1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2011, Joyent, Inc. All rights reserved. 25 * Copyright (c) 2012 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * DTrace - Dynamic Tracing for Solaris 30 * 31 * This is the implementation of the Solaris Dynamic Tracing framework 32 * (DTrace). The user-visible interface to DTrace is described at length in 33 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace 34 * library, the in-kernel DTrace framework, and the DTrace providers are 35 * described in the block comments in the <sys/dtrace.h> header file. The 36 * internal architecture of DTrace is described in the block comments in the 37 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace 38 * implementation very much assume mastery of all of these sources; if one has 39 * an unanswered question about the implementation, one should consult them 40 * first. 41 * 42 * The functions here are ordered roughly as follows: 43 * 44 * - Probe context functions 45 * - Probe hashing functions 46 * - Non-probe context utility functions 47 * - Matching functions 48 * - Provider-to-Framework API functions 49 * - Probe management functions 50 * - DIF object functions 51 * - Format functions 52 * - Predicate functions 53 * - ECB functions 54 * - Buffer functions 55 * - Enabling functions 56 * - DOF functions 57 * - Anonymous enabling functions 58 * - Consumer state functions 59 * - Helper functions 60 * - Hook functions 61 * - Driver cookbook functions 62 * 63 * Each group of functions begins with a block comment labelled the "DTrace 64 * [Group] Functions", allowing one to find each block by searching forward 65 * on capital-f functions. 66 */ 67 #include <sys/errno.h> 68 #include <sys/stat.h> 69 #include <sys/modctl.h> 70 #include <sys/conf.h> 71 #include <sys/systm.h> 72 #include <sys/ddi.h> 73 #include <sys/sunddi.h> 74 #include <sys/cpuvar.h> 75 #include <sys/kmem.h> 76 #include <sys/strsubr.h> 77 #include <sys/sysmacros.h> 78 #include <sys/dtrace_impl.h> 79 #include <sys/atomic.h> 80 #include <sys/cmn_err.h> 81 #include <sys/mutex_impl.h> 82 #include <sys/rwlock_impl.h> 83 #include <sys/ctf_api.h> 84 #include <sys/panic.h> 85 #include <sys/priv_impl.h> 86 #include <sys/policy.h> 87 #include <sys/cred_impl.h> 88 #include <sys/procfs_isa.h> 89 #include <sys/taskq.h> 90 #include <sys/mkdev.h> 91 #include <sys/kdi.h> 92 #include <sys/zone.h> 93 #include <sys/socket.h> 94 #include <netinet/in.h> 95 96 /* 97 * DTrace Tunable Variables 98 * 99 * The following variables may be tuned by adding a line to /etc/system that 100 * includes both the name of the DTrace module ("dtrace") and the name of the 101 * variable. For example: 102 * 103 * set dtrace:dtrace_destructive_disallow = 1 104 * 105 * In general, the only variables that one should be tuning this way are those 106 * that affect system-wide DTrace behavior, and for which the default behavior 107 * is undesirable. Most of these variables are tunable on a per-consumer 108 * basis using DTrace options, and need not be tuned on a system-wide basis. 109 * When tuning these variables, avoid pathological values; while some attempt 110 * is made to verify the integrity of these variables, they are not considered 111 * part of the supported interface to DTrace, and they are therefore not 112 * checked comprehensively. Further, these variables should not be tuned 113 * dynamically via "mdb -kw" or other means; they should only be tuned via 114 * /etc/system. 115 */ 116 int dtrace_destructive_disallow = 0; 117 dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); 118 size_t dtrace_difo_maxsize = (256 * 1024); 119 dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); 120 size_t dtrace_global_maxsize = (16 * 1024); 121 size_t dtrace_actions_max = (16 * 1024); 122 size_t dtrace_retain_max = 1024; 123 dtrace_optval_t dtrace_helper_actions_max = 1024; 124 dtrace_optval_t dtrace_helper_providers_max = 32; 125 dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); 126 size_t dtrace_strsize_default = 256; 127 dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ 128 dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ 129 dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ 130 dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ 131 dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ 132 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */ 133 dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */ 134 dtrace_optval_t dtrace_nspec_default = 1; 135 dtrace_optval_t dtrace_specsize_default = 32 * 1024; 136 dtrace_optval_t dtrace_stackframes_default = 20; 137 dtrace_optval_t dtrace_ustackframes_default = 20; 138 dtrace_optval_t dtrace_jstackframes_default = 50; 139 dtrace_optval_t dtrace_jstackstrsize_default = 512; 140 int dtrace_msgdsize_max = 128; 141 hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ 142 hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ 143 int dtrace_devdepth_max = 32; 144 int dtrace_err_verbose; 145 hrtime_t dtrace_deadman_interval = NANOSEC; 146 hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; 147 hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; 148 hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; 149 150 /* 151 * DTrace External Variables 152 * 153 * As dtrace(7D) is a kernel module, any DTrace variables are obviously 154 * available to DTrace consumers via the backtick (`) syntax. One of these, 155 * dtrace_zero, is made deliberately so: it is provided as a source of 156 * well-known, zero-filled memory. While this variable is not documented, 157 * it is used by some translators as an implementation detail. 158 */ 159 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ 160 161 /* 162 * DTrace Internal Variables 163 */ 164 static dev_info_t *dtrace_devi; /* device info */ 165 static vmem_t *dtrace_arena; /* probe ID arena */ 166 static vmem_t *dtrace_minor; /* minor number arena */ 167 static taskq_t *dtrace_taskq; /* task queue */ 168 static dtrace_probe_t **dtrace_probes; /* array of all probes */ 169 static int dtrace_nprobes; /* number of probes */ 170 static dtrace_provider_t *dtrace_provider; /* provider list */ 171 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ 172 static int dtrace_opens; /* number of opens */ 173 static int dtrace_helpers; /* number of helpers */ 174 static void *dtrace_softstate; /* softstate pointer */ 175 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ 176 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ 177 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ 178 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */ 179 static int dtrace_toxranges; /* number of toxic ranges */ 180 static int dtrace_toxranges_max; /* size of toxic range array */ 181 static dtrace_anon_t dtrace_anon; /* anonymous enabling */ 182 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */ 183 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */ 184 static kthread_t *dtrace_panicked; /* panicking thread */ 185 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ 186 static dtrace_genid_t dtrace_probegen; /* current probe generation */ 187 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ 188 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ 189 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ 190 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ 191 static int dtrace_dynvar_failclean; /* dynvars failed to clean */ 192 193 /* 194 * DTrace Locking 195 * DTrace is protected by three (relatively coarse-grained) locks: 196 * 197 * (1) dtrace_lock is required to manipulate essentially any DTrace state, 198 * including enabling state, probes, ECBs, consumer state, helper state, 199 * etc. Importantly, dtrace_lock is _not_ required when in probe context; 200 * probe context is lock-free -- synchronization is handled via the 201 * dtrace_sync() cross call mechanism. 202 * 203 * (2) dtrace_provider_lock is required when manipulating provider state, or 204 * when provider state must be held constant. 205 * 206 * (3) dtrace_meta_lock is required when manipulating meta provider state, or 207 * when meta provider state must be held constant. 208 * 209 * The lock ordering between these three locks is dtrace_meta_lock before 210 * dtrace_provider_lock before dtrace_lock. (In particular, there are 211 * several places where dtrace_provider_lock is held by the framework as it 212 * calls into the providers -- which then call back into the framework, 213 * grabbing dtrace_lock.) 214 * 215 * There are two other locks in the mix: mod_lock and cpu_lock. With respect 216 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical 217 * role as a coarse-grained lock; it is acquired before both of these locks. 218 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must 219 * be acquired _between_ dtrace_meta_lock and any other DTrace locks. 220 * mod_lock is similar with respect to dtrace_provider_lock in that it must be 221 * acquired _between_ dtrace_provider_lock and dtrace_lock. 222 */ 223 static kmutex_t dtrace_lock; /* probe state lock */ 224 static kmutex_t dtrace_provider_lock; /* provider state lock */ 225 static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ 226 227 /* 228 * DTrace Provider Variables 229 * 230 * These are the variables relating to DTrace as a provider (that is, the 231 * provider of the BEGIN, END, and ERROR probes). 232 */ 233 static dtrace_pattr_t dtrace_provider_attr = { 234 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 235 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 236 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 237 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 238 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 239 }; 240 241 static void 242 dtrace_nullop(void) 243 {} 244 245 static int 246 dtrace_enable_nullop(void) 247 { 248 return (0); 249 } 250 251 static dtrace_pops_t dtrace_provider_ops = { 252 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, 253 (void (*)(void *, struct modctl *))dtrace_nullop, 254 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, 255 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 256 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 257 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 258 NULL, 259 NULL, 260 NULL, 261 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop 262 }; 263 264 static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ 265 static dtrace_id_t dtrace_probeid_end; /* special END probe */ 266 dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ 267 268 /* 269 * DTrace Helper Tracing Variables 270 */ 271 uint32_t dtrace_helptrace_next = 0; 272 uint32_t dtrace_helptrace_nlocals; 273 char *dtrace_helptrace_buffer; 274 int dtrace_helptrace_bufsize = 512 * 1024; 275 276 #ifdef DEBUG 277 int dtrace_helptrace_enabled = 1; 278 #else 279 int dtrace_helptrace_enabled = 0; 280 #endif 281 282 /* 283 * DTrace Error Hashing 284 * 285 * On DEBUG kernels, DTrace will track the errors that has seen in a hash 286 * table. This is very useful for checking coverage of tests that are 287 * expected to induce DIF or DOF processing errors, and may be useful for 288 * debugging problems in the DIF code generator or in DOF generation . The 289 * error hash may be examined with the ::dtrace_errhash MDB dcmd. 290 */ 291 #ifdef DEBUG 292 static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; 293 static const char *dtrace_errlast; 294 static kthread_t *dtrace_errthread; 295 static kmutex_t dtrace_errlock; 296 #endif 297 298 /* 299 * DTrace Macros and Constants 300 * 301 * These are various macros that are useful in various spots in the 302 * implementation, along with a few random constants that have no meaning 303 * outside of the implementation. There is no real structure to this cpp 304 * mishmash -- but is there ever? 305 */ 306 #define DTRACE_HASHSTR(hash, probe) \ 307 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) 308 309 #define DTRACE_HASHNEXT(hash, probe) \ 310 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) 311 312 #define DTRACE_HASHPREV(hash, probe) \ 313 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) 314 315 #define DTRACE_HASHEQ(hash, lhs, rhs) \ 316 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ 317 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) 318 319 #define DTRACE_AGGHASHSIZE_SLEW 17 320 321 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) 322 323 /* 324 * The key for a thread-local variable consists of the lower 61 bits of the 325 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. 326 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never 327 * equal to a variable identifier. This is necessary (but not sufficient) to 328 * assure that global associative arrays never collide with thread-local 329 * variables. To guarantee that they cannot collide, we must also define the 330 * order for keying dynamic variables. That order is: 331 * 332 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] 333 * 334 * Because the variable-key and the tls-key are in orthogonal spaces, there is 335 * no way for a global variable key signature to match a thread-local key 336 * signature. 337 */ 338 #define DTRACE_TLS_THRKEY(where) { \ 339 uint_t intr = 0; \ 340 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ 341 for (; actv; actv >>= 1) \ 342 intr++; \ 343 ASSERT(intr < (1 << 3)); \ 344 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ 345 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 346 } 347 348 #define DT_BSWAP_8(x) ((x) & 0xff) 349 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) 350 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) 351 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) 352 353 #define DT_MASK_LO 0x00000000FFFFFFFFULL 354 355 #define DTRACE_STORE(type, tomax, offset, what) \ 356 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); 357 358 #ifndef __x86 359 #define DTRACE_ALIGNCHECK(addr, size, flags) \ 360 if (addr & (size - 1)) { \ 361 *flags |= CPU_DTRACE_BADALIGN; \ 362 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 363 return (0); \ 364 } 365 #else 366 #define DTRACE_ALIGNCHECK(addr, size, flags) 367 #endif 368 369 /* 370 * Test whether a range of memory starting at testaddr of size testsz falls 371 * within the range of memory described by addr, sz. We take care to avoid 372 * problems with overflow and underflow of the unsigned quantities, and 373 * disallow all negative sizes. Ranges of size 0 are allowed. 374 */ 375 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ 376 ((testaddr) - (baseaddr) < (basesz) && \ 377 (testaddr) + (testsz) - (baseaddr) <= (basesz) && \ 378 (testaddr) + (testsz) >= (testaddr)) 379 380 /* 381 * Test whether alloc_sz bytes will fit in the scratch region. We isolate 382 * alloc_sz on the righthand side of the comparison in order to avoid overflow 383 * or underflow in the comparison with it. This is simpler than the INRANGE 384 * check above, because we know that the dtms_scratch_ptr is valid in the 385 * range. Allocations of size zero are allowed. 386 */ 387 #define DTRACE_INSCRATCH(mstate, alloc_sz) \ 388 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ 389 (mstate)->dtms_scratch_ptr >= (alloc_sz)) 390 391 #define DTRACE_LOADFUNC(bits) \ 392 /*CSTYLED*/ \ 393 uint##bits##_t \ 394 dtrace_load##bits(uintptr_t addr) \ 395 { \ 396 size_t size = bits / NBBY; \ 397 /*CSTYLED*/ \ 398 uint##bits##_t rval; \ 399 int i; \ 400 volatile uint16_t *flags = (volatile uint16_t *) \ 401 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ 402 \ 403 DTRACE_ALIGNCHECK(addr, size, flags); \ 404 \ 405 for (i = 0; i < dtrace_toxranges; i++) { \ 406 if (addr >= dtrace_toxrange[i].dtt_limit) \ 407 continue; \ 408 \ 409 if (addr + size <= dtrace_toxrange[i].dtt_base) \ 410 continue; \ 411 \ 412 /* \ 413 * This address falls within a toxic region; return 0. \ 414 */ \ 415 *flags |= CPU_DTRACE_BADADDR; \ 416 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 417 return (0); \ 418 } \ 419 \ 420 *flags |= CPU_DTRACE_NOFAULT; \ 421 /*CSTYLED*/ \ 422 rval = *((volatile uint##bits##_t *)addr); \ 423 *flags &= ~CPU_DTRACE_NOFAULT; \ 424 \ 425 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ 426 } 427 428 #ifdef _LP64 429 #define dtrace_loadptr dtrace_load64 430 #else 431 #define dtrace_loadptr dtrace_load32 432 #endif 433 434 #define DTRACE_DYNHASH_FREE 0 435 #define DTRACE_DYNHASH_SINK 1 436 #define DTRACE_DYNHASH_VALID 2 437 438 #define DTRACE_MATCH_FAIL -1 439 #define DTRACE_MATCH_NEXT 0 440 #define DTRACE_MATCH_DONE 1 441 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') 442 #define DTRACE_STATE_ALIGN 64 443 444 #define DTRACE_FLAGS2FLT(flags) \ 445 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ 446 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ 447 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ 448 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ 449 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ 450 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ 451 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ 452 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ 453 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ 454 DTRACEFLT_UNKNOWN) 455 456 #define DTRACEACT_ISSTRING(act) \ 457 ((act)->dta_kind == DTRACEACT_DIFEXPR && \ 458 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) 459 460 static size_t dtrace_strlen(const char *, size_t); 461 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); 462 static void dtrace_enabling_provide(dtrace_provider_t *); 463 static int dtrace_enabling_match(dtrace_enabling_t *, int *); 464 static void dtrace_enabling_matchall(void); 465 static void dtrace_enabling_reap(void); 466 static dtrace_state_t *dtrace_anon_grab(void); 467 static uint64_t dtrace_helper(int, dtrace_mstate_t *, 468 dtrace_state_t *, uint64_t, uint64_t); 469 static dtrace_helpers_t *dtrace_helpers_create(proc_t *); 470 static void dtrace_buffer_drop(dtrace_buffer_t *); 471 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); 472 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, 473 dtrace_state_t *, dtrace_mstate_t *); 474 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, 475 dtrace_optval_t); 476 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); 477 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); 478 479 /* 480 * DTrace Probe Context Functions 481 * 482 * These functions are called from probe context. Because probe context is 483 * any context in which C may be called, arbitrarily locks may be held, 484 * interrupts may be disabled, we may be in arbitrary dispatched state, etc. 485 * As a result, functions called from probe context may only call other DTrace 486 * support functions -- they may not interact at all with the system at large. 487 * (Note that the ASSERT macro is made probe-context safe by redefining it in 488 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary 489 * loads are to be performed from probe context, they _must_ be in terms of 490 * the safe dtrace_load*() variants. 491 * 492 * Some functions in this block are not actually called from probe context; 493 * for these functions, there will be a comment above the function reading 494 * "Note: not called from probe context." 495 */ 496 void 497 dtrace_panic(const char *format, ...) 498 { 499 va_list alist; 500 501 va_start(alist, format); 502 dtrace_vpanic(format, alist); 503 va_end(alist); 504 } 505 506 int 507 dtrace_assfail(const char *a, const char *f, int l) 508 { 509 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); 510 511 /* 512 * We just need something here that even the most clever compiler 513 * cannot optimize away. 514 */ 515 return (a[(uintptr_t)f]); 516 } 517 518 /* 519 * Atomically increment a specified error counter from probe context. 520 */ 521 static void 522 dtrace_error(uint32_t *counter) 523 { 524 /* 525 * Most counters stored to in probe context are per-CPU counters. 526 * However, there are some error conditions that are sufficiently 527 * arcane that they don't merit per-CPU storage. If these counters 528 * are incremented concurrently on different CPUs, scalability will be 529 * adversely affected -- but we don't expect them to be white-hot in a 530 * correctly constructed enabling... 531 */ 532 uint32_t oval, nval; 533 534 do { 535 oval = *counter; 536 537 if ((nval = oval + 1) == 0) { 538 /* 539 * If the counter would wrap, set it to 1 -- assuring 540 * that the counter is never zero when we have seen 541 * errors. (The counter must be 32-bits because we 542 * aren't guaranteed a 64-bit compare&swap operation.) 543 * To save this code both the infamy of being fingered 544 * by a priggish news story and the indignity of being 545 * the target of a neo-puritan witch trial, we're 546 * carefully avoiding any colorful description of the 547 * likelihood of this condition -- but suffice it to 548 * say that it is only slightly more likely than the 549 * overflow of predicate cache IDs, as discussed in 550 * dtrace_predicate_create(). 551 */ 552 nval = 1; 553 } 554 } while (dtrace_cas32(counter, oval, nval) != oval); 555 } 556 557 /* 558 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a 559 * uint8_t, a uint16_t, a uint32_t and a uint64_t. 560 */ 561 DTRACE_LOADFUNC(8) 562 DTRACE_LOADFUNC(16) 563 DTRACE_LOADFUNC(32) 564 DTRACE_LOADFUNC(64) 565 566 static int 567 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) 568 { 569 if (dest < mstate->dtms_scratch_base) 570 return (0); 571 572 if (dest + size < dest) 573 return (0); 574 575 if (dest + size > mstate->dtms_scratch_ptr) 576 return (0); 577 578 return (1); 579 } 580 581 static int 582 dtrace_canstore_statvar(uint64_t addr, size_t sz, 583 dtrace_statvar_t **svars, int nsvars) 584 { 585 int i; 586 587 for (i = 0; i < nsvars; i++) { 588 dtrace_statvar_t *svar = svars[i]; 589 590 if (svar == NULL || svar->dtsv_size == 0) 591 continue; 592 593 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) 594 return (1); 595 } 596 597 return (0); 598 } 599 600 /* 601 * Check to see if the address is within a memory region to which a store may 602 * be issued. This includes the DTrace scratch areas, and any DTrace variable 603 * region. The caller of dtrace_canstore() is responsible for performing any 604 * alignment checks that are needed before stores are actually executed. 605 */ 606 static int 607 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 608 dtrace_vstate_t *vstate) 609 { 610 /* 611 * First, check to see if the address is in scratch space... 612 */ 613 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, 614 mstate->dtms_scratch_size)) 615 return (1); 616 617 /* 618 * Now check to see if it's a dynamic variable. This check will pick 619 * up both thread-local variables and any global dynamically-allocated 620 * variables. 621 */ 622 if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base, 623 vstate->dtvs_dynvars.dtds_size)) { 624 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 625 uintptr_t base = (uintptr_t)dstate->dtds_base + 626 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); 627 uintptr_t chunkoffs; 628 629 /* 630 * Before we assume that we can store here, we need to make 631 * sure that it isn't in our metadata -- storing to our 632 * dynamic variable metadata would corrupt our state. For 633 * the range to not include any dynamic variable metadata, 634 * it must: 635 * 636 * (1) Start above the hash table that is at the base of 637 * the dynamic variable space 638 * 639 * (2) Have a starting chunk offset that is beyond the 640 * dtrace_dynvar_t that is at the base of every chunk 641 * 642 * (3) Not span a chunk boundary 643 * 644 */ 645 if (addr < base) 646 return (0); 647 648 chunkoffs = (addr - base) % dstate->dtds_chunksize; 649 650 if (chunkoffs < sizeof (dtrace_dynvar_t)) 651 return (0); 652 653 if (chunkoffs + sz > dstate->dtds_chunksize) 654 return (0); 655 656 return (1); 657 } 658 659 /* 660 * Finally, check the static local and global variables. These checks 661 * take the longest, so we perform them last. 662 */ 663 if (dtrace_canstore_statvar(addr, sz, 664 vstate->dtvs_locals, vstate->dtvs_nlocals)) 665 return (1); 666 667 if (dtrace_canstore_statvar(addr, sz, 668 vstate->dtvs_globals, vstate->dtvs_nglobals)) 669 return (1); 670 671 return (0); 672 } 673 674 675 /* 676 * Convenience routine to check to see if the address is within a memory 677 * region in which a load may be issued given the user's privilege level; 678 * if not, it sets the appropriate error flags and loads 'addr' into the 679 * illegal value slot. 680 * 681 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement 682 * appropriate memory access protection. 683 */ 684 static int 685 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 686 dtrace_vstate_t *vstate) 687 { 688 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 689 690 /* 691 * If we hold the privilege to read from kernel memory, then 692 * everything is readable. 693 */ 694 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 695 return (1); 696 697 /* 698 * You can obviously read that which you can store. 699 */ 700 if (dtrace_canstore(addr, sz, mstate, vstate)) 701 return (1); 702 703 /* 704 * We're allowed to read from our own string table. 705 */ 706 if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, 707 mstate->dtms_difo->dtdo_strlen)) 708 return (1); 709 710 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); 711 *illval = addr; 712 return (0); 713 } 714 715 /* 716 * Convenience routine to check to see if a given string is within a memory 717 * region in which a load may be issued given the user's privilege level; 718 * this exists so that we don't need to issue unnecessary dtrace_strlen() 719 * calls in the event that the user has all privileges. 720 */ 721 static int 722 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 723 dtrace_vstate_t *vstate) 724 { 725 size_t strsz; 726 727 /* 728 * If we hold the privilege to read from kernel memory, then 729 * everything is readable. 730 */ 731 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 732 return (1); 733 734 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz); 735 if (dtrace_canload(addr, strsz, mstate, vstate)) 736 return (1); 737 738 return (0); 739 } 740 741 /* 742 * Convenience routine to check to see if a given variable is within a memory 743 * region in which a load may be issued given the user's privilege level. 744 */ 745 static int 746 dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, 747 dtrace_vstate_t *vstate) 748 { 749 size_t sz; 750 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 751 752 /* 753 * If we hold the privilege to read from kernel memory, then 754 * everything is readable. 755 */ 756 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 757 return (1); 758 759 if (type->dtdt_kind == DIF_TYPE_STRING) 760 sz = dtrace_strlen(src, 761 vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1; 762 else 763 sz = type->dtdt_size; 764 765 return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); 766 } 767 768 /* 769 * Compare two strings using safe loads. 770 */ 771 static int 772 dtrace_strncmp(char *s1, char *s2, size_t limit) 773 { 774 uint8_t c1, c2; 775 volatile uint16_t *flags; 776 777 if (s1 == s2 || limit == 0) 778 return (0); 779 780 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 781 782 do { 783 if (s1 == NULL) { 784 c1 = '\0'; 785 } else { 786 c1 = dtrace_load8((uintptr_t)s1++); 787 } 788 789 if (s2 == NULL) { 790 c2 = '\0'; 791 } else { 792 c2 = dtrace_load8((uintptr_t)s2++); 793 } 794 795 if (c1 != c2) 796 return (c1 - c2); 797 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); 798 799 return (0); 800 } 801 802 /* 803 * Compute strlen(s) for a string using safe memory accesses. The additional 804 * len parameter is used to specify a maximum length to ensure completion. 805 */ 806 static size_t 807 dtrace_strlen(const char *s, size_t lim) 808 { 809 uint_t len; 810 811 for (len = 0; len != lim; len++) { 812 if (dtrace_load8((uintptr_t)s++) == '\0') 813 break; 814 } 815 816 return (len); 817 } 818 819 /* 820 * Check if an address falls within a toxic region. 821 */ 822 static int 823 dtrace_istoxic(uintptr_t kaddr, size_t size) 824 { 825 uintptr_t taddr, tsize; 826 int i; 827 828 for (i = 0; i < dtrace_toxranges; i++) { 829 taddr = dtrace_toxrange[i].dtt_base; 830 tsize = dtrace_toxrange[i].dtt_limit - taddr; 831 832 if (kaddr - taddr < tsize) { 833 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 834 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr; 835 return (1); 836 } 837 838 if (taddr - kaddr < size) { 839 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 840 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr; 841 return (1); 842 } 843 } 844 845 return (0); 846 } 847 848 /* 849 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe 850 * memory specified by the DIF program. The dst is assumed to be safe memory 851 * that we can store to directly because it is managed by DTrace. As with 852 * standard bcopy, overlapping copies are handled properly. 853 */ 854 static void 855 dtrace_bcopy(const void *src, void *dst, size_t len) 856 { 857 if (len != 0) { 858 uint8_t *s1 = dst; 859 const uint8_t *s2 = src; 860 861 if (s1 <= s2) { 862 do { 863 *s1++ = dtrace_load8((uintptr_t)s2++); 864 } while (--len != 0); 865 } else { 866 s2 += len; 867 s1 += len; 868 869 do { 870 *--s1 = dtrace_load8((uintptr_t)--s2); 871 } while (--len != 0); 872 } 873 } 874 } 875 876 /* 877 * Copy src to dst using safe memory accesses, up to either the specified 878 * length, or the point that a nul byte is encountered. The src is assumed to 879 * be unsafe memory specified by the DIF program. The dst is assumed to be 880 * safe memory that we can store to directly because it is managed by DTrace. 881 * Unlike dtrace_bcopy(), overlapping regions are not handled. 882 */ 883 static void 884 dtrace_strcpy(const void *src, void *dst, size_t len) 885 { 886 if (len != 0) { 887 uint8_t *s1 = dst, c; 888 const uint8_t *s2 = src; 889 890 do { 891 *s1++ = c = dtrace_load8((uintptr_t)s2++); 892 } while (--len != 0 && c != '\0'); 893 } 894 } 895 896 /* 897 * Copy src to dst, deriving the size and type from the specified (BYREF) 898 * variable type. The src is assumed to be unsafe memory specified by the DIF 899 * program. The dst is assumed to be DTrace variable memory that is of the 900 * specified type; we assume that we can store to directly. 901 */ 902 static void 903 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) 904 { 905 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 906 907 if (type->dtdt_kind == DIF_TYPE_STRING) { 908 dtrace_strcpy(src, dst, type->dtdt_size); 909 } else { 910 dtrace_bcopy(src, dst, type->dtdt_size); 911 } 912 } 913 914 /* 915 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be 916 * unsafe memory specified by the DIF program. The s2 data is assumed to be 917 * safe memory that we can access directly because it is managed by DTrace. 918 */ 919 static int 920 dtrace_bcmp(const void *s1, const void *s2, size_t len) 921 { 922 volatile uint16_t *flags; 923 924 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 925 926 if (s1 == s2) 927 return (0); 928 929 if (s1 == NULL || s2 == NULL) 930 return (1); 931 932 if (s1 != s2 && len != 0) { 933 const uint8_t *ps1 = s1; 934 const uint8_t *ps2 = s2; 935 936 do { 937 if (dtrace_load8((uintptr_t)ps1++) != *ps2++) 938 return (1); 939 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); 940 } 941 return (0); 942 } 943 944 /* 945 * Zero the specified region using a simple byte-by-byte loop. Note that this 946 * is for safe DTrace-managed memory only. 947 */ 948 static void 949 dtrace_bzero(void *dst, size_t len) 950 { 951 uchar_t *cp; 952 953 for (cp = dst; len != 0; len--) 954 *cp++ = 0; 955 } 956 957 static void 958 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) 959 { 960 uint64_t result[2]; 961 962 result[0] = addend1[0] + addend2[0]; 963 result[1] = addend1[1] + addend2[1] + 964 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); 965 966 sum[0] = result[0]; 967 sum[1] = result[1]; 968 } 969 970 /* 971 * Shift the 128-bit value in a by b. If b is positive, shift left. 972 * If b is negative, shift right. 973 */ 974 static void 975 dtrace_shift_128(uint64_t *a, int b) 976 { 977 uint64_t mask; 978 979 if (b == 0) 980 return; 981 982 if (b < 0) { 983 b = -b; 984 if (b >= 64) { 985 a[0] = a[1] >> (b - 64); 986 a[1] = 0; 987 } else { 988 a[0] >>= b; 989 mask = 1LL << (64 - b); 990 mask -= 1; 991 a[0] |= ((a[1] & mask) << (64 - b)); 992 a[1] >>= b; 993 } 994 } else { 995 if (b >= 64) { 996 a[1] = a[0] << (b - 64); 997 a[0] = 0; 998 } else { 999 a[1] <<= b; 1000 mask = a[0] >> (64 - b); 1001 a[1] |= mask; 1002 a[0] <<= b; 1003 } 1004 } 1005 } 1006 1007 /* 1008 * The basic idea is to break the 2 64-bit values into 4 32-bit values, 1009 * use native multiplication on those, and then re-combine into the 1010 * resulting 128-bit value. 1011 * 1012 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = 1013 * hi1 * hi2 << 64 + 1014 * hi1 * lo2 << 32 + 1015 * hi2 * lo1 << 32 + 1016 * lo1 * lo2 1017 */ 1018 static void 1019 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) 1020 { 1021 uint64_t hi1, hi2, lo1, lo2; 1022 uint64_t tmp[2]; 1023 1024 hi1 = factor1 >> 32; 1025 hi2 = factor2 >> 32; 1026 1027 lo1 = factor1 & DT_MASK_LO; 1028 lo2 = factor2 & DT_MASK_LO; 1029 1030 product[0] = lo1 * lo2; 1031 product[1] = hi1 * hi2; 1032 1033 tmp[0] = hi1 * lo2; 1034 tmp[1] = 0; 1035 dtrace_shift_128(tmp, 32); 1036 dtrace_add_128(product, tmp, product); 1037 1038 tmp[0] = hi2 * lo1; 1039 tmp[1] = 0; 1040 dtrace_shift_128(tmp, 32); 1041 dtrace_add_128(product, tmp, product); 1042 } 1043 1044 /* 1045 * This privilege check should be used by actions and subroutines to 1046 * verify that the user credentials of the process that enabled the 1047 * invoking ECB match the target credentials 1048 */ 1049 static int 1050 dtrace_priv_proc_common_user(dtrace_state_t *state) 1051 { 1052 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1053 1054 /* 1055 * We should always have a non-NULL state cred here, since if cred 1056 * is null (anonymous tracing), we fast-path bypass this routine. 1057 */ 1058 ASSERT(s_cr != NULL); 1059 1060 if ((cr = CRED()) != NULL && 1061 s_cr->cr_uid == cr->cr_uid && 1062 s_cr->cr_uid == cr->cr_ruid && 1063 s_cr->cr_uid == cr->cr_suid && 1064 s_cr->cr_gid == cr->cr_gid && 1065 s_cr->cr_gid == cr->cr_rgid && 1066 s_cr->cr_gid == cr->cr_sgid) 1067 return (1); 1068 1069 return (0); 1070 } 1071 1072 /* 1073 * This privilege check should be used by actions and subroutines to 1074 * verify that the zone of the process that enabled the invoking ECB 1075 * matches the target credentials 1076 */ 1077 static int 1078 dtrace_priv_proc_common_zone(dtrace_state_t *state) 1079 { 1080 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1081 1082 /* 1083 * We should always have a non-NULL state cred here, since if cred 1084 * is null (anonymous tracing), we fast-path bypass this routine. 1085 */ 1086 ASSERT(s_cr != NULL); 1087 1088 if ((cr = CRED()) != NULL && 1089 s_cr->cr_zone == cr->cr_zone) 1090 return (1); 1091 1092 return (0); 1093 } 1094 1095 /* 1096 * This privilege check should be used by actions and subroutines to 1097 * verify that the process has not setuid or changed credentials. 1098 */ 1099 static int 1100 dtrace_priv_proc_common_nocd() 1101 { 1102 proc_t *proc; 1103 1104 if ((proc = ttoproc(curthread)) != NULL && 1105 !(proc->p_flag & SNOCD)) 1106 return (1); 1107 1108 return (0); 1109 } 1110 1111 static int 1112 dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate) 1113 { 1114 int action = state->dts_cred.dcr_action; 1115 1116 if (!(mstate->dtms_access & DTRACE_ACCESS_PROC)) 1117 goto bad; 1118 1119 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && 1120 dtrace_priv_proc_common_zone(state) == 0) 1121 goto bad; 1122 1123 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) && 1124 dtrace_priv_proc_common_user(state) == 0) 1125 goto bad; 1126 1127 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) && 1128 dtrace_priv_proc_common_nocd() == 0) 1129 goto bad; 1130 1131 return (1); 1132 1133 bad: 1134 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1135 1136 return (0); 1137 } 1138 1139 static int 1140 dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate) 1141 { 1142 if (mstate->dtms_access & DTRACE_ACCESS_PROC) { 1143 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) 1144 return (1); 1145 1146 if (dtrace_priv_proc_common_zone(state) && 1147 dtrace_priv_proc_common_user(state) && 1148 dtrace_priv_proc_common_nocd()) 1149 return (1); 1150 } 1151 1152 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1153 1154 return (0); 1155 } 1156 1157 static int 1158 dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate) 1159 { 1160 if ((mstate->dtms_access & DTRACE_ACCESS_PROC) && 1161 (state->dts_cred.dcr_action & DTRACE_CRA_PROC)) 1162 return (1); 1163 1164 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1165 1166 return (0); 1167 } 1168 1169 static int 1170 dtrace_priv_kernel(dtrace_state_t *state) 1171 { 1172 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) 1173 return (1); 1174 1175 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1176 1177 return (0); 1178 } 1179 1180 static int 1181 dtrace_priv_kernel_destructive(dtrace_state_t *state) 1182 { 1183 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) 1184 return (1); 1185 1186 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1187 1188 return (0); 1189 } 1190 1191 /* 1192 * Determine if the dte_cond of the specified ECB allows for processing of 1193 * the current probe to continue. Note that this routine may allow continued 1194 * processing, but with access(es) stripped from the mstate's dtms_access 1195 * field. 1196 */ 1197 static int 1198 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, 1199 dtrace_ecb_t *ecb) 1200 { 1201 dtrace_probe_t *probe = ecb->dte_probe; 1202 dtrace_provider_t *prov = probe->dtpr_provider; 1203 dtrace_pops_t *pops = &prov->dtpv_pops; 1204 int mode = DTRACE_MODE_NOPRIV_DROP; 1205 1206 ASSERT(ecb->dte_cond); 1207 1208 if (pops->dtps_mode != NULL) { 1209 mode = pops->dtps_mode(prov->dtpv_arg, 1210 probe->dtpr_id, probe->dtpr_arg); 1211 1212 ASSERT((mode & DTRACE_MODE_USER) || 1213 (mode & DTRACE_MODE_KERNEL)); 1214 ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || 1215 (mode & DTRACE_MODE_NOPRIV_DROP)); 1216 } 1217 1218 /* 1219 * If the dte_cond bits indicate that this consumer is only allowed to 1220 * see user-mode firings of this probe, call the provider's dtps_mode() 1221 * entry point to check that the probe was fired while in a user 1222 * context. If that's not the case, use the policy specified by the 1223 * provider to determine if we drop the probe or merely restrict 1224 * operation. 1225 */ 1226 if (ecb->dte_cond & DTRACE_COND_USERMODE) { 1227 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); 1228 1229 if (!(mode & DTRACE_MODE_USER)) { 1230 if (mode & DTRACE_MODE_NOPRIV_DROP) 1231 return (0); 1232 1233 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; 1234 } 1235 } 1236 1237 /* 1238 * This is more subtle than it looks. We have to be absolutely certain 1239 * that CRED() isn't going to change out from under us so it's only 1240 * legit to examine that structure if we're in constrained situations. 1241 * Currently, the only times we'll this check is if a non-super-user 1242 * has enabled the profile or syscall providers -- providers that 1243 * allow visibility of all processes. For the profile case, the check 1244 * above will ensure that we're examining a user context. 1245 */ 1246 if (ecb->dte_cond & DTRACE_COND_OWNER) { 1247 cred_t *cr; 1248 cred_t *s_cr = state->dts_cred.dcr_cred; 1249 proc_t *proc; 1250 1251 ASSERT(s_cr != NULL); 1252 1253 if ((cr = CRED()) == NULL || 1254 s_cr->cr_uid != cr->cr_uid || 1255 s_cr->cr_uid != cr->cr_ruid || 1256 s_cr->cr_uid != cr->cr_suid || 1257 s_cr->cr_gid != cr->cr_gid || 1258 s_cr->cr_gid != cr->cr_rgid || 1259 s_cr->cr_gid != cr->cr_sgid || 1260 (proc = ttoproc(curthread)) == NULL || 1261 (proc->p_flag & SNOCD)) { 1262 if (mode & DTRACE_MODE_NOPRIV_DROP) 1263 return (0); 1264 1265 mstate->dtms_access &= ~DTRACE_ACCESS_PROC; 1266 } 1267 } 1268 1269 /* 1270 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not 1271 * in our zone, check to see if our mode policy is to restrict rather 1272 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC 1273 * and DTRACE_ACCESS_ARGS 1274 */ 1275 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { 1276 cred_t *cr; 1277 cred_t *s_cr = state->dts_cred.dcr_cred; 1278 1279 ASSERT(s_cr != NULL); 1280 1281 if ((cr = CRED()) == NULL || 1282 s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { 1283 if (mode & DTRACE_MODE_NOPRIV_DROP) 1284 return (0); 1285 1286 mstate->dtms_access &= 1287 ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); 1288 } 1289 } 1290 1291 return (1); 1292 } 1293 1294 /* 1295 * Note: not called from probe context. This function is called 1296 * asynchronously (and at a regular interval) from outside of probe context to 1297 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable 1298 * cleaning is explained in detail in <sys/dtrace_impl.h>. 1299 */ 1300 void 1301 dtrace_dynvar_clean(dtrace_dstate_t *dstate) 1302 { 1303 dtrace_dynvar_t *dirty; 1304 dtrace_dstate_percpu_t *dcpu; 1305 dtrace_dynvar_t **rinsep; 1306 int i, j, work = 0; 1307 1308 for (i = 0; i < NCPU; i++) { 1309 dcpu = &dstate->dtds_percpu[i]; 1310 rinsep = &dcpu->dtdsc_rinsing; 1311 1312 /* 1313 * If the dirty list is NULL, there is no dirty work to do. 1314 */ 1315 if (dcpu->dtdsc_dirty == NULL) 1316 continue; 1317 1318 if (dcpu->dtdsc_rinsing != NULL) { 1319 /* 1320 * If the rinsing list is non-NULL, then it is because 1321 * this CPU was selected to accept another CPU's 1322 * dirty list -- and since that time, dirty buffers 1323 * have accumulated. This is a highly unlikely 1324 * condition, but we choose to ignore the dirty 1325 * buffers -- they'll be picked up a future cleanse. 1326 */ 1327 continue; 1328 } 1329 1330 if (dcpu->dtdsc_clean != NULL) { 1331 /* 1332 * If the clean list is non-NULL, then we're in a 1333 * situation where a CPU has done deallocations (we 1334 * have a non-NULL dirty list) but no allocations (we 1335 * also have a non-NULL clean list). We can't simply 1336 * move the dirty list into the clean list on this 1337 * CPU, yet we also don't want to allow this condition 1338 * to persist, lest a short clean list prevent a 1339 * massive dirty list from being cleaned (which in 1340 * turn could lead to otherwise avoidable dynamic 1341 * drops). To deal with this, we look for some CPU 1342 * with a NULL clean list, NULL dirty list, and NULL 1343 * rinsing list -- and then we borrow this CPU to 1344 * rinse our dirty list. 1345 */ 1346 for (j = 0; j < NCPU; j++) { 1347 dtrace_dstate_percpu_t *rinser; 1348 1349 rinser = &dstate->dtds_percpu[j]; 1350 1351 if (rinser->dtdsc_rinsing != NULL) 1352 continue; 1353 1354 if (rinser->dtdsc_dirty != NULL) 1355 continue; 1356 1357 if (rinser->dtdsc_clean != NULL) 1358 continue; 1359 1360 rinsep = &rinser->dtdsc_rinsing; 1361 break; 1362 } 1363 1364 if (j == NCPU) { 1365 /* 1366 * We were unable to find another CPU that 1367 * could accept this dirty list -- we are 1368 * therefore unable to clean it now. 1369 */ 1370 dtrace_dynvar_failclean++; 1371 continue; 1372 } 1373 } 1374 1375 work = 1; 1376 1377 /* 1378 * Atomically move the dirty list aside. 1379 */ 1380 do { 1381 dirty = dcpu->dtdsc_dirty; 1382 1383 /* 1384 * Before we zap the dirty list, set the rinsing list. 1385 * (This allows for a potential assertion in 1386 * dtrace_dynvar(): if a free dynamic variable appears 1387 * on a hash chain, either the dirty list or the 1388 * rinsing list for some CPU must be non-NULL.) 1389 */ 1390 *rinsep = dirty; 1391 dtrace_membar_producer(); 1392 } while (dtrace_casptr(&dcpu->dtdsc_dirty, 1393 dirty, NULL) != dirty); 1394 } 1395 1396 if (!work) { 1397 /* 1398 * We have no work to do; we can simply return. 1399 */ 1400 return; 1401 } 1402 1403 dtrace_sync(); 1404 1405 for (i = 0; i < NCPU; i++) { 1406 dcpu = &dstate->dtds_percpu[i]; 1407 1408 if (dcpu->dtdsc_rinsing == NULL) 1409 continue; 1410 1411 /* 1412 * We are now guaranteed that no hash chain contains a pointer 1413 * into this dirty list; we can make it clean. 1414 */ 1415 ASSERT(dcpu->dtdsc_clean == NULL); 1416 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing; 1417 dcpu->dtdsc_rinsing = NULL; 1418 } 1419 1420 /* 1421 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make 1422 * sure that all CPUs have seen all of the dtdsc_clean pointers. 1423 * This prevents a race whereby a CPU incorrectly decides that 1424 * the state should be something other than DTRACE_DSTATE_CLEAN 1425 * after dtrace_dynvar_clean() has completed. 1426 */ 1427 dtrace_sync(); 1428 1429 dstate->dtds_state = DTRACE_DSTATE_CLEAN; 1430 } 1431 1432 /* 1433 * Depending on the value of the op parameter, this function looks-up, 1434 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an 1435 * allocation is requested, this function will return a pointer to a 1436 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no 1437 * variable can be allocated. If NULL is returned, the appropriate counter 1438 * will be incremented. 1439 */ 1440 dtrace_dynvar_t * 1441 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, 1442 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, 1443 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) 1444 { 1445 uint64_t hashval = DTRACE_DYNHASH_VALID; 1446 dtrace_dynhash_t *hash = dstate->dtds_hash; 1447 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL; 1448 processorid_t me = CPU->cpu_id, cpu = me; 1449 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me]; 1450 size_t bucket, ksize; 1451 size_t chunksize = dstate->dtds_chunksize; 1452 uintptr_t kdata, lock, nstate; 1453 uint_t i; 1454 1455 ASSERT(nkeys != 0); 1456 1457 /* 1458 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time" 1459 * algorithm. For the by-value portions, we perform the algorithm in 1460 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a 1461 * bit, and seems to have only a minute effect on distribution. For 1462 * the by-reference data, we perform "One-at-a-time" iterating (safely) 1463 * over each referenced byte. It's painful to do this, but it's much 1464 * better than pathological hash distribution. The efficacy of the 1465 * hashing algorithm (and a comparison with other algorithms) may be 1466 * found by running the ::dtrace_dynstat MDB dcmd. 1467 */ 1468 for (i = 0; i < nkeys; i++) { 1469 if (key[i].dttk_size == 0) { 1470 uint64_t val = key[i].dttk_value; 1471 1472 hashval += (val >> 48) & 0xffff; 1473 hashval += (hashval << 10); 1474 hashval ^= (hashval >> 6); 1475 1476 hashval += (val >> 32) & 0xffff; 1477 hashval += (hashval << 10); 1478 hashval ^= (hashval >> 6); 1479 1480 hashval += (val >> 16) & 0xffff; 1481 hashval += (hashval << 10); 1482 hashval ^= (hashval >> 6); 1483 1484 hashval += val & 0xffff; 1485 hashval += (hashval << 10); 1486 hashval ^= (hashval >> 6); 1487 } else { 1488 /* 1489 * This is incredibly painful, but it beats the hell 1490 * out of the alternative. 1491 */ 1492 uint64_t j, size = key[i].dttk_size; 1493 uintptr_t base = (uintptr_t)key[i].dttk_value; 1494 1495 if (!dtrace_canload(base, size, mstate, vstate)) 1496 break; 1497 1498 for (j = 0; j < size; j++) { 1499 hashval += dtrace_load8(base + j); 1500 hashval += (hashval << 10); 1501 hashval ^= (hashval >> 6); 1502 } 1503 } 1504 } 1505 1506 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) 1507 return (NULL); 1508 1509 hashval += (hashval << 3); 1510 hashval ^= (hashval >> 11); 1511 hashval += (hashval << 15); 1512 1513 /* 1514 * There is a remote chance (ideally, 1 in 2^31) that our hashval 1515 * comes out to be one of our two sentinel hash values. If this 1516 * actually happens, we set the hashval to be a value known to be a 1517 * non-sentinel value. 1518 */ 1519 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK) 1520 hashval = DTRACE_DYNHASH_VALID; 1521 1522 /* 1523 * Yes, it's painful to do a divide here. If the cycle count becomes 1524 * important here, tricks can be pulled to reduce it. (However, it's 1525 * critical that hash collisions be kept to an absolute minimum; 1526 * they're much more painful than a divide.) It's better to have a 1527 * solution that generates few collisions and still keeps things 1528 * relatively simple. 1529 */ 1530 bucket = hashval % dstate->dtds_hashsize; 1531 1532 if (op == DTRACE_DYNVAR_DEALLOC) { 1533 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock; 1534 1535 for (;;) { 1536 while ((lock = *lockp) & 1) 1537 continue; 1538 1539 if (dtrace_casptr((void *)lockp, 1540 (void *)lock, (void *)(lock + 1)) == (void *)lock) 1541 break; 1542 } 1543 1544 dtrace_membar_producer(); 1545 } 1546 1547 top: 1548 prev = NULL; 1549 lock = hash[bucket].dtdh_lock; 1550 1551 dtrace_membar_consumer(); 1552 1553 start = hash[bucket].dtdh_chain; 1554 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK || 1555 start->dtdv_hashval != DTRACE_DYNHASH_FREE || 1556 op != DTRACE_DYNVAR_DEALLOC)); 1557 1558 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) { 1559 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple; 1560 dtrace_key_t *dkey = &dtuple->dtt_key[0]; 1561 1562 if (dvar->dtdv_hashval != hashval) { 1563 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) { 1564 /* 1565 * We've reached the sink, and therefore the 1566 * end of the hash chain; we can kick out of 1567 * the loop knowing that we have seen a valid 1568 * snapshot of state. 1569 */ 1570 ASSERT(dvar->dtdv_next == NULL); 1571 ASSERT(dvar == &dtrace_dynhash_sink); 1572 break; 1573 } 1574 1575 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) { 1576 /* 1577 * We've gone off the rails: somewhere along 1578 * the line, one of the members of this hash 1579 * chain was deleted. Note that we could also 1580 * detect this by simply letting this loop run 1581 * to completion, as we would eventually hit 1582 * the end of the dirty list. However, we 1583 * want to avoid running the length of the 1584 * dirty list unnecessarily (it might be quite 1585 * long), so we catch this as early as 1586 * possible by detecting the hash marker. In 1587 * this case, we simply set dvar to NULL and 1588 * break; the conditional after the loop will 1589 * send us back to top. 1590 */ 1591 dvar = NULL; 1592 break; 1593 } 1594 1595 goto next; 1596 } 1597 1598 if (dtuple->dtt_nkeys != nkeys) 1599 goto next; 1600 1601 for (i = 0; i < nkeys; i++, dkey++) { 1602 if (dkey->dttk_size != key[i].dttk_size) 1603 goto next; /* size or type mismatch */ 1604 1605 if (dkey->dttk_size != 0) { 1606 if (dtrace_bcmp( 1607 (void *)(uintptr_t)key[i].dttk_value, 1608 (void *)(uintptr_t)dkey->dttk_value, 1609 dkey->dttk_size)) 1610 goto next; 1611 } else { 1612 if (dkey->dttk_value != key[i].dttk_value) 1613 goto next; 1614 } 1615 } 1616 1617 if (op != DTRACE_DYNVAR_DEALLOC) 1618 return (dvar); 1619 1620 ASSERT(dvar->dtdv_next == NULL || 1621 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE); 1622 1623 if (prev != NULL) { 1624 ASSERT(hash[bucket].dtdh_chain != dvar); 1625 ASSERT(start != dvar); 1626 ASSERT(prev->dtdv_next == dvar); 1627 prev->dtdv_next = dvar->dtdv_next; 1628 } else { 1629 if (dtrace_casptr(&hash[bucket].dtdh_chain, 1630 start, dvar->dtdv_next) != start) { 1631 /* 1632 * We have failed to atomically swing the 1633 * hash table head pointer, presumably because 1634 * of a conflicting allocation on another CPU. 1635 * We need to reread the hash chain and try 1636 * again. 1637 */ 1638 goto top; 1639 } 1640 } 1641 1642 dtrace_membar_producer(); 1643 1644 /* 1645 * Now set the hash value to indicate that it's free. 1646 */ 1647 ASSERT(hash[bucket].dtdh_chain != dvar); 1648 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 1649 1650 dtrace_membar_producer(); 1651 1652 /* 1653 * Set the next pointer to point at the dirty list, and 1654 * atomically swing the dirty pointer to the newly freed dvar. 1655 */ 1656 do { 1657 next = dcpu->dtdsc_dirty; 1658 dvar->dtdv_next = next; 1659 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next); 1660 1661 /* 1662 * Finally, unlock this hash bucket. 1663 */ 1664 ASSERT(hash[bucket].dtdh_lock == lock); 1665 ASSERT(lock & 1); 1666 hash[bucket].dtdh_lock++; 1667 1668 return (NULL); 1669 next: 1670 prev = dvar; 1671 continue; 1672 } 1673 1674 if (dvar == NULL) { 1675 /* 1676 * If dvar is NULL, it is because we went off the rails: 1677 * one of the elements that we traversed in the hash chain 1678 * was deleted while we were traversing it. In this case, 1679 * we assert that we aren't doing a dealloc (deallocs lock 1680 * the hash bucket to prevent themselves from racing with 1681 * one another), and retry the hash chain traversal. 1682 */ 1683 ASSERT(op != DTRACE_DYNVAR_DEALLOC); 1684 goto top; 1685 } 1686 1687 if (op != DTRACE_DYNVAR_ALLOC) { 1688 /* 1689 * If we are not to allocate a new variable, we want to 1690 * return NULL now. Before we return, check that the value 1691 * of the lock word hasn't changed. If it has, we may have 1692 * seen an inconsistent snapshot. 1693 */ 1694 if (op == DTRACE_DYNVAR_NOALLOC) { 1695 if (hash[bucket].dtdh_lock != lock) 1696 goto top; 1697 } else { 1698 ASSERT(op == DTRACE_DYNVAR_DEALLOC); 1699 ASSERT(hash[bucket].dtdh_lock == lock); 1700 ASSERT(lock & 1); 1701 hash[bucket].dtdh_lock++; 1702 } 1703 1704 return (NULL); 1705 } 1706 1707 /* 1708 * We need to allocate a new dynamic variable. The size we need is the 1709 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the 1710 * size of any auxiliary key data (rounded up to 8-byte alignment) plus 1711 * the size of any referred-to data (dsize). We then round the final 1712 * size up to the chunksize for allocation. 1713 */ 1714 for (ksize = 0, i = 0; i < nkeys; i++) 1715 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 1716 1717 /* 1718 * This should be pretty much impossible, but could happen if, say, 1719 * strange DIF specified the tuple. Ideally, this should be an 1720 * assertion and not an error condition -- but that requires that the 1721 * chunksize calculation in dtrace_difo_chunksize() be absolutely 1722 * bullet-proof. (That is, it must not be able to be fooled by 1723 * malicious DIF.) Given the lack of backwards branches in DIF, 1724 * solving this would presumably not amount to solving the Halting 1725 * Problem -- but it still seems awfully hard. 1726 */ 1727 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) + 1728 ksize + dsize > chunksize) { 1729 dcpu->dtdsc_drops++; 1730 return (NULL); 1731 } 1732 1733 nstate = DTRACE_DSTATE_EMPTY; 1734 1735 do { 1736 retry: 1737 free = dcpu->dtdsc_free; 1738 1739 if (free == NULL) { 1740 dtrace_dynvar_t *clean = dcpu->dtdsc_clean; 1741 void *rval; 1742 1743 if (clean == NULL) { 1744 /* 1745 * We're out of dynamic variable space on 1746 * this CPU. Unless we have tried all CPUs, 1747 * we'll try to allocate from a different 1748 * CPU. 1749 */ 1750 switch (dstate->dtds_state) { 1751 case DTRACE_DSTATE_CLEAN: { 1752 void *sp = &dstate->dtds_state; 1753 1754 if (++cpu >= NCPU) 1755 cpu = 0; 1756 1757 if (dcpu->dtdsc_dirty != NULL && 1758 nstate == DTRACE_DSTATE_EMPTY) 1759 nstate = DTRACE_DSTATE_DIRTY; 1760 1761 if (dcpu->dtdsc_rinsing != NULL) 1762 nstate = DTRACE_DSTATE_RINSING; 1763 1764 dcpu = &dstate->dtds_percpu[cpu]; 1765 1766 if (cpu != me) 1767 goto retry; 1768 1769 (void) dtrace_cas32(sp, 1770 DTRACE_DSTATE_CLEAN, nstate); 1771 1772 /* 1773 * To increment the correct bean 1774 * counter, take another lap. 1775 */ 1776 goto retry; 1777 } 1778 1779 case DTRACE_DSTATE_DIRTY: 1780 dcpu->dtdsc_dirty_drops++; 1781 break; 1782 1783 case DTRACE_DSTATE_RINSING: 1784 dcpu->dtdsc_rinsing_drops++; 1785 break; 1786 1787 case DTRACE_DSTATE_EMPTY: 1788 dcpu->dtdsc_drops++; 1789 break; 1790 } 1791 1792 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP); 1793 return (NULL); 1794 } 1795 1796 /* 1797 * The clean list appears to be non-empty. We want to 1798 * move the clean list to the free list; we start by 1799 * moving the clean pointer aside. 1800 */ 1801 if (dtrace_casptr(&dcpu->dtdsc_clean, 1802 clean, NULL) != clean) { 1803 /* 1804 * We are in one of two situations: 1805 * 1806 * (a) The clean list was switched to the 1807 * free list by another CPU. 1808 * 1809 * (b) The clean list was added to by the 1810 * cleansing cyclic. 1811 * 1812 * In either of these situations, we can 1813 * just reattempt the free list allocation. 1814 */ 1815 goto retry; 1816 } 1817 1818 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); 1819 1820 /* 1821 * Now we'll move the clean list to our free list. 1822 * It's impossible for this to fail: the only way 1823 * the free list can be updated is through this 1824 * code path, and only one CPU can own the clean list. 1825 * Thus, it would only be possible for this to fail if 1826 * this code were racing with dtrace_dynvar_clean(). 1827 * (That is, if dtrace_dynvar_clean() updated the clean 1828 * list, and we ended up racing to update the free 1829 * list.) This race is prevented by the dtrace_sync() 1830 * in dtrace_dynvar_clean() -- which flushes the 1831 * owners of the clean lists out before resetting 1832 * the clean lists. 1833 */ 1834 dcpu = &dstate->dtds_percpu[me]; 1835 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); 1836 ASSERT(rval == NULL); 1837 goto retry; 1838 } 1839 1840 dvar = free; 1841 new_free = dvar->dtdv_next; 1842 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free); 1843 1844 /* 1845 * We have now allocated a new chunk. We copy the tuple keys into the 1846 * tuple array and copy any referenced key data into the data space 1847 * following the tuple array. As we do this, we relocate dttk_value 1848 * in the final tuple to point to the key data address in the chunk. 1849 */ 1850 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys]; 1851 dvar->dtdv_data = (void *)(kdata + ksize); 1852 dvar->dtdv_tuple.dtt_nkeys = nkeys; 1853 1854 for (i = 0; i < nkeys; i++) { 1855 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i]; 1856 size_t kesize = key[i].dttk_size; 1857 1858 if (kesize != 0) { 1859 dtrace_bcopy( 1860 (const void *)(uintptr_t)key[i].dttk_value, 1861 (void *)kdata, kesize); 1862 dkey->dttk_value = kdata; 1863 kdata += P2ROUNDUP(kesize, sizeof (uint64_t)); 1864 } else { 1865 dkey->dttk_value = key[i].dttk_value; 1866 } 1867 1868 dkey->dttk_size = kesize; 1869 } 1870 1871 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE); 1872 dvar->dtdv_hashval = hashval; 1873 dvar->dtdv_next = start; 1874 1875 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start) 1876 return (dvar); 1877 1878 /* 1879 * The cas has failed. Either another CPU is adding an element to 1880 * this hash chain, or another CPU is deleting an element from this 1881 * hash chain. The simplest way to deal with both of these cases 1882 * (though not necessarily the most efficient) is to free our 1883 * allocated block and tail-call ourselves. Note that the free is 1884 * to the dirty list and _not_ to the free list. This is to prevent 1885 * races with allocators, above. 1886 */ 1887 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 1888 1889 dtrace_membar_producer(); 1890 1891 do { 1892 free = dcpu->dtdsc_dirty; 1893 dvar->dtdv_next = free; 1894 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); 1895 1896 return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate)); 1897 } 1898 1899 /*ARGSUSED*/ 1900 static void 1901 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) 1902 { 1903 if ((int64_t)nval < (int64_t)*oval) 1904 *oval = nval; 1905 } 1906 1907 /*ARGSUSED*/ 1908 static void 1909 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) 1910 { 1911 if ((int64_t)nval > (int64_t)*oval) 1912 *oval = nval; 1913 } 1914 1915 static void 1916 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr) 1917 { 1918 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET; 1919 int64_t val = (int64_t)nval; 1920 1921 if (val < 0) { 1922 for (i = 0; i < zero; i++) { 1923 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) { 1924 quanta[i] += incr; 1925 return; 1926 } 1927 } 1928 } else { 1929 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) { 1930 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) { 1931 quanta[i - 1] += incr; 1932 return; 1933 } 1934 } 1935 1936 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr; 1937 return; 1938 } 1939 1940 ASSERT(0); 1941 } 1942 1943 static void 1944 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) 1945 { 1946 uint64_t arg = *lquanta++; 1947 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 1948 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 1949 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); 1950 int32_t val = (int32_t)nval, level; 1951 1952 ASSERT(step != 0); 1953 ASSERT(levels != 0); 1954 1955 if (val < base) { 1956 /* 1957 * This is an underflow. 1958 */ 1959 lquanta[0] += incr; 1960 return; 1961 } 1962 1963 level = (val - base) / step; 1964 1965 if (level < levels) { 1966 lquanta[level + 1] += incr; 1967 return; 1968 } 1969 1970 /* 1971 * This is an overflow. 1972 */ 1973 lquanta[levels + 1] += incr; 1974 } 1975 1976 static int 1977 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, 1978 uint16_t high, uint16_t nsteps, int64_t value) 1979 { 1980 int64_t this = 1, last, next; 1981 int base = 1, order; 1982 1983 ASSERT(factor <= nsteps); 1984 ASSERT(nsteps % factor == 0); 1985 1986 for (order = 0; order < low; order++) 1987 this *= factor; 1988 1989 /* 1990 * If our value is less than our factor taken to the power of the 1991 * low order of magnitude, it goes into the zeroth bucket. 1992 */ 1993 if (value < (last = this)) 1994 return (0); 1995 1996 for (this *= factor; order <= high; order++) { 1997 int nbuckets = this > nsteps ? nsteps : this; 1998 1999 if ((next = this * factor) < this) { 2000 /* 2001 * We should not generally get log/linear quantizations 2002 * with a high magnitude that allows 64-bits to 2003 * overflow, but we nonetheless protect against this 2004 * by explicitly checking for overflow, and clamping 2005 * our value accordingly. 2006 */ 2007 value = this - 1; 2008 } 2009 2010 if (value < this) { 2011 /* 2012 * If our value lies within this order of magnitude, 2013 * determine its position by taking the offset within 2014 * the order of magnitude, dividing by the bucket 2015 * width, and adding to our (accumulated) base. 2016 */ 2017 return (base + (value - last) / (this / nbuckets)); 2018 } 2019 2020 base += nbuckets - (nbuckets / factor); 2021 last = this; 2022 this = next; 2023 } 2024 2025 /* 2026 * Our value is greater than or equal to our factor taken to the 2027 * power of one plus the high magnitude -- return the top bucket. 2028 */ 2029 return (base); 2030 } 2031 2032 static void 2033 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) 2034 { 2035 uint64_t arg = *llquanta++; 2036 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); 2037 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); 2038 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); 2039 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); 2040 2041 llquanta[dtrace_aggregate_llquantize_bucket(factor, 2042 low, high, nsteps, nval)] += incr; 2043 } 2044 2045 /*ARGSUSED*/ 2046 static void 2047 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) 2048 { 2049 data[0]++; 2050 data[1] += nval; 2051 } 2052 2053 /*ARGSUSED*/ 2054 static void 2055 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) 2056 { 2057 int64_t snval = (int64_t)nval; 2058 uint64_t tmp[2]; 2059 2060 data[0]++; 2061 data[1] += nval; 2062 2063 /* 2064 * What we want to say here is: 2065 * 2066 * data[2] += nval * nval; 2067 * 2068 * But given that nval is 64-bit, we could easily overflow, so 2069 * we do this as 128-bit arithmetic. 2070 */ 2071 if (snval < 0) 2072 snval = -snval; 2073 2074 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); 2075 dtrace_add_128(data + 2, tmp, data + 2); 2076 } 2077 2078 /*ARGSUSED*/ 2079 static void 2080 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) 2081 { 2082 *oval = *oval + 1; 2083 } 2084 2085 /*ARGSUSED*/ 2086 static void 2087 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) 2088 { 2089 *oval += nval; 2090 } 2091 2092 /* 2093 * Aggregate given the tuple in the principal data buffer, and the aggregating 2094 * action denoted by the specified dtrace_aggregation_t. The aggregation 2095 * buffer is specified as the buf parameter. This routine does not return 2096 * failure; if there is no space in the aggregation buffer, the data will be 2097 * dropped, and a corresponding counter incremented. 2098 */ 2099 static void 2100 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, 2101 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) 2102 { 2103 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec; 2104 uint32_t i, ndx, size, fsize; 2105 uint32_t align = sizeof (uint64_t) - 1; 2106 dtrace_aggbuffer_t *agb; 2107 dtrace_aggkey_t *key; 2108 uint32_t hashval = 0, limit, isstr; 2109 caddr_t tomax, data, kdata; 2110 dtrace_actkind_t action; 2111 dtrace_action_t *act; 2112 uintptr_t offs; 2113 2114 if (buf == NULL) 2115 return; 2116 2117 if (!agg->dtag_hasarg) { 2118 /* 2119 * Currently, only quantize() and lquantize() take additional 2120 * arguments, and they have the same semantics: an increment 2121 * value that defaults to 1 when not present. If additional 2122 * aggregating actions take arguments, the setting of the 2123 * default argument value will presumably have to become more 2124 * sophisticated... 2125 */ 2126 arg = 1; 2127 } 2128 2129 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION; 2130 size = rec->dtrd_offset - agg->dtag_base; 2131 fsize = size + rec->dtrd_size; 2132 2133 ASSERT(dbuf->dtb_tomax != NULL); 2134 data = dbuf->dtb_tomax + offset + agg->dtag_base; 2135 2136 if ((tomax = buf->dtb_tomax) == NULL) { 2137 dtrace_buffer_drop(buf); 2138 return; 2139 } 2140 2141 /* 2142 * The metastructure is always at the bottom of the buffer. 2143 */ 2144 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size - 2145 sizeof (dtrace_aggbuffer_t)); 2146 2147 if (buf->dtb_offset == 0) { 2148 /* 2149 * We just kludge up approximately 1/8th of the size to be 2150 * buckets. If this guess ends up being routinely 2151 * off-the-mark, we may need to dynamically readjust this 2152 * based on past performance. 2153 */ 2154 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t); 2155 2156 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) < 2157 (uintptr_t)tomax || hashsize == 0) { 2158 /* 2159 * We've been given a ludicrously small buffer; 2160 * increment our drop count and leave. 2161 */ 2162 dtrace_buffer_drop(buf); 2163 return; 2164 } 2165 2166 /* 2167 * And now, a pathetic attempt to try to get a an odd (or 2168 * perchance, a prime) hash size for better hash distribution. 2169 */ 2170 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3)) 2171 hashsize -= DTRACE_AGGHASHSIZE_SLEW; 2172 2173 agb->dtagb_hashsize = hashsize; 2174 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb - 2175 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *)); 2176 agb->dtagb_free = (uintptr_t)agb->dtagb_hash; 2177 2178 for (i = 0; i < agb->dtagb_hashsize; i++) 2179 agb->dtagb_hash[i] = NULL; 2180 } 2181 2182 ASSERT(agg->dtag_first != NULL); 2183 ASSERT(agg->dtag_first->dta_intuple); 2184 2185 /* 2186 * Calculate the hash value based on the key. Note that we _don't_ 2187 * include the aggid in the hashing (but we will store it as part of 2188 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time" 2189 * algorithm: a simple, quick algorithm that has no known funnels, and 2190 * gets good distribution in practice. The efficacy of the hashing 2191 * algorithm (and a comparison with other algorithms) may be found by 2192 * running the ::dtrace_aggstat MDB dcmd. 2193 */ 2194 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 2195 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2196 limit = i + act->dta_rec.dtrd_size; 2197 ASSERT(limit <= size); 2198 isstr = DTRACEACT_ISSTRING(act); 2199 2200 for (; i < limit; i++) { 2201 hashval += data[i]; 2202 hashval += (hashval << 10); 2203 hashval ^= (hashval >> 6); 2204 2205 if (isstr && data[i] == '\0') 2206 break; 2207 } 2208 } 2209 2210 hashval += (hashval << 3); 2211 hashval ^= (hashval >> 11); 2212 hashval += (hashval << 15); 2213 2214 /* 2215 * Yes, the divide here is expensive -- but it's generally the least 2216 * of the performance issues given the amount of data that we iterate 2217 * over to compute hash values, compare data, etc. 2218 */ 2219 ndx = hashval % agb->dtagb_hashsize; 2220 2221 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) { 2222 ASSERT((caddr_t)key >= tomax); 2223 ASSERT((caddr_t)key < tomax + buf->dtb_size); 2224 2225 if (hashval != key->dtak_hashval || key->dtak_size != size) 2226 continue; 2227 2228 kdata = key->dtak_data; 2229 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size); 2230 2231 for (act = agg->dtag_first; act->dta_intuple; 2232 act = act->dta_next) { 2233 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2234 limit = i + act->dta_rec.dtrd_size; 2235 ASSERT(limit <= size); 2236 isstr = DTRACEACT_ISSTRING(act); 2237 2238 for (; i < limit; i++) { 2239 if (kdata[i] != data[i]) 2240 goto next; 2241 2242 if (isstr && data[i] == '\0') 2243 break; 2244 } 2245 } 2246 2247 if (action != key->dtak_action) { 2248 /* 2249 * We are aggregating on the same value in the same 2250 * aggregation with two different aggregating actions. 2251 * (This should have been picked up in the compiler, 2252 * so we may be dealing with errant or devious DIF.) 2253 * This is an error condition; we indicate as much, 2254 * and return. 2255 */ 2256 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 2257 return; 2258 } 2259 2260 /* 2261 * This is a hit: we need to apply the aggregator to 2262 * the value at this key. 2263 */ 2264 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg); 2265 return; 2266 next: 2267 continue; 2268 } 2269 2270 /* 2271 * We didn't find it. We need to allocate some zero-filled space, 2272 * link it into the hash table appropriately, and apply the aggregator 2273 * to the (zero-filled) value. 2274 */ 2275 offs = buf->dtb_offset; 2276 while (offs & (align - 1)) 2277 offs += sizeof (uint32_t); 2278 2279 /* 2280 * If we don't have enough room to both allocate a new key _and_ 2281 * its associated data, increment the drop count and return. 2282 */ 2283 if ((uintptr_t)tomax + offs + fsize > 2284 agb->dtagb_free - sizeof (dtrace_aggkey_t)) { 2285 dtrace_buffer_drop(buf); 2286 return; 2287 } 2288 2289 /*CONSTCOND*/ 2290 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1))); 2291 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t)); 2292 agb->dtagb_free -= sizeof (dtrace_aggkey_t); 2293 2294 key->dtak_data = kdata = tomax + offs; 2295 buf->dtb_offset = offs + fsize; 2296 2297 /* 2298 * Now copy the data across. 2299 */ 2300 *((dtrace_aggid_t *)kdata) = agg->dtag_id; 2301 2302 for (i = sizeof (dtrace_aggid_t); i < size; i++) 2303 kdata[i] = data[i]; 2304 2305 /* 2306 * Because strings are not zeroed out by default, we need to iterate 2307 * looking for actions that store strings, and we need to explicitly 2308 * pad these strings out with zeroes. 2309 */ 2310 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 2311 int nul; 2312 2313 if (!DTRACEACT_ISSTRING(act)) 2314 continue; 2315 2316 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2317 limit = i + act->dta_rec.dtrd_size; 2318 ASSERT(limit <= size); 2319 2320 for (nul = 0; i < limit; i++) { 2321 if (nul) { 2322 kdata[i] = '\0'; 2323 continue; 2324 } 2325 2326 if (data[i] != '\0') 2327 continue; 2328 2329 nul = 1; 2330 } 2331 } 2332 2333 for (i = size; i < fsize; i++) 2334 kdata[i] = 0; 2335 2336 key->dtak_hashval = hashval; 2337 key->dtak_size = size; 2338 key->dtak_action = action; 2339 key->dtak_next = agb->dtagb_hash[ndx]; 2340 agb->dtagb_hash[ndx] = key; 2341 2342 /* 2343 * Finally, apply the aggregator. 2344 */ 2345 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial; 2346 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg); 2347 } 2348 2349 /* 2350 * Given consumer state, this routine finds a speculation in the INACTIVE 2351 * state and transitions it into the ACTIVE state. If there is no speculation 2352 * in the INACTIVE state, 0 is returned. In this case, no error counter is 2353 * incremented -- it is up to the caller to take appropriate action. 2354 */ 2355 static int 2356 dtrace_speculation(dtrace_state_t *state) 2357 { 2358 int i = 0; 2359 dtrace_speculation_state_t current; 2360 uint32_t *stat = &state->dts_speculations_unavail, count; 2361 2362 while (i < state->dts_nspeculations) { 2363 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2364 2365 current = spec->dtsp_state; 2366 2367 if (current != DTRACESPEC_INACTIVE) { 2368 if (current == DTRACESPEC_COMMITTINGMANY || 2369 current == DTRACESPEC_COMMITTING || 2370 current == DTRACESPEC_DISCARDING) 2371 stat = &state->dts_speculations_busy; 2372 i++; 2373 continue; 2374 } 2375 2376 if (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2377 current, DTRACESPEC_ACTIVE) == current) 2378 return (i + 1); 2379 } 2380 2381 /* 2382 * We couldn't find a speculation. If we found as much as a single 2383 * busy speculation buffer, we'll attribute this failure as "busy" 2384 * instead of "unavail". 2385 */ 2386 do { 2387 count = *stat; 2388 } while (dtrace_cas32(stat, count, count + 1) != count); 2389 2390 return (0); 2391 } 2392 2393 /* 2394 * This routine commits an active speculation. If the specified speculation 2395 * is not in a valid state to perform a commit(), this routine will silently do 2396 * nothing. The state of the specified speculation is transitioned according 2397 * to the state transition diagram outlined in <sys/dtrace_impl.h> 2398 */ 2399 static void 2400 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, 2401 dtrace_specid_t which) 2402 { 2403 dtrace_speculation_t *spec; 2404 dtrace_buffer_t *src, *dest; 2405 uintptr_t daddr, saddr, dlimit, slimit; 2406 dtrace_speculation_state_t current, new; 2407 intptr_t offs; 2408 uint64_t timestamp; 2409 2410 if (which == 0) 2411 return; 2412 2413 if (which > state->dts_nspeculations) { 2414 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2415 return; 2416 } 2417 2418 spec = &state->dts_speculations[which - 1]; 2419 src = &spec->dtsp_buffer[cpu]; 2420 dest = &state->dts_buffer[cpu]; 2421 2422 do { 2423 current = spec->dtsp_state; 2424 2425 if (current == DTRACESPEC_COMMITTINGMANY) 2426 break; 2427 2428 switch (current) { 2429 case DTRACESPEC_INACTIVE: 2430 case DTRACESPEC_DISCARDING: 2431 return; 2432 2433 case DTRACESPEC_COMMITTING: 2434 /* 2435 * This is only possible if we are (a) commit()'ing 2436 * without having done a prior speculate() on this CPU 2437 * and (b) racing with another commit() on a different 2438 * CPU. There's nothing to do -- we just assert that 2439 * our offset is 0. 2440 */ 2441 ASSERT(src->dtb_offset == 0); 2442 return; 2443 2444 case DTRACESPEC_ACTIVE: 2445 new = DTRACESPEC_COMMITTING; 2446 break; 2447 2448 case DTRACESPEC_ACTIVEONE: 2449 /* 2450 * This speculation is active on one CPU. If our 2451 * buffer offset is non-zero, we know that the one CPU 2452 * must be us. Otherwise, we are committing on a 2453 * different CPU from the speculate(), and we must 2454 * rely on being asynchronously cleaned. 2455 */ 2456 if (src->dtb_offset != 0) { 2457 new = DTRACESPEC_COMMITTING; 2458 break; 2459 } 2460 /*FALLTHROUGH*/ 2461 2462 case DTRACESPEC_ACTIVEMANY: 2463 new = DTRACESPEC_COMMITTINGMANY; 2464 break; 2465 2466 default: 2467 ASSERT(0); 2468 } 2469 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2470 current, new) != current); 2471 2472 /* 2473 * We have set the state to indicate that we are committing this 2474 * speculation. Now reserve the necessary space in the destination 2475 * buffer. 2476 */ 2477 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset, 2478 sizeof (uint64_t), state, NULL)) < 0) { 2479 dtrace_buffer_drop(dest); 2480 goto out; 2481 } 2482 2483 /* 2484 * We have sufficient space to copy the speculative buffer into the 2485 * primary buffer. First, modify the speculative buffer, filling 2486 * in the timestamp of all entries with the current time. The data 2487 * must have the commit() time rather than the time it was traced, 2488 * so that all entries in the primary buffer are in timestamp order. 2489 */ 2490 timestamp = dtrace_gethrtime(); 2491 saddr = (uintptr_t)src->dtb_tomax; 2492 slimit = saddr + src->dtb_offset; 2493 while (saddr < slimit) { 2494 size_t size; 2495 dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr; 2496 2497 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) { 2498 saddr += sizeof (dtrace_epid_t); 2499 continue; 2500 } 2501 ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs); 2502 size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size; 2503 2504 ASSERT3U(saddr + size, <=, slimit); 2505 ASSERT3U(size, >=, sizeof (dtrace_rechdr_t)); 2506 ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX); 2507 2508 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp); 2509 2510 saddr += size; 2511 } 2512 2513 /* 2514 * Copy the buffer across. (Note that this is a 2515 * highly subobtimal bcopy(); in the unlikely event that this becomes 2516 * a serious performance issue, a high-performance DTrace-specific 2517 * bcopy() should obviously be invented.) 2518 */ 2519 daddr = (uintptr_t)dest->dtb_tomax + offs; 2520 dlimit = daddr + src->dtb_offset; 2521 saddr = (uintptr_t)src->dtb_tomax; 2522 2523 /* 2524 * First, the aligned portion. 2525 */ 2526 while (dlimit - daddr >= sizeof (uint64_t)) { 2527 *((uint64_t *)daddr) = *((uint64_t *)saddr); 2528 2529 daddr += sizeof (uint64_t); 2530 saddr += sizeof (uint64_t); 2531 } 2532 2533 /* 2534 * Now any left-over bit... 2535 */ 2536 while (dlimit - daddr) 2537 *((uint8_t *)daddr++) = *((uint8_t *)saddr++); 2538 2539 /* 2540 * Finally, commit the reserved space in the destination buffer. 2541 */ 2542 dest->dtb_offset = offs + src->dtb_offset; 2543 2544 out: 2545 /* 2546 * If we're lucky enough to be the only active CPU on this speculation 2547 * buffer, we can just set the state back to DTRACESPEC_INACTIVE. 2548 */ 2549 if (current == DTRACESPEC_ACTIVE || 2550 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { 2551 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, 2552 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); 2553 2554 ASSERT(rval == DTRACESPEC_COMMITTING); 2555 } 2556 2557 src->dtb_offset = 0; 2558 src->dtb_xamot_drops += src->dtb_drops; 2559 src->dtb_drops = 0; 2560 } 2561 2562 /* 2563 * This routine discards an active speculation. If the specified speculation 2564 * is not in a valid state to perform a discard(), this routine will silently 2565 * do nothing. The state of the specified speculation is transitioned 2566 * according to the state transition diagram outlined in <sys/dtrace_impl.h> 2567 */ 2568 static void 2569 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, 2570 dtrace_specid_t which) 2571 { 2572 dtrace_speculation_t *spec; 2573 dtrace_speculation_state_t current, new; 2574 dtrace_buffer_t *buf; 2575 2576 if (which == 0) 2577 return; 2578 2579 if (which > state->dts_nspeculations) { 2580 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2581 return; 2582 } 2583 2584 spec = &state->dts_speculations[which - 1]; 2585 buf = &spec->dtsp_buffer[cpu]; 2586 2587 do { 2588 current = spec->dtsp_state; 2589 2590 switch (current) { 2591 case DTRACESPEC_INACTIVE: 2592 case DTRACESPEC_COMMITTINGMANY: 2593 case DTRACESPEC_COMMITTING: 2594 case DTRACESPEC_DISCARDING: 2595 return; 2596 2597 case DTRACESPEC_ACTIVE: 2598 case DTRACESPEC_ACTIVEMANY: 2599 new = DTRACESPEC_DISCARDING; 2600 break; 2601 2602 case DTRACESPEC_ACTIVEONE: 2603 if (buf->dtb_offset != 0) { 2604 new = DTRACESPEC_INACTIVE; 2605 } else { 2606 new = DTRACESPEC_DISCARDING; 2607 } 2608 break; 2609 2610 default: 2611 ASSERT(0); 2612 } 2613 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2614 current, new) != current); 2615 2616 buf->dtb_offset = 0; 2617 buf->dtb_drops = 0; 2618 } 2619 2620 /* 2621 * Note: not called from probe context. This function is called 2622 * asynchronously from cross call context to clean any speculations that are 2623 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be 2624 * transitioned back to the INACTIVE state until all CPUs have cleaned the 2625 * speculation. 2626 */ 2627 static void 2628 dtrace_speculation_clean_here(dtrace_state_t *state) 2629 { 2630 dtrace_icookie_t cookie; 2631 processorid_t cpu = CPU->cpu_id; 2632 dtrace_buffer_t *dest = &state->dts_buffer[cpu]; 2633 dtrace_specid_t i; 2634 2635 cookie = dtrace_interrupt_disable(); 2636 2637 if (dest->dtb_tomax == NULL) { 2638 dtrace_interrupt_enable(cookie); 2639 return; 2640 } 2641 2642 for (i = 0; i < state->dts_nspeculations; i++) { 2643 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2644 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; 2645 2646 if (src->dtb_tomax == NULL) 2647 continue; 2648 2649 if (spec->dtsp_state == DTRACESPEC_DISCARDING) { 2650 src->dtb_offset = 0; 2651 continue; 2652 } 2653 2654 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 2655 continue; 2656 2657 if (src->dtb_offset == 0) 2658 continue; 2659 2660 dtrace_speculation_commit(state, cpu, i + 1); 2661 } 2662 2663 dtrace_interrupt_enable(cookie); 2664 } 2665 2666 /* 2667 * Note: not called from probe context. This function is called 2668 * asynchronously (and at a regular interval) to clean any speculations that 2669 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there 2670 * is work to be done, it cross calls all CPUs to perform that work; 2671 * COMMITMANY and DISCARDING speculations may not be transitioned back to the 2672 * INACTIVE state until they have been cleaned by all CPUs. 2673 */ 2674 static void 2675 dtrace_speculation_clean(dtrace_state_t *state) 2676 { 2677 int work = 0, rv; 2678 dtrace_specid_t i; 2679 2680 for (i = 0; i < state->dts_nspeculations; i++) { 2681 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2682 2683 ASSERT(!spec->dtsp_cleaning); 2684 2685 if (spec->dtsp_state != DTRACESPEC_DISCARDING && 2686 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 2687 continue; 2688 2689 work++; 2690 spec->dtsp_cleaning = 1; 2691 } 2692 2693 if (!work) 2694 return; 2695 2696 dtrace_xcall(DTRACE_CPUALL, 2697 (dtrace_xcall_t)dtrace_speculation_clean_here, state); 2698 2699 /* 2700 * We now know that all CPUs have committed or discarded their 2701 * speculation buffers, as appropriate. We can now set the state 2702 * to inactive. 2703 */ 2704 for (i = 0; i < state->dts_nspeculations; i++) { 2705 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2706 dtrace_speculation_state_t current, new; 2707 2708 if (!spec->dtsp_cleaning) 2709 continue; 2710 2711 current = spec->dtsp_state; 2712 ASSERT(current == DTRACESPEC_DISCARDING || 2713 current == DTRACESPEC_COMMITTINGMANY); 2714 2715 new = DTRACESPEC_INACTIVE; 2716 2717 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new); 2718 ASSERT(rv == current); 2719 spec->dtsp_cleaning = 0; 2720 } 2721 } 2722 2723 /* 2724 * Called as part of a speculate() to get the speculative buffer associated 2725 * with a given speculation. Returns NULL if the specified speculation is not 2726 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and 2727 * the active CPU is not the specified CPU -- the speculation will be 2728 * atomically transitioned into the ACTIVEMANY state. 2729 */ 2730 static dtrace_buffer_t * 2731 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, 2732 dtrace_specid_t which) 2733 { 2734 dtrace_speculation_t *spec; 2735 dtrace_speculation_state_t current, new; 2736 dtrace_buffer_t *buf; 2737 2738 if (which == 0) 2739 return (NULL); 2740 2741 if (which > state->dts_nspeculations) { 2742 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2743 return (NULL); 2744 } 2745 2746 spec = &state->dts_speculations[which - 1]; 2747 buf = &spec->dtsp_buffer[cpuid]; 2748 2749 do { 2750 current = spec->dtsp_state; 2751 2752 switch (current) { 2753 case DTRACESPEC_INACTIVE: 2754 case DTRACESPEC_COMMITTINGMANY: 2755 case DTRACESPEC_DISCARDING: 2756 return (NULL); 2757 2758 case DTRACESPEC_COMMITTING: 2759 ASSERT(buf->dtb_offset == 0); 2760 return (NULL); 2761 2762 case DTRACESPEC_ACTIVEONE: 2763 /* 2764 * This speculation is currently active on one CPU. 2765 * Check the offset in the buffer; if it's non-zero, 2766 * that CPU must be us (and we leave the state alone). 2767 * If it's zero, assume that we're starting on a new 2768 * CPU -- and change the state to indicate that the 2769 * speculation is active on more than one CPU. 2770 */ 2771 if (buf->dtb_offset != 0) 2772 return (buf); 2773 2774 new = DTRACESPEC_ACTIVEMANY; 2775 break; 2776 2777 case DTRACESPEC_ACTIVEMANY: 2778 return (buf); 2779 2780 case DTRACESPEC_ACTIVE: 2781 new = DTRACESPEC_ACTIVEONE; 2782 break; 2783 2784 default: 2785 ASSERT(0); 2786 } 2787 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2788 current, new) != current); 2789 2790 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY); 2791 return (buf); 2792 } 2793 2794 /* 2795 * Return a string. In the event that the user lacks the privilege to access 2796 * arbitrary kernel memory, we copy the string out to scratch memory so that we 2797 * don't fail access checking. 2798 * 2799 * dtrace_dif_variable() uses this routine as a helper for various 2800 * builtin values such as 'execname' and 'probefunc.' 2801 */ 2802 uintptr_t 2803 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, 2804 dtrace_mstate_t *mstate) 2805 { 2806 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 2807 uintptr_t ret; 2808 size_t strsz; 2809 2810 /* 2811 * The easy case: this probe is allowed to read all of memory, so 2812 * we can just return this as a vanilla pointer. 2813 */ 2814 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 2815 return (addr); 2816 2817 /* 2818 * This is the tougher case: we copy the string in question from 2819 * kernel memory into scratch memory and return it that way: this 2820 * ensures that we won't trip up when access checking tests the 2821 * BYREF return value. 2822 */ 2823 strsz = dtrace_strlen((char *)addr, size) + 1; 2824 2825 if (mstate->dtms_scratch_ptr + strsz > 2826 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 2827 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 2828 return (NULL); 2829 } 2830 2831 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, 2832 strsz); 2833 ret = mstate->dtms_scratch_ptr; 2834 mstate->dtms_scratch_ptr += strsz; 2835 return (ret); 2836 } 2837 2838 /* 2839 * This function implements the DIF emulator's variable lookups. The emulator 2840 * passes a reserved variable identifier and optional built-in array index. 2841 */ 2842 static uint64_t 2843 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, 2844 uint64_t ndx) 2845 { 2846 /* 2847 * If we're accessing one of the uncached arguments, we'll turn this 2848 * into a reference in the args array. 2849 */ 2850 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { 2851 ndx = v - DIF_VAR_ARG0; 2852 v = DIF_VAR_ARGS; 2853 } 2854 2855 switch (v) { 2856 case DIF_VAR_ARGS: 2857 if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) { 2858 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= 2859 CPU_DTRACE_KPRIV; 2860 return (0); 2861 } 2862 2863 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); 2864 if (ndx >= sizeof (mstate->dtms_arg) / 2865 sizeof (mstate->dtms_arg[0])) { 2866 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 2867 dtrace_provider_t *pv; 2868 uint64_t val; 2869 2870 pv = mstate->dtms_probe->dtpr_provider; 2871 if (pv->dtpv_pops.dtps_getargval != NULL) 2872 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, 2873 mstate->dtms_probe->dtpr_id, 2874 mstate->dtms_probe->dtpr_arg, ndx, aframes); 2875 else 2876 val = dtrace_getarg(ndx, aframes); 2877 2878 /* 2879 * This is regrettably required to keep the compiler 2880 * from tail-optimizing the call to dtrace_getarg(). 2881 * The condition always evaluates to true, but the 2882 * compiler has no way of figuring that out a priori. 2883 * (None of this would be necessary if the compiler 2884 * could be relied upon to _always_ tail-optimize 2885 * the call to dtrace_getarg() -- but it can't.) 2886 */ 2887 if (mstate->dtms_probe != NULL) 2888 return (val); 2889 2890 ASSERT(0); 2891 } 2892 2893 return (mstate->dtms_arg[ndx]); 2894 2895 case DIF_VAR_UREGS: { 2896 klwp_t *lwp; 2897 2898 if (!dtrace_priv_proc(state, mstate)) 2899 return (0); 2900 2901 if ((lwp = curthread->t_lwp) == NULL) { 2902 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 2903 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL; 2904 return (0); 2905 } 2906 2907 return (dtrace_getreg(lwp->lwp_regs, ndx)); 2908 } 2909 2910 case DIF_VAR_VMREGS: { 2911 uint64_t rval; 2912 2913 if (!dtrace_priv_kernel(state)) 2914 return (0); 2915 2916 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 2917 2918 rval = dtrace_getvmreg(ndx, 2919 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags); 2920 2921 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 2922 2923 return (rval); 2924 } 2925 2926 case DIF_VAR_CURTHREAD: 2927 if (!dtrace_priv_kernel(state)) 2928 return (0); 2929 return ((uint64_t)(uintptr_t)curthread); 2930 2931 case DIF_VAR_TIMESTAMP: 2932 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { 2933 mstate->dtms_timestamp = dtrace_gethrtime(); 2934 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP; 2935 } 2936 return (mstate->dtms_timestamp); 2937 2938 case DIF_VAR_VTIMESTAMP: 2939 ASSERT(dtrace_vtime_references != 0); 2940 return (curthread->t_dtrace_vtime); 2941 2942 case DIF_VAR_WALLTIMESTAMP: 2943 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { 2944 mstate->dtms_walltimestamp = dtrace_gethrestime(); 2945 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP; 2946 } 2947 return (mstate->dtms_walltimestamp); 2948 2949 case DIF_VAR_IPL: 2950 if (!dtrace_priv_kernel(state)) 2951 return (0); 2952 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) { 2953 mstate->dtms_ipl = dtrace_getipl(); 2954 mstate->dtms_present |= DTRACE_MSTATE_IPL; 2955 } 2956 return (mstate->dtms_ipl); 2957 2958 case DIF_VAR_EPID: 2959 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID); 2960 return (mstate->dtms_epid); 2961 2962 case DIF_VAR_ID: 2963 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 2964 return (mstate->dtms_probe->dtpr_id); 2965 2966 case DIF_VAR_STACKDEPTH: 2967 if (!dtrace_priv_kernel(state)) 2968 return (0); 2969 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { 2970 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 2971 2972 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); 2973 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; 2974 } 2975 return (mstate->dtms_stackdepth); 2976 2977 case DIF_VAR_USTACKDEPTH: 2978 if (!dtrace_priv_proc(state, mstate)) 2979 return (0); 2980 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { 2981 /* 2982 * See comment in DIF_VAR_PID. 2983 */ 2984 if (DTRACE_ANCHORED(mstate->dtms_probe) && 2985 CPU_ON_INTR(CPU)) { 2986 mstate->dtms_ustackdepth = 0; 2987 } else { 2988 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 2989 mstate->dtms_ustackdepth = 2990 dtrace_getustackdepth(); 2991 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 2992 } 2993 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH; 2994 } 2995 return (mstate->dtms_ustackdepth); 2996 2997 case DIF_VAR_CALLER: 2998 if (!dtrace_priv_kernel(state)) 2999 return (0); 3000 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { 3001 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 3002 3003 if (!DTRACE_ANCHORED(mstate->dtms_probe)) { 3004 /* 3005 * If this is an unanchored probe, we are 3006 * required to go through the slow path: 3007 * dtrace_caller() only guarantees correct 3008 * results for anchored probes. 3009 */ 3010 pc_t caller[2]; 3011 3012 dtrace_getpcstack(caller, 2, aframes, 3013 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); 3014 mstate->dtms_caller = caller[1]; 3015 } else if ((mstate->dtms_caller = 3016 dtrace_caller(aframes)) == -1) { 3017 /* 3018 * We have failed to do this the quick way; 3019 * we must resort to the slower approach of 3020 * calling dtrace_getpcstack(). 3021 */ 3022 pc_t caller; 3023 3024 dtrace_getpcstack(&caller, 1, aframes, NULL); 3025 mstate->dtms_caller = caller; 3026 } 3027 3028 mstate->dtms_present |= DTRACE_MSTATE_CALLER; 3029 } 3030 return (mstate->dtms_caller); 3031 3032 case DIF_VAR_UCALLER: 3033 if (!dtrace_priv_proc(state, mstate)) 3034 return (0); 3035 3036 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { 3037 uint64_t ustack[3]; 3038 3039 /* 3040 * dtrace_getupcstack() fills in the first uint64_t 3041 * with the current PID. The second uint64_t will 3042 * be the program counter at user-level. The third 3043 * uint64_t will contain the caller, which is what 3044 * we're after. 3045 */ 3046 ustack[2] = NULL; 3047 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3048 dtrace_getupcstack(ustack, 3); 3049 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3050 mstate->dtms_ucaller = ustack[2]; 3051 mstate->dtms_present |= DTRACE_MSTATE_UCALLER; 3052 } 3053 3054 return (mstate->dtms_ucaller); 3055 3056 case DIF_VAR_PROBEPROV: 3057 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3058 return (dtrace_dif_varstr( 3059 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, 3060 state, mstate)); 3061 3062 case DIF_VAR_PROBEMOD: 3063 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3064 return (dtrace_dif_varstr( 3065 (uintptr_t)mstate->dtms_probe->dtpr_mod, 3066 state, mstate)); 3067 3068 case DIF_VAR_PROBEFUNC: 3069 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3070 return (dtrace_dif_varstr( 3071 (uintptr_t)mstate->dtms_probe->dtpr_func, 3072 state, mstate)); 3073 3074 case DIF_VAR_PROBENAME: 3075 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3076 return (dtrace_dif_varstr( 3077 (uintptr_t)mstate->dtms_probe->dtpr_name, 3078 state, mstate)); 3079 3080 case DIF_VAR_PID: 3081 if (!dtrace_priv_proc(state, mstate)) 3082 return (0); 3083 3084 /* 3085 * Note that we are assuming that an unanchored probe is 3086 * always due to a high-level interrupt. (And we're assuming 3087 * that there is only a single high level interrupt.) 3088 */ 3089 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3090 return (pid0.pid_id); 3091 3092 /* 3093 * It is always safe to dereference one's own t_procp pointer: 3094 * it always points to a valid, allocated proc structure. 3095 * Further, it is always safe to dereference the p_pidp member 3096 * of one's own proc structure. (These are truisms becuase 3097 * threads and processes don't clean up their own state -- 3098 * they leave that task to whomever reaps them.) 3099 */ 3100 return ((uint64_t)curthread->t_procp->p_pidp->pid_id); 3101 3102 case DIF_VAR_PPID: 3103 if (!dtrace_priv_proc(state, mstate)) 3104 return (0); 3105 3106 /* 3107 * See comment in DIF_VAR_PID. 3108 */ 3109 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3110 return (pid0.pid_id); 3111 3112 /* 3113 * It is always safe to dereference one's own t_procp pointer: 3114 * it always points to a valid, allocated proc structure. 3115 * (This is true because threads don't clean up their own 3116 * state -- they leave that task to whomever reaps them.) 3117 */ 3118 return ((uint64_t)curthread->t_procp->p_ppid); 3119 3120 case DIF_VAR_TID: 3121 /* 3122 * See comment in DIF_VAR_PID. 3123 */ 3124 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3125 return (0); 3126 3127 return ((uint64_t)curthread->t_tid); 3128 3129 case DIF_VAR_EXECNAME: 3130 if (!dtrace_priv_proc(state, mstate)) 3131 return (0); 3132 3133 /* 3134 * See comment in DIF_VAR_PID. 3135 */ 3136 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3137 return ((uint64_t)(uintptr_t)p0.p_user.u_comm); 3138 3139 /* 3140 * It is always safe to dereference one's own t_procp pointer: 3141 * it always points to a valid, allocated proc structure. 3142 * (This is true because threads don't clean up their own 3143 * state -- they leave that task to whomever reaps them.) 3144 */ 3145 return (dtrace_dif_varstr( 3146 (uintptr_t)curthread->t_procp->p_user.u_comm, 3147 state, mstate)); 3148 3149 case DIF_VAR_ZONENAME: 3150 if (!dtrace_priv_proc(state, mstate)) 3151 return (0); 3152 3153 /* 3154 * See comment in DIF_VAR_PID. 3155 */ 3156 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3157 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); 3158 3159 /* 3160 * It is always safe to dereference one's own t_procp pointer: 3161 * it always points to a valid, allocated proc structure. 3162 * (This is true because threads don't clean up their own 3163 * state -- they leave that task to whomever reaps them.) 3164 */ 3165 return (dtrace_dif_varstr( 3166 (uintptr_t)curthread->t_procp->p_zone->zone_name, 3167 state, mstate)); 3168 3169 case DIF_VAR_UID: 3170 if (!dtrace_priv_proc(state, mstate)) 3171 return (0); 3172 3173 /* 3174 * See comment in DIF_VAR_PID. 3175 */ 3176 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3177 return ((uint64_t)p0.p_cred->cr_uid); 3178 3179 /* 3180 * It is always safe to dereference one's own t_procp pointer: 3181 * it always points to a valid, allocated proc structure. 3182 * (This is true because threads don't clean up their own 3183 * state -- they leave that task to whomever reaps them.) 3184 * 3185 * Additionally, it is safe to dereference one's own process 3186 * credential, since this is never NULL after process birth. 3187 */ 3188 return ((uint64_t)curthread->t_procp->p_cred->cr_uid); 3189 3190 case DIF_VAR_GID: 3191 if (!dtrace_priv_proc(state, mstate)) 3192 return (0); 3193 3194 /* 3195 * See comment in DIF_VAR_PID. 3196 */ 3197 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3198 return ((uint64_t)p0.p_cred->cr_gid); 3199 3200 /* 3201 * It is always safe to dereference one's own t_procp pointer: 3202 * it always points to a valid, allocated proc structure. 3203 * (This is true because threads don't clean up their own 3204 * state -- they leave that task to whomever reaps them.) 3205 * 3206 * Additionally, it is safe to dereference one's own process 3207 * credential, since this is never NULL after process birth. 3208 */ 3209 return ((uint64_t)curthread->t_procp->p_cred->cr_gid); 3210 3211 case DIF_VAR_ERRNO: { 3212 klwp_t *lwp; 3213 if (!dtrace_priv_proc(state, mstate)) 3214 return (0); 3215 3216 /* 3217 * See comment in DIF_VAR_PID. 3218 */ 3219 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3220 return (0); 3221 3222 /* 3223 * It is always safe to dereference one's own t_lwp pointer in 3224 * the event that this pointer is non-NULL. (This is true 3225 * because threads and lwps don't clean up their own state -- 3226 * they leave that task to whomever reaps them.) 3227 */ 3228 if ((lwp = curthread->t_lwp) == NULL) 3229 return (0); 3230 3231 return ((uint64_t)lwp->lwp_errno); 3232 } 3233 default: 3234 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 3235 return (0); 3236 } 3237 } 3238 3239 /* 3240 * Emulate the execution of DTrace ID subroutines invoked by the call opcode. 3241 * Notice that we don't bother validating the proper number of arguments or 3242 * their types in the tuple stack. This isn't needed because all argument 3243 * interpretation is safe because of our load safety -- the worst that can 3244 * happen is that a bogus program can obtain bogus results. 3245 */ 3246 static void 3247 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, 3248 dtrace_key_t *tupregs, int nargs, 3249 dtrace_mstate_t *mstate, dtrace_state_t *state) 3250 { 3251 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 3252 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 3253 dtrace_vstate_t *vstate = &state->dts_vstate; 3254 3255 union { 3256 mutex_impl_t mi; 3257 uint64_t mx; 3258 } m; 3259 3260 union { 3261 krwlock_t ri; 3262 uintptr_t rw; 3263 } r; 3264 3265 switch (subr) { 3266 case DIF_SUBR_RAND: 3267 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; 3268 break; 3269 3270 case DIF_SUBR_MUTEX_OWNED: 3271 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 3272 mstate, vstate)) { 3273 regs[rd] = NULL; 3274 break; 3275 } 3276 3277 m.mx = dtrace_load64(tupregs[0].dttk_value); 3278 if (MUTEX_TYPE_ADAPTIVE(&m.mi)) 3279 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; 3280 else 3281 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock); 3282 break; 3283 3284 case DIF_SUBR_MUTEX_OWNER: 3285 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 3286 mstate, vstate)) { 3287 regs[rd] = NULL; 3288 break; 3289 } 3290 3291 m.mx = dtrace_load64(tupregs[0].dttk_value); 3292 if (MUTEX_TYPE_ADAPTIVE(&m.mi) && 3293 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) 3294 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi); 3295 else 3296 regs[rd] = 0; 3297 break; 3298 3299 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: 3300 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 3301 mstate, vstate)) { 3302 regs[rd] = NULL; 3303 break; 3304 } 3305 3306 m.mx = dtrace_load64(tupregs[0].dttk_value); 3307 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); 3308 break; 3309 3310 case DIF_SUBR_MUTEX_TYPE_SPIN: 3311 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 3312 mstate, vstate)) { 3313 regs[rd] = NULL; 3314 break; 3315 } 3316 3317 m.mx = dtrace_load64(tupregs[0].dttk_value); 3318 regs[rd] = MUTEX_TYPE_SPIN(&m.mi); 3319 break; 3320 3321 case DIF_SUBR_RW_READ_HELD: { 3322 uintptr_t tmp; 3323 3324 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 3325 mstate, vstate)) { 3326 regs[rd] = NULL; 3327 break; 3328 } 3329 3330 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3331 regs[rd] = _RW_READ_HELD(&r.ri, tmp); 3332 break; 3333 } 3334 3335 case DIF_SUBR_RW_WRITE_HELD: 3336 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), 3337 mstate, vstate)) { 3338 regs[rd] = NULL; 3339 break; 3340 } 3341 3342 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3343 regs[rd] = _RW_WRITE_HELD(&r.ri); 3344 break; 3345 3346 case DIF_SUBR_RW_ISWRITER: 3347 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), 3348 mstate, vstate)) { 3349 regs[rd] = NULL; 3350 break; 3351 } 3352 3353 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3354 regs[rd] = _RW_ISWRITER(&r.ri); 3355 break; 3356 3357 case DIF_SUBR_BCOPY: { 3358 /* 3359 * We need to be sure that the destination is in the scratch 3360 * region -- no other region is allowed. 3361 */ 3362 uintptr_t src = tupregs[0].dttk_value; 3363 uintptr_t dest = tupregs[1].dttk_value; 3364 size_t size = tupregs[2].dttk_value; 3365 3366 if (!dtrace_inscratch(dest, size, mstate)) { 3367 *flags |= CPU_DTRACE_BADADDR; 3368 *illval = regs[rd]; 3369 break; 3370 } 3371 3372 if (!dtrace_canload(src, size, mstate, vstate)) { 3373 regs[rd] = NULL; 3374 break; 3375 } 3376 3377 dtrace_bcopy((void *)src, (void *)dest, size); 3378 break; 3379 } 3380 3381 case DIF_SUBR_ALLOCA: 3382 case DIF_SUBR_COPYIN: { 3383 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 3384 uint64_t size = 3385 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; 3386 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; 3387 3388 /* 3389 * This action doesn't require any credential checks since 3390 * probes will not activate in user contexts to which the 3391 * enabling user does not have permissions. 3392 */ 3393 3394 /* 3395 * Rounding up the user allocation size could have overflowed 3396 * a large, bogus allocation (like -1ULL) to 0. 3397 */ 3398 if (scratch_size < size || 3399 !DTRACE_INSCRATCH(mstate, scratch_size)) { 3400 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3401 regs[rd] = NULL; 3402 break; 3403 } 3404 3405 if (subr == DIF_SUBR_COPYIN) { 3406 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3407 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); 3408 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3409 } 3410 3411 mstate->dtms_scratch_ptr += scratch_size; 3412 regs[rd] = dest; 3413 break; 3414 } 3415 3416 case DIF_SUBR_COPYINTO: { 3417 uint64_t size = tupregs[1].dttk_value; 3418 uintptr_t dest = tupregs[2].dttk_value; 3419 3420 /* 3421 * This action doesn't require any credential checks since 3422 * probes will not activate in user contexts to which the 3423 * enabling user does not have permissions. 3424 */ 3425 if (!dtrace_inscratch(dest, size, mstate)) { 3426 *flags |= CPU_DTRACE_BADADDR; 3427 *illval = regs[rd]; 3428 break; 3429 } 3430 3431 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3432 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); 3433 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3434 break; 3435 } 3436 3437 case DIF_SUBR_COPYINSTR: { 3438 uintptr_t dest = mstate->dtms_scratch_ptr; 3439 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3440 3441 if (nargs > 1 && tupregs[1].dttk_value < size) 3442 size = tupregs[1].dttk_value + 1; 3443 3444 /* 3445 * This action doesn't require any credential checks since 3446 * probes will not activate in user contexts to which the 3447 * enabling user does not have permissions. 3448 */ 3449 if (!DTRACE_INSCRATCH(mstate, size)) { 3450 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3451 regs[rd] = NULL; 3452 break; 3453 } 3454 3455 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3456 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); 3457 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3458 3459 ((char *)dest)[size - 1] = '\0'; 3460 mstate->dtms_scratch_ptr += size; 3461 regs[rd] = dest; 3462 break; 3463 } 3464 3465 case DIF_SUBR_MSGSIZE: 3466 case DIF_SUBR_MSGDSIZE: { 3467 uintptr_t baddr = tupregs[0].dttk_value, daddr; 3468 uintptr_t wptr, rptr; 3469 size_t count = 0; 3470 int cont = 0; 3471 3472 while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 3473 3474 if (!dtrace_canload(baddr, sizeof (mblk_t), mstate, 3475 vstate)) { 3476 regs[rd] = NULL; 3477 break; 3478 } 3479 3480 wptr = dtrace_loadptr(baddr + 3481 offsetof(mblk_t, b_wptr)); 3482 3483 rptr = dtrace_loadptr(baddr + 3484 offsetof(mblk_t, b_rptr)); 3485 3486 if (wptr < rptr) { 3487 *flags |= CPU_DTRACE_BADADDR; 3488 *illval = tupregs[0].dttk_value; 3489 break; 3490 } 3491 3492 daddr = dtrace_loadptr(baddr + 3493 offsetof(mblk_t, b_datap)); 3494 3495 baddr = dtrace_loadptr(baddr + 3496 offsetof(mblk_t, b_cont)); 3497 3498 /* 3499 * We want to prevent against denial-of-service here, 3500 * so we're only going to search the list for 3501 * dtrace_msgdsize_max mblks. 3502 */ 3503 if (cont++ > dtrace_msgdsize_max) { 3504 *flags |= CPU_DTRACE_ILLOP; 3505 break; 3506 } 3507 3508 if (subr == DIF_SUBR_MSGDSIZE) { 3509 if (dtrace_load8(daddr + 3510 offsetof(dblk_t, db_type)) != M_DATA) 3511 continue; 3512 } 3513 3514 count += wptr - rptr; 3515 } 3516 3517 if (!(*flags & CPU_DTRACE_FAULT)) 3518 regs[rd] = count; 3519 3520 break; 3521 } 3522 3523 case DIF_SUBR_PROGENYOF: { 3524 pid_t pid = tupregs[0].dttk_value; 3525 proc_t *p; 3526 int rval = 0; 3527 3528 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3529 3530 for (p = curthread->t_procp; p != NULL; p = p->p_parent) { 3531 if (p->p_pidp->pid_id == pid) { 3532 rval = 1; 3533 break; 3534 } 3535 } 3536 3537 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3538 3539 regs[rd] = rval; 3540 break; 3541 } 3542 3543 case DIF_SUBR_SPECULATION: 3544 regs[rd] = dtrace_speculation(state); 3545 break; 3546 3547 case DIF_SUBR_COPYOUT: { 3548 uintptr_t kaddr = tupregs[0].dttk_value; 3549 uintptr_t uaddr = tupregs[1].dttk_value; 3550 uint64_t size = tupregs[2].dttk_value; 3551 3552 if (!dtrace_destructive_disallow && 3553 dtrace_priv_proc_control(state, mstate) && 3554 !dtrace_istoxic(kaddr, size)) { 3555 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3556 dtrace_copyout(kaddr, uaddr, size, flags); 3557 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3558 } 3559 break; 3560 } 3561 3562 case DIF_SUBR_COPYOUTSTR: { 3563 uintptr_t kaddr = tupregs[0].dttk_value; 3564 uintptr_t uaddr = tupregs[1].dttk_value; 3565 uint64_t size = tupregs[2].dttk_value; 3566 3567 if (!dtrace_destructive_disallow && 3568 dtrace_priv_proc_control(state, mstate) && 3569 !dtrace_istoxic(kaddr, size)) { 3570 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3571 dtrace_copyoutstr(kaddr, uaddr, size, flags); 3572 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3573 } 3574 break; 3575 } 3576 3577 case DIF_SUBR_STRLEN: { 3578 size_t sz; 3579 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; 3580 sz = dtrace_strlen((char *)addr, 3581 state->dts_options[DTRACEOPT_STRSIZE]); 3582 3583 if (!dtrace_canload(addr, sz + 1, mstate, vstate)) { 3584 regs[rd] = NULL; 3585 break; 3586 } 3587 3588 regs[rd] = sz; 3589 3590 break; 3591 } 3592 3593 case DIF_SUBR_STRCHR: 3594 case DIF_SUBR_STRRCHR: { 3595 /* 3596 * We're going to iterate over the string looking for the 3597 * specified character. We will iterate until we have reached 3598 * the string length or we have found the character. If this 3599 * is DIF_SUBR_STRRCHR, we will look for the last occurrence 3600 * of the specified character instead of the first. 3601 */ 3602 uintptr_t saddr = tupregs[0].dttk_value; 3603 uintptr_t addr = tupregs[0].dttk_value; 3604 uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; 3605 char c, target = (char)tupregs[1].dttk_value; 3606 3607 for (regs[rd] = NULL; addr < limit; addr++) { 3608 if ((c = dtrace_load8(addr)) == target) { 3609 regs[rd] = addr; 3610 3611 if (subr == DIF_SUBR_STRCHR) 3612 break; 3613 } 3614 3615 if (c == '\0') 3616 break; 3617 } 3618 3619 if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) { 3620 regs[rd] = NULL; 3621 break; 3622 } 3623 3624 break; 3625 } 3626 3627 case DIF_SUBR_STRSTR: 3628 case DIF_SUBR_INDEX: 3629 case DIF_SUBR_RINDEX: { 3630 /* 3631 * We're going to iterate over the string looking for the 3632 * specified string. We will iterate until we have reached 3633 * the string length or we have found the string. (Yes, this 3634 * is done in the most naive way possible -- but considering 3635 * that the string we're searching for is likely to be 3636 * relatively short, the complexity of Rabin-Karp or similar 3637 * hardly seems merited.) 3638 */ 3639 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value; 3640 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value; 3641 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3642 size_t len = dtrace_strlen(addr, size); 3643 size_t sublen = dtrace_strlen(substr, size); 3644 char *limit = addr + len, *orig = addr; 3645 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1; 3646 int inc = 1; 3647 3648 regs[rd] = notfound; 3649 3650 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { 3651 regs[rd] = NULL; 3652 break; 3653 } 3654 3655 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, 3656 vstate)) { 3657 regs[rd] = NULL; 3658 break; 3659 } 3660 3661 /* 3662 * strstr() and index()/rindex() have similar semantics if 3663 * both strings are the empty string: strstr() returns a 3664 * pointer to the (empty) string, and index() and rindex() 3665 * both return index 0 (regardless of any position argument). 3666 */ 3667 if (sublen == 0 && len == 0) { 3668 if (subr == DIF_SUBR_STRSTR) 3669 regs[rd] = (uintptr_t)addr; 3670 else 3671 regs[rd] = 0; 3672 break; 3673 } 3674 3675 if (subr != DIF_SUBR_STRSTR) { 3676 if (subr == DIF_SUBR_RINDEX) { 3677 limit = orig - 1; 3678 addr += len; 3679 inc = -1; 3680 } 3681 3682 /* 3683 * Both index() and rindex() take an optional position 3684 * argument that denotes the starting position. 3685 */ 3686 if (nargs == 3) { 3687 int64_t pos = (int64_t)tupregs[2].dttk_value; 3688 3689 /* 3690 * If the position argument to index() is 3691 * negative, Perl implicitly clamps it at 3692 * zero. This semantic is a little surprising 3693 * given the special meaning of negative 3694 * positions to similar Perl functions like 3695 * substr(), but it appears to reflect a 3696 * notion that index() can start from a 3697 * negative index and increment its way up to 3698 * the string. Given this notion, Perl's 3699 * rindex() is at least self-consistent in 3700 * that it implicitly clamps positions greater 3701 * than the string length to be the string 3702 * length. Where Perl completely loses 3703 * coherence, however, is when the specified 3704 * substring is the empty string (""). In 3705 * this case, even if the position is 3706 * negative, rindex() returns 0 -- and even if 3707 * the position is greater than the length, 3708 * index() returns the string length. These 3709 * semantics violate the notion that index() 3710 * should never return a value less than the 3711 * specified position and that rindex() should 3712 * never return a value greater than the 3713 * specified position. (One assumes that 3714 * these semantics are artifacts of Perl's 3715 * implementation and not the results of 3716 * deliberate design -- it beggars belief that 3717 * even Larry Wall could desire such oddness.) 3718 * While in the abstract one would wish for 3719 * consistent position semantics across 3720 * substr(), index() and rindex() -- or at the 3721 * very least self-consistent position 3722 * semantics for index() and rindex() -- we 3723 * instead opt to keep with the extant Perl 3724 * semantics, in all their broken glory. (Do 3725 * we have more desire to maintain Perl's 3726 * semantics than Perl does? Probably.) 3727 */ 3728 if (subr == DIF_SUBR_RINDEX) { 3729 if (pos < 0) { 3730 if (sublen == 0) 3731 regs[rd] = 0; 3732 break; 3733 } 3734 3735 if (pos > len) 3736 pos = len; 3737 } else { 3738 if (pos < 0) 3739 pos = 0; 3740 3741 if (pos >= len) { 3742 if (sublen == 0) 3743 regs[rd] = len; 3744 break; 3745 } 3746 } 3747 3748 addr = orig + pos; 3749 } 3750 } 3751 3752 for (regs[rd] = notfound; addr != limit; addr += inc) { 3753 if (dtrace_strncmp(addr, substr, sublen) == 0) { 3754 if (subr != DIF_SUBR_STRSTR) { 3755 /* 3756 * As D index() and rindex() are 3757 * modeled on Perl (and not on awk), 3758 * we return a zero-based (and not a 3759 * one-based) index. (For you Perl 3760 * weenies: no, we're not going to add 3761 * $[ -- and shouldn't you be at a con 3762 * or something?) 3763 */ 3764 regs[rd] = (uintptr_t)(addr - orig); 3765 break; 3766 } 3767 3768 ASSERT(subr == DIF_SUBR_STRSTR); 3769 regs[rd] = (uintptr_t)addr; 3770 break; 3771 } 3772 } 3773 3774 break; 3775 } 3776 3777 case DIF_SUBR_STRTOK: { 3778 uintptr_t addr = tupregs[0].dttk_value; 3779 uintptr_t tokaddr = tupregs[1].dttk_value; 3780 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3781 uintptr_t limit, toklimit = tokaddr + size; 3782 uint8_t c, tokmap[32]; /* 256 / 8 */ 3783 char *dest = (char *)mstate->dtms_scratch_ptr; 3784 int i; 3785 3786 /* 3787 * Check both the token buffer and (later) the input buffer, 3788 * since both could be non-scratch addresses. 3789 */ 3790 if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) { 3791 regs[rd] = NULL; 3792 break; 3793 } 3794 3795 if (!DTRACE_INSCRATCH(mstate, size)) { 3796 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3797 regs[rd] = NULL; 3798 break; 3799 } 3800 3801 if (addr == NULL) { 3802 /* 3803 * If the address specified is NULL, we use our saved 3804 * strtok pointer from the mstate. Note that this 3805 * means that the saved strtok pointer is _only_ 3806 * valid within multiple enablings of the same probe -- 3807 * it behaves like an implicit clause-local variable. 3808 */ 3809 addr = mstate->dtms_strtok; 3810 } else { 3811 /* 3812 * If the user-specified address is non-NULL we must 3813 * access check it. This is the only time we have 3814 * a chance to do so, since this address may reside 3815 * in the string table of this clause-- future calls 3816 * (when we fetch addr from mstate->dtms_strtok) 3817 * would fail this access check. 3818 */ 3819 if (!dtrace_strcanload(addr, size, mstate, vstate)) { 3820 regs[rd] = NULL; 3821 break; 3822 } 3823 } 3824 3825 /* 3826 * First, zero the token map, and then process the token 3827 * string -- setting a bit in the map for every character 3828 * found in the token string. 3829 */ 3830 for (i = 0; i < sizeof (tokmap); i++) 3831 tokmap[i] = 0; 3832 3833 for (; tokaddr < toklimit; tokaddr++) { 3834 if ((c = dtrace_load8(tokaddr)) == '\0') 3835 break; 3836 3837 ASSERT((c >> 3) < sizeof (tokmap)); 3838 tokmap[c >> 3] |= (1 << (c & 0x7)); 3839 } 3840 3841 for (limit = addr + size; addr < limit; addr++) { 3842 /* 3843 * We're looking for a character that is _not_ contained 3844 * in the token string. 3845 */ 3846 if ((c = dtrace_load8(addr)) == '\0') 3847 break; 3848 3849 if (!(tokmap[c >> 3] & (1 << (c & 0x7)))) 3850 break; 3851 } 3852 3853 if (c == '\0') { 3854 /* 3855 * We reached the end of the string without finding 3856 * any character that was not in the token string. 3857 * We return NULL in this case, and we set the saved 3858 * address to NULL as well. 3859 */ 3860 regs[rd] = NULL; 3861 mstate->dtms_strtok = NULL; 3862 break; 3863 } 3864 3865 /* 3866 * From here on, we're copying into the destination string. 3867 */ 3868 for (i = 0; addr < limit && i < size - 1; addr++) { 3869 if ((c = dtrace_load8(addr)) == '\0') 3870 break; 3871 3872 if (tokmap[c >> 3] & (1 << (c & 0x7))) 3873 break; 3874 3875 ASSERT(i < size); 3876 dest[i++] = c; 3877 } 3878 3879 ASSERT(i < size); 3880 dest[i] = '\0'; 3881 regs[rd] = (uintptr_t)dest; 3882 mstate->dtms_scratch_ptr += size; 3883 mstate->dtms_strtok = addr; 3884 break; 3885 } 3886 3887 case DIF_SUBR_SUBSTR: { 3888 uintptr_t s = tupregs[0].dttk_value; 3889 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3890 char *d = (char *)mstate->dtms_scratch_ptr; 3891 int64_t index = (int64_t)tupregs[1].dttk_value; 3892 int64_t remaining = (int64_t)tupregs[2].dttk_value; 3893 size_t len = dtrace_strlen((char *)s, size); 3894 int64_t i; 3895 3896 if (!dtrace_canload(s, len + 1, mstate, vstate)) { 3897 regs[rd] = NULL; 3898 break; 3899 } 3900 3901 if (!DTRACE_INSCRATCH(mstate, size)) { 3902 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3903 regs[rd] = NULL; 3904 break; 3905 } 3906 3907 if (nargs <= 2) 3908 remaining = (int64_t)size; 3909 3910 if (index < 0) { 3911 index += len; 3912 3913 if (index < 0 && index + remaining > 0) { 3914 remaining += index; 3915 index = 0; 3916 } 3917 } 3918 3919 if (index >= len || index < 0) { 3920 remaining = 0; 3921 } else if (remaining < 0) { 3922 remaining += len - index; 3923 } else if (index + remaining > size) { 3924 remaining = size - index; 3925 } 3926 3927 for (i = 0; i < remaining; i++) { 3928 if ((d[i] = dtrace_load8(s + index + i)) == '\0') 3929 break; 3930 } 3931 3932 d[i] = '\0'; 3933 3934 mstate->dtms_scratch_ptr += size; 3935 regs[rd] = (uintptr_t)d; 3936 break; 3937 } 3938 3939 case DIF_SUBR_TOUPPER: 3940 case DIF_SUBR_TOLOWER: { 3941 uintptr_t s = tupregs[0].dttk_value; 3942 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3943 char *dest = (char *)mstate->dtms_scratch_ptr, c; 3944 size_t len = dtrace_strlen((char *)s, size); 3945 char lower, upper, convert; 3946 int64_t i; 3947 3948 if (subr == DIF_SUBR_TOUPPER) { 3949 lower = 'a'; 3950 upper = 'z'; 3951 convert = 'A'; 3952 } else { 3953 lower = 'A'; 3954 upper = 'Z'; 3955 convert = 'a'; 3956 } 3957 3958 if (!dtrace_canload(s, len + 1, mstate, vstate)) { 3959 regs[rd] = NULL; 3960 break; 3961 } 3962 3963 if (!DTRACE_INSCRATCH(mstate, size)) { 3964 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3965 regs[rd] = NULL; 3966 break; 3967 } 3968 3969 for (i = 0; i < size - 1; i++) { 3970 if ((c = dtrace_load8(s + i)) == '\0') 3971 break; 3972 3973 if (c >= lower && c <= upper) 3974 c = convert + (c - lower); 3975 3976 dest[i] = c; 3977 } 3978 3979 ASSERT(i < size); 3980 dest[i] = '\0'; 3981 regs[rd] = (uintptr_t)dest; 3982 mstate->dtms_scratch_ptr += size; 3983 break; 3984 } 3985 3986 case DIF_SUBR_GETMAJOR: 3987 #ifdef _LP64 3988 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; 3989 #else 3990 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; 3991 #endif 3992 break; 3993 3994 case DIF_SUBR_GETMINOR: 3995 #ifdef _LP64 3996 regs[rd] = tupregs[0].dttk_value & MAXMIN64; 3997 #else 3998 regs[rd] = tupregs[0].dttk_value & MAXMIN; 3999 #endif 4000 break; 4001 4002 case DIF_SUBR_DDI_PATHNAME: { 4003 /* 4004 * This one is a galactic mess. We are going to roughly 4005 * emulate ddi_pathname(), but it's made more complicated 4006 * by the fact that we (a) want to include the minor name and 4007 * (b) must proceed iteratively instead of recursively. 4008 */ 4009 uintptr_t dest = mstate->dtms_scratch_ptr; 4010 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4011 char *start = (char *)dest, *end = start + size - 1; 4012 uintptr_t daddr = tupregs[0].dttk_value; 4013 int64_t minor = (int64_t)tupregs[1].dttk_value; 4014 char *s; 4015 int i, len, depth = 0; 4016 4017 /* 4018 * Due to all the pointer jumping we do and context we must 4019 * rely upon, we just mandate that the user must have kernel 4020 * read privileges to use this routine. 4021 */ 4022 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) { 4023 *flags |= CPU_DTRACE_KPRIV; 4024 *illval = daddr; 4025 regs[rd] = NULL; 4026 } 4027 4028 if (!DTRACE_INSCRATCH(mstate, size)) { 4029 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4030 regs[rd] = NULL; 4031 break; 4032 } 4033 4034 *end = '\0'; 4035 4036 /* 4037 * We want to have a name for the minor. In order to do this, 4038 * we need to walk the minor list from the devinfo. We want 4039 * to be sure that we don't infinitely walk a circular list, 4040 * so we check for circularity by sending a scout pointer 4041 * ahead two elements for every element that we iterate over; 4042 * if the list is circular, these will ultimately point to the 4043 * same element. You may recognize this little trick as the 4044 * answer to a stupid interview question -- one that always 4045 * seems to be asked by those who had to have it laboriously 4046 * explained to them, and who can't even concisely describe 4047 * the conditions under which one would be forced to resort to 4048 * this technique. Needless to say, those conditions are 4049 * found here -- and probably only here. Is this the only use 4050 * of this infamous trick in shipping, production code? If it 4051 * isn't, it probably should be... 4052 */ 4053 if (minor != -1) { 4054 uintptr_t maddr = dtrace_loadptr(daddr + 4055 offsetof(struct dev_info, devi_minor)); 4056 4057 uintptr_t next = offsetof(struct ddi_minor_data, next); 4058 uintptr_t name = offsetof(struct ddi_minor_data, 4059 d_minor) + offsetof(struct ddi_minor, name); 4060 uintptr_t dev = offsetof(struct ddi_minor_data, 4061 d_minor) + offsetof(struct ddi_minor, dev); 4062 uintptr_t scout; 4063 4064 if (maddr != NULL) 4065 scout = dtrace_loadptr(maddr + next); 4066 4067 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 4068 uint64_t m; 4069 #ifdef _LP64 4070 m = dtrace_load64(maddr + dev) & MAXMIN64; 4071 #else 4072 m = dtrace_load32(maddr + dev) & MAXMIN; 4073 #endif 4074 if (m != minor) { 4075 maddr = dtrace_loadptr(maddr + next); 4076 4077 if (scout == NULL) 4078 continue; 4079 4080 scout = dtrace_loadptr(scout + next); 4081 4082 if (scout == NULL) 4083 continue; 4084 4085 scout = dtrace_loadptr(scout + next); 4086 4087 if (scout == NULL) 4088 continue; 4089 4090 if (scout == maddr) { 4091 *flags |= CPU_DTRACE_ILLOP; 4092 break; 4093 } 4094 4095 continue; 4096 } 4097 4098 /* 4099 * We have the minor data. Now we need to 4100 * copy the minor's name into the end of the 4101 * pathname. 4102 */ 4103 s = (char *)dtrace_loadptr(maddr + name); 4104 len = dtrace_strlen(s, size); 4105 4106 if (*flags & CPU_DTRACE_FAULT) 4107 break; 4108 4109 if (len != 0) { 4110 if ((end -= (len + 1)) < start) 4111 break; 4112 4113 *end = ':'; 4114 } 4115 4116 for (i = 1; i <= len; i++) 4117 end[i] = dtrace_load8((uintptr_t)s++); 4118 break; 4119 } 4120 } 4121 4122 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 4123 ddi_node_state_t devi_state; 4124 4125 devi_state = dtrace_load32(daddr + 4126 offsetof(struct dev_info, devi_node_state)); 4127 4128 if (*flags & CPU_DTRACE_FAULT) 4129 break; 4130 4131 if (devi_state >= DS_INITIALIZED) { 4132 s = (char *)dtrace_loadptr(daddr + 4133 offsetof(struct dev_info, devi_addr)); 4134 len = dtrace_strlen(s, size); 4135 4136 if (*flags & CPU_DTRACE_FAULT) 4137 break; 4138 4139 if (len != 0) { 4140 if ((end -= (len + 1)) < start) 4141 break; 4142 4143 *end = '@'; 4144 } 4145 4146 for (i = 1; i <= len; i++) 4147 end[i] = dtrace_load8((uintptr_t)s++); 4148 } 4149 4150 /* 4151 * Now for the node name... 4152 */ 4153 s = (char *)dtrace_loadptr(daddr + 4154 offsetof(struct dev_info, devi_node_name)); 4155 4156 daddr = dtrace_loadptr(daddr + 4157 offsetof(struct dev_info, devi_parent)); 4158 4159 /* 4160 * If our parent is NULL (that is, if we're the root 4161 * node), we're going to use the special path 4162 * "devices". 4163 */ 4164 if (daddr == NULL) 4165 s = "devices"; 4166 4167 len = dtrace_strlen(s, size); 4168 if (*flags & CPU_DTRACE_FAULT) 4169 break; 4170 4171 if ((end -= (len + 1)) < start) 4172 break; 4173 4174 for (i = 1; i <= len; i++) 4175 end[i] = dtrace_load8((uintptr_t)s++); 4176 *end = '/'; 4177 4178 if (depth++ > dtrace_devdepth_max) { 4179 *flags |= CPU_DTRACE_ILLOP; 4180 break; 4181 } 4182 } 4183 4184 if (end < start) 4185 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4186 4187 if (daddr == NULL) { 4188 regs[rd] = (uintptr_t)end; 4189 mstate->dtms_scratch_ptr += size; 4190 } 4191 4192 break; 4193 } 4194 4195 case DIF_SUBR_STRJOIN: { 4196 char *d = (char *)mstate->dtms_scratch_ptr; 4197 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4198 uintptr_t s1 = tupregs[0].dttk_value; 4199 uintptr_t s2 = tupregs[1].dttk_value; 4200 int i = 0; 4201 4202 if (!dtrace_strcanload(s1, size, mstate, vstate) || 4203 !dtrace_strcanload(s2, size, mstate, vstate)) { 4204 regs[rd] = NULL; 4205 break; 4206 } 4207 4208 if (!DTRACE_INSCRATCH(mstate, size)) { 4209 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4210 regs[rd] = NULL; 4211 break; 4212 } 4213 4214 for (;;) { 4215 if (i >= size) { 4216 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4217 regs[rd] = NULL; 4218 break; 4219 } 4220 4221 if ((d[i++] = dtrace_load8(s1++)) == '\0') { 4222 i--; 4223 break; 4224 } 4225 } 4226 4227 for (;;) { 4228 if (i >= size) { 4229 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4230 regs[rd] = NULL; 4231 break; 4232 } 4233 4234 if ((d[i++] = dtrace_load8(s2++)) == '\0') 4235 break; 4236 } 4237 4238 if (i < size) { 4239 mstate->dtms_scratch_ptr += i; 4240 regs[rd] = (uintptr_t)d; 4241 } 4242 4243 break; 4244 } 4245 4246 case DIF_SUBR_LLTOSTR: { 4247 int64_t i = (int64_t)tupregs[0].dttk_value; 4248 uint64_t val, digit; 4249 uint64_t size = 65; /* enough room for 2^64 in binary */ 4250 char *end = (char *)mstate->dtms_scratch_ptr + size - 1; 4251 int base = 10; 4252 4253 if (nargs > 1) { 4254 if ((base = tupregs[1].dttk_value) <= 1 || 4255 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { 4256 *flags |= CPU_DTRACE_ILLOP; 4257 break; 4258 } 4259 } 4260 4261 val = (base == 10 && i < 0) ? i * -1 : i; 4262 4263 if (!DTRACE_INSCRATCH(mstate, size)) { 4264 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4265 regs[rd] = NULL; 4266 break; 4267 } 4268 4269 for (*end-- = '\0'; val; val /= base) { 4270 if ((digit = val % base) <= '9' - '0') { 4271 *end-- = '0' + digit; 4272 } else { 4273 *end-- = 'a' + (digit - ('9' - '0') - 1); 4274 } 4275 } 4276 4277 if (i == 0 && base == 16) 4278 *end-- = '0'; 4279 4280 if (base == 16) 4281 *end-- = 'x'; 4282 4283 if (i == 0 || base == 8 || base == 16) 4284 *end-- = '0'; 4285 4286 if (i < 0 && base == 10) 4287 *end-- = '-'; 4288 4289 regs[rd] = (uintptr_t)end + 1; 4290 mstate->dtms_scratch_ptr += size; 4291 break; 4292 } 4293 4294 case DIF_SUBR_HTONS: 4295 case DIF_SUBR_NTOHS: 4296 #ifdef _BIG_ENDIAN 4297 regs[rd] = (uint16_t)tupregs[0].dttk_value; 4298 #else 4299 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); 4300 #endif 4301 break; 4302 4303 4304 case DIF_SUBR_HTONL: 4305 case DIF_SUBR_NTOHL: 4306 #ifdef _BIG_ENDIAN 4307 regs[rd] = (uint32_t)tupregs[0].dttk_value; 4308 #else 4309 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); 4310 #endif 4311 break; 4312 4313 4314 case DIF_SUBR_HTONLL: 4315 case DIF_SUBR_NTOHLL: 4316 #ifdef _BIG_ENDIAN 4317 regs[rd] = (uint64_t)tupregs[0].dttk_value; 4318 #else 4319 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); 4320 #endif 4321 break; 4322 4323 4324 case DIF_SUBR_DIRNAME: 4325 case DIF_SUBR_BASENAME: { 4326 char *dest = (char *)mstate->dtms_scratch_ptr; 4327 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4328 uintptr_t src = tupregs[0].dttk_value; 4329 int i, j, len = dtrace_strlen((char *)src, size); 4330 int lastbase = -1, firstbase = -1, lastdir = -1; 4331 int start, end; 4332 4333 if (!dtrace_canload(src, len + 1, mstate, vstate)) { 4334 regs[rd] = NULL; 4335 break; 4336 } 4337 4338 if (!DTRACE_INSCRATCH(mstate, size)) { 4339 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4340 regs[rd] = NULL; 4341 break; 4342 } 4343 4344 /* 4345 * The basename and dirname for a zero-length string is 4346 * defined to be "." 4347 */ 4348 if (len == 0) { 4349 len = 1; 4350 src = (uintptr_t)"."; 4351 } 4352 4353 /* 4354 * Start from the back of the string, moving back toward the 4355 * front until we see a character that isn't a slash. That 4356 * character is the last character in the basename. 4357 */ 4358 for (i = len - 1; i >= 0; i--) { 4359 if (dtrace_load8(src + i) != '/') 4360 break; 4361 } 4362 4363 if (i >= 0) 4364 lastbase = i; 4365 4366 /* 4367 * Starting from the last character in the basename, move 4368 * towards the front until we find a slash. The character 4369 * that we processed immediately before that is the first 4370 * character in the basename. 4371 */ 4372 for (; i >= 0; i--) { 4373 if (dtrace_load8(src + i) == '/') 4374 break; 4375 } 4376 4377 if (i >= 0) 4378 firstbase = i + 1; 4379 4380 /* 4381 * Now keep going until we find a non-slash character. That 4382 * character is the last character in the dirname. 4383 */ 4384 for (; i >= 0; i--) { 4385 if (dtrace_load8(src + i) != '/') 4386 break; 4387 } 4388 4389 if (i >= 0) 4390 lastdir = i; 4391 4392 ASSERT(!(lastbase == -1 && firstbase != -1)); 4393 ASSERT(!(firstbase == -1 && lastdir != -1)); 4394 4395 if (lastbase == -1) { 4396 /* 4397 * We didn't find a non-slash character. We know that 4398 * the length is non-zero, so the whole string must be 4399 * slashes. In either the dirname or the basename 4400 * case, we return '/'. 4401 */ 4402 ASSERT(firstbase == -1); 4403 firstbase = lastbase = lastdir = 0; 4404 } 4405 4406 if (firstbase == -1) { 4407 /* 4408 * The entire string consists only of a basename 4409 * component. If we're looking for dirname, we need 4410 * to change our string to be just "."; if we're 4411 * looking for a basename, we'll just set the first 4412 * character of the basename to be 0. 4413 */ 4414 if (subr == DIF_SUBR_DIRNAME) { 4415 ASSERT(lastdir == -1); 4416 src = (uintptr_t)"."; 4417 lastdir = 0; 4418 } else { 4419 firstbase = 0; 4420 } 4421 } 4422 4423 if (subr == DIF_SUBR_DIRNAME) { 4424 if (lastdir == -1) { 4425 /* 4426 * We know that we have a slash in the name -- 4427 * or lastdir would be set to 0, above. And 4428 * because lastdir is -1, we know that this 4429 * slash must be the first character. (That 4430 * is, the full string must be of the form 4431 * "/basename".) In this case, the last 4432 * character of the directory name is 0. 4433 */ 4434 lastdir = 0; 4435 } 4436 4437 start = 0; 4438 end = lastdir; 4439 } else { 4440 ASSERT(subr == DIF_SUBR_BASENAME); 4441 ASSERT(firstbase != -1 && lastbase != -1); 4442 start = firstbase; 4443 end = lastbase; 4444 } 4445 4446 for (i = start, j = 0; i <= end && j < size - 1; i++, j++) 4447 dest[j] = dtrace_load8(src + i); 4448 4449 dest[j] = '\0'; 4450 regs[rd] = (uintptr_t)dest; 4451 mstate->dtms_scratch_ptr += size; 4452 break; 4453 } 4454 4455 case DIF_SUBR_CLEANPATH: { 4456 char *dest = (char *)mstate->dtms_scratch_ptr, c; 4457 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4458 uintptr_t src = tupregs[0].dttk_value; 4459 int i = 0, j = 0; 4460 4461 if (!dtrace_strcanload(src, size, mstate, vstate)) { 4462 regs[rd] = NULL; 4463 break; 4464 } 4465 4466 if (!DTRACE_INSCRATCH(mstate, size)) { 4467 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4468 regs[rd] = NULL; 4469 break; 4470 } 4471 4472 /* 4473 * Move forward, loading each character. 4474 */ 4475 do { 4476 c = dtrace_load8(src + i++); 4477 next: 4478 if (j + 5 >= size) /* 5 = strlen("/..c\0") */ 4479 break; 4480 4481 if (c != '/') { 4482 dest[j++] = c; 4483 continue; 4484 } 4485 4486 c = dtrace_load8(src + i++); 4487 4488 if (c == '/') { 4489 /* 4490 * We have two slashes -- we can just advance 4491 * to the next character. 4492 */ 4493 goto next; 4494 } 4495 4496 if (c != '.') { 4497 /* 4498 * This is not "." and it's not ".." -- we can 4499 * just store the "/" and this character and 4500 * drive on. 4501 */ 4502 dest[j++] = '/'; 4503 dest[j++] = c; 4504 continue; 4505 } 4506 4507 c = dtrace_load8(src + i++); 4508 4509 if (c == '/') { 4510 /* 4511 * This is a "/./" component. We're not going 4512 * to store anything in the destination buffer; 4513 * we're just going to go to the next component. 4514 */ 4515 goto next; 4516 } 4517 4518 if (c != '.') { 4519 /* 4520 * This is not ".." -- we can just store the 4521 * "/." and this character and continue 4522 * processing. 4523 */ 4524 dest[j++] = '/'; 4525 dest[j++] = '.'; 4526 dest[j++] = c; 4527 continue; 4528 } 4529 4530 c = dtrace_load8(src + i++); 4531 4532 if (c != '/' && c != '\0') { 4533 /* 4534 * This is not ".." -- it's "..[mumble]". 4535 * We'll store the "/.." and this character 4536 * and continue processing. 4537 */ 4538 dest[j++] = '/'; 4539 dest[j++] = '.'; 4540 dest[j++] = '.'; 4541 dest[j++] = c; 4542 continue; 4543 } 4544 4545 /* 4546 * This is "/../" or "/..\0". We need to back up 4547 * our destination pointer until we find a "/". 4548 */ 4549 i--; 4550 while (j != 0 && dest[--j] != '/') 4551 continue; 4552 4553 if (c == '\0') 4554 dest[++j] = '/'; 4555 } while (c != '\0'); 4556 4557 dest[j] = '\0'; 4558 regs[rd] = (uintptr_t)dest; 4559 mstate->dtms_scratch_ptr += size; 4560 break; 4561 } 4562 4563 case DIF_SUBR_INET_NTOA: 4564 case DIF_SUBR_INET_NTOA6: 4565 case DIF_SUBR_INET_NTOP: { 4566 size_t size; 4567 int af, argi, i; 4568 char *base, *end; 4569 4570 if (subr == DIF_SUBR_INET_NTOP) { 4571 af = (int)tupregs[0].dttk_value; 4572 argi = 1; 4573 } else { 4574 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6; 4575 argi = 0; 4576 } 4577 4578 if (af == AF_INET) { 4579 ipaddr_t ip4; 4580 uint8_t *ptr8, val; 4581 4582 /* 4583 * Safely load the IPv4 address. 4584 */ 4585 ip4 = dtrace_load32(tupregs[argi].dttk_value); 4586 4587 /* 4588 * Check an IPv4 string will fit in scratch. 4589 */ 4590 size = INET_ADDRSTRLEN; 4591 if (!DTRACE_INSCRATCH(mstate, size)) { 4592 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4593 regs[rd] = NULL; 4594 break; 4595 } 4596 base = (char *)mstate->dtms_scratch_ptr; 4597 end = (char *)mstate->dtms_scratch_ptr + size - 1; 4598 4599 /* 4600 * Stringify as a dotted decimal quad. 4601 */ 4602 *end-- = '\0'; 4603 ptr8 = (uint8_t *)&ip4; 4604 for (i = 3; i >= 0; i--) { 4605 val = ptr8[i]; 4606 4607 if (val == 0) { 4608 *end-- = '0'; 4609 } else { 4610 for (; val; val /= 10) { 4611 *end-- = '0' + (val % 10); 4612 } 4613 } 4614 4615 if (i > 0) 4616 *end-- = '.'; 4617 } 4618 ASSERT(end + 1 >= base); 4619 4620 } else if (af == AF_INET6) { 4621 struct in6_addr ip6; 4622 int firstzero, tryzero, numzero, v6end; 4623 uint16_t val; 4624 const char digits[] = "0123456789abcdef"; 4625 4626 /* 4627 * Stringify using RFC 1884 convention 2 - 16 bit 4628 * hexadecimal values with a zero-run compression. 4629 * Lower case hexadecimal digits are used. 4630 * eg, fe80::214:4fff:fe0b:76c8. 4631 * The IPv4 embedded form is returned for inet_ntop, 4632 * just the IPv4 string is returned for inet_ntoa6. 4633 */ 4634 4635 /* 4636 * Safely load the IPv6 address. 4637 */ 4638 dtrace_bcopy( 4639 (void *)(uintptr_t)tupregs[argi].dttk_value, 4640 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr)); 4641 4642 /* 4643 * Check an IPv6 string will fit in scratch. 4644 */ 4645 size = INET6_ADDRSTRLEN; 4646 if (!DTRACE_INSCRATCH(mstate, size)) { 4647 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4648 regs[rd] = NULL; 4649 break; 4650 } 4651 base = (char *)mstate->dtms_scratch_ptr; 4652 end = (char *)mstate->dtms_scratch_ptr + size - 1; 4653 *end-- = '\0'; 4654 4655 /* 4656 * Find the longest run of 16 bit zero values 4657 * for the single allowed zero compression - "::". 4658 */ 4659 firstzero = -1; 4660 tryzero = -1; 4661 numzero = 1; 4662 for (i = 0; i < sizeof (struct in6_addr); i++) { 4663 if (ip6._S6_un._S6_u8[i] == 0 && 4664 tryzero == -1 && i % 2 == 0) { 4665 tryzero = i; 4666 continue; 4667 } 4668 4669 if (tryzero != -1 && 4670 (ip6._S6_un._S6_u8[i] != 0 || 4671 i == sizeof (struct in6_addr) - 1)) { 4672 4673 if (i - tryzero <= numzero) { 4674 tryzero = -1; 4675 continue; 4676 } 4677 4678 firstzero = tryzero; 4679 numzero = i - i % 2 - tryzero; 4680 tryzero = -1; 4681 4682 if (ip6._S6_un._S6_u8[i] == 0 && 4683 i == sizeof (struct in6_addr) - 1) 4684 numzero += 2; 4685 } 4686 } 4687 ASSERT(firstzero + numzero <= sizeof (struct in6_addr)); 4688 4689 /* 4690 * Check for an IPv4 embedded address. 4691 */ 4692 v6end = sizeof (struct in6_addr) - 2; 4693 if (IN6_IS_ADDR_V4MAPPED(&ip6) || 4694 IN6_IS_ADDR_V4COMPAT(&ip6)) { 4695 for (i = sizeof (struct in6_addr) - 1; 4696 i >= DTRACE_V4MAPPED_OFFSET; i--) { 4697 ASSERT(end >= base); 4698 4699 val = ip6._S6_un._S6_u8[i]; 4700 4701 if (val == 0) { 4702 *end-- = '0'; 4703 } else { 4704 for (; val; val /= 10) { 4705 *end-- = '0' + val % 10; 4706 } 4707 } 4708 4709 if (i > DTRACE_V4MAPPED_OFFSET) 4710 *end-- = '.'; 4711 } 4712 4713 if (subr == DIF_SUBR_INET_NTOA6) 4714 goto inetout; 4715 4716 /* 4717 * Set v6end to skip the IPv4 address that 4718 * we have already stringified. 4719 */ 4720 v6end = 10; 4721 } 4722 4723 /* 4724 * Build the IPv6 string by working through the 4725 * address in reverse. 4726 */ 4727 for (i = v6end; i >= 0; i -= 2) { 4728 ASSERT(end >= base); 4729 4730 if (i == firstzero + numzero - 2) { 4731 *end-- = ':'; 4732 *end-- = ':'; 4733 i -= numzero - 2; 4734 continue; 4735 } 4736 4737 if (i < 14 && i != firstzero - 2) 4738 *end-- = ':'; 4739 4740 val = (ip6._S6_un._S6_u8[i] << 8) + 4741 ip6._S6_un._S6_u8[i + 1]; 4742 4743 if (val == 0) { 4744 *end-- = '0'; 4745 } else { 4746 for (; val; val /= 16) { 4747 *end-- = digits[val % 16]; 4748 } 4749 } 4750 } 4751 ASSERT(end + 1 >= base); 4752 4753 } else { 4754 /* 4755 * The user didn't use AH_INET or AH_INET6. 4756 */ 4757 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 4758 regs[rd] = NULL; 4759 break; 4760 } 4761 4762 inetout: regs[rd] = (uintptr_t)end + 1; 4763 mstate->dtms_scratch_ptr += size; 4764 break; 4765 } 4766 4767 } 4768 } 4769 4770 /* 4771 * Emulate the execution of DTrace IR instructions specified by the given 4772 * DIF object. This function is deliberately void of assertions as all of 4773 * the necessary checks are handled by a call to dtrace_difo_validate(). 4774 */ 4775 static uint64_t 4776 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, 4777 dtrace_vstate_t *vstate, dtrace_state_t *state) 4778 { 4779 const dif_instr_t *text = difo->dtdo_buf; 4780 const uint_t textlen = difo->dtdo_len; 4781 const char *strtab = difo->dtdo_strtab; 4782 const uint64_t *inttab = difo->dtdo_inttab; 4783 4784 uint64_t rval = 0; 4785 dtrace_statvar_t *svar; 4786 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 4787 dtrace_difv_t *v; 4788 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 4789 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 4790 4791 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 4792 uint64_t regs[DIF_DIR_NREGS]; 4793 uint64_t *tmp; 4794 4795 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; 4796 int64_t cc_r; 4797 uint_t pc = 0, id, opc; 4798 uint8_t ttop = 0; 4799 dif_instr_t instr; 4800 uint_t r1, r2, rd; 4801 4802 /* 4803 * We stash the current DIF object into the machine state: we need it 4804 * for subsequent access checking. 4805 */ 4806 mstate->dtms_difo = difo; 4807 4808 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ 4809 4810 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { 4811 opc = pc; 4812 4813 instr = text[pc++]; 4814 r1 = DIF_INSTR_R1(instr); 4815 r2 = DIF_INSTR_R2(instr); 4816 rd = DIF_INSTR_RD(instr); 4817 4818 switch (DIF_INSTR_OP(instr)) { 4819 case DIF_OP_OR: 4820 regs[rd] = regs[r1] | regs[r2]; 4821 break; 4822 case DIF_OP_XOR: 4823 regs[rd] = regs[r1] ^ regs[r2]; 4824 break; 4825 case DIF_OP_AND: 4826 regs[rd] = regs[r1] & regs[r2]; 4827 break; 4828 case DIF_OP_SLL: 4829 regs[rd] = regs[r1] << regs[r2]; 4830 break; 4831 case DIF_OP_SRL: 4832 regs[rd] = regs[r1] >> regs[r2]; 4833 break; 4834 case DIF_OP_SUB: 4835 regs[rd] = regs[r1] - regs[r2]; 4836 break; 4837 case DIF_OP_ADD: 4838 regs[rd] = regs[r1] + regs[r2]; 4839 break; 4840 case DIF_OP_MUL: 4841 regs[rd] = regs[r1] * regs[r2]; 4842 break; 4843 case DIF_OP_SDIV: 4844 if (regs[r2] == 0) { 4845 regs[rd] = 0; 4846 *flags |= CPU_DTRACE_DIVZERO; 4847 } else { 4848 regs[rd] = (int64_t)regs[r1] / 4849 (int64_t)regs[r2]; 4850 } 4851 break; 4852 4853 case DIF_OP_UDIV: 4854 if (regs[r2] == 0) { 4855 regs[rd] = 0; 4856 *flags |= CPU_DTRACE_DIVZERO; 4857 } else { 4858 regs[rd] = regs[r1] / regs[r2]; 4859 } 4860 break; 4861 4862 case DIF_OP_SREM: 4863 if (regs[r2] == 0) { 4864 regs[rd] = 0; 4865 *flags |= CPU_DTRACE_DIVZERO; 4866 } else { 4867 regs[rd] = (int64_t)regs[r1] % 4868 (int64_t)regs[r2]; 4869 } 4870 break; 4871 4872 case DIF_OP_UREM: 4873 if (regs[r2] == 0) { 4874 regs[rd] = 0; 4875 *flags |= CPU_DTRACE_DIVZERO; 4876 } else { 4877 regs[rd] = regs[r1] % regs[r2]; 4878 } 4879 break; 4880 4881 case DIF_OP_NOT: 4882 regs[rd] = ~regs[r1]; 4883 break; 4884 case DIF_OP_MOV: 4885 regs[rd] = regs[r1]; 4886 break; 4887 case DIF_OP_CMP: 4888 cc_r = regs[r1] - regs[r2]; 4889 cc_n = cc_r < 0; 4890 cc_z = cc_r == 0; 4891 cc_v = 0; 4892 cc_c = regs[r1] < regs[r2]; 4893 break; 4894 case DIF_OP_TST: 4895 cc_n = cc_v = cc_c = 0; 4896 cc_z = regs[r1] == 0; 4897 break; 4898 case DIF_OP_BA: 4899 pc = DIF_INSTR_LABEL(instr); 4900 break; 4901 case DIF_OP_BE: 4902 if (cc_z) 4903 pc = DIF_INSTR_LABEL(instr); 4904 break; 4905 case DIF_OP_BNE: 4906 if (cc_z == 0) 4907 pc = DIF_INSTR_LABEL(instr); 4908 break; 4909 case DIF_OP_BG: 4910 if ((cc_z | (cc_n ^ cc_v)) == 0) 4911 pc = DIF_INSTR_LABEL(instr); 4912 break; 4913 case DIF_OP_BGU: 4914 if ((cc_c | cc_z) == 0) 4915 pc = DIF_INSTR_LABEL(instr); 4916 break; 4917 case DIF_OP_BGE: 4918 if ((cc_n ^ cc_v) == 0) 4919 pc = DIF_INSTR_LABEL(instr); 4920 break; 4921 case DIF_OP_BGEU: 4922 if (cc_c == 0) 4923 pc = DIF_INSTR_LABEL(instr); 4924 break; 4925 case DIF_OP_BL: 4926 if (cc_n ^ cc_v) 4927 pc = DIF_INSTR_LABEL(instr); 4928 break; 4929 case DIF_OP_BLU: 4930 if (cc_c) 4931 pc = DIF_INSTR_LABEL(instr); 4932 break; 4933 case DIF_OP_BLE: 4934 if (cc_z | (cc_n ^ cc_v)) 4935 pc = DIF_INSTR_LABEL(instr); 4936 break; 4937 case DIF_OP_BLEU: 4938 if (cc_c | cc_z) 4939 pc = DIF_INSTR_LABEL(instr); 4940 break; 4941 case DIF_OP_RLDSB: 4942 if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) { 4943 *flags |= CPU_DTRACE_KPRIV; 4944 *illval = regs[r1]; 4945 break; 4946 } 4947 /*FALLTHROUGH*/ 4948 case DIF_OP_LDSB: 4949 regs[rd] = (int8_t)dtrace_load8(regs[r1]); 4950 break; 4951 case DIF_OP_RLDSH: 4952 if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) { 4953 *flags |= CPU_DTRACE_KPRIV; 4954 *illval = regs[r1]; 4955 break; 4956 } 4957 /*FALLTHROUGH*/ 4958 case DIF_OP_LDSH: 4959 regs[rd] = (int16_t)dtrace_load16(regs[r1]); 4960 break; 4961 case DIF_OP_RLDSW: 4962 if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) { 4963 *flags |= CPU_DTRACE_KPRIV; 4964 *illval = regs[r1]; 4965 break; 4966 } 4967 /*FALLTHROUGH*/ 4968 case DIF_OP_LDSW: 4969 regs[rd] = (int32_t)dtrace_load32(regs[r1]); 4970 break; 4971 case DIF_OP_RLDUB: 4972 if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) { 4973 *flags |= CPU_DTRACE_KPRIV; 4974 *illval = regs[r1]; 4975 break; 4976 } 4977 /*FALLTHROUGH*/ 4978 case DIF_OP_LDUB: 4979 regs[rd] = dtrace_load8(regs[r1]); 4980 break; 4981 case DIF_OP_RLDUH: 4982 if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) { 4983 *flags |= CPU_DTRACE_KPRIV; 4984 *illval = regs[r1]; 4985 break; 4986 } 4987 /*FALLTHROUGH*/ 4988 case DIF_OP_LDUH: 4989 regs[rd] = dtrace_load16(regs[r1]); 4990 break; 4991 case DIF_OP_RLDUW: 4992 if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) { 4993 *flags |= CPU_DTRACE_KPRIV; 4994 *illval = regs[r1]; 4995 break; 4996 } 4997 /*FALLTHROUGH*/ 4998 case DIF_OP_LDUW: 4999 regs[rd] = dtrace_load32(regs[r1]); 5000 break; 5001 case DIF_OP_RLDX: 5002 if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) { 5003 *flags |= CPU_DTRACE_KPRIV; 5004 *illval = regs[r1]; 5005 break; 5006 } 5007 /*FALLTHROUGH*/ 5008 case DIF_OP_LDX: 5009 regs[rd] = dtrace_load64(regs[r1]); 5010 break; 5011 case DIF_OP_ULDSB: 5012 regs[rd] = (int8_t) 5013 dtrace_fuword8((void *)(uintptr_t)regs[r1]); 5014 break; 5015 case DIF_OP_ULDSH: 5016 regs[rd] = (int16_t) 5017 dtrace_fuword16((void *)(uintptr_t)regs[r1]); 5018 break; 5019 case DIF_OP_ULDSW: 5020 regs[rd] = (int32_t) 5021 dtrace_fuword32((void *)(uintptr_t)regs[r1]); 5022 break; 5023 case DIF_OP_ULDUB: 5024 regs[rd] = 5025 dtrace_fuword8((void *)(uintptr_t)regs[r1]); 5026 break; 5027 case DIF_OP_ULDUH: 5028 regs[rd] = 5029 dtrace_fuword16((void *)(uintptr_t)regs[r1]); 5030 break; 5031 case DIF_OP_ULDUW: 5032 regs[rd] = 5033 dtrace_fuword32((void *)(uintptr_t)regs[r1]); 5034 break; 5035 case DIF_OP_ULDX: 5036 regs[rd] = 5037 dtrace_fuword64((void *)(uintptr_t)regs[r1]); 5038 break; 5039 case DIF_OP_RET: 5040 rval = regs[rd]; 5041 pc = textlen; 5042 break; 5043 case DIF_OP_NOP: 5044 break; 5045 case DIF_OP_SETX: 5046 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)]; 5047 break; 5048 case DIF_OP_SETS: 5049 regs[rd] = (uint64_t)(uintptr_t) 5050 (strtab + DIF_INSTR_STRING(instr)); 5051 break; 5052 case DIF_OP_SCMP: { 5053 size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; 5054 uintptr_t s1 = regs[r1]; 5055 uintptr_t s2 = regs[r2]; 5056 5057 if (s1 != NULL && 5058 !dtrace_strcanload(s1, sz, mstate, vstate)) 5059 break; 5060 if (s2 != NULL && 5061 !dtrace_strcanload(s2, sz, mstate, vstate)) 5062 break; 5063 5064 cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz); 5065 5066 cc_n = cc_r < 0; 5067 cc_z = cc_r == 0; 5068 cc_v = cc_c = 0; 5069 break; 5070 } 5071 case DIF_OP_LDGA: 5072 regs[rd] = dtrace_dif_variable(mstate, state, 5073 r1, regs[r2]); 5074 break; 5075 case DIF_OP_LDGS: 5076 id = DIF_INSTR_VAR(instr); 5077 5078 if (id >= DIF_VAR_OTHER_UBASE) { 5079 uintptr_t a; 5080 5081 id -= DIF_VAR_OTHER_UBASE; 5082 svar = vstate->dtvs_globals[id]; 5083 ASSERT(svar != NULL); 5084 v = &svar->dtsv_var; 5085 5086 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) { 5087 regs[rd] = svar->dtsv_data; 5088 break; 5089 } 5090 5091 a = (uintptr_t)svar->dtsv_data; 5092 5093 if (*(uint8_t *)a == UINT8_MAX) { 5094 /* 5095 * If the 0th byte is set to UINT8_MAX 5096 * then this is to be treated as a 5097 * reference to a NULL variable. 5098 */ 5099 regs[rd] = NULL; 5100 } else { 5101 regs[rd] = a + sizeof (uint64_t); 5102 } 5103 5104 break; 5105 } 5106 5107 regs[rd] = dtrace_dif_variable(mstate, state, id, 0); 5108 break; 5109 5110 case DIF_OP_STGS: 5111 id = DIF_INSTR_VAR(instr); 5112 5113 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5114 id -= DIF_VAR_OTHER_UBASE; 5115 5116 svar = vstate->dtvs_globals[id]; 5117 ASSERT(svar != NULL); 5118 v = &svar->dtsv_var; 5119 5120 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5121 uintptr_t a = (uintptr_t)svar->dtsv_data; 5122 5123 ASSERT(a != NULL); 5124 ASSERT(svar->dtsv_size != 0); 5125 5126 if (regs[rd] == NULL) { 5127 *(uint8_t *)a = UINT8_MAX; 5128 break; 5129 } else { 5130 *(uint8_t *)a = 0; 5131 a += sizeof (uint64_t); 5132 } 5133 if (!dtrace_vcanload( 5134 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 5135 mstate, vstate)) 5136 break; 5137 5138 dtrace_vcopy((void *)(uintptr_t)regs[rd], 5139 (void *)a, &v->dtdv_type); 5140 break; 5141 } 5142 5143 svar->dtsv_data = regs[rd]; 5144 break; 5145 5146 case DIF_OP_LDTA: 5147 /* 5148 * There are no DTrace built-in thread-local arrays at 5149 * present. This opcode is saved for future work. 5150 */ 5151 *flags |= CPU_DTRACE_ILLOP; 5152 regs[rd] = 0; 5153 break; 5154 5155 case DIF_OP_LDLS: 5156 id = DIF_INSTR_VAR(instr); 5157 5158 if (id < DIF_VAR_OTHER_UBASE) { 5159 /* 5160 * For now, this has no meaning. 5161 */ 5162 regs[rd] = 0; 5163 break; 5164 } 5165 5166 id -= DIF_VAR_OTHER_UBASE; 5167 5168 ASSERT(id < vstate->dtvs_nlocals); 5169 ASSERT(vstate->dtvs_locals != NULL); 5170 5171 svar = vstate->dtvs_locals[id]; 5172 ASSERT(svar != NULL); 5173 v = &svar->dtsv_var; 5174 5175 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5176 uintptr_t a = (uintptr_t)svar->dtsv_data; 5177 size_t sz = v->dtdv_type.dtdt_size; 5178 5179 sz += sizeof (uint64_t); 5180 ASSERT(svar->dtsv_size == NCPU * sz); 5181 a += CPU->cpu_id * sz; 5182 5183 if (*(uint8_t *)a == UINT8_MAX) { 5184 /* 5185 * If the 0th byte is set to UINT8_MAX 5186 * then this is to be treated as a 5187 * reference to a NULL variable. 5188 */ 5189 regs[rd] = NULL; 5190 } else { 5191 regs[rd] = a + sizeof (uint64_t); 5192 } 5193 5194 break; 5195 } 5196 5197 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); 5198 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 5199 regs[rd] = tmp[CPU->cpu_id]; 5200 break; 5201 5202 case DIF_OP_STLS: 5203 id = DIF_INSTR_VAR(instr); 5204 5205 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5206 id -= DIF_VAR_OTHER_UBASE; 5207 ASSERT(id < vstate->dtvs_nlocals); 5208 5209 ASSERT(vstate->dtvs_locals != NULL); 5210 svar = vstate->dtvs_locals[id]; 5211 ASSERT(svar != NULL); 5212 v = &svar->dtsv_var; 5213 5214 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5215 uintptr_t a = (uintptr_t)svar->dtsv_data; 5216 size_t sz = v->dtdv_type.dtdt_size; 5217 5218 sz += sizeof (uint64_t); 5219 ASSERT(svar->dtsv_size == NCPU * sz); 5220 a += CPU->cpu_id * sz; 5221 5222 if (regs[rd] == NULL) { 5223 *(uint8_t *)a = UINT8_MAX; 5224 break; 5225 } else { 5226 *(uint8_t *)a = 0; 5227 a += sizeof (uint64_t); 5228 } 5229 5230 if (!dtrace_vcanload( 5231 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 5232 mstate, vstate)) 5233 break; 5234 5235 dtrace_vcopy((void *)(uintptr_t)regs[rd], 5236 (void *)a, &v->dtdv_type); 5237 break; 5238 } 5239 5240 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); 5241 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 5242 tmp[CPU->cpu_id] = regs[rd]; 5243 break; 5244 5245 case DIF_OP_LDTS: { 5246 dtrace_dynvar_t *dvar; 5247 dtrace_key_t *key; 5248 5249 id = DIF_INSTR_VAR(instr); 5250 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5251 id -= DIF_VAR_OTHER_UBASE; 5252 v = &vstate->dtvs_tlocals[id]; 5253 5254 key = &tupregs[DIF_DTR_NREGS]; 5255 key[0].dttk_value = (uint64_t)id; 5256 key[0].dttk_size = 0; 5257 DTRACE_TLS_THRKEY(key[1].dttk_value); 5258 key[1].dttk_size = 0; 5259 5260 dvar = dtrace_dynvar(dstate, 2, key, 5261 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, 5262 mstate, vstate); 5263 5264 if (dvar == NULL) { 5265 regs[rd] = 0; 5266 break; 5267 } 5268 5269 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5270 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 5271 } else { 5272 regs[rd] = *((uint64_t *)dvar->dtdv_data); 5273 } 5274 5275 break; 5276 } 5277 5278 case DIF_OP_STTS: { 5279 dtrace_dynvar_t *dvar; 5280 dtrace_key_t *key; 5281 5282 id = DIF_INSTR_VAR(instr); 5283 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5284 id -= DIF_VAR_OTHER_UBASE; 5285 5286 key = &tupregs[DIF_DTR_NREGS]; 5287 key[0].dttk_value = (uint64_t)id; 5288 key[0].dttk_size = 0; 5289 DTRACE_TLS_THRKEY(key[1].dttk_value); 5290 key[1].dttk_size = 0; 5291 v = &vstate->dtvs_tlocals[id]; 5292 5293 dvar = dtrace_dynvar(dstate, 2, key, 5294 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 5295 v->dtdv_type.dtdt_size : sizeof (uint64_t), 5296 regs[rd] ? DTRACE_DYNVAR_ALLOC : 5297 DTRACE_DYNVAR_DEALLOC, mstate, vstate); 5298 5299 /* 5300 * Given that we're storing to thread-local data, 5301 * we need to flush our predicate cache. 5302 */ 5303 curthread->t_predcache = NULL; 5304 5305 if (dvar == NULL) 5306 break; 5307 5308 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5309 if (!dtrace_vcanload( 5310 (void *)(uintptr_t)regs[rd], 5311 &v->dtdv_type, mstate, vstate)) 5312 break; 5313 5314 dtrace_vcopy((void *)(uintptr_t)regs[rd], 5315 dvar->dtdv_data, &v->dtdv_type); 5316 } else { 5317 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 5318 } 5319 5320 break; 5321 } 5322 5323 case DIF_OP_SRA: 5324 regs[rd] = (int64_t)regs[r1] >> regs[r2]; 5325 break; 5326 5327 case DIF_OP_CALL: 5328 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd, 5329 regs, tupregs, ttop, mstate, state); 5330 break; 5331 5332 case DIF_OP_PUSHTR: 5333 if (ttop == DIF_DTR_NREGS) { 5334 *flags |= CPU_DTRACE_TUPOFLOW; 5335 break; 5336 } 5337 5338 if (r1 == DIF_TYPE_STRING) { 5339 /* 5340 * If this is a string type and the size is 0, 5341 * we'll use the system-wide default string 5342 * size. Note that we are _not_ looking at 5343 * the value of the DTRACEOPT_STRSIZE option; 5344 * had this been set, we would expect to have 5345 * a non-zero size value in the "pushtr". 5346 */ 5347 tupregs[ttop].dttk_size = 5348 dtrace_strlen((char *)(uintptr_t)regs[rd], 5349 regs[r2] ? regs[r2] : 5350 dtrace_strsize_default) + 1; 5351 } else { 5352 tupregs[ttop].dttk_size = regs[r2]; 5353 } 5354 5355 tupregs[ttop++].dttk_value = regs[rd]; 5356 break; 5357 5358 case DIF_OP_PUSHTV: 5359 if (ttop == DIF_DTR_NREGS) { 5360 *flags |= CPU_DTRACE_TUPOFLOW; 5361 break; 5362 } 5363 5364 tupregs[ttop].dttk_value = regs[rd]; 5365 tupregs[ttop++].dttk_size = 0; 5366 break; 5367 5368 case DIF_OP_POPTS: 5369 if (ttop != 0) 5370 ttop--; 5371 break; 5372 5373 case DIF_OP_FLUSHTS: 5374 ttop = 0; 5375 break; 5376 5377 case DIF_OP_LDGAA: 5378 case DIF_OP_LDTAA: { 5379 dtrace_dynvar_t *dvar; 5380 dtrace_key_t *key = tupregs; 5381 uint_t nkeys = ttop; 5382 5383 id = DIF_INSTR_VAR(instr); 5384 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5385 id -= DIF_VAR_OTHER_UBASE; 5386 5387 key[nkeys].dttk_value = (uint64_t)id; 5388 key[nkeys++].dttk_size = 0; 5389 5390 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { 5391 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 5392 key[nkeys++].dttk_size = 0; 5393 v = &vstate->dtvs_tlocals[id]; 5394 } else { 5395 v = &vstate->dtvs_globals[id]->dtsv_var; 5396 } 5397 5398 dvar = dtrace_dynvar(dstate, nkeys, key, 5399 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 5400 v->dtdv_type.dtdt_size : sizeof (uint64_t), 5401 DTRACE_DYNVAR_NOALLOC, mstate, vstate); 5402 5403 if (dvar == NULL) { 5404 regs[rd] = 0; 5405 break; 5406 } 5407 5408 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5409 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 5410 } else { 5411 regs[rd] = *((uint64_t *)dvar->dtdv_data); 5412 } 5413 5414 break; 5415 } 5416 5417 case DIF_OP_STGAA: 5418 case DIF_OP_STTAA: { 5419 dtrace_dynvar_t *dvar; 5420 dtrace_key_t *key = tupregs; 5421 uint_t nkeys = ttop; 5422 5423 id = DIF_INSTR_VAR(instr); 5424 ASSERT(id >= DIF_VAR_OTHER_UBASE); 5425 id -= DIF_VAR_OTHER_UBASE; 5426 5427 key[nkeys].dttk_value = (uint64_t)id; 5428 key[nkeys++].dttk_size = 0; 5429 5430 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { 5431 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 5432 key[nkeys++].dttk_size = 0; 5433 v = &vstate->dtvs_tlocals[id]; 5434 } else { 5435 v = &vstate->dtvs_globals[id]->dtsv_var; 5436 } 5437 5438 dvar = dtrace_dynvar(dstate, nkeys, key, 5439 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 5440 v->dtdv_type.dtdt_size : sizeof (uint64_t), 5441 regs[rd] ? DTRACE_DYNVAR_ALLOC : 5442 DTRACE_DYNVAR_DEALLOC, mstate, vstate); 5443 5444 if (dvar == NULL) 5445 break; 5446 5447 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 5448 if (!dtrace_vcanload( 5449 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 5450 mstate, vstate)) 5451 break; 5452 5453 dtrace_vcopy((void *)(uintptr_t)regs[rd], 5454 dvar->dtdv_data, &v->dtdv_type); 5455 } else { 5456 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 5457 } 5458 5459 break; 5460 } 5461 5462 case DIF_OP_ALLOCS: { 5463 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 5464 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; 5465 5466 /* 5467 * Rounding up the user allocation size could have 5468 * overflowed large, bogus allocations (like -1ULL) to 5469 * 0. 5470 */ 5471 if (size < regs[r1] || 5472 !DTRACE_INSCRATCH(mstate, size)) { 5473 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5474 regs[rd] = NULL; 5475 break; 5476 } 5477 5478 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); 5479 mstate->dtms_scratch_ptr += size; 5480 regs[rd] = ptr; 5481 break; 5482 } 5483 5484 case DIF_OP_COPYS: 5485 if (!dtrace_canstore(regs[rd], regs[r2], 5486 mstate, vstate)) { 5487 *flags |= CPU_DTRACE_BADADDR; 5488 *illval = regs[rd]; 5489 break; 5490 } 5491 5492 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) 5493 break; 5494 5495 dtrace_bcopy((void *)(uintptr_t)regs[r1], 5496 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); 5497 break; 5498 5499 case DIF_OP_STB: 5500 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) { 5501 *flags |= CPU_DTRACE_BADADDR; 5502 *illval = regs[rd]; 5503 break; 5504 } 5505 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1]; 5506 break; 5507 5508 case DIF_OP_STH: 5509 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) { 5510 *flags |= CPU_DTRACE_BADADDR; 5511 *illval = regs[rd]; 5512 break; 5513 } 5514 if (regs[rd] & 1) { 5515 *flags |= CPU_DTRACE_BADALIGN; 5516 *illval = regs[rd]; 5517 break; 5518 } 5519 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1]; 5520 break; 5521 5522 case DIF_OP_STW: 5523 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) { 5524 *flags |= CPU_DTRACE_BADADDR; 5525 *illval = regs[rd]; 5526 break; 5527 } 5528 if (regs[rd] & 3) { 5529 *flags |= CPU_DTRACE_BADALIGN; 5530 *illval = regs[rd]; 5531 break; 5532 } 5533 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1]; 5534 break; 5535 5536 case DIF_OP_STX: 5537 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) { 5538 *flags |= CPU_DTRACE_BADADDR; 5539 *illval = regs[rd]; 5540 break; 5541 } 5542 if (regs[rd] & 7) { 5543 *flags |= CPU_DTRACE_BADALIGN; 5544 *illval = regs[rd]; 5545 break; 5546 } 5547 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; 5548 break; 5549 } 5550 } 5551 5552 if (!(*flags & CPU_DTRACE_FAULT)) 5553 return (rval); 5554 5555 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t); 5556 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS; 5557 5558 return (0); 5559 } 5560 5561 static void 5562 dtrace_action_breakpoint(dtrace_ecb_t *ecb) 5563 { 5564 dtrace_probe_t *probe = ecb->dte_probe; 5565 dtrace_provider_t *prov = probe->dtpr_provider; 5566 char c[DTRACE_FULLNAMELEN + 80], *str; 5567 char *msg = "dtrace: breakpoint action at probe "; 5568 char *ecbmsg = " (ecb "; 5569 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); 5570 uintptr_t val = (uintptr_t)ecb; 5571 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; 5572 5573 if (dtrace_destructive_disallow) 5574 return; 5575 5576 /* 5577 * It's impossible to be taking action on the NULL probe. 5578 */ 5579 ASSERT(probe != NULL); 5580 5581 /* 5582 * This is a poor man's (destitute man's?) sprintf(): we want to 5583 * print the provider name, module name, function name and name of 5584 * the probe, along with the hex address of the ECB with the breakpoint 5585 * action -- all of which we must place in the character buffer by 5586 * hand. 5587 */ 5588 while (*msg != '\0') 5589 c[i++] = *msg++; 5590 5591 for (str = prov->dtpv_name; *str != '\0'; str++) 5592 c[i++] = *str; 5593 c[i++] = ':'; 5594 5595 for (str = probe->dtpr_mod; *str != '\0'; str++) 5596 c[i++] = *str; 5597 c[i++] = ':'; 5598 5599 for (str = probe->dtpr_func; *str != '\0'; str++) 5600 c[i++] = *str; 5601 c[i++] = ':'; 5602 5603 for (str = probe->dtpr_name; *str != '\0'; str++) 5604 c[i++] = *str; 5605 5606 while (*ecbmsg != '\0') 5607 c[i++] = *ecbmsg++; 5608 5609 while (shift >= 0) { 5610 mask = (uintptr_t)0xf << shift; 5611 5612 if (val >= ((uintptr_t)1 << shift)) 5613 c[i++] = "0123456789abcdef"[(val & mask) >> shift]; 5614 shift -= 4; 5615 } 5616 5617 c[i++] = ')'; 5618 c[i] = '\0'; 5619 5620 debug_enter(c); 5621 } 5622 5623 static void 5624 dtrace_action_panic(dtrace_ecb_t *ecb) 5625 { 5626 dtrace_probe_t *probe = ecb->dte_probe; 5627 5628 /* 5629 * It's impossible to be taking action on the NULL probe. 5630 */ 5631 ASSERT(probe != NULL); 5632 5633 if (dtrace_destructive_disallow) 5634 return; 5635 5636 if (dtrace_panicked != NULL) 5637 return; 5638 5639 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) 5640 return; 5641 5642 /* 5643 * We won the right to panic. (We want to be sure that only one 5644 * thread calls panic() from dtrace_probe(), and that panic() is 5645 * called exactly once.) 5646 */ 5647 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", 5648 probe->dtpr_provider->dtpv_name, probe->dtpr_mod, 5649 probe->dtpr_func, probe->dtpr_name, (void *)ecb); 5650 } 5651 5652 static void 5653 dtrace_action_raise(uint64_t sig) 5654 { 5655 if (dtrace_destructive_disallow) 5656 return; 5657 5658 if (sig >= NSIG) { 5659 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 5660 return; 5661 } 5662 5663 /* 5664 * raise() has a queue depth of 1 -- we ignore all subsequent 5665 * invocations of the raise() action. 5666 */ 5667 if (curthread->t_dtrace_sig == 0) 5668 curthread->t_dtrace_sig = (uint8_t)sig; 5669 5670 curthread->t_sig_check = 1; 5671 aston(curthread); 5672 } 5673 5674 static void 5675 dtrace_action_stop(void) 5676 { 5677 if (dtrace_destructive_disallow) 5678 return; 5679 5680 if (!curthread->t_dtrace_stop) { 5681 curthread->t_dtrace_stop = 1; 5682 curthread->t_sig_check = 1; 5683 aston(curthread); 5684 } 5685 } 5686 5687 static void 5688 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) 5689 { 5690 hrtime_t now; 5691 volatile uint16_t *flags; 5692 cpu_t *cpu = CPU; 5693 5694 if (dtrace_destructive_disallow) 5695 return; 5696 5697 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags; 5698 5699 now = dtrace_gethrtime(); 5700 5701 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) { 5702 /* 5703 * We need to advance the mark to the current time. 5704 */ 5705 cpu->cpu_dtrace_chillmark = now; 5706 cpu->cpu_dtrace_chilled = 0; 5707 } 5708 5709 /* 5710 * Now check to see if the requested chill time would take us over 5711 * the maximum amount of time allowed in the chill interval. (Or 5712 * worse, if the calculation itself induces overflow.) 5713 */ 5714 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max || 5715 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) { 5716 *flags |= CPU_DTRACE_ILLOP; 5717 return; 5718 } 5719 5720 while (dtrace_gethrtime() - now < val) 5721 continue; 5722 5723 /* 5724 * Normally, we assure that the value of the variable "timestamp" does 5725 * not change within an ECB. The presence of chill() represents an 5726 * exception to this rule, however. 5727 */ 5728 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP; 5729 cpu->cpu_dtrace_chilled += val; 5730 } 5731 5732 static void 5733 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, 5734 uint64_t *buf, uint64_t arg) 5735 { 5736 int nframes = DTRACE_USTACK_NFRAMES(arg); 5737 int strsize = DTRACE_USTACK_STRSIZE(arg); 5738 uint64_t *pcs = &buf[1], *fps; 5739 char *str = (char *)&pcs[nframes]; 5740 int size, offs = 0, i, j; 5741 uintptr_t old = mstate->dtms_scratch_ptr, saved; 5742 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 5743 char *sym; 5744 5745 /* 5746 * Should be taking a faster path if string space has not been 5747 * allocated. 5748 */ 5749 ASSERT(strsize != 0); 5750 5751 /* 5752 * We will first allocate some temporary space for the frame pointers. 5753 */ 5754 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 5755 size = (uintptr_t)fps - mstate->dtms_scratch_ptr + 5756 (nframes * sizeof (uint64_t)); 5757 5758 if (!DTRACE_INSCRATCH(mstate, size)) { 5759 /* 5760 * Not enough room for our frame pointers -- need to indicate 5761 * that we ran out of scratch space. 5762 */ 5763 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5764 return; 5765 } 5766 5767 mstate->dtms_scratch_ptr += size; 5768 saved = mstate->dtms_scratch_ptr; 5769 5770 /* 5771 * Now get a stack with both program counters and frame pointers. 5772 */ 5773 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 5774 dtrace_getufpstack(buf, fps, nframes + 1); 5775 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 5776 5777 /* 5778 * If that faulted, we're cooked. 5779 */ 5780 if (*flags & CPU_DTRACE_FAULT) 5781 goto out; 5782 5783 /* 5784 * Now we want to walk up the stack, calling the USTACK helper. For 5785 * each iteration, we restore the scratch pointer. 5786 */ 5787 for (i = 0; i < nframes; i++) { 5788 mstate->dtms_scratch_ptr = saved; 5789 5790 if (offs >= strsize) 5791 break; 5792 5793 sym = (char *)(uintptr_t)dtrace_helper( 5794 DTRACE_HELPER_ACTION_USTACK, 5795 mstate, state, pcs[i], fps[i]); 5796 5797 /* 5798 * If we faulted while running the helper, we're going to 5799 * clear the fault and null out the corresponding string. 5800 */ 5801 if (*flags & CPU_DTRACE_FAULT) { 5802 *flags &= ~CPU_DTRACE_FAULT; 5803 str[offs++] = '\0'; 5804 continue; 5805 } 5806 5807 if (sym == NULL) { 5808 str[offs++] = '\0'; 5809 continue; 5810 } 5811 5812 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 5813 5814 /* 5815 * Now copy in the string that the helper returned to us. 5816 */ 5817 for (j = 0; offs + j < strsize; j++) { 5818 if ((str[offs + j] = sym[j]) == '\0') 5819 break; 5820 } 5821 5822 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 5823 5824 offs += j + 1; 5825 } 5826 5827 if (offs >= strsize) { 5828 /* 5829 * If we didn't have room for all of the strings, we don't 5830 * abort processing -- this needn't be a fatal error -- but we 5831 * still want to increment a counter (dts_stkstroverflows) to 5832 * allow this condition to be warned about. (If this is from 5833 * a jstack() action, it is easily tuned via jstackstrsize.) 5834 */ 5835 dtrace_error(&state->dts_stkstroverflows); 5836 } 5837 5838 while (offs < strsize) 5839 str[offs++] = '\0'; 5840 5841 out: 5842 mstate->dtms_scratch_ptr = old; 5843 } 5844 5845 /* 5846 * If you're looking for the epicenter of DTrace, you just found it. This 5847 * is the function called by the provider to fire a probe -- from which all 5848 * subsequent probe-context DTrace activity emanates. 5849 */ 5850 void 5851 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, 5852 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) 5853 { 5854 processorid_t cpuid; 5855 dtrace_icookie_t cookie; 5856 dtrace_probe_t *probe; 5857 dtrace_mstate_t mstate; 5858 dtrace_ecb_t *ecb; 5859 dtrace_action_t *act; 5860 intptr_t offs; 5861 size_t size; 5862 int vtime, onintr; 5863 volatile uint16_t *flags; 5864 hrtime_t now; 5865 5866 /* 5867 * Kick out immediately if this CPU is still being born (in which case 5868 * curthread will be set to -1) or the current thread can't allow 5869 * probes in its current context. 5870 */ 5871 if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE)) 5872 return; 5873 5874 cookie = dtrace_interrupt_disable(); 5875 probe = dtrace_probes[id - 1]; 5876 cpuid = CPU->cpu_id; 5877 onintr = CPU_ON_INTR(CPU); 5878 5879 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && 5880 probe->dtpr_predcache == curthread->t_predcache) { 5881 /* 5882 * We have hit in the predicate cache; we know that 5883 * this predicate would evaluate to be false. 5884 */ 5885 dtrace_interrupt_enable(cookie); 5886 return; 5887 } 5888 5889 if (panic_quiesce) { 5890 /* 5891 * We don't trace anything if we're panicking. 5892 */ 5893 dtrace_interrupt_enable(cookie); 5894 return; 5895 } 5896 5897 now = dtrace_gethrtime(); 5898 vtime = dtrace_vtime_references != 0; 5899 5900 if (vtime && curthread->t_dtrace_start) 5901 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; 5902 5903 mstate.dtms_difo = NULL; 5904 mstate.dtms_probe = probe; 5905 mstate.dtms_strtok = NULL; 5906 mstate.dtms_arg[0] = arg0; 5907 mstate.dtms_arg[1] = arg1; 5908 mstate.dtms_arg[2] = arg2; 5909 mstate.dtms_arg[3] = arg3; 5910 mstate.dtms_arg[4] = arg4; 5911 5912 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags; 5913 5914 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 5915 dtrace_predicate_t *pred = ecb->dte_predicate; 5916 dtrace_state_t *state = ecb->dte_state; 5917 dtrace_buffer_t *buf = &state->dts_buffer[cpuid]; 5918 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; 5919 dtrace_vstate_t *vstate = &state->dts_vstate; 5920 dtrace_provider_t *prov = probe->dtpr_provider; 5921 uint64_t tracememsize = 0; 5922 int committed = 0; 5923 caddr_t tomax; 5924 5925 /* 5926 * A little subtlety with the following (seemingly innocuous) 5927 * declaration of the automatic 'val': by looking at the 5928 * code, you might think that it could be declared in the 5929 * action processing loop, below. (That is, it's only used in 5930 * the action processing loop.) However, it must be declared 5931 * out of that scope because in the case of DIF expression 5932 * arguments to aggregating actions, one iteration of the 5933 * action loop will use the last iteration's value. 5934 */ 5935 #ifdef lint 5936 uint64_t val = 0; 5937 #else 5938 uint64_t val; 5939 #endif 5940 5941 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; 5942 mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC; 5943 *flags &= ~CPU_DTRACE_ERROR; 5944 5945 if (prov == dtrace_provider) { 5946 /* 5947 * If dtrace itself is the provider of this probe, 5948 * we're only going to continue processing the ECB if 5949 * arg0 (the dtrace_state_t) is equal to the ECB's 5950 * creating state. (This prevents disjoint consumers 5951 * from seeing one another's metaprobes.) 5952 */ 5953 if (arg0 != (uint64_t)(uintptr_t)state) 5954 continue; 5955 } 5956 5957 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) { 5958 /* 5959 * We're not currently active. If our provider isn't 5960 * the dtrace pseudo provider, we're not interested. 5961 */ 5962 if (prov != dtrace_provider) 5963 continue; 5964 5965 /* 5966 * Now we must further check if we are in the BEGIN 5967 * probe. If we are, we will only continue processing 5968 * if we're still in WARMUP -- if one BEGIN enabling 5969 * has invoked the exit() action, we don't want to 5970 * evaluate subsequent BEGIN enablings. 5971 */ 5972 if (probe->dtpr_id == dtrace_probeid_begin && 5973 state->dts_activity != DTRACE_ACTIVITY_WARMUP) { 5974 ASSERT(state->dts_activity == 5975 DTRACE_ACTIVITY_DRAINING); 5976 continue; 5977 } 5978 } 5979 5980 if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb)) 5981 continue; 5982 5983 if (now - state->dts_alive > dtrace_deadman_timeout) { 5984 /* 5985 * We seem to be dead. Unless we (a) have kernel 5986 * destructive permissions (b) have explicitly enabled 5987 * destructive actions and (c) destructive actions have 5988 * not been disabled, we're going to transition into 5989 * the KILLED state, from which no further processing 5990 * on this state will be performed. 5991 */ 5992 if (!dtrace_priv_kernel_destructive(state) || 5993 !state->dts_cred.dcr_destructive || 5994 dtrace_destructive_disallow) { 5995 void *activity = &state->dts_activity; 5996 dtrace_activity_t current; 5997 5998 do { 5999 current = state->dts_activity; 6000 } while (dtrace_cas32(activity, current, 6001 DTRACE_ACTIVITY_KILLED) != current); 6002 6003 continue; 6004 } 6005 } 6006 6007 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed, 6008 ecb->dte_alignment, state, &mstate)) < 0) 6009 continue; 6010 6011 tomax = buf->dtb_tomax; 6012 ASSERT(tomax != NULL); 6013 6014 if (ecb->dte_size != 0) { 6015 dtrace_rechdr_t dtrh; 6016 if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) { 6017 mstate.dtms_timestamp = dtrace_gethrtime(); 6018 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP; 6019 } 6020 ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t)); 6021 dtrh.dtrh_epid = ecb->dte_epid; 6022 DTRACE_RECORD_STORE_TIMESTAMP(&dtrh, 6023 mstate.dtms_timestamp); 6024 *((dtrace_rechdr_t *)(tomax + offs)) = dtrh; 6025 } 6026 6027 mstate.dtms_epid = ecb->dte_epid; 6028 mstate.dtms_present |= DTRACE_MSTATE_EPID; 6029 6030 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) 6031 mstate.dtms_access |= DTRACE_ACCESS_KERNEL; 6032 6033 if (pred != NULL) { 6034 dtrace_difo_t *dp = pred->dtp_difo; 6035 int rval; 6036 6037 rval = dtrace_dif_emulate(dp, &mstate, vstate, state); 6038 6039 if (!(*flags & CPU_DTRACE_ERROR) && !rval) { 6040 dtrace_cacheid_t cid = probe->dtpr_predcache; 6041 6042 if (cid != DTRACE_CACHEIDNONE && !onintr) { 6043 /* 6044 * Update the predicate cache... 6045 */ 6046 ASSERT(cid == pred->dtp_cacheid); 6047 curthread->t_predcache = cid; 6048 } 6049 6050 continue; 6051 } 6052 } 6053 6054 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) && 6055 act != NULL; act = act->dta_next) { 6056 size_t valoffs; 6057 dtrace_difo_t *dp; 6058 dtrace_recdesc_t *rec = &act->dta_rec; 6059 6060 size = rec->dtrd_size; 6061 valoffs = offs + rec->dtrd_offset; 6062 6063 if (DTRACEACT_ISAGG(act->dta_kind)) { 6064 uint64_t v = 0xbad; 6065 dtrace_aggregation_t *agg; 6066 6067 agg = (dtrace_aggregation_t *)act; 6068 6069 if ((dp = act->dta_difo) != NULL) 6070 v = dtrace_dif_emulate(dp, 6071 &mstate, vstate, state); 6072 6073 if (*flags & CPU_DTRACE_ERROR) 6074 continue; 6075 6076 /* 6077 * Note that we always pass the expression 6078 * value from the previous iteration of the 6079 * action loop. This value will only be used 6080 * if there is an expression argument to the 6081 * aggregating action, denoted by the 6082 * dtag_hasarg field. 6083 */ 6084 dtrace_aggregate(agg, buf, 6085 offs, aggbuf, v, val); 6086 continue; 6087 } 6088 6089 switch (act->dta_kind) { 6090 case DTRACEACT_STOP: 6091 if (dtrace_priv_proc_destructive(state, 6092 &mstate)) 6093 dtrace_action_stop(); 6094 continue; 6095 6096 case DTRACEACT_BREAKPOINT: 6097 if (dtrace_priv_kernel_destructive(state)) 6098 dtrace_action_breakpoint(ecb); 6099 continue; 6100 6101 case DTRACEACT_PANIC: 6102 if (dtrace_priv_kernel_destructive(state)) 6103 dtrace_action_panic(ecb); 6104 continue; 6105 6106 case DTRACEACT_STACK: 6107 if (!dtrace_priv_kernel(state)) 6108 continue; 6109 6110 dtrace_getpcstack((pc_t *)(tomax + valoffs), 6111 size / sizeof (pc_t), probe->dtpr_aframes, 6112 DTRACE_ANCHORED(probe) ? NULL : 6113 (uint32_t *)arg0); 6114 6115 continue; 6116 6117 case DTRACEACT_JSTACK: 6118 case DTRACEACT_USTACK: 6119 if (!dtrace_priv_proc(state, &mstate)) 6120 continue; 6121 6122 /* 6123 * See comment in DIF_VAR_PID. 6124 */ 6125 if (DTRACE_ANCHORED(mstate.dtms_probe) && 6126 CPU_ON_INTR(CPU)) { 6127 int depth = DTRACE_USTACK_NFRAMES( 6128 rec->dtrd_arg) + 1; 6129 6130 dtrace_bzero((void *)(tomax + valoffs), 6131 DTRACE_USTACK_STRSIZE(rec->dtrd_arg) 6132 + depth * sizeof (uint64_t)); 6133 6134 continue; 6135 } 6136 6137 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 && 6138 curproc->p_dtrace_helpers != NULL) { 6139 /* 6140 * This is the slow path -- we have 6141 * allocated string space, and we're 6142 * getting the stack of a process that 6143 * has helpers. Call into a separate 6144 * routine to perform this processing. 6145 */ 6146 dtrace_action_ustack(&mstate, state, 6147 (uint64_t *)(tomax + valoffs), 6148 rec->dtrd_arg); 6149 continue; 6150 } 6151 6152 /* 6153 * Clear the string space, since there's no 6154 * helper to do it for us. 6155 */ 6156 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) { 6157 int depth = DTRACE_USTACK_NFRAMES( 6158 rec->dtrd_arg); 6159 size_t strsize = DTRACE_USTACK_STRSIZE( 6160 rec->dtrd_arg); 6161 uint64_t *buf = (uint64_t *)(tomax + 6162 valoffs); 6163 void *strspace = &buf[depth + 1]; 6164 6165 dtrace_bzero(strspace, 6166 MIN(depth, strsize)); 6167 } 6168 6169 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6170 dtrace_getupcstack((uint64_t *) 6171 (tomax + valoffs), 6172 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1); 6173 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6174 continue; 6175 6176 default: 6177 break; 6178 } 6179 6180 dp = act->dta_difo; 6181 ASSERT(dp != NULL); 6182 6183 val = dtrace_dif_emulate(dp, &mstate, vstate, state); 6184 6185 if (*flags & CPU_DTRACE_ERROR) 6186 continue; 6187 6188 switch (act->dta_kind) { 6189 case DTRACEACT_SPECULATE: { 6190 dtrace_rechdr_t *dtrh; 6191 6192 ASSERT(buf == &state->dts_buffer[cpuid]); 6193 buf = dtrace_speculation_buffer(state, 6194 cpuid, val); 6195 6196 if (buf == NULL) { 6197 *flags |= CPU_DTRACE_DROP; 6198 continue; 6199 } 6200 6201 offs = dtrace_buffer_reserve(buf, 6202 ecb->dte_needed, ecb->dte_alignment, 6203 state, NULL); 6204 6205 if (offs < 0) { 6206 *flags |= CPU_DTRACE_DROP; 6207 continue; 6208 } 6209 6210 tomax = buf->dtb_tomax; 6211 ASSERT(tomax != NULL); 6212 6213 if (ecb->dte_size == 0) 6214 continue; 6215 6216 ASSERT3U(ecb->dte_size, >=, 6217 sizeof (dtrace_rechdr_t)); 6218 dtrh = ((void *)(tomax + offs)); 6219 dtrh->dtrh_epid = ecb->dte_epid; 6220 /* 6221 * When the speculation is committed, all of 6222 * the records in the speculative buffer will 6223 * have their timestamps set to the commit 6224 * time. Until then, it is set to a sentinel 6225 * value, for debugability. 6226 */ 6227 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX); 6228 continue; 6229 } 6230 6231 case DTRACEACT_CHILL: 6232 if (dtrace_priv_kernel_destructive(state)) 6233 dtrace_action_chill(&mstate, val); 6234 continue; 6235 6236 case DTRACEACT_RAISE: 6237 if (dtrace_priv_proc_destructive(state, 6238 &mstate)) 6239 dtrace_action_raise(val); 6240 continue; 6241 6242 case DTRACEACT_COMMIT: 6243 ASSERT(!committed); 6244 6245 /* 6246 * We need to commit our buffer state. 6247 */ 6248 if (ecb->dte_size) 6249 buf->dtb_offset = offs + ecb->dte_size; 6250 buf = &state->dts_buffer[cpuid]; 6251 dtrace_speculation_commit(state, cpuid, val); 6252 committed = 1; 6253 continue; 6254 6255 case DTRACEACT_DISCARD: 6256 dtrace_speculation_discard(state, cpuid, val); 6257 continue; 6258 6259 case DTRACEACT_DIFEXPR: 6260 case DTRACEACT_LIBACT: 6261 case DTRACEACT_PRINTF: 6262 case DTRACEACT_PRINTA: 6263 case DTRACEACT_SYSTEM: 6264 case DTRACEACT_FREOPEN: 6265 case DTRACEACT_TRACEMEM: 6266 break; 6267 6268 case DTRACEACT_TRACEMEM_DYNSIZE: 6269 tracememsize = val; 6270 break; 6271 6272 case DTRACEACT_SYM: 6273 case DTRACEACT_MOD: 6274 if (!dtrace_priv_kernel(state)) 6275 continue; 6276 break; 6277 6278 case DTRACEACT_USYM: 6279 case DTRACEACT_UMOD: 6280 case DTRACEACT_UADDR: { 6281 struct pid *pid = curthread->t_procp->p_pidp; 6282 6283 if (!dtrace_priv_proc(state, &mstate)) 6284 continue; 6285 6286 DTRACE_STORE(uint64_t, tomax, 6287 valoffs, (uint64_t)pid->pid_id); 6288 DTRACE_STORE(uint64_t, tomax, 6289 valoffs + sizeof (uint64_t), val); 6290 6291 continue; 6292 } 6293 6294 case DTRACEACT_EXIT: { 6295 /* 6296 * For the exit action, we are going to attempt 6297 * to atomically set our activity to be 6298 * draining. If this fails (either because 6299 * another CPU has beat us to the exit action, 6300 * or because our current activity is something 6301 * other than ACTIVE or WARMUP), we will 6302 * continue. This assures that the exit action 6303 * can be successfully recorded at most once 6304 * when we're in the ACTIVE state. If we're 6305 * encountering the exit() action while in 6306 * COOLDOWN, however, we want to honor the new 6307 * status code. (We know that we're the only 6308 * thread in COOLDOWN, so there is no race.) 6309 */ 6310 void *activity = &state->dts_activity; 6311 dtrace_activity_t current = state->dts_activity; 6312 6313 if (current == DTRACE_ACTIVITY_COOLDOWN) 6314 break; 6315 6316 if (current != DTRACE_ACTIVITY_WARMUP) 6317 current = DTRACE_ACTIVITY_ACTIVE; 6318 6319 if (dtrace_cas32(activity, current, 6320 DTRACE_ACTIVITY_DRAINING) != current) { 6321 *flags |= CPU_DTRACE_DROP; 6322 continue; 6323 } 6324 6325 break; 6326 } 6327 6328 default: 6329 ASSERT(0); 6330 } 6331 6332 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { 6333 uintptr_t end = valoffs + size; 6334 6335 if (tracememsize != 0 && 6336 valoffs + tracememsize < end) { 6337 end = valoffs + tracememsize; 6338 tracememsize = 0; 6339 } 6340 6341 if (!dtrace_vcanload((void *)(uintptr_t)val, 6342 &dp->dtdo_rtype, &mstate, vstate)) 6343 continue; 6344 6345 /* 6346 * If this is a string, we're going to only 6347 * load until we find the zero byte -- after 6348 * which we'll store zero bytes. 6349 */ 6350 if (dp->dtdo_rtype.dtdt_kind == 6351 DIF_TYPE_STRING) { 6352 char c = '\0' + 1; 6353 int intuple = act->dta_intuple; 6354 size_t s; 6355 6356 for (s = 0; s < size; s++) { 6357 if (c != '\0') 6358 c = dtrace_load8(val++); 6359 6360 DTRACE_STORE(uint8_t, tomax, 6361 valoffs++, c); 6362 6363 if (c == '\0' && intuple) 6364 break; 6365 } 6366 6367 continue; 6368 } 6369 6370 while (valoffs < end) { 6371 DTRACE_STORE(uint8_t, tomax, valoffs++, 6372 dtrace_load8(val++)); 6373 } 6374 6375 continue; 6376 } 6377 6378 switch (size) { 6379 case 0: 6380 break; 6381 6382 case sizeof (uint8_t): 6383 DTRACE_STORE(uint8_t, tomax, valoffs, val); 6384 break; 6385 case sizeof (uint16_t): 6386 DTRACE_STORE(uint16_t, tomax, valoffs, val); 6387 break; 6388 case sizeof (uint32_t): 6389 DTRACE_STORE(uint32_t, tomax, valoffs, val); 6390 break; 6391 case sizeof (uint64_t): 6392 DTRACE_STORE(uint64_t, tomax, valoffs, val); 6393 break; 6394 default: 6395 /* 6396 * Any other size should have been returned by 6397 * reference, not by value. 6398 */ 6399 ASSERT(0); 6400 break; 6401 } 6402 } 6403 6404 if (*flags & CPU_DTRACE_DROP) 6405 continue; 6406 6407 if (*flags & CPU_DTRACE_FAULT) { 6408 int ndx; 6409 dtrace_action_t *err; 6410 6411 buf->dtb_errors++; 6412 6413 if (probe->dtpr_id == dtrace_probeid_error) { 6414 /* 6415 * There's nothing we can do -- we had an 6416 * error on the error probe. We bump an 6417 * error counter to at least indicate that 6418 * this condition happened. 6419 */ 6420 dtrace_error(&state->dts_dblerrors); 6421 continue; 6422 } 6423 6424 if (vtime) { 6425 /* 6426 * Before recursing on dtrace_probe(), we 6427 * need to explicitly clear out our start 6428 * time to prevent it from being accumulated 6429 * into t_dtrace_vtime. 6430 */ 6431 curthread->t_dtrace_start = 0; 6432 } 6433 6434 /* 6435 * Iterate over the actions to figure out which action 6436 * we were processing when we experienced the error. 6437 * Note that act points _past_ the faulting action; if 6438 * act is ecb->dte_action, the fault was in the 6439 * predicate, if it's ecb->dte_action->dta_next it's 6440 * in action #1, and so on. 6441 */ 6442 for (err = ecb->dte_action, ndx = 0; 6443 err != act; err = err->dta_next, ndx++) 6444 continue; 6445 6446 dtrace_probe_error(state, ecb->dte_epid, ndx, 6447 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ? 6448 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags), 6449 cpu_core[cpuid].cpuc_dtrace_illval); 6450 6451 continue; 6452 } 6453 6454 if (!committed) 6455 buf->dtb_offset = offs + ecb->dte_size; 6456 } 6457 6458 if (vtime) 6459 curthread->t_dtrace_start = dtrace_gethrtime(); 6460 6461 dtrace_interrupt_enable(cookie); 6462 } 6463 6464 /* 6465 * DTrace Probe Hashing Functions 6466 * 6467 * The functions in this section (and indeed, the functions in remaining 6468 * sections) are not _called_ from probe context. (Any exceptions to this are 6469 * marked with a "Note:".) Rather, they are called from elsewhere in the 6470 * DTrace framework to look-up probes in, add probes to and remove probes from 6471 * the DTrace probe hashes. (Each probe is hashed by each element of the 6472 * probe tuple -- allowing for fast lookups, regardless of what was 6473 * specified.) 6474 */ 6475 static uint_t 6476 dtrace_hash_str(char *p) 6477 { 6478 unsigned int g; 6479 uint_t hval = 0; 6480 6481 while (*p) { 6482 hval = (hval << 4) + *p++; 6483 if ((g = (hval & 0xf0000000)) != 0) 6484 hval ^= g >> 24; 6485 hval &= ~g; 6486 } 6487 return (hval); 6488 } 6489 6490 static dtrace_hash_t * 6491 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) 6492 { 6493 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); 6494 6495 hash->dth_stroffs = stroffs; 6496 hash->dth_nextoffs = nextoffs; 6497 hash->dth_prevoffs = prevoffs; 6498 6499 hash->dth_size = 1; 6500 hash->dth_mask = hash->dth_size - 1; 6501 6502 hash->dth_tab = kmem_zalloc(hash->dth_size * 6503 sizeof (dtrace_hashbucket_t *), KM_SLEEP); 6504 6505 return (hash); 6506 } 6507 6508 static void 6509 dtrace_hash_destroy(dtrace_hash_t *hash) 6510 { 6511 #ifdef DEBUG 6512 int i; 6513 6514 for (i = 0; i < hash->dth_size; i++) 6515 ASSERT(hash->dth_tab[i] == NULL); 6516 #endif 6517 6518 kmem_free(hash->dth_tab, 6519 hash->dth_size * sizeof (dtrace_hashbucket_t *)); 6520 kmem_free(hash, sizeof (dtrace_hash_t)); 6521 } 6522 6523 static void 6524 dtrace_hash_resize(dtrace_hash_t *hash) 6525 { 6526 int size = hash->dth_size, i, ndx; 6527 int new_size = hash->dth_size << 1; 6528 int new_mask = new_size - 1; 6529 dtrace_hashbucket_t **new_tab, *bucket, *next; 6530 6531 ASSERT((new_size & new_mask) == 0); 6532 6533 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP); 6534 6535 for (i = 0; i < size; i++) { 6536 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { 6537 dtrace_probe_t *probe = bucket->dthb_chain; 6538 6539 ASSERT(probe != NULL); 6540 ndx = DTRACE_HASHSTR(hash, probe) & new_mask; 6541 6542 next = bucket->dthb_next; 6543 bucket->dthb_next = new_tab[ndx]; 6544 new_tab[ndx] = bucket; 6545 } 6546 } 6547 6548 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *)); 6549 hash->dth_tab = new_tab; 6550 hash->dth_size = new_size; 6551 hash->dth_mask = new_mask; 6552 } 6553 6554 static void 6555 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) 6556 { 6557 int hashval = DTRACE_HASHSTR(hash, new); 6558 int ndx = hashval & hash->dth_mask; 6559 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6560 dtrace_probe_t **nextp, **prevp; 6561 6562 for (; bucket != NULL; bucket = bucket->dthb_next) { 6563 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) 6564 goto add; 6565 } 6566 6567 if ((hash->dth_nbuckets >> 1) > hash->dth_size) { 6568 dtrace_hash_resize(hash); 6569 dtrace_hash_add(hash, new); 6570 return; 6571 } 6572 6573 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP); 6574 bucket->dthb_next = hash->dth_tab[ndx]; 6575 hash->dth_tab[ndx] = bucket; 6576 hash->dth_nbuckets++; 6577 6578 add: 6579 nextp = DTRACE_HASHNEXT(hash, new); 6580 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL); 6581 *nextp = bucket->dthb_chain; 6582 6583 if (bucket->dthb_chain != NULL) { 6584 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain); 6585 ASSERT(*prevp == NULL); 6586 *prevp = new; 6587 } 6588 6589 bucket->dthb_chain = new; 6590 bucket->dthb_len++; 6591 } 6592 6593 static dtrace_probe_t * 6594 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) 6595 { 6596 int hashval = DTRACE_HASHSTR(hash, template); 6597 int ndx = hashval & hash->dth_mask; 6598 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6599 6600 for (; bucket != NULL; bucket = bucket->dthb_next) { 6601 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 6602 return (bucket->dthb_chain); 6603 } 6604 6605 return (NULL); 6606 } 6607 6608 static int 6609 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) 6610 { 6611 int hashval = DTRACE_HASHSTR(hash, template); 6612 int ndx = hashval & hash->dth_mask; 6613 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6614 6615 for (; bucket != NULL; bucket = bucket->dthb_next) { 6616 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 6617 return (bucket->dthb_len); 6618 } 6619 6620 return (NULL); 6621 } 6622 6623 static void 6624 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) 6625 { 6626 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; 6627 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6628 6629 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); 6630 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); 6631 6632 /* 6633 * Find the bucket that we're removing this probe from. 6634 */ 6635 for (; bucket != NULL; bucket = bucket->dthb_next) { 6636 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) 6637 break; 6638 } 6639 6640 ASSERT(bucket != NULL); 6641 6642 if (*prevp == NULL) { 6643 if (*nextp == NULL) { 6644 /* 6645 * The removed probe was the only probe on this 6646 * bucket; we need to remove the bucket. 6647 */ 6648 dtrace_hashbucket_t *b = hash->dth_tab[ndx]; 6649 6650 ASSERT(bucket->dthb_chain == probe); 6651 ASSERT(b != NULL); 6652 6653 if (b == bucket) { 6654 hash->dth_tab[ndx] = bucket->dthb_next; 6655 } else { 6656 while (b->dthb_next != bucket) 6657 b = b->dthb_next; 6658 b->dthb_next = bucket->dthb_next; 6659 } 6660 6661 ASSERT(hash->dth_nbuckets > 0); 6662 hash->dth_nbuckets--; 6663 kmem_free(bucket, sizeof (dtrace_hashbucket_t)); 6664 return; 6665 } 6666 6667 bucket->dthb_chain = *nextp; 6668 } else { 6669 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp; 6670 } 6671 6672 if (*nextp != NULL) 6673 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp; 6674 } 6675 6676 /* 6677 * DTrace Utility Functions 6678 * 6679 * These are random utility functions that are _not_ called from probe context. 6680 */ 6681 static int 6682 dtrace_badattr(const dtrace_attribute_t *a) 6683 { 6684 return (a->dtat_name > DTRACE_STABILITY_MAX || 6685 a->dtat_data > DTRACE_STABILITY_MAX || 6686 a->dtat_class > DTRACE_CLASS_MAX); 6687 } 6688 6689 /* 6690 * Return a duplicate copy of a string. If the specified string is NULL, 6691 * this function returns a zero-length string. 6692 */ 6693 static char * 6694 dtrace_strdup(const char *str) 6695 { 6696 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); 6697 6698 if (str != NULL) 6699 (void) strcpy(new, str); 6700 6701 return (new); 6702 } 6703 6704 #define DTRACE_ISALPHA(c) \ 6705 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) 6706 6707 static int 6708 dtrace_badname(const char *s) 6709 { 6710 char c; 6711 6712 if (s == NULL || (c = *s++) == '\0') 6713 return (0); 6714 6715 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.') 6716 return (1); 6717 6718 while ((c = *s++) != '\0') { 6719 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') && 6720 c != '-' && c != '_' && c != '.' && c != '`') 6721 return (1); 6722 } 6723 6724 return (0); 6725 } 6726 6727 static void 6728 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) 6729 { 6730 uint32_t priv; 6731 6732 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 6733 /* 6734 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. 6735 */ 6736 priv = DTRACE_PRIV_ALL; 6737 } else { 6738 *uidp = crgetuid(cr); 6739 *zoneidp = crgetzoneid(cr); 6740 6741 priv = 0; 6742 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) 6743 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER; 6744 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) 6745 priv |= DTRACE_PRIV_USER; 6746 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) 6747 priv |= DTRACE_PRIV_PROC; 6748 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 6749 priv |= DTRACE_PRIV_OWNER; 6750 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 6751 priv |= DTRACE_PRIV_ZONEOWNER; 6752 } 6753 6754 *privp = priv; 6755 } 6756 6757 #ifdef DTRACE_ERRDEBUG 6758 static void 6759 dtrace_errdebug(const char *str) 6760 { 6761 int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ; 6762 int occupied = 0; 6763 6764 mutex_enter(&dtrace_errlock); 6765 dtrace_errlast = str; 6766 dtrace_errthread = curthread; 6767 6768 while (occupied++ < DTRACE_ERRHASHSZ) { 6769 if (dtrace_errhash[hval].dter_msg == str) { 6770 dtrace_errhash[hval].dter_count++; 6771 goto out; 6772 } 6773 6774 if (dtrace_errhash[hval].dter_msg != NULL) { 6775 hval = (hval + 1) % DTRACE_ERRHASHSZ; 6776 continue; 6777 } 6778 6779 dtrace_errhash[hval].dter_msg = str; 6780 dtrace_errhash[hval].dter_count = 1; 6781 goto out; 6782 } 6783 6784 panic("dtrace: undersized error hash"); 6785 out: 6786 mutex_exit(&dtrace_errlock); 6787 } 6788 #endif 6789 6790 /* 6791 * DTrace Matching Functions 6792 * 6793 * These functions are used to match groups of probes, given some elements of 6794 * a probe tuple, or some globbed expressions for elements of a probe tuple. 6795 */ 6796 static int 6797 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid, 6798 zoneid_t zoneid) 6799 { 6800 if (priv != DTRACE_PRIV_ALL) { 6801 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags; 6802 uint32_t match = priv & ppriv; 6803 6804 /* 6805 * No PRIV_DTRACE_* privileges... 6806 */ 6807 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER | 6808 DTRACE_PRIV_KERNEL)) == 0) 6809 return (0); 6810 6811 /* 6812 * No matching bits, but there were bits to match... 6813 */ 6814 if (match == 0 && ppriv != 0) 6815 return (0); 6816 6817 /* 6818 * Need to have permissions to the process, but don't... 6819 */ 6820 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 && 6821 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) { 6822 return (0); 6823 } 6824 6825 /* 6826 * Need to be in the same zone unless we possess the 6827 * privilege to examine all zones. 6828 */ 6829 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 && 6830 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) { 6831 return (0); 6832 } 6833 } 6834 6835 return (1); 6836 } 6837 6838 /* 6839 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which 6840 * consists of input pattern strings and an ops-vector to evaluate them. 6841 * This function returns >0 for match, 0 for no match, and <0 for error. 6842 */ 6843 static int 6844 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp, 6845 uint32_t priv, uid_t uid, zoneid_t zoneid) 6846 { 6847 dtrace_provider_t *pvp = prp->dtpr_provider; 6848 int rv; 6849 6850 if (pvp->dtpv_defunct) 6851 return (0); 6852 6853 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0) 6854 return (rv); 6855 6856 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0) 6857 return (rv); 6858 6859 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0) 6860 return (rv); 6861 6862 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0) 6863 return (rv); 6864 6865 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0) 6866 return (0); 6867 6868 return (rv); 6869 } 6870 6871 /* 6872 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN) 6873 * interface for matching a glob pattern 'p' to an input string 's'. Unlike 6874 * libc's version, the kernel version only applies to 8-bit ASCII strings. 6875 * In addition, all of the recursion cases except for '*' matching have been 6876 * unwound. For '*', we still implement recursive evaluation, but a depth 6877 * counter is maintained and matching is aborted if we recurse too deep. 6878 * The function returns 0 if no match, >0 if match, and <0 if recursion error. 6879 */ 6880 static int 6881 dtrace_match_glob(const char *s, const char *p, int depth) 6882 { 6883 const char *olds; 6884 char s1, c; 6885 int gs; 6886 6887 if (depth > DTRACE_PROBEKEY_MAXDEPTH) 6888 return (-1); 6889 6890 if (s == NULL) 6891 s = ""; /* treat NULL as empty string */ 6892 6893 top: 6894 olds = s; 6895 s1 = *s++; 6896 6897 if (p == NULL) 6898 return (0); 6899 6900 if ((c = *p++) == '\0') 6901 return (s1 == '\0'); 6902 6903 switch (c) { 6904 case '[': { 6905 int ok = 0, notflag = 0; 6906 char lc = '\0'; 6907 6908 if (s1 == '\0') 6909 return (0); 6910 6911 if (*p == '!') { 6912 notflag = 1; 6913 p++; 6914 } 6915 6916 if ((c = *p++) == '\0') 6917 return (0); 6918 6919 do { 6920 if (c == '-' && lc != '\0' && *p != ']') { 6921 if ((c = *p++) == '\0') 6922 return (0); 6923 if (c == '\\' && (c = *p++) == '\0') 6924 return (0); 6925 6926 if (notflag) { 6927 if (s1 < lc || s1 > c) 6928 ok++; 6929 else 6930 return (0); 6931 } else if (lc <= s1 && s1 <= c) 6932 ok++; 6933 6934 } else if (c == '\\' && (c = *p++) == '\0') 6935 return (0); 6936 6937 lc = c; /* save left-hand 'c' for next iteration */ 6938 6939 if (notflag) { 6940 if (s1 != c) 6941 ok++; 6942 else 6943 return (0); 6944 } else if (s1 == c) 6945 ok++; 6946 6947 if ((c = *p++) == '\0') 6948 return (0); 6949 6950 } while (c != ']'); 6951 6952 if (ok) 6953 goto top; 6954 6955 return (0); 6956 } 6957 6958 case '\\': 6959 if ((c = *p++) == '\0') 6960 return (0); 6961 /*FALLTHRU*/ 6962 6963 default: 6964 if (c != s1) 6965 return (0); 6966 /*FALLTHRU*/ 6967 6968 case '?': 6969 if (s1 != '\0') 6970 goto top; 6971 return (0); 6972 6973 case '*': 6974 while (*p == '*') 6975 p++; /* consecutive *'s are identical to a single one */ 6976 6977 if (*p == '\0') 6978 return (1); 6979 6980 for (s = olds; *s != '\0'; s++) { 6981 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0) 6982 return (gs); 6983 } 6984 6985 return (0); 6986 } 6987 } 6988 6989 /*ARGSUSED*/ 6990 static int 6991 dtrace_match_string(const char *s, const char *p, int depth) 6992 { 6993 return (s != NULL && strcmp(s, p) == 0); 6994 } 6995 6996 /*ARGSUSED*/ 6997 static int 6998 dtrace_match_nul(const char *s, const char *p, int depth) 6999 { 7000 return (1); /* always match the empty pattern */ 7001 } 7002 7003 /*ARGSUSED*/ 7004 static int 7005 dtrace_match_nonzero(const char *s, const char *p, int depth) 7006 { 7007 return (s != NULL && s[0] != '\0'); 7008 } 7009 7010 static int 7011 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, 7012 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) 7013 { 7014 dtrace_probe_t template, *probe; 7015 dtrace_hash_t *hash = NULL; 7016 int len, rc, best = INT_MAX, nmatched = 0; 7017 dtrace_id_t i; 7018 7019 ASSERT(MUTEX_HELD(&dtrace_lock)); 7020 7021 /* 7022 * If the probe ID is specified in the key, just lookup by ID and 7023 * invoke the match callback once if a matching probe is found. 7024 */ 7025 if (pkp->dtpk_id != DTRACE_IDNONE) { 7026 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && 7027 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { 7028 if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) 7029 return (DTRACE_MATCH_FAIL); 7030 nmatched++; 7031 } 7032 return (nmatched); 7033 } 7034 7035 template.dtpr_mod = (char *)pkp->dtpk_mod; 7036 template.dtpr_func = (char *)pkp->dtpk_func; 7037 template.dtpr_name = (char *)pkp->dtpk_name; 7038 7039 /* 7040 * We want to find the most distinct of the module name, function 7041 * name, and name. So for each one that is not a glob pattern or 7042 * empty string, we perform a lookup in the corresponding hash and 7043 * use the hash table with the fewest collisions to do our search. 7044 */ 7045 if (pkp->dtpk_mmatch == &dtrace_match_string && 7046 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { 7047 best = len; 7048 hash = dtrace_bymod; 7049 } 7050 7051 if (pkp->dtpk_fmatch == &dtrace_match_string && 7052 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) { 7053 best = len; 7054 hash = dtrace_byfunc; 7055 } 7056 7057 if (pkp->dtpk_nmatch == &dtrace_match_string && 7058 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) { 7059 best = len; 7060 hash = dtrace_byname; 7061 } 7062 7063 /* 7064 * If we did not select a hash table, iterate over every probe and 7065 * invoke our callback for each one that matches our input probe key. 7066 */ 7067 if (hash == NULL) { 7068 for (i = 0; i < dtrace_nprobes; i++) { 7069 if ((probe = dtrace_probes[i]) == NULL || 7070 dtrace_match_probe(probe, pkp, priv, uid, 7071 zoneid) <= 0) 7072 continue; 7073 7074 nmatched++; 7075 7076 if ((rc = (*matched)(probe, arg)) != 7077 DTRACE_MATCH_NEXT) { 7078 if (rc == DTRACE_MATCH_FAIL) 7079 return (DTRACE_MATCH_FAIL); 7080 break; 7081 } 7082 } 7083 7084 return (nmatched); 7085 } 7086 7087 /* 7088 * If we selected a hash table, iterate over each probe of the same key 7089 * name and invoke the callback for every probe that matches the other 7090 * attributes of our input probe key. 7091 */ 7092 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL; 7093 probe = *(DTRACE_HASHNEXT(hash, probe))) { 7094 7095 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) 7096 continue; 7097 7098 nmatched++; 7099 7100 if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { 7101 if (rc == DTRACE_MATCH_FAIL) 7102 return (DTRACE_MATCH_FAIL); 7103 break; 7104 } 7105 } 7106 7107 return (nmatched); 7108 } 7109 7110 /* 7111 * Return the function pointer dtrace_probecmp() should use to compare the 7112 * specified pattern with a string. For NULL or empty patterns, we select 7113 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob(). 7114 * For non-empty non-glob strings, we use dtrace_match_string(). 7115 */ 7116 static dtrace_probekey_f * 7117 dtrace_probekey_func(const char *p) 7118 { 7119 char c; 7120 7121 if (p == NULL || *p == '\0') 7122 return (&dtrace_match_nul); 7123 7124 while ((c = *p++) != '\0') { 7125 if (c == '[' || c == '?' || c == '*' || c == '\\') 7126 return (&dtrace_match_glob); 7127 } 7128 7129 return (&dtrace_match_string); 7130 } 7131 7132 /* 7133 * Build a probe comparison key for use with dtrace_match_probe() from the 7134 * given probe description. By convention, a null key only matches anchored 7135 * probes: if each field is the empty string, reset dtpk_fmatch to 7136 * dtrace_match_nonzero(). 7137 */ 7138 static void 7139 dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) 7140 { 7141 pkp->dtpk_prov = pdp->dtpd_provider; 7142 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); 7143 7144 pkp->dtpk_mod = pdp->dtpd_mod; 7145 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); 7146 7147 pkp->dtpk_func = pdp->dtpd_func; 7148 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); 7149 7150 pkp->dtpk_name = pdp->dtpd_name; 7151 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); 7152 7153 pkp->dtpk_id = pdp->dtpd_id; 7154 7155 if (pkp->dtpk_id == DTRACE_IDNONE && 7156 pkp->dtpk_pmatch == &dtrace_match_nul && 7157 pkp->dtpk_mmatch == &dtrace_match_nul && 7158 pkp->dtpk_fmatch == &dtrace_match_nul && 7159 pkp->dtpk_nmatch == &dtrace_match_nul) 7160 pkp->dtpk_fmatch = &dtrace_match_nonzero; 7161 } 7162 7163 /* 7164 * DTrace Provider-to-Framework API Functions 7165 * 7166 * These functions implement much of the Provider-to-Framework API, as 7167 * described in <sys/dtrace.h>. The parts of the API not in this section are 7168 * the functions in the API for probe management (found below), and 7169 * dtrace_probe() itself (found above). 7170 */ 7171 7172 /* 7173 * Register the calling provider with the DTrace framework. This should 7174 * generally be called by DTrace providers in their attach(9E) entry point. 7175 */ 7176 int 7177 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, 7178 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp) 7179 { 7180 dtrace_provider_t *provider; 7181 7182 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) { 7183 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7184 "arguments", name ? name : "<NULL>"); 7185 return (EINVAL); 7186 } 7187 7188 if (name[0] == '\0' || dtrace_badname(name)) { 7189 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7190 "provider name", name); 7191 return (EINVAL); 7192 } 7193 7194 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) || 7195 pops->dtps_enable == NULL || pops->dtps_disable == NULL || 7196 pops->dtps_destroy == NULL || 7197 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) { 7198 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7199 "provider ops", name); 7200 return (EINVAL); 7201 } 7202 7203 if (dtrace_badattr(&pap->dtpa_provider) || 7204 dtrace_badattr(&pap->dtpa_mod) || 7205 dtrace_badattr(&pap->dtpa_func) || 7206 dtrace_badattr(&pap->dtpa_name) || 7207 dtrace_badattr(&pap->dtpa_args)) { 7208 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7209 "provider attributes", name); 7210 return (EINVAL); 7211 } 7212 7213 if (priv & ~DTRACE_PRIV_ALL) { 7214 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 7215 "privilege attributes", name); 7216 return (EINVAL); 7217 } 7218 7219 if ((priv & DTRACE_PRIV_KERNEL) && 7220 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && 7221 pops->dtps_mode == NULL) { 7222 cmn_err(CE_WARN, "failed to register provider '%s': need " 7223 "dtps_mode() op for given privilege attributes", name); 7224 return (EINVAL); 7225 } 7226 7227 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); 7228 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 7229 (void) strcpy(provider->dtpv_name, name); 7230 7231 provider->dtpv_attr = *pap; 7232 provider->dtpv_priv.dtpp_flags = priv; 7233 if (cr != NULL) { 7234 provider->dtpv_priv.dtpp_uid = crgetuid(cr); 7235 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr); 7236 } 7237 provider->dtpv_pops = *pops; 7238 7239 if (pops->dtps_provide == NULL) { 7240 ASSERT(pops->dtps_provide_module != NULL); 7241 provider->dtpv_pops.dtps_provide = 7242 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop; 7243 } 7244 7245 if (pops->dtps_provide_module == NULL) { 7246 ASSERT(pops->dtps_provide != NULL); 7247 provider->dtpv_pops.dtps_provide_module = 7248 (void (*)(void *, struct modctl *))dtrace_nullop; 7249 } 7250 7251 if (pops->dtps_suspend == NULL) { 7252 ASSERT(pops->dtps_resume == NULL); 7253 provider->dtpv_pops.dtps_suspend = 7254 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 7255 provider->dtpv_pops.dtps_resume = 7256 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 7257 } 7258 7259 provider->dtpv_arg = arg; 7260 *idp = (dtrace_provider_id_t)provider; 7261 7262 if (pops == &dtrace_provider_ops) { 7263 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 7264 ASSERT(MUTEX_HELD(&dtrace_lock)); 7265 ASSERT(dtrace_anon.dta_enabling == NULL); 7266 7267 /* 7268 * We make sure that the DTrace provider is at the head of 7269 * the provider chain. 7270 */ 7271 provider->dtpv_next = dtrace_provider; 7272 dtrace_provider = provider; 7273 return (0); 7274 } 7275 7276 mutex_enter(&dtrace_provider_lock); 7277 mutex_enter(&dtrace_lock); 7278 7279 /* 7280 * If there is at least one provider registered, we'll add this 7281 * provider after the first provider. 7282 */ 7283 if (dtrace_provider != NULL) { 7284 provider->dtpv_next = dtrace_provider->dtpv_next; 7285 dtrace_provider->dtpv_next = provider; 7286 } else { 7287 dtrace_provider = provider; 7288 } 7289 7290 if (dtrace_retained != NULL) { 7291 dtrace_enabling_provide(provider); 7292 7293 /* 7294 * Now we need to call dtrace_enabling_matchall() -- which 7295 * will acquire cpu_lock and dtrace_lock. We therefore need 7296 * to drop all of our locks before calling into it... 7297 */ 7298 mutex_exit(&dtrace_lock); 7299 mutex_exit(&dtrace_provider_lock); 7300 dtrace_enabling_matchall(); 7301 7302 return (0); 7303 } 7304 7305 mutex_exit(&dtrace_lock); 7306 mutex_exit(&dtrace_provider_lock); 7307 7308 return (0); 7309 } 7310 7311 /* 7312 * Unregister the specified provider from the DTrace framework. This should 7313 * generally be called by DTrace providers in their detach(9E) entry point. 7314 */ 7315 int 7316 dtrace_unregister(dtrace_provider_id_t id) 7317 { 7318 dtrace_provider_t *old = (dtrace_provider_t *)id; 7319 dtrace_provider_t *prev = NULL; 7320 int i, self = 0, noreap = 0; 7321 dtrace_probe_t *probe, *first = NULL; 7322 7323 if (old->dtpv_pops.dtps_enable == 7324 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { 7325 /* 7326 * If DTrace itself is the provider, we're called with locks 7327 * already held. 7328 */ 7329 ASSERT(old == dtrace_provider); 7330 ASSERT(dtrace_devi != NULL); 7331 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 7332 ASSERT(MUTEX_HELD(&dtrace_lock)); 7333 self = 1; 7334 7335 if (dtrace_provider->dtpv_next != NULL) { 7336 /* 7337 * There's another provider here; return failure. 7338 */ 7339 return (EBUSY); 7340 } 7341 } else { 7342 mutex_enter(&dtrace_provider_lock); 7343 mutex_enter(&mod_lock); 7344 mutex_enter(&dtrace_lock); 7345 } 7346 7347 /* 7348 * If anyone has /dev/dtrace open, or if there are anonymous enabled 7349 * probes, we refuse to let providers slither away, unless this 7350 * provider has already been explicitly invalidated. 7351 */ 7352 if (!old->dtpv_defunct && 7353 (dtrace_opens || (dtrace_anon.dta_state != NULL && 7354 dtrace_anon.dta_state->dts_necbs > 0))) { 7355 if (!self) { 7356 mutex_exit(&dtrace_lock); 7357 mutex_exit(&mod_lock); 7358 mutex_exit(&dtrace_provider_lock); 7359 } 7360 return (EBUSY); 7361 } 7362 7363 /* 7364 * Attempt to destroy the probes associated with this provider. 7365 */ 7366 for (i = 0; i < dtrace_nprobes; i++) { 7367 if ((probe = dtrace_probes[i]) == NULL) 7368 continue; 7369 7370 if (probe->dtpr_provider != old) 7371 continue; 7372 7373 if (probe->dtpr_ecb == NULL) 7374 continue; 7375 7376 /* 7377 * If we are trying to unregister a defunct provider, and the 7378 * provider was made defunct within the interval dictated by 7379 * dtrace_unregister_defunct_reap, we'll (asynchronously) 7380 * attempt to reap our enablings. To denote that the provider 7381 * should reattempt to unregister itself at some point in the 7382 * future, we will return a differentiable error code (EAGAIN 7383 * instead of EBUSY) in this case. 7384 */ 7385 if (dtrace_gethrtime() - old->dtpv_defunct > 7386 dtrace_unregister_defunct_reap) 7387 noreap = 1; 7388 7389 if (!self) { 7390 mutex_exit(&dtrace_lock); 7391 mutex_exit(&mod_lock); 7392 mutex_exit(&dtrace_provider_lock); 7393 } 7394 7395 if (noreap) 7396 return (EBUSY); 7397 7398 (void) taskq_dispatch(dtrace_taskq, 7399 (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); 7400 7401 return (EAGAIN); 7402 } 7403 7404 /* 7405 * All of the probes for this provider are disabled; we can safely 7406 * remove all of them from their hash chains and from the probe array. 7407 */ 7408 for (i = 0; i < dtrace_nprobes; i++) { 7409 if ((probe = dtrace_probes[i]) == NULL) 7410 continue; 7411 7412 if (probe->dtpr_provider != old) 7413 continue; 7414 7415 dtrace_probes[i] = NULL; 7416 7417 dtrace_hash_remove(dtrace_bymod, probe); 7418 dtrace_hash_remove(dtrace_byfunc, probe); 7419 dtrace_hash_remove(dtrace_byname, probe); 7420 7421 if (first == NULL) { 7422 first = probe; 7423 probe->dtpr_nextmod = NULL; 7424 } else { 7425 probe->dtpr_nextmod = first; 7426 first = probe; 7427 } 7428 } 7429 7430 /* 7431 * The provider's probes have been removed from the hash chains and 7432 * from the probe array. Now issue a dtrace_sync() to be sure that 7433 * everyone has cleared out from any probe array processing. 7434 */ 7435 dtrace_sync(); 7436 7437 for (probe = first; probe != NULL; probe = first) { 7438 first = probe->dtpr_nextmod; 7439 7440 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, 7441 probe->dtpr_arg); 7442 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 7443 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 7444 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 7445 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); 7446 kmem_free(probe, sizeof (dtrace_probe_t)); 7447 } 7448 7449 if ((prev = dtrace_provider) == old) { 7450 ASSERT(self || dtrace_devi == NULL); 7451 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL); 7452 dtrace_provider = old->dtpv_next; 7453 } else { 7454 while (prev != NULL && prev->dtpv_next != old) 7455 prev = prev->dtpv_next; 7456 7457 if (prev == NULL) { 7458 panic("attempt to unregister non-existent " 7459 "dtrace provider %p\n", (void *)id); 7460 } 7461 7462 prev->dtpv_next = old->dtpv_next; 7463 } 7464 7465 if (!self) { 7466 mutex_exit(&dtrace_lock); 7467 mutex_exit(&mod_lock); 7468 mutex_exit(&dtrace_provider_lock); 7469 } 7470 7471 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); 7472 kmem_free(old, sizeof (dtrace_provider_t)); 7473 7474 return (0); 7475 } 7476 7477 /* 7478 * Invalidate the specified provider. All subsequent probe lookups for the 7479 * specified provider will fail, but its probes will not be removed. 7480 */ 7481 void 7482 dtrace_invalidate(dtrace_provider_id_t id) 7483 { 7484 dtrace_provider_t *pvp = (dtrace_provider_t *)id; 7485 7486 ASSERT(pvp->dtpv_pops.dtps_enable != 7487 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); 7488 7489 mutex_enter(&dtrace_provider_lock); 7490 mutex_enter(&dtrace_lock); 7491 7492 pvp->dtpv_defunct = dtrace_gethrtime(); 7493 7494 mutex_exit(&dtrace_lock); 7495 mutex_exit(&dtrace_provider_lock); 7496 } 7497 7498 /* 7499 * Indicate whether or not DTrace has attached. 7500 */ 7501 int 7502 dtrace_attached(void) 7503 { 7504 /* 7505 * dtrace_provider will be non-NULL iff the DTrace driver has 7506 * attached. (It's non-NULL because DTrace is always itself a 7507 * provider.) 7508 */ 7509 return (dtrace_provider != NULL); 7510 } 7511 7512 /* 7513 * Remove all the unenabled probes for the given provider. This function is 7514 * not unlike dtrace_unregister(), except that it doesn't remove the provider 7515 * -- just as many of its associated probes as it can. 7516 */ 7517 int 7518 dtrace_condense(dtrace_provider_id_t id) 7519 { 7520 dtrace_provider_t *prov = (dtrace_provider_t *)id; 7521 int i; 7522 dtrace_probe_t *probe; 7523 7524 /* 7525 * Make sure this isn't the dtrace provider itself. 7526 */ 7527 ASSERT(prov->dtpv_pops.dtps_enable != 7528 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); 7529 7530 mutex_enter(&dtrace_provider_lock); 7531 mutex_enter(&dtrace_lock); 7532 7533 /* 7534 * Attempt to destroy the probes associated with this provider. 7535 */ 7536 for (i = 0; i < dtrace_nprobes; i++) { 7537 if ((probe = dtrace_probes[i]) == NULL) 7538 continue; 7539 7540 if (probe->dtpr_provider != prov) 7541 continue; 7542 7543 if (probe->dtpr_ecb != NULL) 7544 continue; 7545 7546 dtrace_probes[i] = NULL; 7547 7548 dtrace_hash_remove(dtrace_bymod, probe); 7549 dtrace_hash_remove(dtrace_byfunc, probe); 7550 dtrace_hash_remove(dtrace_byname, probe); 7551 7552 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, 7553 probe->dtpr_arg); 7554 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 7555 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 7556 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 7557 kmem_free(probe, sizeof (dtrace_probe_t)); 7558 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); 7559 } 7560 7561 mutex_exit(&dtrace_lock); 7562 mutex_exit(&dtrace_provider_lock); 7563 7564 return (0); 7565 } 7566 7567 /* 7568 * DTrace Probe Management Functions 7569 * 7570 * The functions in this section perform the DTrace probe management, 7571 * including functions to create probes, look-up probes, and call into the 7572 * providers to request that probes be provided. Some of these functions are 7573 * in the Provider-to-Framework API; these functions can be identified by the 7574 * fact that they are not declared "static". 7575 */ 7576 7577 /* 7578 * Create a probe with the specified module name, function name, and name. 7579 */ 7580 dtrace_id_t 7581 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, 7582 const char *func, const char *name, int aframes, void *arg) 7583 { 7584 dtrace_probe_t *probe, **probes; 7585 dtrace_provider_t *provider = (dtrace_provider_t *)prov; 7586 dtrace_id_t id; 7587 7588 if (provider == dtrace_provider) { 7589 ASSERT(MUTEX_HELD(&dtrace_lock)); 7590 } else { 7591 mutex_enter(&dtrace_lock); 7592 } 7593 7594 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, 7595 VM_BESTFIT | VM_SLEEP); 7596 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); 7597 7598 probe->dtpr_id = id; 7599 probe->dtpr_gen = dtrace_probegen++; 7600 probe->dtpr_mod = dtrace_strdup(mod); 7601 probe->dtpr_func = dtrace_strdup(func); 7602 probe->dtpr_name = dtrace_strdup(name); 7603 probe->dtpr_arg = arg; 7604 probe->dtpr_aframes = aframes; 7605 probe->dtpr_provider = provider; 7606 7607 dtrace_hash_add(dtrace_bymod, probe); 7608 dtrace_hash_add(dtrace_byfunc, probe); 7609 dtrace_hash_add(dtrace_byname, probe); 7610 7611 if (id - 1 >= dtrace_nprobes) { 7612 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); 7613 size_t nsize = osize << 1; 7614 7615 if (nsize == 0) { 7616 ASSERT(osize == 0); 7617 ASSERT(dtrace_probes == NULL); 7618 nsize = sizeof (dtrace_probe_t *); 7619 } 7620 7621 probes = kmem_zalloc(nsize, KM_SLEEP); 7622 7623 if (dtrace_probes == NULL) { 7624 ASSERT(osize == 0); 7625 dtrace_probes = probes; 7626 dtrace_nprobes = 1; 7627 } else { 7628 dtrace_probe_t **oprobes = dtrace_probes; 7629 7630 bcopy(oprobes, probes, osize); 7631 dtrace_membar_producer(); 7632 dtrace_probes = probes; 7633 7634 dtrace_sync(); 7635 7636 /* 7637 * All CPUs are now seeing the new probes array; we can 7638 * safely free the old array. 7639 */ 7640 kmem_free(oprobes, osize); 7641 dtrace_nprobes <<= 1; 7642 } 7643 7644 ASSERT(id - 1 < dtrace_nprobes); 7645 } 7646 7647 ASSERT(dtrace_probes[id - 1] == NULL); 7648 dtrace_probes[id - 1] = probe; 7649 7650 if (provider != dtrace_provider) 7651 mutex_exit(&dtrace_lock); 7652 7653 return (id); 7654 } 7655 7656 static dtrace_probe_t * 7657 dtrace_probe_lookup_id(dtrace_id_t id) 7658 { 7659 ASSERT(MUTEX_HELD(&dtrace_lock)); 7660 7661 if (id == 0 || id > dtrace_nprobes) 7662 return (NULL); 7663 7664 return (dtrace_probes[id - 1]); 7665 } 7666 7667 static int 7668 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) 7669 { 7670 *((dtrace_id_t *)arg) = probe->dtpr_id; 7671 7672 return (DTRACE_MATCH_DONE); 7673 } 7674 7675 /* 7676 * Look up a probe based on provider and one or more of module name, function 7677 * name and probe name. 7678 */ 7679 dtrace_id_t 7680 dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod, 7681 const char *func, const char *name) 7682 { 7683 dtrace_probekey_t pkey; 7684 dtrace_id_t id; 7685 int match; 7686 7687 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; 7688 pkey.dtpk_pmatch = &dtrace_match_string; 7689 pkey.dtpk_mod = mod; 7690 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; 7691 pkey.dtpk_func = func; 7692 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; 7693 pkey.dtpk_name = name; 7694 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; 7695 pkey.dtpk_id = DTRACE_IDNONE; 7696 7697 mutex_enter(&dtrace_lock); 7698 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, 7699 dtrace_probe_lookup_match, &id); 7700 mutex_exit(&dtrace_lock); 7701 7702 ASSERT(match == 1 || match == 0); 7703 return (match ? id : 0); 7704 } 7705 7706 /* 7707 * Returns the probe argument associated with the specified probe. 7708 */ 7709 void * 7710 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid) 7711 { 7712 dtrace_probe_t *probe; 7713 void *rval = NULL; 7714 7715 mutex_enter(&dtrace_lock); 7716 7717 if ((probe = dtrace_probe_lookup_id(pid)) != NULL && 7718 probe->dtpr_provider == (dtrace_provider_t *)id) 7719 rval = probe->dtpr_arg; 7720 7721 mutex_exit(&dtrace_lock); 7722 7723 return (rval); 7724 } 7725 7726 /* 7727 * Copy a probe into a probe description. 7728 */ 7729 static void 7730 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) 7731 { 7732 bzero(pdp, sizeof (dtrace_probedesc_t)); 7733 pdp->dtpd_id = prp->dtpr_id; 7734 7735 (void) strncpy(pdp->dtpd_provider, 7736 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1); 7737 7738 (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1); 7739 (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1); 7740 (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1); 7741 } 7742 7743 /* 7744 * Called to indicate that a probe -- or probes -- should be provided by a 7745 * specfied provider. If the specified description is NULL, the provider will 7746 * be told to provide all of its probes. (This is done whenever a new 7747 * consumer comes along, or whenever a retained enabling is to be matched.) If 7748 * the specified description is non-NULL, the provider is given the 7749 * opportunity to dynamically provide the specified probe, allowing providers 7750 * to support the creation of probes on-the-fly. (So-called _autocreated_ 7751 * probes.) If the provider is NULL, the operations will be applied to all 7752 * providers; if the provider is non-NULL the operations will only be applied 7753 * to the specified provider. The dtrace_provider_lock must be held, and the 7754 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation 7755 * will need to grab the dtrace_lock when it reenters the framework through 7756 * dtrace_probe_lookup(), dtrace_probe_create(), etc. 7757 */ 7758 static void 7759 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) 7760 { 7761 struct modctl *ctl; 7762 int all = 0; 7763 7764 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 7765 7766 if (prv == NULL) { 7767 all = 1; 7768 prv = dtrace_provider; 7769 } 7770 7771 do { 7772 /* 7773 * First, call the blanket provide operation. 7774 */ 7775 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); 7776 7777 /* 7778 * Now call the per-module provide operation. We will grab 7779 * mod_lock to prevent the list from being modified. Note 7780 * that this also prevents the mod_busy bits from changing. 7781 * (mod_busy can only be changed with mod_lock held.) 7782 */ 7783 mutex_enter(&mod_lock); 7784 7785 ctl = &modules; 7786 do { 7787 if (ctl->mod_busy || ctl->mod_mp == NULL) 7788 continue; 7789 7790 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 7791 7792 } while ((ctl = ctl->mod_next) != &modules); 7793 7794 mutex_exit(&mod_lock); 7795 } while (all && (prv = prv->dtpv_next) != NULL); 7796 } 7797 7798 /* 7799 * Iterate over each probe, and call the Framework-to-Provider API function 7800 * denoted by offs. 7801 */ 7802 static void 7803 dtrace_probe_foreach(uintptr_t offs) 7804 { 7805 dtrace_provider_t *prov; 7806 void (*func)(void *, dtrace_id_t, void *); 7807 dtrace_probe_t *probe; 7808 dtrace_icookie_t cookie; 7809 int i; 7810 7811 /* 7812 * We disable interrupts to walk through the probe array. This is 7813 * safe -- the dtrace_sync() in dtrace_unregister() assures that we 7814 * won't see stale data. 7815 */ 7816 cookie = dtrace_interrupt_disable(); 7817 7818 for (i = 0; i < dtrace_nprobes; i++) { 7819 if ((probe = dtrace_probes[i]) == NULL) 7820 continue; 7821 7822 if (probe->dtpr_ecb == NULL) { 7823 /* 7824 * This probe isn't enabled -- don't call the function. 7825 */ 7826 continue; 7827 } 7828 7829 prov = probe->dtpr_provider; 7830 func = *((void(**)(void *, dtrace_id_t, void *)) 7831 ((uintptr_t)&prov->dtpv_pops + offs)); 7832 7833 func(prov->dtpv_arg, i + 1, probe->dtpr_arg); 7834 } 7835 7836 dtrace_interrupt_enable(cookie); 7837 } 7838 7839 static int 7840 dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) 7841 { 7842 dtrace_probekey_t pkey; 7843 uint32_t priv; 7844 uid_t uid; 7845 zoneid_t zoneid; 7846 7847 ASSERT(MUTEX_HELD(&dtrace_lock)); 7848 dtrace_ecb_create_cache = NULL; 7849 7850 if (desc == NULL) { 7851 /* 7852 * If we're passed a NULL description, we're being asked to 7853 * create an ECB with a NULL probe. 7854 */ 7855 (void) dtrace_ecb_create_enable(NULL, enab); 7856 return (0); 7857 } 7858 7859 dtrace_probekey(desc, &pkey); 7860 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, 7861 &priv, &uid, &zoneid); 7862 7863 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, 7864 enab)); 7865 } 7866 7867 /* 7868 * DTrace Helper Provider Functions 7869 */ 7870 static void 7871 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr) 7872 { 7873 attr->dtat_name = DOF_ATTR_NAME(dofattr); 7874 attr->dtat_data = DOF_ATTR_DATA(dofattr); 7875 attr->dtat_class = DOF_ATTR_CLASS(dofattr); 7876 } 7877 7878 static void 7879 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, 7880 const dof_provider_t *dofprov, char *strtab) 7881 { 7882 hprov->dthpv_provname = strtab + dofprov->dofpv_name; 7883 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider, 7884 dofprov->dofpv_provattr); 7885 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod, 7886 dofprov->dofpv_modattr); 7887 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func, 7888 dofprov->dofpv_funcattr); 7889 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name, 7890 dofprov->dofpv_nameattr); 7891 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args, 7892 dofprov->dofpv_argsattr); 7893 } 7894 7895 static void 7896 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 7897 { 7898 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7899 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7900 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 7901 dof_provider_t *provider; 7902 dof_probe_t *probe; 7903 uint32_t *off, *enoff; 7904 uint8_t *arg; 7905 char *strtab; 7906 uint_t i, nprobes; 7907 dtrace_helper_provdesc_t dhpv; 7908 dtrace_helper_probedesc_t dhpb; 7909 dtrace_meta_t *meta = dtrace_meta_pid; 7910 dtrace_mops_t *mops = &meta->dtm_mops; 7911 void *parg; 7912 7913 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 7914 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7915 provider->dofpv_strtab * dof->dofh_secsize); 7916 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7917 provider->dofpv_probes * dof->dofh_secsize); 7918 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7919 provider->dofpv_prargs * dof->dofh_secsize); 7920 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7921 provider->dofpv_proffs * dof->dofh_secsize); 7922 7923 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 7924 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset); 7925 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 7926 enoff = NULL; 7927 7928 /* 7929 * See dtrace_helper_provider_validate(). 7930 */ 7931 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 7932 provider->dofpv_prenoffs != DOF_SECT_NONE) { 7933 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7934 provider->dofpv_prenoffs * dof->dofh_secsize); 7935 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset); 7936 } 7937 7938 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 7939 7940 /* 7941 * Create the provider. 7942 */ 7943 dtrace_dofprov2hprov(&dhpv, provider, strtab); 7944 7945 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) 7946 return; 7947 7948 meta->dtm_count++; 7949 7950 /* 7951 * Create the probes. 7952 */ 7953 for (i = 0; i < nprobes; i++) { 7954 probe = (dof_probe_t *)(uintptr_t)(daddr + 7955 prb_sec->dofs_offset + i * prb_sec->dofs_entsize); 7956 7957 dhpb.dthpb_mod = dhp->dofhp_mod; 7958 dhpb.dthpb_func = strtab + probe->dofpr_func; 7959 dhpb.dthpb_name = strtab + probe->dofpr_name; 7960 dhpb.dthpb_base = probe->dofpr_addr; 7961 dhpb.dthpb_offs = off + probe->dofpr_offidx; 7962 dhpb.dthpb_noffs = probe->dofpr_noffs; 7963 if (enoff != NULL) { 7964 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; 7965 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; 7966 } else { 7967 dhpb.dthpb_enoffs = NULL; 7968 dhpb.dthpb_nenoffs = 0; 7969 } 7970 dhpb.dthpb_args = arg + probe->dofpr_argidx; 7971 dhpb.dthpb_nargc = probe->dofpr_nargc; 7972 dhpb.dthpb_xargc = probe->dofpr_xargc; 7973 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv; 7974 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv; 7975 7976 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); 7977 } 7978 } 7979 7980 static void 7981 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) 7982 { 7983 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7984 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7985 int i; 7986 7987 ASSERT(MUTEX_HELD(&dtrace_meta_lock)); 7988 7989 for (i = 0; i < dof->dofh_secnum; i++) { 7990 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 7991 dof->dofh_secoff + i * dof->dofh_secsize); 7992 7993 if (sec->dofs_type != DOF_SECT_PROVIDER) 7994 continue; 7995 7996 dtrace_helper_provide_one(dhp, sec, pid); 7997 } 7998 7999 /* 8000 * We may have just created probes, so we must now rematch against 8001 * any retained enablings. Note that this call will acquire both 8002 * cpu_lock and dtrace_lock; the fact that we are holding 8003 * dtrace_meta_lock now is what defines the ordering with respect to 8004 * these three locks. 8005 */ 8006 dtrace_enabling_matchall(); 8007 } 8008 8009 static void 8010 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 8011 { 8012 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 8013 dof_hdr_t *dof = (dof_hdr_t *)daddr; 8014 dof_sec_t *str_sec; 8015 dof_provider_t *provider; 8016 char *strtab; 8017 dtrace_helper_provdesc_t dhpv; 8018 dtrace_meta_t *meta = dtrace_meta_pid; 8019 dtrace_mops_t *mops = &meta->dtm_mops; 8020 8021 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 8022 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 8023 provider->dofpv_strtab * dof->dofh_secsize); 8024 8025 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 8026 8027 /* 8028 * Create the provider. 8029 */ 8030 dtrace_dofprov2hprov(&dhpv, provider, strtab); 8031 8032 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); 8033 8034 meta->dtm_count--; 8035 } 8036 8037 static void 8038 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) 8039 { 8040 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 8041 dof_hdr_t *dof = (dof_hdr_t *)daddr; 8042 int i; 8043 8044 ASSERT(MUTEX_HELD(&dtrace_meta_lock)); 8045 8046 for (i = 0; i < dof->dofh_secnum; i++) { 8047 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 8048 dof->dofh_secoff + i * dof->dofh_secsize); 8049 8050 if (sec->dofs_type != DOF_SECT_PROVIDER) 8051 continue; 8052 8053 dtrace_helper_provider_remove_one(dhp, sec, pid); 8054 } 8055 } 8056 8057 /* 8058 * DTrace Meta Provider-to-Framework API Functions 8059 * 8060 * These functions implement the Meta Provider-to-Framework API, as described 8061 * in <sys/dtrace.h>. 8062 */ 8063 int 8064 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, 8065 dtrace_meta_provider_id_t *idp) 8066 { 8067 dtrace_meta_t *meta; 8068 dtrace_helpers_t *help, *next; 8069 int i; 8070 8071 *idp = DTRACE_METAPROVNONE; 8072 8073 /* 8074 * We strictly don't need the name, but we hold onto it for 8075 * debuggability. All hail error queues! 8076 */ 8077 if (name == NULL) { 8078 cmn_err(CE_WARN, "failed to register meta-provider: " 8079 "invalid name"); 8080 return (EINVAL); 8081 } 8082 8083 if (mops == NULL || 8084 mops->dtms_create_probe == NULL || 8085 mops->dtms_provide_pid == NULL || 8086 mops->dtms_remove_pid == NULL) { 8087 cmn_err(CE_WARN, "failed to register meta-register %s: " 8088 "invalid ops", name); 8089 return (EINVAL); 8090 } 8091 8092 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); 8093 meta->dtm_mops = *mops; 8094 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 8095 (void) strcpy(meta->dtm_name, name); 8096 meta->dtm_arg = arg; 8097 8098 mutex_enter(&dtrace_meta_lock); 8099 mutex_enter(&dtrace_lock); 8100 8101 if (dtrace_meta_pid != NULL) { 8102 mutex_exit(&dtrace_lock); 8103 mutex_exit(&dtrace_meta_lock); 8104 cmn_err(CE_WARN, "failed to register meta-register %s: " 8105 "user-land meta-provider exists", name); 8106 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); 8107 kmem_free(meta, sizeof (dtrace_meta_t)); 8108 return (EINVAL); 8109 } 8110 8111 dtrace_meta_pid = meta; 8112 *idp = (dtrace_meta_provider_id_t)meta; 8113 8114 /* 8115 * If there are providers and probes ready to go, pass them 8116 * off to the new meta provider now. 8117 */ 8118 8119 help = dtrace_deferred_pid; 8120 dtrace_deferred_pid = NULL; 8121 8122 mutex_exit(&dtrace_lock); 8123 8124 while (help != NULL) { 8125 for (i = 0; i < help->dthps_nprovs; i++) { 8126 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 8127 help->dthps_pid); 8128 } 8129 8130 next = help->dthps_next; 8131 help->dthps_next = NULL; 8132 help->dthps_prev = NULL; 8133 help->dthps_deferred = 0; 8134 help = next; 8135 } 8136 8137 mutex_exit(&dtrace_meta_lock); 8138 8139 return (0); 8140 } 8141 8142 int 8143 dtrace_meta_unregister(dtrace_meta_provider_id_t id) 8144 { 8145 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id; 8146 8147 mutex_enter(&dtrace_meta_lock); 8148 mutex_enter(&dtrace_lock); 8149 8150 if (old == dtrace_meta_pid) { 8151 pp = &dtrace_meta_pid; 8152 } else { 8153 panic("attempt to unregister non-existent " 8154 "dtrace meta-provider %p\n", (void *)old); 8155 } 8156 8157 if (old->dtm_count != 0) { 8158 mutex_exit(&dtrace_lock); 8159 mutex_exit(&dtrace_meta_lock); 8160 return (EBUSY); 8161 } 8162 8163 *pp = NULL; 8164 8165 mutex_exit(&dtrace_lock); 8166 mutex_exit(&dtrace_meta_lock); 8167 8168 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); 8169 kmem_free(old, sizeof (dtrace_meta_t)); 8170 8171 return (0); 8172 } 8173 8174 8175 /* 8176 * DTrace DIF Object Functions 8177 */ 8178 static int 8179 dtrace_difo_err(uint_t pc, const char *format, ...) 8180 { 8181 if (dtrace_err_verbose) { 8182 va_list alist; 8183 8184 (void) uprintf("dtrace DIF object error: [%u]: ", pc); 8185 va_start(alist, format); 8186 (void) vuprintf(format, alist); 8187 va_end(alist); 8188 } 8189 8190 #ifdef DTRACE_ERRDEBUG 8191 dtrace_errdebug(format); 8192 #endif 8193 return (1); 8194 } 8195 8196 /* 8197 * Validate a DTrace DIF object by checking the IR instructions. The following 8198 * rules are currently enforced by dtrace_difo_validate(): 8199 * 8200 * 1. Each instruction must have a valid opcode 8201 * 2. Each register, string, variable, or subroutine reference must be valid 8202 * 3. No instruction can modify register %r0 (must be zero) 8203 * 4. All instruction reserved bits must be set to zero 8204 * 5. The last instruction must be a "ret" instruction 8205 * 6. All branch targets must reference a valid instruction _after_ the branch 8206 */ 8207 static int 8208 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, 8209 cred_t *cr) 8210 { 8211 int err = 0, i; 8212 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 8213 int kcheckload; 8214 uint_t pc; 8215 8216 kcheckload = cr == NULL || 8217 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; 8218 8219 dp->dtdo_destructive = 0; 8220 8221 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { 8222 dif_instr_t instr = dp->dtdo_buf[pc]; 8223 8224 uint_t r1 = DIF_INSTR_R1(instr); 8225 uint_t r2 = DIF_INSTR_R2(instr); 8226 uint_t rd = DIF_INSTR_RD(instr); 8227 uint_t rs = DIF_INSTR_RS(instr); 8228 uint_t label = DIF_INSTR_LABEL(instr); 8229 uint_t v = DIF_INSTR_VAR(instr); 8230 uint_t subr = DIF_INSTR_SUBR(instr); 8231 uint_t type = DIF_INSTR_TYPE(instr); 8232 uint_t op = DIF_INSTR_OP(instr); 8233 8234 switch (op) { 8235 case DIF_OP_OR: 8236 case DIF_OP_XOR: 8237 case DIF_OP_AND: 8238 case DIF_OP_SLL: 8239 case DIF_OP_SRL: 8240 case DIF_OP_SRA: 8241 case DIF_OP_SUB: 8242 case DIF_OP_ADD: 8243 case DIF_OP_MUL: 8244 case DIF_OP_SDIV: 8245 case DIF_OP_UDIV: 8246 case DIF_OP_SREM: 8247 case DIF_OP_UREM: 8248 case DIF_OP_COPYS: 8249 if (r1 >= nregs) 8250 err += efunc(pc, "invalid register %u\n", r1); 8251 if (r2 >= nregs) 8252 err += efunc(pc, "invalid register %u\n", r2); 8253 if (rd >= nregs) 8254 err += efunc(pc, "invalid register %u\n", rd); 8255 if (rd == 0) 8256 err += efunc(pc, "cannot write to %r0\n"); 8257 break; 8258 case DIF_OP_NOT: 8259 case DIF_OP_MOV: 8260 case DIF_OP_ALLOCS: 8261 if (r1 >= nregs) 8262 err += efunc(pc, "invalid register %u\n", r1); 8263 if (r2 != 0) 8264 err += efunc(pc, "non-zero reserved bits\n"); 8265 if (rd >= nregs) 8266 err += efunc(pc, "invalid register %u\n", rd); 8267 if (rd == 0) 8268 err += efunc(pc, "cannot write to %r0\n"); 8269 break; 8270 case DIF_OP_LDSB: 8271 case DIF_OP_LDSH: 8272 case DIF_OP_LDSW: 8273 case DIF_OP_LDUB: 8274 case DIF_OP_LDUH: 8275 case DIF_OP_LDUW: 8276 case DIF_OP_LDX: 8277 if (r1 >= nregs) 8278 err += efunc(pc, "invalid register %u\n", r1); 8279 if (r2 != 0) 8280 err += efunc(pc, "non-zero reserved bits\n"); 8281 if (rd >= nregs) 8282 err += efunc(pc, "invalid register %u\n", rd); 8283 if (rd == 0) 8284 err += efunc(pc, "cannot write to %r0\n"); 8285 if (kcheckload) 8286 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + 8287 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); 8288 break; 8289 case DIF_OP_RLDSB: 8290 case DIF_OP_RLDSH: 8291 case DIF_OP_RLDSW: 8292 case DIF_OP_RLDUB: 8293 case DIF_OP_RLDUH: 8294 case DIF_OP_RLDUW: 8295 case DIF_OP_RLDX: 8296 if (r1 >= nregs) 8297 err += efunc(pc, "invalid register %u\n", r1); 8298 if (r2 != 0) 8299 err += efunc(pc, "non-zero reserved bits\n"); 8300 if (rd >= nregs) 8301 err += efunc(pc, "invalid register %u\n", rd); 8302 if (rd == 0) 8303 err += efunc(pc, "cannot write to %r0\n"); 8304 break; 8305 case DIF_OP_ULDSB: 8306 case DIF_OP_ULDSH: 8307 case DIF_OP_ULDSW: 8308 case DIF_OP_ULDUB: 8309 case DIF_OP_ULDUH: 8310 case DIF_OP_ULDUW: 8311 case DIF_OP_ULDX: 8312 if (r1 >= nregs) 8313 err += efunc(pc, "invalid register %u\n", r1); 8314 if (r2 != 0) 8315 err += efunc(pc, "non-zero reserved bits\n"); 8316 if (rd >= nregs) 8317 err += efunc(pc, "invalid register %u\n", rd); 8318 if (rd == 0) 8319 err += efunc(pc, "cannot write to %r0\n"); 8320 break; 8321 case DIF_OP_STB: 8322 case DIF_OP_STH: 8323 case DIF_OP_STW: 8324 case DIF_OP_STX: 8325 if (r1 >= nregs) 8326 err += efunc(pc, "invalid register %u\n", r1); 8327 if (r2 != 0) 8328 err += efunc(pc, "non-zero reserved bits\n"); 8329 if (rd >= nregs) 8330 err += efunc(pc, "invalid register %u\n", rd); 8331 if (rd == 0) 8332 err += efunc(pc, "cannot write to 0 address\n"); 8333 break; 8334 case DIF_OP_CMP: 8335 case DIF_OP_SCMP: 8336 if (r1 >= nregs) 8337 err += efunc(pc, "invalid register %u\n", r1); 8338 if (r2 >= nregs) 8339 err += efunc(pc, "invalid register %u\n", r2); 8340 if (rd != 0) 8341 err += efunc(pc, "non-zero reserved bits\n"); 8342 break; 8343 case DIF_OP_TST: 8344 if (r1 >= nregs) 8345 err += efunc(pc, "invalid register %u\n", r1); 8346 if (r2 != 0 || rd != 0) 8347 err += efunc(pc, "non-zero reserved bits\n"); 8348 break; 8349 case DIF_OP_BA: 8350 case DIF_OP_BE: 8351 case DIF_OP_BNE: 8352 case DIF_OP_BG: 8353 case DIF_OP_BGU: 8354 case DIF_OP_BGE: 8355 case DIF_OP_BGEU: 8356 case DIF_OP_BL: 8357 case DIF_OP_BLU: 8358 case DIF_OP_BLE: 8359 case DIF_OP_BLEU: 8360 if (label >= dp->dtdo_len) { 8361 err += efunc(pc, "invalid branch target %u\n", 8362 label); 8363 } 8364 if (label <= pc) { 8365 err += efunc(pc, "backward branch to %u\n", 8366 label); 8367 } 8368 break; 8369 case DIF_OP_RET: 8370 if (r1 != 0 || r2 != 0) 8371 err += efunc(pc, "non-zero reserved bits\n"); 8372 if (rd >= nregs) 8373 err += efunc(pc, "invalid register %u\n", rd); 8374 break; 8375 case DIF_OP_NOP: 8376 case DIF_OP_POPTS: 8377 case DIF_OP_FLUSHTS: 8378 if (r1 != 0 || r2 != 0 || rd != 0) 8379 err += efunc(pc, "non-zero reserved bits\n"); 8380 break; 8381 case DIF_OP_SETX: 8382 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) { 8383 err += efunc(pc, "invalid integer ref %u\n", 8384 DIF_INSTR_INTEGER(instr)); 8385 } 8386 if (rd >= nregs) 8387 err += efunc(pc, "invalid register %u\n", rd); 8388 if (rd == 0) 8389 err += efunc(pc, "cannot write to %r0\n"); 8390 break; 8391 case DIF_OP_SETS: 8392 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) { 8393 err += efunc(pc, "invalid string ref %u\n", 8394 DIF_INSTR_STRING(instr)); 8395 } 8396 if (rd >= nregs) 8397 err += efunc(pc, "invalid register %u\n", rd); 8398 if (rd == 0) 8399 err += efunc(pc, "cannot write to %r0\n"); 8400 break; 8401 case DIF_OP_LDGA: 8402 case DIF_OP_LDTA: 8403 if (r1 > DIF_VAR_ARRAY_MAX) 8404 err += efunc(pc, "invalid array %u\n", r1); 8405 if (r2 >= nregs) 8406 err += efunc(pc, "invalid register %u\n", r2); 8407 if (rd >= nregs) 8408 err += efunc(pc, "invalid register %u\n", rd); 8409 if (rd == 0) 8410 err += efunc(pc, "cannot write to %r0\n"); 8411 break; 8412 case DIF_OP_LDGS: 8413 case DIF_OP_LDTS: 8414 case DIF_OP_LDLS: 8415 case DIF_OP_LDGAA: 8416 case DIF_OP_LDTAA: 8417 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX) 8418 err += efunc(pc, "invalid variable %u\n", v); 8419 if (rd >= nregs) 8420 err += efunc(pc, "invalid register %u\n", rd); 8421 if (rd == 0) 8422 err += efunc(pc, "cannot write to %r0\n"); 8423 break; 8424 case DIF_OP_STGS: 8425 case DIF_OP_STTS: 8426 case DIF_OP_STLS: 8427 case DIF_OP_STGAA: 8428 case DIF_OP_STTAA: 8429 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX) 8430 err += efunc(pc, "invalid variable %u\n", v); 8431 if (rs >= nregs) 8432 err += efunc(pc, "invalid register %u\n", rd); 8433 break; 8434 case DIF_OP_CALL: 8435 if (subr > DIF_SUBR_MAX) 8436 err += efunc(pc, "invalid subr %u\n", subr); 8437 if (rd >= nregs) 8438 err += efunc(pc, "invalid register %u\n", rd); 8439 if (rd == 0) 8440 err += efunc(pc, "cannot write to %r0\n"); 8441 8442 if (subr == DIF_SUBR_COPYOUT || 8443 subr == DIF_SUBR_COPYOUTSTR) { 8444 dp->dtdo_destructive = 1; 8445 } 8446 break; 8447 case DIF_OP_PUSHTR: 8448 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF) 8449 err += efunc(pc, "invalid ref type %u\n", type); 8450 if (r2 >= nregs) 8451 err += efunc(pc, "invalid register %u\n", r2); 8452 if (rs >= nregs) 8453 err += efunc(pc, "invalid register %u\n", rs); 8454 break; 8455 case DIF_OP_PUSHTV: 8456 if (type != DIF_TYPE_CTF) 8457 err += efunc(pc, "invalid val type %u\n", type); 8458 if (r2 >= nregs) 8459 err += efunc(pc, "invalid register %u\n", r2); 8460 if (rs >= nregs) 8461 err += efunc(pc, "invalid register %u\n", rs); 8462 break; 8463 default: 8464 err += efunc(pc, "invalid opcode %u\n", 8465 DIF_INSTR_OP(instr)); 8466 } 8467 } 8468 8469 if (dp->dtdo_len != 0 && 8470 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) { 8471 err += efunc(dp->dtdo_len - 1, 8472 "expected 'ret' as last DIF instruction\n"); 8473 } 8474 8475 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) { 8476 /* 8477 * If we're not returning by reference, the size must be either 8478 * 0 or the size of one of the base types. 8479 */ 8480 switch (dp->dtdo_rtype.dtdt_size) { 8481 case 0: 8482 case sizeof (uint8_t): 8483 case sizeof (uint16_t): 8484 case sizeof (uint32_t): 8485 case sizeof (uint64_t): 8486 break; 8487 8488 default: 8489 err += efunc(dp->dtdo_len - 1, "bad return size\n"); 8490 } 8491 } 8492 8493 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { 8494 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; 8495 dtrace_diftype_t *vt, *et; 8496 uint_t id, ndx; 8497 8498 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && 8499 v->dtdv_scope != DIFV_SCOPE_THREAD && 8500 v->dtdv_scope != DIFV_SCOPE_LOCAL) { 8501 err += efunc(i, "unrecognized variable scope %d\n", 8502 v->dtdv_scope); 8503 break; 8504 } 8505 8506 if (v->dtdv_kind != DIFV_KIND_ARRAY && 8507 v->dtdv_kind != DIFV_KIND_SCALAR) { 8508 err += efunc(i, "unrecognized variable type %d\n", 8509 v->dtdv_kind); 8510 break; 8511 } 8512 8513 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) { 8514 err += efunc(i, "%d exceeds variable id limit\n", id); 8515 break; 8516 } 8517 8518 if (id < DIF_VAR_OTHER_UBASE) 8519 continue; 8520 8521 /* 8522 * For user-defined variables, we need to check that this 8523 * definition is identical to any previous definition that we 8524 * encountered. 8525 */ 8526 ndx = id - DIF_VAR_OTHER_UBASE; 8527 8528 switch (v->dtdv_scope) { 8529 case DIFV_SCOPE_GLOBAL: 8530 if (ndx < vstate->dtvs_nglobals) { 8531 dtrace_statvar_t *svar; 8532 8533 if ((svar = vstate->dtvs_globals[ndx]) != NULL) 8534 existing = &svar->dtsv_var; 8535 } 8536 8537 break; 8538 8539 case DIFV_SCOPE_THREAD: 8540 if (ndx < vstate->dtvs_ntlocals) 8541 existing = &vstate->dtvs_tlocals[ndx]; 8542 break; 8543 8544 case DIFV_SCOPE_LOCAL: 8545 if (ndx < vstate->dtvs_nlocals) { 8546 dtrace_statvar_t *svar; 8547 8548 if ((svar = vstate->dtvs_locals[ndx]) != NULL) 8549 existing = &svar->dtsv_var; 8550 } 8551 8552 break; 8553 } 8554 8555 vt = &v->dtdv_type; 8556 8557 if (vt->dtdt_flags & DIF_TF_BYREF) { 8558 if (vt->dtdt_size == 0) { 8559 err += efunc(i, "zero-sized variable\n"); 8560 break; 8561 } 8562 8563 if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && 8564 vt->dtdt_size > dtrace_global_maxsize) { 8565 err += efunc(i, "oversized by-ref global\n"); 8566 break; 8567 } 8568 } 8569 8570 if (existing == NULL || existing->dtdv_id == 0) 8571 continue; 8572 8573 ASSERT(existing->dtdv_id == v->dtdv_id); 8574 ASSERT(existing->dtdv_scope == v->dtdv_scope); 8575 8576 if (existing->dtdv_kind != v->dtdv_kind) 8577 err += efunc(i, "%d changed variable kind\n", id); 8578 8579 et = &existing->dtdv_type; 8580 8581 if (vt->dtdt_flags != et->dtdt_flags) { 8582 err += efunc(i, "%d changed variable type flags\n", id); 8583 break; 8584 } 8585 8586 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) { 8587 err += efunc(i, "%d changed variable type size\n", id); 8588 break; 8589 } 8590 } 8591 8592 return (err); 8593 } 8594 8595 /* 8596 * Validate a DTrace DIF object that it is to be used as a helper. Helpers 8597 * are much more constrained than normal DIFOs. Specifically, they may 8598 * not: 8599 * 8600 * 1. Make calls to subroutines other than copyin(), copyinstr() or 8601 * miscellaneous string routines 8602 * 2. Access DTrace variables other than the args[] array, and the 8603 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables. 8604 * 3. Have thread-local variables. 8605 * 4. Have dynamic variables. 8606 */ 8607 static int 8608 dtrace_difo_validate_helper(dtrace_difo_t *dp) 8609 { 8610 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 8611 int err = 0; 8612 uint_t pc; 8613 8614 for (pc = 0; pc < dp->dtdo_len; pc++) { 8615 dif_instr_t instr = dp->dtdo_buf[pc]; 8616 8617 uint_t v = DIF_INSTR_VAR(instr); 8618 uint_t subr = DIF_INSTR_SUBR(instr); 8619 uint_t op = DIF_INSTR_OP(instr); 8620 8621 switch (op) { 8622 case DIF_OP_OR: 8623 case DIF_OP_XOR: 8624 case DIF_OP_AND: 8625 case DIF_OP_SLL: 8626 case DIF_OP_SRL: 8627 case DIF_OP_SRA: 8628 case DIF_OP_SUB: 8629 case DIF_OP_ADD: 8630 case DIF_OP_MUL: 8631 case DIF_OP_SDIV: 8632 case DIF_OP_UDIV: 8633 case DIF_OP_SREM: 8634 case DIF_OP_UREM: 8635 case DIF_OP_COPYS: 8636 case DIF_OP_NOT: 8637 case DIF_OP_MOV: 8638 case DIF_OP_RLDSB: 8639 case DIF_OP_RLDSH: 8640 case DIF_OP_RLDSW: 8641 case DIF_OP_RLDUB: 8642 case DIF_OP_RLDUH: 8643 case DIF_OP_RLDUW: 8644 case DIF_OP_RLDX: 8645 case DIF_OP_ULDSB: 8646 case DIF_OP_ULDSH: 8647 case DIF_OP_ULDSW: 8648 case DIF_OP_ULDUB: 8649 case DIF_OP_ULDUH: 8650 case DIF_OP_ULDUW: 8651 case DIF_OP_ULDX: 8652 case DIF_OP_STB: 8653 case DIF_OP_STH: 8654 case DIF_OP_STW: 8655 case DIF_OP_STX: 8656 case DIF_OP_ALLOCS: 8657 case DIF_OP_CMP: 8658 case DIF_OP_SCMP: 8659 case DIF_OP_TST: 8660 case DIF_OP_BA: 8661 case DIF_OP_BE: 8662 case DIF_OP_BNE: 8663 case DIF_OP_BG: 8664 case DIF_OP_BGU: 8665 case DIF_OP_BGE: 8666 case DIF_OP_BGEU: 8667 case DIF_OP_BL: 8668 case DIF_OP_BLU: 8669 case DIF_OP_BLE: 8670 case DIF_OP_BLEU: 8671 case DIF_OP_RET: 8672 case DIF_OP_NOP: 8673 case DIF_OP_POPTS: 8674 case DIF_OP_FLUSHTS: 8675 case DIF_OP_SETX: 8676 case DIF_OP_SETS: 8677 case DIF_OP_LDGA: 8678 case DIF_OP_LDLS: 8679 case DIF_OP_STGS: 8680 case DIF_OP_STLS: 8681 case DIF_OP_PUSHTR: 8682 case DIF_OP_PUSHTV: 8683 break; 8684 8685 case DIF_OP_LDGS: 8686 if (v >= DIF_VAR_OTHER_UBASE) 8687 break; 8688 8689 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) 8690 break; 8691 8692 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID || 8693 v == DIF_VAR_PPID || v == DIF_VAR_TID || 8694 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME || 8695 v == DIF_VAR_UID || v == DIF_VAR_GID) 8696 break; 8697 8698 err += efunc(pc, "illegal variable %u\n", v); 8699 break; 8700 8701 case DIF_OP_LDTA: 8702 case DIF_OP_LDTS: 8703 case DIF_OP_LDGAA: 8704 case DIF_OP_LDTAA: 8705 err += efunc(pc, "illegal dynamic variable load\n"); 8706 break; 8707 8708 case DIF_OP_STTS: 8709 case DIF_OP_STGAA: 8710 case DIF_OP_STTAA: 8711 err += efunc(pc, "illegal dynamic variable store\n"); 8712 break; 8713 8714 case DIF_OP_CALL: 8715 if (subr == DIF_SUBR_ALLOCA || 8716 subr == DIF_SUBR_BCOPY || 8717 subr == DIF_SUBR_COPYIN || 8718 subr == DIF_SUBR_COPYINTO || 8719 subr == DIF_SUBR_COPYINSTR || 8720 subr == DIF_SUBR_INDEX || 8721 subr == DIF_SUBR_INET_NTOA || 8722 subr == DIF_SUBR_INET_NTOA6 || 8723 subr == DIF_SUBR_INET_NTOP || 8724 subr == DIF_SUBR_LLTOSTR || 8725 subr == DIF_SUBR_RINDEX || 8726 subr == DIF_SUBR_STRCHR || 8727 subr == DIF_SUBR_STRJOIN || 8728 subr == DIF_SUBR_STRRCHR || 8729 subr == DIF_SUBR_STRSTR || 8730 subr == DIF_SUBR_HTONS || 8731 subr == DIF_SUBR_HTONL || 8732 subr == DIF_SUBR_HTONLL || 8733 subr == DIF_SUBR_NTOHS || 8734 subr == DIF_SUBR_NTOHL || 8735 subr == DIF_SUBR_NTOHLL) 8736 break; 8737 8738 err += efunc(pc, "invalid subr %u\n", subr); 8739 break; 8740 8741 default: 8742 err += efunc(pc, "invalid opcode %u\n", 8743 DIF_INSTR_OP(instr)); 8744 } 8745 } 8746 8747 return (err); 8748 } 8749 8750 /* 8751 * Returns 1 if the expression in the DIF object can be cached on a per-thread 8752 * basis; 0 if not. 8753 */ 8754 static int 8755 dtrace_difo_cacheable(dtrace_difo_t *dp) 8756 { 8757 int i; 8758 8759 if (dp == NULL) 8760 return (0); 8761 8762 for (i = 0; i < dp->dtdo_varlen; i++) { 8763 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8764 8765 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL) 8766 continue; 8767 8768 switch (v->dtdv_id) { 8769 case DIF_VAR_CURTHREAD: 8770 case DIF_VAR_PID: 8771 case DIF_VAR_TID: 8772 case DIF_VAR_EXECNAME: 8773 case DIF_VAR_ZONENAME: 8774 break; 8775 8776 default: 8777 return (0); 8778 } 8779 } 8780 8781 /* 8782 * This DIF object may be cacheable. Now we need to look for any 8783 * array loading instructions, any memory loading instructions, or 8784 * any stores to thread-local variables. 8785 */ 8786 for (i = 0; i < dp->dtdo_len; i++) { 8787 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]); 8788 8789 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) || 8790 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) || 8791 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) || 8792 op == DIF_OP_LDGA || op == DIF_OP_STTS) 8793 return (0); 8794 } 8795 8796 return (1); 8797 } 8798 8799 static void 8800 dtrace_difo_hold(dtrace_difo_t *dp) 8801 { 8802 int i; 8803 8804 ASSERT(MUTEX_HELD(&dtrace_lock)); 8805 8806 dp->dtdo_refcnt++; 8807 ASSERT(dp->dtdo_refcnt != 0); 8808 8809 /* 8810 * We need to check this DIF object for references to the variable 8811 * DIF_VAR_VTIMESTAMP. 8812 */ 8813 for (i = 0; i < dp->dtdo_varlen; i++) { 8814 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8815 8816 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 8817 continue; 8818 8819 if (dtrace_vtime_references++ == 0) 8820 dtrace_vtime_enable(); 8821 } 8822 } 8823 8824 /* 8825 * This routine calculates the dynamic variable chunksize for a given DIF 8826 * object. The calculation is not fool-proof, and can probably be tricked by 8827 * malicious DIF -- but it works for all compiler-generated DIF. Because this 8828 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail 8829 * if a dynamic variable size exceeds the chunksize. 8830 */ 8831 static void 8832 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8833 { 8834 uint64_t sval; 8835 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 8836 const dif_instr_t *text = dp->dtdo_buf; 8837 uint_t pc, srd = 0; 8838 uint_t ttop = 0; 8839 size_t size, ksize; 8840 uint_t id, i; 8841 8842 for (pc = 0; pc < dp->dtdo_len; pc++) { 8843 dif_instr_t instr = text[pc]; 8844 uint_t op = DIF_INSTR_OP(instr); 8845 uint_t rd = DIF_INSTR_RD(instr); 8846 uint_t r1 = DIF_INSTR_R1(instr); 8847 uint_t nkeys = 0; 8848 uchar_t scope; 8849 8850 dtrace_key_t *key = tupregs; 8851 8852 switch (op) { 8853 case DIF_OP_SETX: 8854 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)]; 8855 srd = rd; 8856 continue; 8857 8858 case DIF_OP_STTS: 8859 key = &tupregs[DIF_DTR_NREGS]; 8860 key[0].dttk_size = 0; 8861 key[1].dttk_size = 0; 8862 nkeys = 2; 8863 scope = DIFV_SCOPE_THREAD; 8864 break; 8865 8866 case DIF_OP_STGAA: 8867 case DIF_OP_STTAA: 8868 nkeys = ttop; 8869 8870 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) 8871 key[nkeys++].dttk_size = 0; 8872 8873 key[nkeys++].dttk_size = 0; 8874 8875 if (op == DIF_OP_STTAA) { 8876 scope = DIFV_SCOPE_THREAD; 8877 } else { 8878 scope = DIFV_SCOPE_GLOBAL; 8879 } 8880 8881 break; 8882 8883 case DIF_OP_PUSHTR: 8884 if (ttop == DIF_DTR_NREGS) 8885 return; 8886 8887 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) { 8888 /* 8889 * If the register for the size of the "pushtr" 8890 * is %r0 (or the value is 0) and the type is 8891 * a string, we'll use the system-wide default 8892 * string size. 8893 */ 8894 tupregs[ttop++].dttk_size = 8895 dtrace_strsize_default; 8896 } else { 8897 if (srd == 0) 8898 return; 8899 8900 tupregs[ttop++].dttk_size = sval; 8901 } 8902 8903 break; 8904 8905 case DIF_OP_PUSHTV: 8906 if (ttop == DIF_DTR_NREGS) 8907 return; 8908 8909 tupregs[ttop++].dttk_size = 0; 8910 break; 8911 8912 case DIF_OP_FLUSHTS: 8913 ttop = 0; 8914 break; 8915 8916 case DIF_OP_POPTS: 8917 if (ttop != 0) 8918 ttop--; 8919 break; 8920 } 8921 8922 sval = 0; 8923 srd = 0; 8924 8925 if (nkeys == 0) 8926 continue; 8927 8928 /* 8929 * We have a dynamic variable allocation; calculate its size. 8930 */ 8931 for (ksize = 0, i = 0; i < nkeys; i++) 8932 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 8933 8934 size = sizeof (dtrace_dynvar_t); 8935 size += sizeof (dtrace_key_t) * (nkeys - 1); 8936 size += ksize; 8937 8938 /* 8939 * Now we need to determine the size of the stored data. 8940 */ 8941 id = DIF_INSTR_VAR(instr); 8942 8943 for (i = 0; i < dp->dtdo_varlen; i++) { 8944 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8945 8946 if (v->dtdv_id == id && v->dtdv_scope == scope) { 8947 size += v->dtdv_type.dtdt_size; 8948 break; 8949 } 8950 } 8951 8952 if (i == dp->dtdo_varlen) 8953 return; 8954 8955 /* 8956 * We have the size. If this is larger than the chunk size 8957 * for our dynamic variable state, reset the chunk size. 8958 */ 8959 size = P2ROUNDUP(size, sizeof (uint64_t)); 8960 8961 if (size > vstate->dtvs_dynvars.dtds_chunksize) 8962 vstate->dtvs_dynvars.dtds_chunksize = size; 8963 } 8964 } 8965 8966 static void 8967 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8968 { 8969 int i, oldsvars, osz, nsz, otlocals, ntlocals; 8970 uint_t id; 8971 8972 ASSERT(MUTEX_HELD(&dtrace_lock)); 8973 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); 8974 8975 for (i = 0; i < dp->dtdo_varlen; i++) { 8976 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8977 dtrace_statvar_t *svar, ***svarp; 8978 size_t dsize = 0; 8979 uint8_t scope = v->dtdv_scope; 8980 int *np; 8981 8982 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 8983 continue; 8984 8985 id -= DIF_VAR_OTHER_UBASE; 8986 8987 switch (scope) { 8988 case DIFV_SCOPE_THREAD: 8989 while (id >= (otlocals = vstate->dtvs_ntlocals)) { 8990 dtrace_difv_t *tlocals; 8991 8992 if ((ntlocals = (otlocals << 1)) == 0) 8993 ntlocals = 1; 8994 8995 osz = otlocals * sizeof (dtrace_difv_t); 8996 nsz = ntlocals * sizeof (dtrace_difv_t); 8997 8998 tlocals = kmem_zalloc(nsz, KM_SLEEP); 8999 9000 if (osz != 0) { 9001 bcopy(vstate->dtvs_tlocals, 9002 tlocals, osz); 9003 kmem_free(vstate->dtvs_tlocals, osz); 9004 } 9005 9006 vstate->dtvs_tlocals = tlocals; 9007 vstate->dtvs_ntlocals = ntlocals; 9008 } 9009 9010 vstate->dtvs_tlocals[id] = *v; 9011 continue; 9012 9013 case DIFV_SCOPE_LOCAL: 9014 np = &vstate->dtvs_nlocals; 9015 svarp = &vstate->dtvs_locals; 9016 9017 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 9018 dsize = NCPU * (v->dtdv_type.dtdt_size + 9019 sizeof (uint64_t)); 9020 else 9021 dsize = NCPU * sizeof (uint64_t); 9022 9023 break; 9024 9025 case DIFV_SCOPE_GLOBAL: 9026 np = &vstate->dtvs_nglobals; 9027 svarp = &vstate->dtvs_globals; 9028 9029 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 9030 dsize = v->dtdv_type.dtdt_size + 9031 sizeof (uint64_t); 9032 9033 break; 9034 9035 default: 9036 ASSERT(0); 9037 } 9038 9039 while (id >= (oldsvars = *np)) { 9040 dtrace_statvar_t **statics; 9041 int newsvars, oldsize, newsize; 9042 9043 if ((newsvars = (oldsvars << 1)) == 0) 9044 newsvars = 1; 9045 9046 oldsize = oldsvars * sizeof (dtrace_statvar_t *); 9047 newsize = newsvars * sizeof (dtrace_statvar_t *); 9048 9049 statics = kmem_zalloc(newsize, KM_SLEEP); 9050 9051 if (oldsize != 0) { 9052 bcopy(*svarp, statics, oldsize); 9053 kmem_free(*svarp, oldsize); 9054 } 9055 9056 *svarp = statics; 9057 *np = newsvars; 9058 } 9059 9060 if ((svar = (*svarp)[id]) == NULL) { 9061 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP); 9062 svar->dtsv_var = *v; 9063 9064 if ((svar->dtsv_size = dsize) != 0) { 9065 svar->dtsv_data = (uint64_t)(uintptr_t) 9066 kmem_zalloc(dsize, KM_SLEEP); 9067 } 9068 9069 (*svarp)[id] = svar; 9070 } 9071 9072 svar->dtsv_refcnt++; 9073 } 9074 9075 dtrace_difo_chunksize(dp, vstate); 9076 dtrace_difo_hold(dp); 9077 } 9078 9079 static dtrace_difo_t * 9080 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 9081 { 9082 dtrace_difo_t *new; 9083 size_t sz; 9084 9085 ASSERT(dp->dtdo_buf != NULL); 9086 ASSERT(dp->dtdo_refcnt != 0); 9087 9088 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 9089 9090 ASSERT(dp->dtdo_buf != NULL); 9091 sz = dp->dtdo_len * sizeof (dif_instr_t); 9092 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP); 9093 bcopy(dp->dtdo_buf, new->dtdo_buf, sz); 9094 new->dtdo_len = dp->dtdo_len; 9095 9096 if (dp->dtdo_strtab != NULL) { 9097 ASSERT(dp->dtdo_strlen != 0); 9098 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP); 9099 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen); 9100 new->dtdo_strlen = dp->dtdo_strlen; 9101 } 9102 9103 if (dp->dtdo_inttab != NULL) { 9104 ASSERT(dp->dtdo_intlen != 0); 9105 sz = dp->dtdo_intlen * sizeof (uint64_t); 9106 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP); 9107 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz); 9108 new->dtdo_intlen = dp->dtdo_intlen; 9109 } 9110 9111 if (dp->dtdo_vartab != NULL) { 9112 ASSERT(dp->dtdo_varlen != 0); 9113 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t); 9114 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP); 9115 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz); 9116 new->dtdo_varlen = dp->dtdo_varlen; 9117 } 9118 9119 dtrace_difo_init(new, vstate); 9120 return (new); 9121 } 9122 9123 static void 9124 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 9125 { 9126 int i; 9127 9128 ASSERT(dp->dtdo_refcnt == 0); 9129 9130 for (i = 0; i < dp->dtdo_varlen; i++) { 9131 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 9132 dtrace_statvar_t *svar, **svarp; 9133 uint_t id; 9134 uint8_t scope = v->dtdv_scope; 9135 int *np; 9136 9137 switch (scope) { 9138 case DIFV_SCOPE_THREAD: 9139 continue; 9140 9141 case DIFV_SCOPE_LOCAL: 9142 np = &vstate->dtvs_nlocals; 9143 svarp = vstate->dtvs_locals; 9144 break; 9145 9146 case DIFV_SCOPE_GLOBAL: 9147 np = &vstate->dtvs_nglobals; 9148 svarp = vstate->dtvs_globals; 9149 break; 9150 9151 default: 9152 ASSERT(0); 9153 } 9154 9155 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 9156 continue; 9157 9158 id -= DIF_VAR_OTHER_UBASE; 9159 ASSERT(id < *np); 9160 9161 svar = svarp[id]; 9162 ASSERT(svar != NULL); 9163 ASSERT(svar->dtsv_refcnt > 0); 9164 9165 if (--svar->dtsv_refcnt > 0) 9166 continue; 9167 9168 if (svar->dtsv_size != 0) { 9169 ASSERT(svar->dtsv_data != NULL); 9170 kmem_free((void *)(uintptr_t)svar->dtsv_data, 9171 svar->dtsv_size); 9172 } 9173 9174 kmem_free(svar, sizeof (dtrace_statvar_t)); 9175 svarp[id] = NULL; 9176 } 9177 9178 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 9179 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 9180 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 9181 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 9182 9183 kmem_free(dp, sizeof (dtrace_difo_t)); 9184 } 9185 9186 static void 9187 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 9188 { 9189 int i; 9190 9191 ASSERT(MUTEX_HELD(&dtrace_lock)); 9192 ASSERT(dp->dtdo_refcnt != 0); 9193 9194 for (i = 0; i < dp->dtdo_varlen; i++) { 9195 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 9196 9197 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 9198 continue; 9199 9200 ASSERT(dtrace_vtime_references > 0); 9201 if (--dtrace_vtime_references == 0) 9202 dtrace_vtime_disable(); 9203 } 9204 9205 if (--dp->dtdo_refcnt == 0) 9206 dtrace_difo_destroy(dp, vstate); 9207 } 9208 9209 /* 9210 * DTrace Format Functions 9211 */ 9212 static uint16_t 9213 dtrace_format_add(dtrace_state_t *state, char *str) 9214 { 9215 char *fmt, **new; 9216 uint16_t ndx, len = strlen(str) + 1; 9217 9218 fmt = kmem_zalloc(len, KM_SLEEP); 9219 bcopy(str, fmt, len); 9220 9221 for (ndx = 0; ndx < state->dts_nformats; ndx++) { 9222 if (state->dts_formats[ndx] == NULL) { 9223 state->dts_formats[ndx] = fmt; 9224 return (ndx + 1); 9225 } 9226 } 9227 9228 if (state->dts_nformats == USHRT_MAX) { 9229 /* 9230 * This is only likely if a denial-of-service attack is being 9231 * attempted. As such, it's okay to fail silently here. 9232 */ 9233 kmem_free(fmt, len); 9234 return (0); 9235 } 9236 9237 /* 9238 * For simplicity, we always resize the formats array to be exactly the 9239 * number of formats. 9240 */ 9241 ndx = state->dts_nformats++; 9242 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP); 9243 9244 if (state->dts_formats != NULL) { 9245 ASSERT(ndx != 0); 9246 bcopy(state->dts_formats, new, ndx * sizeof (char *)); 9247 kmem_free(state->dts_formats, ndx * sizeof (char *)); 9248 } 9249 9250 state->dts_formats = new; 9251 state->dts_formats[ndx] = fmt; 9252 9253 return (ndx + 1); 9254 } 9255 9256 static void 9257 dtrace_format_remove(dtrace_state_t *state, uint16_t format) 9258 { 9259 char *fmt; 9260 9261 ASSERT(state->dts_formats != NULL); 9262 ASSERT(format <= state->dts_nformats); 9263 ASSERT(state->dts_formats[format - 1] != NULL); 9264 9265 fmt = state->dts_formats[format - 1]; 9266 kmem_free(fmt, strlen(fmt) + 1); 9267 state->dts_formats[format - 1] = NULL; 9268 } 9269 9270 static void 9271 dtrace_format_destroy(dtrace_state_t *state) 9272 { 9273 int i; 9274 9275 if (state->dts_nformats == 0) { 9276 ASSERT(state->dts_formats == NULL); 9277 return; 9278 } 9279 9280 ASSERT(state->dts_formats != NULL); 9281 9282 for (i = 0; i < state->dts_nformats; i++) { 9283 char *fmt = state->dts_formats[i]; 9284 9285 if (fmt == NULL) 9286 continue; 9287 9288 kmem_free(fmt, strlen(fmt) + 1); 9289 } 9290 9291 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *)); 9292 state->dts_nformats = 0; 9293 state->dts_formats = NULL; 9294 } 9295 9296 /* 9297 * DTrace Predicate Functions 9298 */ 9299 static dtrace_predicate_t * 9300 dtrace_predicate_create(dtrace_difo_t *dp) 9301 { 9302 dtrace_predicate_t *pred; 9303 9304 ASSERT(MUTEX_HELD(&dtrace_lock)); 9305 ASSERT(dp->dtdo_refcnt != 0); 9306 9307 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); 9308 pred->dtp_difo = dp; 9309 pred->dtp_refcnt = 1; 9310 9311 if (!dtrace_difo_cacheable(dp)) 9312 return (pred); 9313 9314 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) { 9315 /* 9316 * This is only theoretically possible -- we have had 2^32 9317 * cacheable predicates on this machine. We cannot allow any 9318 * more predicates to become cacheable: as unlikely as it is, 9319 * there may be a thread caching a (now stale) predicate cache 9320 * ID. (N.B.: the temptation is being successfully resisted to 9321 * have this cmn_err() "Holy shit -- we executed this code!") 9322 */ 9323 return (pred); 9324 } 9325 9326 pred->dtp_cacheid = dtrace_predcache_id++; 9327 9328 return (pred); 9329 } 9330 9331 static void 9332 dtrace_predicate_hold(dtrace_predicate_t *pred) 9333 { 9334 ASSERT(MUTEX_HELD(&dtrace_lock)); 9335 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); 9336 ASSERT(pred->dtp_refcnt > 0); 9337 9338 pred->dtp_refcnt++; 9339 } 9340 9341 static void 9342 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) 9343 { 9344 dtrace_difo_t *dp = pred->dtp_difo; 9345 9346 ASSERT(MUTEX_HELD(&dtrace_lock)); 9347 ASSERT(dp != NULL && dp->dtdo_refcnt != 0); 9348 ASSERT(pred->dtp_refcnt > 0); 9349 9350 if (--pred->dtp_refcnt == 0) { 9351 dtrace_difo_release(pred->dtp_difo, vstate); 9352 kmem_free(pred, sizeof (dtrace_predicate_t)); 9353 } 9354 } 9355 9356 /* 9357 * DTrace Action Description Functions 9358 */ 9359 static dtrace_actdesc_t * 9360 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, 9361 uint64_t uarg, uint64_t arg) 9362 { 9363 dtrace_actdesc_t *act; 9364 9365 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && 9366 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA)); 9367 9368 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); 9369 act->dtad_kind = kind; 9370 act->dtad_ntuple = ntuple; 9371 act->dtad_uarg = uarg; 9372 act->dtad_arg = arg; 9373 act->dtad_refcnt = 1; 9374 9375 return (act); 9376 } 9377 9378 static void 9379 dtrace_actdesc_hold(dtrace_actdesc_t *act) 9380 { 9381 ASSERT(act->dtad_refcnt >= 1); 9382 act->dtad_refcnt++; 9383 } 9384 9385 static void 9386 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) 9387 { 9388 dtrace_actkind_t kind = act->dtad_kind; 9389 dtrace_difo_t *dp; 9390 9391 ASSERT(act->dtad_refcnt >= 1); 9392 9393 if (--act->dtad_refcnt != 0) 9394 return; 9395 9396 if ((dp = act->dtad_difo) != NULL) 9397 dtrace_difo_release(dp, vstate); 9398 9399 if (DTRACEACT_ISPRINTFLIKE(kind)) { 9400 char *str = (char *)(uintptr_t)act->dtad_arg; 9401 9402 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || 9403 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); 9404 9405 if (str != NULL) 9406 kmem_free(str, strlen(str) + 1); 9407 } 9408 9409 kmem_free(act, sizeof (dtrace_actdesc_t)); 9410 } 9411 9412 /* 9413 * DTrace ECB Functions 9414 */ 9415 static dtrace_ecb_t * 9416 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) 9417 { 9418 dtrace_ecb_t *ecb; 9419 dtrace_epid_t epid; 9420 9421 ASSERT(MUTEX_HELD(&dtrace_lock)); 9422 9423 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); 9424 ecb->dte_predicate = NULL; 9425 ecb->dte_probe = probe; 9426 9427 /* 9428 * The default size is the size of the default action: recording 9429 * the header. 9430 */ 9431 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t); 9432 ecb->dte_alignment = sizeof (dtrace_epid_t); 9433 9434 epid = state->dts_epid++; 9435 9436 if (epid - 1 >= state->dts_necbs) { 9437 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; 9438 int necbs = state->dts_necbs << 1; 9439 9440 ASSERT(epid == state->dts_necbs + 1); 9441 9442 if (necbs == 0) { 9443 ASSERT(oecbs == NULL); 9444 necbs = 1; 9445 } 9446 9447 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP); 9448 9449 if (oecbs != NULL) 9450 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs)); 9451 9452 dtrace_membar_producer(); 9453 state->dts_ecbs = ecbs; 9454 9455 if (oecbs != NULL) { 9456 /* 9457 * If this state is active, we must dtrace_sync() 9458 * before we can free the old dts_ecbs array: we're 9459 * coming in hot, and there may be active ring 9460 * buffer processing (which indexes into the dts_ecbs 9461 * array) on another CPU. 9462 */ 9463 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 9464 dtrace_sync(); 9465 9466 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs)); 9467 } 9468 9469 dtrace_membar_producer(); 9470 state->dts_necbs = necbs; 9471 } 9472 9473 ecb->dte_state = state; 9474 9475 ASSERT(state->dts_ecbs[epid - 1] == NULL); 9476 dtrace_membar_producer(); 9477 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb; 9478 9479 return (ecb); 9480 } 9481 9482 static int 9483 dtrace_ecb_enable(dtrace_ecb_t *ecb) 9484 { 9485 dtrace_probe_t *probe = ecb->dte_probe; 9486 9487 ASSERT(MUTEX_HELD(&cpu_lock)); 9488 ASSERT(MUTEX_HELD(&dtrace_lock)); 9489 ASSERT(ecb->dte_next == NULL); 9490 9491 if (probe == NULL) { 9492 /* 9493 * This is the NULL probe -- there's nothing to do. 9494 */ 9495 return (0); 9496 } 9497 9498 if (probe->dtpr_ecb == NULL) { 9499 dtrace_provider_t *prov = probe->dtpr_provider; 9500 9501 /* 9502 * We're the first ECB on this probe. 9503 */ 9504 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb; 9505 9506 if (ecb->dte_predicate != NULL) 9507 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; 9508 9509 return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg, 9510 probe->dtpr_id, probe->dtpr_arg)); 9511 } else { 9512 /* 9513 * This probe is already active. Swing the last pointer to 9514 * point to the new ECB, and issue a dtrace_sync() to assure 9515 * that all CPUs have seen the change. 9516 */ 9517 ASSERT(probe->dtpr_ecb_last != NULL); 9518 probe->dtpr_ecb_last->dte_next = ecb; 9519 probe->dtpr_ecb_last = ecb; 9520 probe->dtpr_predcache = 0; 9521 9522 dtrace_sync(); 9523 return (0); 9524 } 9525 } 9526 9527 static void 9528 dtrace_ecb_resize(dtrace_ecb_t *ecb) 9529 { 9530 dtrace_action_t *act; 9531 uint32_t curneeded = UINT32_MAX; 9532 uint32_t aggbase = UINT32_MAX; 9533 9534 /* 9535 * If we record anything, we always record the dtrace_rechdr_t. (And 9536 * we always record it first.) 9537 */ 9538 ecb->dte_size = sizeof (dtrace_rechdr_t); 9539 ecb->dte_alignment = sizeof (dtrace_epid_t); 9540 9541 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 9542 dtrace_recdesc_t *rec = &act->dta_rec; 9543 ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1); 9544 9545 ecb->dte_alignment = MAX(ecb->dte_alignment, 9546 rec->dtrd_alignment); 9547 9548 if (DTRACEACT_ISAGG(act->dta_kind)) { 9549 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 9550 9551 ASSERT(rec->dtrd_size != 0); 9552 ASSERT(agg->dtag_first != NULL); 9553 ASSERT(act->dta_prev->dta_intuple); 9554 ASSERT(aggbase != UINT32_MAX); 9555 ASSERT(curneeded != UINT32_MAX); 9556 9557 agg->dtag_base = aggbase; 9558 9559 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); 9560 rec->dtrd_offset = curneeded; 9561 curneeded += rec->dtrd_size; 9562 ecb->dte_needed = MAX(ecb->dte_needed, curneeded); 9563 9564 aggbase = UINT32_MAX; 9565 curneeded = UINT32_MAX; 9566 } else if (act->dta_intuple) { 9567 if (curneeded == UINT32_MAX) { 9568 /* 9569 * This is the first record in a tuple. Align 9570 * curneeded to be at offset 4 in an 8-byte 9571 * aligned block. 9572 */ 9573 ASSERT(act->dta_prev == NULL || 9574 !act->dta_prev->dta_intuple); 9575 ASSERT3U(aggbase, ==, UINT32_MAX); 9576 curneeded = P2PHASEUP(ecb->dte_size, 9577 sizeof (uint64_t), sizeof (dtrace_aggid_t)); 9578 9579 aggbase = curneeded - sizeof (dtrace_aggid_t); 9580 ASSERT(IS_P2ALIGNED(aggbase, 9581 sizeof (uint64_t))); 9582 } 9583 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); 9584 rec->dtrd_offset = curneeded; 9585 curneeded += rec->dtrd_size; 9586 } else { 9587 /* tuples must be followed by an aggregation */ 9588 ASSERT(act->dta_prev == NULL || 9589 !act->dta_prev->dta_intuple); 9590 9591 ecb->dte_size = P2ROUNDUP(ecb->dte_size, 9592 rec->dtrd_alignment); 9593 rec->dtrd_offset = ecb->dte_size; 9594 ecb->dte_size += rec->dtrd_size; 9595 ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size); 9596 } 9597 } 9598 9599 if ((act = ecb->dte_action) != NULL && 9600 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && 9601 ecb->dte_size == sizeof (dtrace_rechdr_t)) { 9602 /* 9603 * If the size is still sizeof (dtrace_rechdr_t), then all 9604 * actions store no data; set the size to 0. 9605 */ 9606 ecb->dte_size = 0; 9607 } 9608 9609 ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t)); 9610 ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t))); 9611 ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, 9612 ecb->dte_needed); 9613 } 9614 9615 static dtrace_action_t * 9616 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 9617 { 9618 dtrace_aggregation_t *agg; 9619 size_t size = sizeof (uint64_t); 9620 int ntuple = desc->dtad_ntuple; 9621 dtrace_action_t *act; 9622 dtrace_recdesc_t *frec; 9623 dtrace_aggid_t aggid; 9624 dtrace_state_t *state = ecb->dte_state; 9625 9626 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP); 9627 agg->dtag_ecb = ecb; 9628 9629 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind)); 9630 9631 switch (desc->dtad_kind) { 9632 case DTRACEAGG_MIN: 9633 agg->dtag_initial = INT64_MAX; 9634 agg->dtag_aggregate = dtrace_aggregate_min; 9635 break; 9636 9637 case DTRACEAGG_MAX: 9638 agg->dtag_initial = INT64_MIN; 9639 agg->dtag_aggregate = dtrace_aggregate_max; 9640 break; 9641 9642 case DTRACEAGG_COUNT: 9643 agg->dtag_aggregate = dtrace_aggregate_count; 9644 break; 9645 9646 case DTRACEAGG_QUANTIZE: 9647 agg->dtag_aggregate = dtrace_aggregate_quantize; 9648 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) * 9649 sizeof (uint64_t); 9650 break; 9651 9652 case DTRACEAGG_LQUANTIZE: { 9653 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg); 9654 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg); 9655 9656 agg->dtag_initial = desc->dtad_arg; 9657 agg->dtag_aggregate = dtrace_aggregate_lquantize; 9658 9659 if (step == 0 || levels == 0) 9660 goto err; 9661 9662 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t); 9663 break; 9664 } 9665 9666 case DTRACEAGG_LLQUANTIZE: { 9667 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); 9668 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); 9669 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); 9670 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); 9671 int64_t v; 9672 9673 agg->dtag_initial = desc->dtad_arg; 9674 agg->dtag_aggregate = dtrace_aggregate_llquantize; 9675 9676 if (factor < 2 || low >= high || nsteps < factor) 9677 goto err; 9678 9679 /* 9680 * Now check that the number of steps evenly divides a power 9681 * of the factor. (This assures both integer bucket size and 9682 * linearity within each magnitude.) 9683 */ 9684 for (v = factor; v < nsteps; v *= factor) 9685 continue; 9686 9687 if ((v % nsteps) || (nsteps % factor)) 9688 goto err; 9689 9690 size = (dtrace_aggregate_llquantize_bucket(factor, 9691 low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); 9692 break; 9693 } 9694 9695 case DTRACEAGG_AVG: 9696 agg->dtag_aggregate = dtrace_aggregate_avg; 9697 size = sizeof (uint64_t) * 2; 9698 break; 9699 9700 case DTRACEAGG_STDDEV: 9701 agg->dtag_aggregate = dtrace_aggregate_stddev; 9702 size = sizeof (uint64_t) * 4; 9703 break; 9704 9705 case DTRACEAGG_SUM: 9706 agg->dtag_aggregate = dtrace_aggregate_sum; 9707 break; 9708 9709 default: 9710 goto err; 9711 } 9712 9713 agg->dtag_action.dta_rec.dtrd_size = size; 9714 9715 if (ntuple == 0) 9716 goto err; 9717 9718 /* 9719 * We must make sure that we have enough actions for the n-tuple. 9720 */ 9721 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) { 9722 if (DTRACEACT_ISAGG(act->dta_kind)) 9723 break; 9724 9725 if (--ntuple == 0) { 9726 /* 9727 * This is the action with which our n-tuple begins. 9728 */ 9729 agg->dtag_first = act; 9730 goto success; 9731 } 9732 } 9733 9734 /* 9735 * This n-tuple is short by ntuple elements. Return failure. 9736 */ 9737 ASSERT(ntuple != 0); 9738 err: 9739 kmem_free(agg, sizeof (dtrace_aggregation_t)); 9740 return (NULL); 9741 9742 success: 9743 /* 9744 * If the last action in the tuple has a size of zero, it's actually 9745 * an expression argument for the aggregating action. 9746 */ 9747 ASSERT(ecb->dte_action_last != NULL); 9748 act = ecb->dte_action_last; 9749 9750 if (act->dta_kind == DTRACEACT_DIFEXPR) { 9751 ASSERT(act->dta_difo != NULL); 9752 9753 if (act->dta_difo->dtdo_rtype.dtdt_size == 0) 9754 agg->dtag_hasarg = 1; 9755 } 9756 9757 /* 9758 * We need to allocate an id for this aggregation. 9759 */ 9760 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, 9761 VM_BESTFIT | VM_SLEEP); 9762 9763 if (aggid - 1 >= state->dts_naggregations) { 9764 dtrace_aggregation_t **oaggs = state->dts_aggregations; 9765 dtrace_aggregation_t **aggs; 9766 int naggs = state->dts_naggregations << 1; 9767 int onaggs = state->dts_naggregations; 9768 9769 ASSERT(aggid == state->dts_naggregations + 1); 9770 9771 if (naggs == 0) { 9772 ASSERT(oaggs == NULL); 9773 naggs = 1; 9774 } 9775 9776 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP); 9777 9778 if (oaggs != NULL) { 9779 bcopy(oaggs, aggs, onaggs * sizeof (*aggs)); 9780 kmem_free(oaggs, onaggs * sizeof (*aggs)); 9781 } 9782 9783 state->dts_aggregations = aggs; 9784 state->dts_naggregations = naggs; 9785 } 9786 9787 ASSERT(state->dts_aggregations[aggid - 1] == NULL); 9788 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg; 9789 9790 frec = &agg->dtag_first->dta_rec; 9791 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t)) 9792 frec->dtrd_alignment = sizeof (dtrace_aggid_t); 9793 9794 for (act = agg->dtag_first; act != NULL; act = act->dta_next) { 9795 ASSERT(!act->dta_intuple); 9796 act->dta_intuple = 1; 9797 } 9798 9799 return (&agg->dtag_action); 9800 } 9801 9802 static void 9803 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act) 9804 { 9805 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 9806 dtrace_state_t *state = ecb->dte_state; 9807 dtrace_aggid_t aggid = agg->dtag_id; 9808 9809 ASSERT(DTRACEACT_ISAGG(act->dta_kind)); 9810 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1); 9811 9812 ASSERT(state->dts_aggregations[aggid - 1] == agg); 9813 state->dts_aggregations[aggid - 1] = NULL; 9814 9815 kmem_free(agg, sizeof (dtrace_aggregation_t)); 9816 } 9817 9818 static int 9819 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 9820 { 9821 dtrace_action_t *action, *last; 9822 dtrace_difo_t *dp = desc->dtad_difo; 9823 uint32_t size = 0, align = sizeof (uint8_t), mask; 9824 uint16_t format = 0; 9825 dtrace_recdesc_t *rec; 9826 dtrace_state_t *state = ecb->dte_state; 9827 dtrace_optval_t *opt = state->dts_options, nframes, strsize; 9828 uint64_t arg = desc->dtad_arg; 9829 9830 ASSERT(MUTEX_HELD(&dtrace_lock)); 9831 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); 9832 9833 if (DTRACEACT_ISAGG(desc->dtad_kind)) { 9834 /* 9835 * If this is an aggregating action, there must be neither 9836 * a speculate nor a commit on the action chain. 9837 */ 9838 dtrace_action_t *act; 9839 9840 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 9841 if (act->dta_kind == DTRACEACT_COMMIT) 9842 return (EINVAL); 9843 9844 if (act->dta_kind == DTRACEACT_SPECULATE) 9845 return (EINVAL); 9846 } 9847 9848 action = dtrace_ecb_aggregation_create(ecb, desc); 9849 9850 if (action == NULL) 9851 return (EINVAL); 9852 } else { 9853 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) || 9854 (desc->dtad_kind == DTRACEACT_DIFEXPR && 9855 dp != NULL && dp->dtdo_destructive)) { 9856 state->dts_destructive = 1; 9857 } 9858 9859 switch (desc->dtad_kind) { 9860 case DTRACEACT_PRINTF: 9861 case DTRACEACT_PRINTA: 9862 case DTRACEACT_SYSTEM: 9863 case DTRACEACT_FREOPEN: 9864 case DTRACEACT_DIFEXPR: 9865 /* 9866 * We know that our arg is a string -- turn it into a 9867 * format. 9868 */ 9869 if (arg == NULL) { 9870 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || 9871 desc->dtad_kind == DTRACEACT_DIFEXPR); 9872 format = 0; 9873 } else { 9874 ASSERT(arg != NULL); 9875 ASSERT(arg > KERNELBASE); 9876 format = dtrace_format_add(state, 9877 (char *)(uintptr_t)arg); 9878 } 9879 9880 /*FALLTHROUGH*/ 9881 case DTRACEACT_LIBACT: 9882 case DTRACEACT_TRACEMEM: 9883 case DTRACEACT_TRACEMEM_DYNSIZE: 9884 if (dp == NULL) 9885 return (EINVAL); 9886 9887 if ((size = dp->dtdo_rtype.dtdt_size) != 0) 9888 break; 9889 9890 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { 9891 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9892 return (EINVAL); 9893 9894 size = opt[DTRACEOPT_STRSIZE]; 9895 } 9896 9897 break; 9898 9899 case DTRACEACT_STACK: 9900 if ((nframes = arg) == 0) { 9901 nframes = opt[DTRACEOPT_STACKFRAMES]; 9902 ASSERT(nframes > 0); 9903 arg = nframes; 9904 } 9905 9906 size = nframes * sizeof (pc_t); 9907 break; 9908 9909 case DTRACEACT_JSTACK: 9910 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0) 9911 strsize = opt[DTRACEOPT_JSTACKSTRSIZE]; 9912 9913 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) 9914 nframes = opt[DTRACEOPT_JSTACKFRAMES]; 9915 9916 arg = DTRACE_USTACK_ARG(nframes, strsize); 9917 9918 /*FALLTHROUGH*/ 9919 case DTRACEACT_USTACK: 9920 if (desc->dtad_kind != DTRACEACT_JSTACK && 9921 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { 9922 strsize = DTRACE_USTACK_STRSIZE(arg); 9923 nframes = opt[DTRACEOPT_USTACKFRAMES]; 9924 ASSERT(nframes > 0); 9925 arg = DTRACE_USTACK_ARG(nframes, strsize); 9926 } 9927 9928 /* 9929 * Save a slot for the pid. 9930 */ 9931 size = (nframes + 1) * sizeof (uint64_t); 9932 size += DTRACE_USTACK_STRSIZE(arg); 9933 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t))); 9934 9935 break; 9936 9937 case DTRACEACT_SYM: 9938 case DTRACEACT_MOD: 9939 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) != 9940 sizeof (uint64_t)) || 9941 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9942 return (EINVAL); 9943 break; 9944 9945 case DTRACEACT_USYM: 9946 case DTRACEACT_UMOD: 9947 case DTRACEACT_UADDR: 9948 if (dp == NULL || 9949 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) || 9950 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9951 return (EINVAL); 9952 9953 /* 9954 * We have a slot for the pid, plus a slot for the 9955 * argument. To keep things simple (aligned with 9956 * bitness-neutral sizing), we store each as a 64-bit 9957 * quantity. 9958 */ 9959 size = 2 * sizeof (uint64_t); 9960 break; 9961 9962 case DTRACEACT_STOP: 9963 case DTRACEACT_BREAKPOINT: 9964 case DTRACEACT_PANIC: 9965 break; 9966 9967 case DTRACEACT_CHILL: 9968 case DTRACEACT_DISCARD: 9969 case DTRACEACT_RAISE: 9970 if (dp == NULL) 9971 return (EINVAL); 9972 break; 9973 9974 case DTRACEACT_EXIT: 9975 if (dp == NULL || 9976 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) || 9977 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9978 return (EINVAL); 9979 break; 9980 9981 case DTRACEACT_SPECULATE: 9982 if (ecb->dte_size > sizeof (dtrace_rechdr_t)) 9983 return (EINVAL); 9984 9985 if (dp == NULL) 9986 return (EINVAL); 9987 9988 state->dts_speculates = 1; 9989 break; 9990 9991 case DTRACEACT_COMMIT: { 9992 dtrace_action_t *act = ecb->dte_action; 9993 9994 for (; act != NULL; act = act->dta_next) { 9995 if (act->dta_kind == DTRACEACT_COMMIT) 9996 return (EINVAL); 9997 } 9998 9999 if (dp == NULL) 10000 return (EINVAL); 10001 break; 10002 } 10003 10004 default: 10005 return (EINVAL); 10006 } 10007 10008 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) { 10009 /* 10010 * If this is a data-storing action or a speculate, 10011 * we must be sure that there isn't a commit on the 10012 * action chain. 10013 */ 10014 dtrace_action_t *act = ecb->dte_action; 10015 10016 for (; act != NULL; act = act->dta_next) { 10017 if (act->dta_kind == DTRACEACT_COMMIT) 10018 return (EINVAL); 10019 } 10020 } 10021 10022 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP); 10023 action->dta_rec.dtrd_size = size; 10024 } 10025 10026 action->dta_refcnt = 1; 10027 rec = &action->dta_rec; 10028 size = rec->dtrd_size; 10029 10030 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) { 10031 if (!(size & mask)) { 10032 align = mask + 1; 10033 break; 10034 } 10035 } 10036 10037 action->dta_kind = desc->dtad_kind; 10038 10039 if ((action->dta_difo = dp) != NULL) 10040 dtrace_difo_hold(dp); 10041 10042 rec->dtrd_action = action->dta_kind; 10043 rec->dtrd_arg = arg; 10044 rec->dtrd_uarg = desc->dtad_uarg; 10045 rec->dtrd_alignment = (uint16_t)align; 10046 rec->dtrd_format = format; 10047 10048 if ((last = ecb->dte_action_last) != NULL) { 10049 ASSERT(ecb->dte_action != NULL); 10050 action->dta_prev = last; 10051 last->dta_next = action; 10052 } else { 10053 ASSERT(ecb->dte_action == NULL); 10054 ecb->dte_action = action; 10055 } 10056 10057 ecb->dte_action_last = action; 10058 10059 return (0); 10060 } 10061 10062 static void 10063 dtrace_ecb_action_remove(dtrace_ecb_t *ecb) 10064 { 10065 dtrace_action_t *act = ecb->dte_action, *next; 10066 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate; 10067 dtrace_difo_t *dp; 10068 uint16_t format; 10069 10070 if (act != NULL && act->dta_refcnt > 1) { 10071 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1); 10072 act->dta_refcnt--; 10073 } else { 10074 for (; act != NULL; act = next) { 10075 next = act->dta_next; 10076 ASSERT(next != NULL || act == ecb->dte_action_last); 10077 ASSERT(act->dta_refcnt == 1); 10078 10079 if ((format = act->dta_rec.dtrd_format) != 0) 10080 dtrace_format_remove(ecb->dte_state, format); 10081 10082 if ((dp = act->dta_difo) != NULL) 10083 dtrace_difo_release(dp, vstate); 10084 10085 if (DTRACEACT_ISAGG(act->dta_kind)) { 10086 dtrace_ecb_aggregation_destroy(ecb, act); 10087 } else { 10088 kmem_free(act, sizeof (dtrace_action_t)); 10089 } 10090 } 10091 } 10092 10093 ecb->dte_action = NULL; 10094 ecb->dte_action_last = NULL; 10095 ecb->dte_size = 0; 10096 } 10097 10098 static void 10099 dtrace_ecb_disable(dtrace_ecb_t *ecb) 10100 { 10101 /* 10102 * We disable the ECB by removing it from its probe. 10103 */ 10104 dtrace_ecb_t *pecb, *prev = NULL; 10105 dtrace_probe_t *probe = ecb->dte_probe; 10106 10107 ASSERT(MUTEX_HELD(&dtrace_lock)); 10108 10109 if (probe == NULL) { 10110 /* 10111 * This is the NULL probe; there is nothing to disable. 10112 */ 10113 return; 10114 } 10115 10116 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) { 10117 if (pecb == ecb) 10118 break; 10119 prev = pecb; 10120 } 10121 10122 ASSERT(pecb != NULL); 10123 10124 if (prev == NULL) { 10125 probe->dtpr_ecb = ecb->dte_next; 10126 } else { 10127 prev->dte_next = ecb->dte_next; 10128 } 10129 10130 if (ecb == probe->dtpr_ecb_last) { 10131 ASSERT(ecb->dte_next == NULL); 10132 probe->dtpr_ecb_last = prev; 10133 } 10134 10135 /* 10136 * The ECB has been disconnected from the probe; now sync to assure 10137 * that all CPUs have seen the change before returning. 10138 */ 10139 dtrace_sync(); 10140 10141 if (probe->dtpr_ecb == NULL) { 10142 /* 10143 * That was the last ECB on the probe; clear the predicate 10144 * cache ID for the probe, disable it and sync one more time 10145 * to assure that we'll never hit it again. 10146 */ 10147 dtrace_provider_t *prov = probe->dtpr_provider; 10148 10149 ASSERT(ecb->dte_next == NULL); 10150 ASSERT(probe->dtpr_ecb_last == NULL); 10151 probe->dtpr_predcache = DTRACE_CACHEIDNONE; 10152 prov->dtpv_pops.dtps_disable(prov->dtpv_arg, 10153 probe->dtpr_id, probe->dtpr_arg); 10154 dtrace_sync(); 10155 } else { 10156 /* 10157 * There is at least one ECB remaining on the probe. If there 10158 * is _exactly_ one, set the probe's predicate cache ID to be 10159 * the predicate cache ID of the remaining ECB. 10160 */ 10161 ASSERT(probe->dtpr_ecb_last != NULL); 10162 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE); 10163 10164 if (probe->dtpr_ecb == probe->dtpr_ecb_last) { 10165 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate; 10166 10167 ASSERT(probe->dtpr_ecb->dte_next == NULL); 10168 10169 if (p != NULL) 10170 probe->dtpr_predcache = p->dtp_cacheid; 10171 } 10172 10173 ecb->dte_next = NULL; 10174 } 10175 } 10176 10177 static void 10178 dtrace_ecb_destroy(dtrace_ecb_t *ecb) 10179 { 10180 dtrace_state_t *state = ecb->dte_state; 10181 dtrace_vstate_t *vstate = &state->dts_vstate; 10182 dtrace_predicate_t *pred; 10183 dtrace_epid_t epid = ecb->dte_epid; 10184 10185 ASSERT(MUTEX_HELD(&dtrace_lock)); 10186 ASSERT(ecb->dte_next == NULL); 10187 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); 10188 10189 if ((pred = ecb->dte_predicate) != NULL) 10190 dtrace_predicate_release(pred, vstate); 10191 10192 dtrace_ecb_action_remove(ecb); 10193 10194 ASSERT(state->dts_ecbs[epid - 1] == ecb); 10195 state->dts_ecbs[epid - 1] = NULL; 10196 10197 kmem_free(ecb, sizeof (dtrace_ecb_t)); 10198 } 10199 10200 static dtrace_ecb_t * 10201 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, 10202 dtrace_enabling_t *enab) 10203 { 10204 dtrace_ecb_t *ecb; 10205 dtrace_predicate_t *pred; 10206 dtrace_actdesc_t *act; 10207 dtrace_provider_t *prov; 10208 dtrace_ecbdesc_t *desc = enab->dten_current; 10209 10210 ASSERT(MUTEX_HELD(&dtrace_lock)); 10211 ASSERT(state != NULL); 10212 10213 ecb = dtrace_ecb_add(state, probe); 10214 ecb->dte_uarg = desc->dted_uarg; 10215 10216 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) { 10217 dtrace_predicate_hold(pred); 10218 ecb->dte_predicate = pred; 10219 } 10220 10221 if (probe != NULL) { 10222 /* 10223 * If the provider shows more leg than the consumer is old 10224 * enough to see, we need to enable the appropriate implicit 10225 * predicate bits to prevent the ecb from activating at 10226 * revealing times. 10227 * 10228 * Providers specifying DTRACE_PRIV_USER at register time 10229 * are stating that they need the /proc-style privilege 10230 * model to be enforced, and this is what DTRACE_COND_OWNER 10231 * and DTRACE_COND_ZONEOWNER will then do at probe time. 10232 */ 10233 prov = probe->dtpr_provider; 10234 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) && 10235 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 10236 ecb->dte_cond |= DTRACE_COND_OWNER; 10237 10238 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) && 10239 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 10240 ecb->dte_cond |= DTRACE_COND_ZONEOWNER; 10241 10242 /* 10243 * If the provider shows us kernel innards and the user 10244 * is lacking sufficient privilege, enable the 10245 * DTRACE_COND_USERMODE implicit predicate. 10246 */ 10247 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) && 10248 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL)) 10249 ecb->dte_cond |= DTRACE_COND_USERMODE; 10250 } 10251 10252 if (dtrace_ecb_create_cache != NULL) { 10253 /* 10254 * If we have a cached ecb, we'll use its action list instead 10255 * of creating our own (saving both time and space). 10256 */ 10257 dtrace_ecb_t *cached = dtrace_ecb_create_cache; 10258 dtrace_action_t *act = cached->dte_action; 10259 10260 if (act != NULL) { 10261 ASSERT(act->dta_refcnt > 0); 10262 act->dta_refcnt++; 10263 ecb->dte_action = act; 10264 ecb->dte_action_last = cached->dte_action_last; 10265 ecb->dte_needed = cached->dte_needed; 10266 ecb->dte_size = cached->dte_size; 10267 ecb->dte_alignment = cached->dte_alignment; 10268 } 10269 10270 return (ecb); 10271 } 10272 10273 for (act = desc->dted_action; act != NULL; act = act->dtad_next) { 10274 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) { 10275 dtrace_ecb_destroy(ecb); 10276 return (NULL); 10277 } 10278 } 10279 10280 dtrace_ecb_resize(ecb); 10281 10282 return (dtrace_ecb_create_cache = ecb); 10283 } 10284 10285 static int 10286 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) 10287 { 10288 dtrace_ecb_t *ecb; 10289 dtrace_enabling_t *enab = arg; 10290 dtrace_state_t *state = enab->dten_vstate->dtvs_state; 10291 10292 ASSERT(state != NULL); 10293 10294 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { 10295 /* 10296 * This probe was created in a generation for which this 10297 * enabling has previously created ECBs; we don't want to 10298 * enable it again, so just kick out. 10299 */ 10300 return (DTRACE_MATCH_NEXT); 10301 } 10302 10303 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) 10304 return (DTRACE_MATCH_DONE); 10305 10306 if (dtrace_ecb_enable(ecb) < 0) 10307 return (DTRACE_MATCH_FAIL); 10308 10309 return (DTRACE_MATCH_NEXT); 10310 } 10311 10312 static dtrace_ecb_t * 10313 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) 10314 { 10315 dtrace_ecb_t *ecb; 10316 10317 ASSERT(MUTEX_HELD(&dtrace_lock)); 10318 10319 if (id == 0 || id > state->dts_necbs) 10320 return (NULL); 10321 10322 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); 10323 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id); 10324 10325 return (state->dts_ecbs[id - 1]); 10326 } 10327 10328 static dtrace_aggregation_t * 10329 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) 10330 { 10331 dtrace_aggregation_t *agg; 10332 10333 ASSERT(MUTEX_HELD(&dtrace_lock)); 10334 10335 if (id == 0 || id > state->dts_naggregations) 10336 return (NULL); 10337 10338 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); 10339 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL || 10340 agg->dtag_id == id); 10341 10342 return (state->dts_aggregations[id - 1]); 10343 } 10344 10345 /* 10346 * DTrace Buffer Functions 10347 * 10348 * The following functions manipulate DTrace buffers. Most of these functions 10349 * are called in the context of establishing or processing consumer state; 10350 * exceptions are explicitly noted. 10351 */ 10352 10353 /* 10354 * Note: called from cross call context. This function switches the two 10355 * buffers on a given CPU. The atomicity of this operation is assured by 10356 * disabling interrupts while the actual switch takes place; the disabling of 10357 * interrupts serializes the execution with any execution of dtrace_probe() on 10358 * the same CPU. 10359 */ 10360 static void 10361 dtrace_buffer_switch(dtrace_buffer_t *buf) 10362 { 10363 caddr_t tomax = buf->dtb_tomax; 10364 caddr_t xamot = buf->dtb_xamot; 10365 dtrace_icookie_t cookie; 10366 hrtime_t now; 10367 10368 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 10369 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); 10370 10371 cookie = dtrace_interrupt_disable(); 10372 now = dtrace_gethrtime(); 10373 buf->dtb_tomax = xamot; 10374 buf->dtb_xamot = tomax; 10375 buf->dtb_xamot_drops = buf->dtb_drops; 10376 buf->dtb_xamot_offset = buf->dtb_offset; 10377 buf->dtb_xamot_errors = buf->dtb_errors; 10378 buf->dtb_xamot_flags = buf->dtb_flags; 10379 buf->dtb_offset = 0; 10380 buf->dtb_drops = 0; 10381 buf->dtb_errors = 0; 10382 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); 10383 buf->dtb_interval = now - buf->dtb_switched; 10384 buf->dtb_switched = now; 10385 dtrace_interrupt_enable(cookie); 10386 } 10387 10388 /* 10389 * Note: called from cross call context. This function activates a buffer 10390 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation 10391 * is guaranteed by the disabling of interrupts. 10392 */ 10393 static void 10394 dtrace_buffer_activate(dtrace_state_t *state) 10395 { 10396 dtrace_buffer_t *buf; 10397 dtrace_icookie_t cookie = dtrace_interrupt_disable(); 10398 10399 buf = &state->dts_buffer[CPU->cpu_id]; 10400 10401 if (buf->dtb_tomax != NULL) { 10402 /* 10403 * We might like to assert that the buffer is marked inactive, 10404 * but this isn't necessarily true: the buffer for the CPU 10405 * that processes the BEGIN probe has its buffer activated 10406 * manually. In this case, we take the (harmless) action 10407 * re-clearing the bit INACTIVE bit. 10408 */ 10409 buf->dtb_flags &= ~DTRACEBUF_INACTIVE; 10410 } 10411 10412 dtrace_interrupt_enable(cookie); 10413 } 10414 10415 static int 10416 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, 10417 processorid_t cpu, int *factor) 10418 { 10419 cpu_t *cp; 10420 dtrace_buffer_t *buf; 10421 int allocated = 0, desired = 0; 10422 10423 ASSERT(MUTEX_HELD(&cpu_lock)); 10424 ASSERT(MUTEX_HELD(&dtrace_lock)); 10425 10426 *factor = 1; 10427 10428 if (size > dtrace_nonroot_maxsize && 10429 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) 10430 return (EFBIG); 10431 10432 cp = cpu_list; 10433 10434 do { 10435 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 10436 continue; 10437 10438 buf = &bufs[cp->cpu_id]; 10439 10440 /* 10441 * If there is already a buffer allocated for this CPU, it 10442 * is only possible that this is a DR event. In this case, 10443 * the buffer size must match our specified size. 10444 */ 10445 if (buf->dtb_tomax != NULL) { 10446 ASSERT(buf->dtb_size == size); 10447 continue; 10448 } 10449 10450 ASSERT(buf->dtb_xamot == NULL); 10451 10452 if ((buf->dtb_tomax = kmem_zalloc(size, 10453 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 10454 goto err; 10455 10456 buf->dtb_size = size; 10457 buf->dtb_flags = flags; 10458 buf->dtb_offset = 0; 10459 buf->dtb_drops = 0; 10460 10461 if (flags & DTRACEBUF_NOSWITCH) 10462 continue; 10463 10464 if ((buf->dtb_xamot = kmem_zalloc(size, 10465 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 10466 goto err; 10467 } while ((cp = cp->cpu_next) != cpu_list); 10468 10469 return (0); 10470 10471 err: 10472 cp = cpu_list; 10473 10474 do { 10475 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 10476 continue; 10477 10478 buf = &bufs[cp->cpu_id]; 10479 desired += 2; 10480 10481 if (buf->dtb_xamot != NULL) { 10482 ASSERT(buf->dtb_tomax != NULL); 10483 ASSERT(buf->dtb_size == size); 10484 kmem_free(buf->dtb_xamot, size); 10485 allocated++; 10486 } 10487 10488 if (buf->dtb_tomax != NULL) { 10489 ASSERT(buf->dtb_size == size); 10490 kmem_free(buf->dtb_tomax, size); 10491 allocated++; 10492 } 10493 10494 buf->dtb_tomax = NULL; 10495 buf->dtb_xamot = NULL; 10496 buf->dtb_size = 0; 10497 } while ((cp = cp->cpu_next) != cpu_list); 10498 10499 *factor = desired / (allocated > 0 ? allocated : 1); 10500 10501 return (ENOMEM); 10502 } 10503 10504 /* 10505 * Note: called from probe context. This function just increments the drop 10506 * count on a buffer. It has been made a function to allow for the 10507 * possibility of understanding the source of mysterious drop counts. (A 10508 * problem for which one may be particularly disappointed that DTrace cannot 10509 * be used to understand DTrace.) 10510 */ 10511 static void 10512 dtrace_buffer_drop(dtrace_buffer_t *buf) 10513 { 10514 buf->dtb_drops++; 10515 } 10516 10517 /* 10518 * Note: called from probe context. This function is called to reserve space 10519 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the 10520 * mstate. Returns the new offset in the buffer, or a negative value if an 10521 * error has occurred. 10522 */ 10523 static intptr_t 10524 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, 10525 dtrace_state_t *state, dtrace_mstate_t *mstate) 10526 { 10527 intptr_t offs = buf->dtb_offset, soffs; 10528 intptr_t woffs; 10529 caddr_t tomax; 10530 size_t total; 10531 10532 if (buf->dtb_flags & DTRACEBUF_INACTIVE) 10533 return (-1); 10534 10535 if ((tomax = buf->dtb_tomax) == NULL) { 10536 dtrace_buffer_drop(buf); 10537 return (-1); 10538 } 10539 10540 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) { 10541 while (offs & (align - 1)) { 10542 /* 10543 * Assert that our alignment is off by a number which 10544 * is itself sizeof (uint32_t) aligned. 10545 */ 10546 ASSERT(!((align - (offs & (align - 1))) & 10547 (sizeof (uint32_t) - 1))); 10548 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 10549 offs += sizeof (uint32_t); 10550 } 10551 10552 if ((soffs = offs + needed) > buf->dtb_size) { 10553 dtrace_buffer_drop(buf); 10554 return (-1); 10555 } 10556 10557 if (mstate == NULL) 10558 return (offs); 10559 10560 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs; 10561 mstate->dtms_scratch_size = buf->dtb_size - soffs; 10562 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 10563 10564 return (offs); 10565 } 10566 10567 if (buf->dtb_flags & DTRACEBUF_FILL) { 10568 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN && 10569 (buf->dtb_flags & DTRACEBUF_FULL)) 10570 return (-1); 10571 goto out; 10572 } 10573 10574 total = needed + (offs & (align - 1)); 10575 10576 /* 10577 * For a ring buffer, life is quite a bit more complicated. Before 10578 * we can store any padding, we need to adjust our wrapping offset. 10579 * (If we've never before wrapped or we're not about to, no adjustment 10580 * is required.) 10581 */ 10582 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) || 10583 offs + total > buf->dtb_size) { 10584 woffs = buf->dtb_xamot_offset; 10585 10586 if (offs + total > buf->dtb_size) { 10587 /* 10588 * We can't fit in the end of the buffer. First, a 10589 * sanity check that we can fit in the buffer at all. 10590 */ 10591 if (total > buf->dtb_size) { 10592 dtrace_buffer_drop(buf); 10593 return (-1); 10594 } 10595 10596 /* 10597 * We're going to be storing at the top of the buffer, 10598 * so now we need to deal with the wrapped offset. We 10599 * only reset our wrapped offset to 0 if it is 10600 * currently greater than the current offset. If it 10601 * is less than the current offset, it is because a 10602 * previous allocation induced a wrap -- but the 10603 * allocation didn't subsequently take the space due 10604 * to an error or false predicate evaluation. In this 10605 * case, we'll just leave the wrapped offset alone: if 10606 * the wrapped offset hasn't been advanced far enough 10607 * for this allocation, it will be adjusted in the 10608 * lower loop. 10609 */ 10610 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 10611 if (woffs >= offs) 10612 woffs = 0; 10613 } else { 10614 woffs = 0; 10615 } 10616 10617 /* 10618 * Now we know that we're going to be storing to the 10619 * top of the buffer and that there is room for us 10620 * there. We need to clear the buffer from the current 10621 * offset to the end (there may be old gunk there). 10622 */ 10623 while (offs < buf->dtb_size) 10624 tomax[offs++] = 0; 10625 10626 /* 10627 * We need to set our offset to zero. And because we 10628 * are wrapping, we need to set the bit indicating as 10629 * much. We can also adjust our needed space back 10630 * down to the space required by the ECB -- we know 10631 * that the top of the buffer is aligned. 10632 */ 10633 offs = 0; 10634 total = needed; 10635 buf->dtb_flags |= DTRACEBUF_WRAPPED; 10636 } else { 10637 /* 10638 * There is room for us in the buffer, so we simply 10639 * need to check the wrapped offset. 10640 */ 10641 if (woffs < offs) { 10642 /* 10643 * The wrapped offset is less than the offset. 10644 * This can happen if we allocated buffer space 10645 * that induced a wrap, but then we didn't 10646 * subsequently take the space due to an error 10647 * or false predicate evaluation. This is 10648 * okay; we know that _this_ allocation isn't 10649 * going to induce a wrap. We still can't 10650 * reset the wrapped offset to be zero, 10651 * however: the space may have been trashed in 10652 * the previous failed probe attempt. But at 10653 * least the wrapped offset doesn't need to 10654 * be adjusted at all... 10655 */ 10656 goto out; 10657 } 10658 } 10659 10660 while (offs + total > woffs) { 10661 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); 10662 size_t size; 10663 10664 if (epid == DTRACE_EPIDNONE) { 10665 size = sizeof (uint32_t); 10666 } else { 10667 ASSERT3U(epid, <=, state->dts_necbs); 10668 ASSERT(state->dts_ecbs[epid - 1] != NULL); 10669 10670 size = state->dts_ecbs[epid - 1]->dte_size; 10671 } 10672 10673 ASSERT(woffs + size <= buf->dtb_size); 10674 ASSERT(size != 0); 10675 10676 if (woffs + size == buf->dtb_size) { 10677 /* 10678 * We've reached the end of the buffer; we want 10679 * to set the wrapped offset to 0 and break 10680 * out. However, if the offs is 0, then we're 10681 * in a strange edge-condition: the amount of 10682 * space that we want to reserve plus the size 10683 * of the record that we're overwriting is 10684 * greater than the size of the buffer. This 10685 * is problematic because if we reserve the 10686 * space but subsequently don't consume it (due 10687 * to a failed predicate or error) the wrapped 10688 * offset will be 0 -- yet the EPID at offset 0 10689 * will not be committed. This situation is 10690 * relatively easy to deal with: if we're in 10691 * this case, the buffer is indistinguishable 10692 * from one that hasn't wrapped; we need only 10693 * finish the job by clearing the wrapped bit, 10694 * explicitly setting the offset to be 0, and 10695 * zero'ing out the old data in the buffer. 10696 */ 10697 if (offs == 0) { 10698 buf->dtb_flags &= ~DTRACEBUF_WRAPPED; 10699 buf->dtb_offset = 0; 10700 woffs = total; 10701 10702 while (woffs < buf->dtb_size) 10703 tomax[woffs++] = 0; 10704 } 10705 10706 woffs = 0; 10707 break; 10708 } 10709 10710 woffs += size; 10711 } 10712 10713 /* 10714 * We have a wrapped offset. It may be that the wrapped offset 10715 * has become zero -- that's okay. 10716 */ 10717 buf->dtb_xamot_offset = woffs; 10718 } 10719 10720 out: 10721 /* 10722 * Now we can plow the buffer with any necessary padding. 10723 */ 10724 while (offs & (align - 1)) { 10725 /* 10726 * Assert that our alignment is off by a number which 10727 * is itself sizeof (uint32_t) aligned. 10728 */ 10729 ASSERT(!((align - (offs & (align - 1))) & 10730 (sizeof (uint32_t) - 1))); 10731 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 10732 offs += sizeof (uint32_t); 10733 } 10734 10735 if (buf->dtb_flags & DTRACEBUF_FILL) { 10736 if (offs + needed > buf->dtb_size - state->dts_reserve) { 10737 buf->dtb_flags |= DTRACEBUF_FULL; 10738 return (-1); 10739 } 10740 } 10741 10742 if (mstate == NULL) 10743 return (offs); 10744 10745 /* 10746 * For ring buffers and fill buffers, the scratch space is always 10747 * the inactive buffer. 10748 */ 10749 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot; 10750 mstate->dtms_scratch_size = buf->dtb_size; 10751 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 10752 10753 return (offs); 10754 } 10755 10756 static void 10757 dtrace_buffer_polish(dtrace_buffer_t *buf) 10758 { 10759 ASSERT(buf->dtb_flags & DTRACEBUF_RING); 10760 ASSERT(MUTEX_HELD(&dtrace_lock)); 10761 10762 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) 10763 return; 10764 10765 /* 10766 * We need to polish the ring buffer. There are three cases: 10767 * 10768 * - The first (and presumably most common) is that there is no gap 10769 * between the buffer offset and the wrapped offset. In this case, 10770 * there is nothing in the buffer that isn't valid data; we can 10771 * mark the buffer as polished and return. 10772 * 10773 * - The second (less common than the first but still more common 10774 * than the third) is that there is a gap between the buffer offset 10775 * and the wrapped offset, and the wrapped offset is larger than the 10776 * buffer offset. This can happen because of an alignment issue, or 10777 * can happen because of a call to dtrace_buffer_reserve() that 10778 * didn't subsequently consume the buffer space. In this case, 10779 * we need to zero the data from the buffer offset to the wrapped 10780 * offset. 10781 * 10782 * - The third (and least common) is that there is a gap between the 10783 * buffer offset and the wrapped offset, but the wrapped offset is 10784 * _less_ than the buffer offset. This can only happen because a 10785 * call to dtrace_buffer_reserve() induced a wrap, but the space 10786 * was not subsequently consumed. In this case, we need to zero the 10787 * space from the offset to the end of the buffer _and_ from the 10788 * top of the buffer to the wrapped offset. 10789 */ 10790 if (buf->dtb_offset < buf->dtb_xamot_offset) { 10791 bzero(buf->dtb_tomax + buf->dtb_offset, 10792 buf->dtb_xamot_offset - buf->dtb_offset); 10793 } 10794 10795 if (buf->dtb_offset > buf->dtb_xamot_offset) { 10796 bzero(buf->dtb_tomax + buf->dtb_offset, 10797 buf->dtb_size - buf->dtb_offset); 10798 bzero(buf->dtb_tomax, buf->dtb_xamot_offset); 10799 } 10800 } 10801 10802 /* 10803 * This routine determines if data generated at the specified time has likely 10804 * been entirely consumed at user-level. This routine is called to determine 10805 * if an ECB on a defunct probe (but for an active enabling) can be safely 10806 * disabled and destroyed. 10807 */ 10808 static int 10809 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) 10810 { 10811 int i; 10812 10813 for (i = 0; i < NCPU; i++) { 10814 dtrace_buffer_t *buf = &bufs[i]; 10815 10816 if (buf->dtb_size == 0) 10817 continue; 10818 10819 if (buf->dtb_flags & DTRACEBUF_RING) 10820 return (0); 10821 10822 if (!buf->dtb_switched && buf->dtb_offset != 0) 10823 return (0); 10824 10825 if (buf->dtb_switched - buf->dtb_interval < when) 10826 return (0); 10827 } 10828 10829 return (1); 10830 } 10831 10832 static void 10833 dtrace_buffer_free(dtrace_buffer_t *bufs) 10834 { 10835 int i; 10836 10837 for (i = 0; i < NCPU; i++) { 10838 dtrace_buffer_t *buf = &bufs[i]; 10839 10840 if (buf->dtb_tomax == NULL) { 10841 ASSERT(buf->dtb_xamot == NULL); 10842 ASSERT(buf->dtb_size == 0); 10843 continue; 10844 } 10845 10846 if (buf->dtb_xamot != NULL) { 10847 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 10848 kmem_free(buf->dtb_xamot, buf->dtb_size); 10849 } 10850 10851 kmem_free(buf->dtb_tomax, buf->dtb_size); 10852 buf->dtb_size = 0; 10853 buf->dtb_tomax = NULL; 10854 buf->dtb_xamot = NULL; 10855 } 10856 } 10857 10858 /* 10859 * DTrace Enabling Functions 10860 */ 10861 static dtrace_enabling_t * 10862 dtrace_enabling_create(dtrace_vstate_t *vstate) 10863 { 10864 dtrace_enabling_t *enab; 10865 10866 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP); 10867 enab->dten_vstate = vstate; 10868 10869 return (enab); 10870 } 10871 10872 static void 10873 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) 10874 { 10875 dtrace_ecbdesc_t **ndesc; 10876 size_t osize, nsize; 10877 10878 /* 10879 * We can't add to enablings after we've enabled them, or after we've 10880 * retained them. 10881 */ 10882 ASSERT(enab->dten_probegen == 0); 10883 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 10884 10885 if (enab->dten_ndesc < enab->dten_maxdesc) { 10886 enab->dten_desc[enab->dten_ndesc++] = ecb; 10887 return; 10888 } 10889 10890 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 10891 10892 if (enab->dten_maxdesc == 0) { 10893 enab->dten_maxdesc = 1; 10894 } else { 10895 enab->dten_maxdesc <<= 1; 10896 } 10897 10898 ASSERT(enab->dten_ndesc < enab->dten_maxdesc); 10899 10900 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 10901 ndesc = kmem_zalloc(nsize, KM_SLEEP); 10902 bcopy(enab->dten_desc, ndesc, osize); 10903 kmem_free(enab->dten_desc, osize); 10904 10905 enab->dten_desc = ndesc; 10906 enab->dten_desc[enab->dten_ndesc++] = ecb; 10907 } 10908 10909 static void 10910 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb, 10911 dtrace_probedesc_t *pd) 10912 { 10913 dtrace_ecbdesc_t *new; 10914 dtrace_predicate_t *pred; 10915 dtrace_actdesc_t *act; 10916 10917 /* 10918 * We're going to create a new ECB description that matches the 10919 * specified ECB in every way, but has the specified probe description. 10920 */ 10921 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 10922 10923 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL) 10924 dtrace_predicate_hold(pred); 10925 10926 for (act = ecb->dted_action; act != NULL; act = act->dtad_next) 10927 dtrace_actdesc_hold(act); 10928 10929 new->dted_action = ecb->dted_action; 10930 new->dted_pred = ecb->dted_pred; 10931 new->dted_probe = *pd; 10932 new->dted_uarg = ecb->dted_uarg; 10933 10934 dtrace_enabling_add(enab, new); 10935 } 10936 10937 static void 10938 dtrace_enabling_dump(dtrace_enabling_t *enab) 10939 { 10940 int i; 10941 10942 for (i = 0; i < enab->dten_ndesc; i++) { 10943 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe; 10944 10945 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i, 10946 desc->dtpd_provider, desc->dtpd_mod, 10947 desc->dtpd_func, desc->dtpd_name); 10948 } 10949 } 10950 10951 static void 10952 dtrace_enabling_destroy(dtrace_enabling_t *enab) 10953 { 10954 int i; 10955 dtrace_ecbdesc_t *ep; 10956 dtrace_vstate_t *vstate = enab->dten_vstate; 10957 10958 ASSERT(MUTEX_HELD(&dtrace_lock)); 10959 10960 for (i = 0; i < enab->dten_ndesc; i++) { 10961 dtrace_actdesc_t *act, *next; 10962 dtrace_predicate_t *pred; 10963 10964 ep = enab->dten_desc[i]; 10965 10966 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) 10967 dtrace_predicate_release(pred, vstate); 10968 10969 for (act = ep->dted_action; act != NULL; act = next) { 10970 next = act->dtad_next; 10971 dtrace_actdesc_release(act, vstate); 10972 } 10973 10974 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 10975 } 10976 10977 kmem_free(enab->dten_desc, 10978 enab->dten_maxdesc * sizeof (dtrace_enabling_t *)); 10979 10980 /* 10981 * If this was a retained enabling, decrement the dts_nretained count 10982 * and take it off of the dtrace_retained list. 10983 */ 10984 if (enab->dten_prev != NULL || enab->dten_next != NULL || 10985 dtrace_retained == enab) { 10986 ASSERT(enab->dten_vstate->dtvs_state != NULL); 10987 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); 10988 enab->dten_vstate->dtvs_state->dts_nretained--; 10989 dtrace_retained_gen++; 10990 } 10991 10992 if (enab->dten_prev == NULL) { 10993 if (dtrace_retained == enab) { 10994 dtrace_retained = enab->dten_next; 10995 10996 if (dtrace_retained != NULL) 10997 dtrace_retained->dten_prev = NULL; 10998 } 10999 } else { 11000 ASSERT(enab != dtrace_retained); 11001 ASSERT(dtrace_retained != NULL); 11002 enab->dten_prev->dten_next = enab->dten_next; 11003 } 11004 11005 if (enab->dten_next != NULL) { 11006 ASSERT(dtrace_retained != NULL); 11007 enab->dten_next->dten_prev = enab->dten_prev; 11008 } 11009 11010 kmem_free(enab, sizeof (dtrace_enabling_t)); 11011 } 11012 11013 static int 11014 dtrace_enabling_retain(dtrace_enabling_t *enab) 11015 { 11016 dtrace_state_t *state; 11017 11018 ASSERT(MUTEX_HELD(&dtrace_lock)); 11019 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 11020 ASSERT(enab->dten_vstate != NULL); 11021 11022 state = enab->dten_vstate->dtvs_state; 11023 ASSERT(state != NULL); 11024 11025 /* 11026 * We only allow each state to retain dtrace_retain_max enablings. 11027 */ 11028 if (state->dts_nretained >= dtrace_retain_max) 11029 return (ENOSPC); 11030 11031 state->dts_nretained++; 11032 dtrace_retained_gen++; 11033 11034 if (dtrace_retained == NULL) { 11035 dtrace_retained = enab; 11036 return (0); 11037 } 11038 11039 enab->dten_next = dtrace_retained; 11040 dtrace_retained->dten_prev = enab; 11041 dtrace_retained = enab; 11042 11043 return (0); 11044 } 11045 11046 static int 11047 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, 11048 dtrace_probedesc_t *create) 11049 { 11050 dtrace_enabling_t *new, *enab; 11051 int found = 0, err = ENOENT; 11052 11053 ASSERT(MUTEX_HELD(&dtrace_lock)); 11054 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); 11055 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); 11056 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); 11057 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN); 11058 11059 new = dtrace_enabling_create(&state->dts_vstate); 11060 11061 /* 11062 * Iterate over all retained enablings, looking for enablings that 11063 * match the specified state. 11064 */ 11065 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 11066 int i; 11067 11068 /* 11069 * dtvs_state can only be NULL for helper enablings -- and 11070 * helper enablings can't be retained. 11071 */ 11072 ASSERT(enab->dten_vstate->dtvs_state != NULL); 11073 11074 if (enab->dten_vstate->dtvs_state != state) 11075 continue; 11076 11077 /* 11078 * Now iterate over each probe description; we're looking for 11079 * an exact match to the specified probe description. 11080 */ 11081 for (i = 0; i < enab->dten_ndesc; i++) { 11082 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 11083 dtrace_probedesc_t *pd = &ep->dted_probe; 11084 11085 if (strcmp(pd->dtpd_provider, match->dtpd_provider)) 11086 continue; 11087 11088 if (strcmp(pd->dtpd_mod, match->dtpd_mod)) 11089 continue; 11090 11091 if (strcmp(pd->dtpd_func, match->dtpd_func)) 11092 continue; 11093 11094 if (strcmp(pd->dtpd_name, match->dtpd_name)) 11095 continue; 11096 11097 /* 11098 * We have a winning probe! Add it to our growing 11099 * enabling. 11100 */ 11101 found = 1; 11102 dtrace_enabling_addlike(new, ep, create); 11103 } 11104 } 11105 11106 if (!found || (err = dtrace_enabling_retain(new)) != 0) { 11107 dtrace_enabling_destroy(new); 11108 return (err); 11109 } 11110 11111 return (0); 11112 } 11113 11114 static void 11115 dtrace_enabling_retract(dtrace_state_t *state) 11116 { 11117 dtrace_enabling_t *enab, *next; 11118 11119 ASSERT(MUTEX_HELD(&dtrace_lock)); 11120 11121 /* 11122 * Iterate over all retained enablings, destroy the enablings retained 11123 * for the specified state. 11124 */ 11125 for (enab = dtrace_retained; enab != NULL; enab = next) { 11126 next = enab->dten_next; 11127 11128 /* 11129 * dtvs_state can only be NULL for helper enablings -- and 11130 * helper enablings can't be retained. 11131 */ 11132 ASSERT(enab->dten_vstate->dtvs_state != NULL); 11133 11134 if (enab->dten_vstate->dtvs_state == state) { 11135 ASSERT(state->dts_nretained > 0); 11136 dtrace_enabling_destroy(enab); 11137 } 11138 } 11139 11140 ASSERT(state->dts_nretained == 0); 11141 } 11142 11143 static int 11144 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) 11145 { 11146 int i = 0; 11147 int total_matched = 0, matched = 0; 11148 11149 ASSERT(MUTEX_HELD(&cpu_lock)); 11150 ASSERT(MUTEX_HELD(&dtrace_lock)); 11151 11152 for (i = 0; i < enab->dten_ndesc; i++) { 11153 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 11154 11155 enab->dten_current = ep; 11156 enab->dten_error = 0; 11157 11158 /* 11159 * If a provider failed to enable a probe then get out and 11160 * let the consumer know we failed. 11161 */ 11162 if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0) 11163 return (EBUSY); 11164 11165 total_matched += matched; 11166 11167 if (enab->dten_error != 0) { 11168 /* 11169 * If we get an error half-way through enabling the 11170 * probes, we kick out -- perhaps with some number of 11171 * them enabled. Leaving enabled probes enabled may 11172 * be slightly confusing for user-level, but we expect 11173 * that no one will attempt to actually drive on in 11174 * the face of such errors. If this is an anonymous 11175 * enabling (indicated with a NULL nmatched pointer), 11176 * we cmn_err() a message. We aren't expecting to 11177 * get such an error -- such as it can exist at all, 11178 * it would be a result of corrupted DOF in the driver 11179 * properties. 11180 */ 11181 if (nmatched == NULL) { 11182 cmn_err(CE_WARN, "dtrace_enabling_match() " 11183 "error on %p: %d", (void *)ep, 11184 enab->dten_error); 11185 } 11186 11187 return (enab->dten_error); 11188 } 11189 } 11190 11191 enab->dten_probegen = dtrace_probegen; 11192 if (nmatched != NULL) 11193 *nmatched = total_matched; 11194 11195 return (0); 11196 } 11197 11198 static void 11199 dtrace_enabling_matchall(void) 11200 { 11201 dtrace_enabling_t *enab; 11202 11203 mutex_enter(&cpu_lock); 11204 mutex_enter(&dtrace_lock); 11205 11206 /* 11207 * Iterate over all retained enablings to see if any probes match 11208 * against them. We only perform this operation on enablings for which 11209 * we have sufficient permissions by virtue of being in the global zone 11210 * or in the same zone as the DTrace client. Because we can be called 11211 * after dtrace_detach() has been called, we cannot assert that there 11212 * are retained enablings. We can safely load from dtrace_retained, 11213 * however: the taskq_destroy() at the end of dtrace_detach() will 11214 * block pending our completion. 11215 */ 11216 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 11217 dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred; 11218 cred_t *cr = dcr->dcr_cred; 11219 zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0; 11220 11221 if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL && 11222 (zone == GLOBAL_ZONEID || getzoneid() == zone))) 11223 (void) dtrace_enabling_match(enab, NULL); 11224 } 11225 11226 mutex_exit(&dtrace_lock); 11227 mutex_exit(&cpu_lock); 11228 } 11229 11230 /* 11231 * If an enabling is to be enabled without having matched probes (that is, if 11232 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the 11233 * enabling must be _primed_ by creating an ECB for every ECB description. 11234 * This must be done to assure that we know the number of speculations, the 11235 * number of aggregations, the minimum buffer size needed, etc. before we 11236 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually 11237 * enabling any probes, we create ECBs for every ECB decription, but with a 11238 * NULL probe -- which is exactly what this function does. 11239 */ 11240 static void 11241 dtrace_enabling_prime(dtrace_state_t *state) 11242 { 11243 dtrace_enabling_t *enab; 11244 int i; 11245 11246 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 11247 ASSERT(enab->dten_vstate->dtvs_state != NULL); 11248 11249 if (enab->dten_vstate->dtvs_state != state) 11250 continue; 11251 11252 /* 11253 * We don't want to prime an enabling more than once, lest 11254 * we allow a malicious user to induce resource exhaustion. 11255 * (The ECBs that result from priming an enabling aren't 11256 * leaked -- but they also aren't deallocated until the 11257 * consumer state is destroyed.) 11258 */ 11259 if (enab->dten_primed) 11260 continue; 11261 11262 for (i = 0; i < enab->dten_ndesc; i++) { 11263 enab->dten_current = enab->dten_desc[i]; 11264 (void) dtrace_probe_enable(NULL, enab); 11265 } 11266 11267 enab->dten_primed = 1; 11268 } 11269 } 11270 11271 /* 11272 * Called to indicate that probes should be provided due to retained 11273 * enablings. This is implemented in terms of dtrace_probe_provide(), but it 11274 * must take an initial lap through the enabling calling the dtps_provide() 11275 * entry point explicitly to allow for autocreated probes. 11276 */ 11277 static void 11278 dtrace_enabling_provide(dtrace_provider_t *prv) 11279 { 11280 int i, all = 0; 11281 dtrace_probedesc_t desc; 11282 dtrace_genid_t gen; 11283 11284 ASSERT(MUTEX_HELD(&dtrace_lock)); 11285 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 11286 11287 if (prv == NULL) { 11288 all = 1; 11289 prv = dtrace_provider; 11290 } 11291 11292 do { 11293 dtrace_enabling_t *enab; 11294 void *parg = prv->dtpv_arg; 11295 11296 retry: 11297 gen = dtrace_retained_gen; 11298 for (enab = dtrace_retained; enab != NULL; 11299 enab = enab->dten_next) { 11300 for (i = 0; i < enab->dten_ndesc; i++) { 11301 desc = enab->dten_desc[i]->dted_probe; 11302 mutex_exit(&dtrace_lock); 11303 prv->dtpv_pops.dtps_provide(parg, &desc); 11304 mutex_enter(&dtrace_lock); 11305 /* 11306 * Process the retained enablings again if 11307 * they have changed while we weren't holding 11308 * dtrace_lock. 11309 */ 11310 if (gen != dtrace_retained_gen) 11311 goto retry; 11312 } 11313 } 11314 } while (all && (prv = prv->dtpv_next) != NULL); 11315 11316 mutex_exit(&dtrace_lock); 11317 dtrace_probe_provide(NULL, all ? NULL : prv); 11318 mutex_enter(&dtrace_lock); 11319 } 11320 11321 /* 11322 * Called to reap ECBs that are attached to probes from defunct providers. 11323 */ 11324 static void 11325 dtrace_enabling_reap(void) 11326 { 11327 dtrace_provider_t *prov; 11328 dtrace_probe_t *probe; 11329 dtrace_ecb_t *ecb; 11330 hrtime_t when; 11331 int i; 11332 11333 mutex_enter(&cpu_lock); 11334 mutex_enter(&dtrace_lock); 11335 11336 for (i = 0; i < dtrace_nprobes; i++) { 11337 if ((probe = dtrace_probes[i]) == NULL) 11338 continue; 11339 11340 if (probe->dtpr_ecb == NULL) 11341 continue; 11342 11343 prov = probe->dtpr_provider; 11344 11345 if ((when = prov->dtpv_defunct) == 0) 11346 continue; 11347 11348 /* 11349 * We have ECBs on a defunct provider: we want to reap these 11350 * ECBs to allow the provider to unregister. The destruction 11351 * of these ECBs must be done carefully: if we destroy the ECB 11352 * and the consumer later wishes to consume an EPID that 11353 * corresponds to the destroyed ECB (and if the EPID metadata 11354 * has not been previously consumed), the consumer will abort 11355 * processing on the unknown EPID. To reduce (but not, sadly, 11356 * eliminate) the possibility of this, we will only destroy an 11357 * ECB for a defunct provider if, for the state that 11358 * corresponds to the ECB: 11359 * 11360 * (a) There is no speculative tracing (which can effectively 11361 * cache an EPID for an arbitrary amount of time). 11362 * 11363 * (b) The principal buffers have been switched twice since the 11364 * provider became defunct. 11365 * 11366 * (c) The aggregation buffers are of zero size or have been 11367 * switched twice since the provider became defunct. 11368 * 11369 * We use dts_speculates to determine (a) and call a function 11370 * (dtrace_buffer_consumed()) to determine (b) and (c). Note 11371 * that as soon as we've been unable to destroy one of the ECBs 11372 * associated with the probe, we quit trying -- reaping is only 11373 * fruitful in as much as we can destroy all ECBs associated 11374 * with the defunct provider's probes. 11375 */ 11376 while ((ecb = probe->dtpr_ecb) != NULL) { 11377 dtrace_state_t *state = ecb->dte_state; 11378 dtrace_buffer_t *buf = state->dts_buffer; 11379 dtrace_buffer_t *aggbuf = state->dts_aggbuffer; 11380 11381 if (state->dts_speculates) 11382 break; 11383 11384 if (!dtrace_buffer_consumed(buf, when)) 11385 break; 11386 11387 if (!dtrace_buffer_consumed(aggbuf, when)) 11388 break; 11389 11390 dtrace_ecb_disable(ecb); 11391 ASSERT(probe->dtpr_ecb != ecb); 11392 dtrace_ecb_destroy(ecb); 11393 } 11394 } 11395 11396 mutex_exit(&dtrace_lock); 11397 mutex_exit(&cpu_lock); 11398 } 11399 11400 /* 11401 * DTrace DOF Functions 11402 */ 11403 /*ARGSUSED*/ 11404 static void 11405 dtrace_dof_error(dof_hdr_t *dof, const char *str) 11406 { 11407 if (dtrace_err_verbose) 11408 cmn_err(CE_WARN, "failed to process DOF: %s", str); 11409 11410 #ifdef DTRACE_ERRDEBUG 11411 dtrace_errdebug(str); 11412 #endif 11413 } 11414 11415 /* 11416 * Create DOF out of a currently enabled state. Right now, we only create 11417 * DOF containing the run-time options -- but this could be expanded to create 11418 * complete DOF representing the enabled state. 11419 */ 11420 static dof_hdr_t * 11421 dtrace_dof_create(dtrace_state_t *state) 11422 { 11423 dof_hdr_t *dof; 11424 dof_sec_t *sec; 11425 dof_optdesc_t *opt; 11426 int i, len = sizeof (dof_hdr_t) + 11427 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + 11428 sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 11429 11430 ASSERT(MUTEX_HELD(&dtrace_lock)); 11431 11432 dof = kmem_zalloc(len, KM_SLEEP); 11433 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; 11434 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; 11435 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; 11436 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3; 11437 11438 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE; 11439 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE; 11440 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION; 11441 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION; 11442 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS; 11443 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS; 11444 11445 dof->dofh_flags = 0; 11446 dof->dofh_hdrsize = sizeof (dof_hdr_t); 11447 dof->dofh_secsize = sizeof (dof_sec_t); 11448 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */ 11449 dof->dofh_secoff = sizeof (dof_hdr_t); 11450 dof->dofh_loadsz = len; 11451 dof->dofh_filesz = len; 11452 dof->dofh_pad = 0; 11453 11454 /* 11455 * Fill in the option section header... 11456 */ 11457 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t)); 11458 sec->dofs_type = DOF_SECT_OPTDESC; 11459 sec->dofs_align = sizeof (uint64_t); 11460 sec->dofs_flags = DOF_SECF_LOAD; 11461 sec->dofs_entsize = sizeof (dof_optdesc_t); 11462 11463 opt = (dof_optdesc_t *)((uintptr_t)sec + 11464 roundup(sizeof (dof_sec_t), sizeof (uint64_t))); 11465 11466 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof; 11467 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 11468 11469 for (i = 0; i < DTRACEOPT_MAX; i++) { 11470 opt[i].dofo_option = i; 11471 opt[i].dofo_strtab = DOF_SECIDX_NONE; 11472 opt[i].dofo_value = state->dts_options[i]; 11473 } 11474 11475 return (dof); 11476 } 11477 11478 static dof_hdr_t * 11479 dtrace_dof_copyin(uintptr_t uarg, int *errp) 11480 { 11481 dof_hdr_t hdr, *dof; 11482 11483 ASSERT(!MUTEX_HELD(&dtrace_lock)); 11484 11485 /* 11486 * First, we're going to copyin() the sizeof (dof_hdr_t). 11487 */ 11488 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { 11489 dtrace_dof_error(NULL, "failed to copyin DOF header"); 11490 *errp = EFAULT; 11491 return (NULL); 11492 } 11493 11494 /* 11495 * Now we'll allocate the entire DOF and copy it in -- provided 11496 * that the length isn't outrageous. 11497 */ 11498 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { 11499 dtrace_dof_error(&hdr, "load size exceeds maximum"); 11500 *errp = E2BIG; 11501 return (NULL); 11502 } 11503 11504 if (hdr.dofh_loadsz < sizeof (hdr)) { 11505 dtrace_dof_error(&hdr, "invalid load size"); 11506 *errp = EINVAL; 11507 return (NULL); 11508 } 11509 11510 dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); 11511 11512 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || 11513 dof->dofh_loadsz != hdr.dofh_loadsz) { 11514 kmem_free(dof, hdr.dofh_loadsz); 11515 *errp = EFAULT; 11516 return (NULL); 11517 } 11518 11519 return (dof); 11520 } 11521 11522 static dof_hdr_t * 11523 dtrace_dof_property(const char *name) 11524 { 11525 uchar_t *buf; 11526 uint64_t loadsz; 11527 unsigned int len, i; 11528 dof_hdr_t *dof; 11529 11530 /* 11531 * Unfortunately, array of values in .conf files are always (and 11532 * only) interpreted to be integer arrays. We must read our DOF 11533 * as an integer array, and then squeeze it into a byte array. 11534 */ 11535 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, 11536 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) 11537 return (NULL); 11538 11539 for (i = 0; i < len; i++) 11540 buf[i] = (uchar_t)(((int *)buf)[i]); 11541 11542 if (len < sizeof (dof_hdr_t)) { 11543 ddi_prop_free(buf); 11544 dtrace_dof_error(NULL, "truncated header"); 11545 return (NULL); 11546 } 11547 11548 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { 11549 ddi_prop_free(buf); 11550 dtrace_dof_error(NULL, "truncated DOF"); 11551 return (NULL); 11552 } 11553 11554 if (loadsz >= dtrace_dof_maxsize) { 11555 ddi_prop_free(buf); 11556 dtrace_dof_error(NULL, "oversized DOF"); 11557 return (NULL); 11558 } 11559 11560 dof = kmem_alloc(loadsz, KM_SLEEP); 11561 bcopy(buf, dof, loadsz); 11562 ddi_prop_free(buf); 11563 11564 return (dof); 11565 } 11566 11567 static void 11568 dtrace_dof_destroy(dof_hdr_t *dof) 11569 { 11570 kmem_free(dof, dof->dofh_loadsz); 11571 } 11572 11573 /* 11574 * Return the dof_sec_t pointer corresponding to a given section index. If the 11575 * index is not valid, dtrace_dof_error() is called and NULL is returned. If 11576 * a type other than DOF_SECT_NONE is specified, the header is checked against 11577 * this type and NULL is returned if the types do not match. 11578 */ 11579 static dof_sec_t * 11580 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i) 11581 { 11582 dof_sec_t *sec = (dof_sec_t *)(uintptr_t) 11583 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize); 11584 11585 if (i >= dof->dofh_secnum) { 11586 dtrace_dof_error(dof, "referenced section index is invalid"); 11587 return (NULL); 11588 } 11589 11590 if (!(sec->dofs_flags & DOF_SECF_LOAD)) { 11591 dtrace_dof_error(dof, "referenced section is not loadable"); 11592 return (NULL); 11593 } 11594 11595 if (type != DOF_SECT_NONE && type != sec->dofs_type) { 11596 dtrace_dof_error(dof, "referenced section is the wrong type"); 11597 return (NULL); 11598 } 11599 11600 return (sec); 11601 } 11602 11603 static dtrace_probedesc_t * 11604 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) 11605 { 11606 dof_probedesc_t *probe; 11607 dof_sec_t *strtab; 11608 uintptr_t daddr = (uintptr_t)dof; 11609 uintptr_t str; 11610 size_t size; 11611 11612 if (sec->dofs_type != DOF_SECT_PROBEDESC) { 11613 dtrace_dof_error(dof, "invalid probe section"); 11614 return (NULL); 11615 } 11616 11617 if (sec->dofs_align != sizeof (dof_secidx_t)) { 11618 dtrace_dof_error(dof, "bad alignment in probe description"); 11619 return (NULL); 11620 } 11621 11622 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) { 11623 dtrace_dof_error(dof, "truncated probe description"); 11624 return (NULL); 11625 } 11626 11627 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset); 11628 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab); 11629 11630 if (strtab == NULL) 11631 return (NULL); 11632 11633 str = daddr + strtab->dofs_offset; 11634 size = strtab->dofs_size; 11635 11636 if (probe->dofp_provider >= strtab->dofs_size) { 11637 dtrace_dof_error(dof, "corrupt probe provider"); 11638 return (NULL); 11639 } 11640 11641 (void) strncpy(desc->dtpd_provider, 11642 (char *)(str + probe->dofp_provider), 11643 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); 11644 11645 if (probe->dofp_mod >= strtab->dofs_size) { 11646 dtrace_dof_error(dof, "corrupt probe module"); 11647 return (NULL); 11648 } 11649 11650 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), 11651 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); 11652 11653 if (probe->dofp_func >= strtab->dofs_size) { 11654 dtrace_dof_error(dof, "corrupt probe function"); 11655 return (NULL); 11656 } 11657 11658 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), 11659 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); 11660 11661 if (probe->dofp_name >= strtab->dofs_size) { 11662 dtrace_dof_error(dof, "corrupt probe name"); 11663 return (NULL); 11664 } 11665 11666 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), 11667 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); 11668 11669 return (desc); 11670 } 11671 11672 static dtrace_difo_t * 11673 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11674 cred_t *cr) 11675 { 11676 dtrace_difo_t *dp; 11677 size_t ttl = 0; 11678 dof_difohdr_t *dofd; 11679 uintptr_t daddr = (uintptr_t)dof; 11680 size_t max = dtrace_difo_maxsize; 11681 int i, l, n; 11682 11683 static const struct { 11684 int section; 11685 int bufoffs; 11686 int lenoffs; 11687 int entsize; 11688 int align; 11689 const char *msg; 11690 } difo[] = { 11691 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf), 11692 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t), 11693 sizeof (dif_instr_t), "multiple DIF sections" }, 11694 11695 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab), 11696 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t), 11697 sizeof (uint64_t), "multiple integer tables" }, 11698 11699 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab), 11700 offsetof(dtrace_difo_t, dtdo_strlen), 0, 11701 sizeof (char), "multiple string tables" }, 11702 11703 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab), 11704 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), 11705 sizeof (uint_t), "multiple variable tables" }, 11706 11707 { DOF_SECT_NONE, 0, 0, 0, NULL } 11708 }; 11709 11710 if (sec->dofs_type != DOF_SECT_DIFOHDR) { 11711 dtrace_dof_error(dof, "invalid DIFO header section"); 11712 return (NULL); 11713 } 11714 11715 if (sec->dofs_align != sizeof (dof_secidx_t)) { 11716 dtrace_dof_error(dof, "bad alignment in DIFO header"); 11717 return (NULL); 11718 } 11719 11720 if (sec->dofs_size < sizeof (dof_difohdr_t) || 11721 sec->dofs_size % sizeof (dof_secidx_t)) { 11722 dtrace_dof_error(dof, "bad size in DIFO header"); 11723 return (NULL); 11724 } 11725 11726 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 11727 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1; 11728 11729 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 11730 dp->dtdo_rtype = dofd->dofd_rtype; 11731 11732 for (l = 0; l < n; l++) { 11733 dof_sec_t *subsec; 11734 void **bufp; 11735 uint32_t *lenp; 11736 11737 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE, 11738 dofd->dofd_links[l])) == NULL) 11739 goto err; /* invalid section link */ 11740 11741 if (ttl + subsec->dofs_size > max) { 11742 dtrace_dof_error(dof, "exceeds maximum size"); 11743 goto err; 11744 } 11745 11746 ttl += subsec->dofs_size; 11747 11748 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { 11749 if (subsec->dofs_type != difo[i].section) 11750 continue; 11751 11752 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { 11753 dtrace_dof_error(dof, "section not loaded"); 11754 goto err; 11755 } 11756 11757 if (subsec->dofs_align != difo[i].align) { 11758 dtrace_dof_error(dof, "bad alignment"); 11759 goto err; 11760 } 11761 11762 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); 11763 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); 11764 11765 if (*bufp != NULL) { 11766 dtrace_dof_error(dof, difo[i].msg); 11767 goto err; 11768 } 11769 11770 if (difo[i].entsize != subsec->dofs_entsize) { 11771 dtrace_dof_error(dof, "entry size mismatch"); 11772 goto err; 11773 } 11774 11775 if (subsec->dofs_entsize != 0 && 11776 (subsec->dofs_size % subsec->dofs_entsize) != 0) { 11777 dtrace_dof_error(dof, "corrupt entry size"); 11778 goto err; 11779 } 11780 11781 *lenp = subsec->dofs_size; 11782 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP); 11783 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset), 11784 *bufp, subsec->dofs_size); 11785 11786 if (subsec->dofs_entsize != 0) 11787 *lenp /= subsec->dofs_entsize; 11788 11789 break; 11790 } 11791 11792 /* 11793 * If we encounter a loadable DIFO sub-section that is not 11794 * known to us, assume this is a broken program and fail. 11795 */ 11796 if (difo[i].section == DOF_SECT_NONE && 11797 (subsec->dofs_flags & DOF_SECF_LOAD)) { 11798 dtrace_dof_error(dof, "unrecognized DIFO subsection"); 11799 goto err; 11800 } 11801 } 11802 11803 if (dp->dtdo_buf == NULL) { 11804 /* 11805 * We can't have a DIF object without DIF text. 11806 */ 11807 dtrace_dof_error(dof, "missing DIF text"); 11808 goto err; 11809 } 11810 11811 /* 11812 * Before we validate the DIF object, run through the variable table 11813 * looking for the strings -- if any of their size are under, we'll set 11814 * their size to be the system-wide default string size. Note that 11815 * this should _not_ happen if the "strsize" option has been set -- 11816 * in this case, the compiler should have set the size to reflect the 11817 * setting of the option. 11818 */ 11819 for (i = 0; i < dp->dtdo_varlen; i++) { 11820 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 11821 dtrace_diftype_t *t = &v->dtdv_type; 11822 11823 if (v->dtdv_id < DIF_VAR_OTHER_UBASE) 11824 continue; 11825 11826 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0) 11827 t->dtdt_size = dtrace_strsize_default; 11828 } 11829 11830 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0) 11831 goto err; 11832 11833 dtrace_difo_init(dp, vstate); 11834 return (dp); 11835 11836 err: 11837 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 11838 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 11839 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 11840 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 11841 11842 kmem_free(dp, sizeof (dtrace_difo_t)); 11843 return (NULL); 11844 } 11845 11846 static dtrace_predicate_t * 11847 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11848 cred_t *cr) 11849 { 11850 dtrace_difo_t *dp; 11851 11852 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL) 11853 return (NULL); 11854 11855 return (dtrace_predicate_create(dp)); 11856 } 11857 11858 static dtrace_actdesc_t * 11859 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11860 cred_t *cr) 11861 { 11862 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next; 11863 dof_actdesc_t *desc; 11864 dof_sec_t *difosec; 11865 size_t offs; 11866 uintptr_t daddr = (uintptr_t)dof; 11867 uint64_t arg; 11868 dtrace_actkind_t kind; 11869 11870 if (sec->dofs_type != DOF_SECT_ACTDESC) { 11871 dtrace_dof_error(dof, "invalid action section"); 11872 return (NULL); 11873 } 11874 11875 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) { 11876 dtrace_dof_error(dof, "truncated action description"); 11877 return (NULL); 11878 } 11879 11880 if (sec->dofs_align != sizeof (uint64_t)) { 11881 dtrace_dof_error(dof, "bad alignment in action description"); 11882 return (NULL); 11883 } 11884 11885 if (sec->dofs_size < sec->dofs_entsize) { 11886 dtrace_dof_error(dof, "section entry size exceeds total size"); 11887 return (NULL); 11888 } 11889 11890 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) { 11891 dtrace_dof_error(dof, "bad entry size in action description"); 11892 return (NULL); 11893 } 11894 11895 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) { 11896 dtrace_dof_error(dof, "actions exceed dtrace_actions_max"); 11897 return (NULL); 11898 } 11899 11900 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) { 11901 desc = (dof_actdesc_t *)(daddr + 11902 (uintptr_t)sec->dofs_offset + offs); 11903 kind = (dtrace_actkind_t)desc->dofa_kind; 11904 11905 if ((DTRACEACT_ISPRINTFLIKE(kind) && 11906 (kind != DTRACEACT_PRINTA || 11907 desc->dofa_strtab != DOF_SECIDX_NONE)) || 11908 (kind == DTRACEACT_DIFEXPR && 11909 desc->dofa_strtab != DOF_SECIDX_NONE)) { 11910 dof_sec_t *strtab; 11911 char *str, *fmt; 11912 uint64_t i; 11913 11914 /* 11915 * The argument to these actions is an index into the 11916 * DOF string table. For printf()-like actions, this 11917 * is the format string. For print(), this is the 11918 * CTF type of the expression result. 11919 */ 11920 if ((strtab = dtrace_dof_sect(dof, 11921 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) 11922 goto err; 11923 11924 str = (char *)((uintptr_t)dof + 11925 (uintptr_t)strtab->dofs_offset); 11926 11927 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) { 11928 if (str[i] == '\0') 11929 break; 11930 } 11931 11932 if (i >= strtab->dofs_size) { 11933 dtrace_dof_error(dof, "bogus format string"); 11934 goto err; 11935 } 11936 11937 if (i == desc->dofa_arg) { 11938 dtrace_dof_error(dof, "empty format string"); 11939 goto err; 11940 } 11941 11942 i -= desc->dofa_arg; 11943 fmt = kmem_alloc(i + 1, KM_SLEEP); 11944 bcopy(&str[desc->dofa_arg], fmt, i + 1); 11945 arg = (uint64_t)(uintptr_t)fmt; 11946 } else { 11947 if (kind == DTRACEACT_PRINTA) { 11948 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE); 11949 arg = 0; 11950 } else { 11951 arg = desc->dofa_arg; 11952 } 11953 } 11954 11955 act = dtrace_actdesc_create(kind, desc->dofa_ntuple, 11956 desc->dofa_uarg, arg); 11957 11958 if (last != NULL) { 11959 last->dtad_next = act; 11960 } else { 11961 first = act; 11962 } 11963 11964 last = act; 11965 11966 if (desc->dofa_difo == DOF_SECIDX_NONE) 11967 continue; 11968 11969 if ((difosec = dtrace_dof_sect(dof, 11970 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL) 11971 goto err; 11972 11973 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr); 11974 11975 if (act->dtad_difo == NULL) 11976 goto err; 11977 } 11978 11979 ASSERT(first != NULL); 11980 return (first); 11981 11982 err: 11983 for (act = first; act != NULL; act = next) { 11984 next = act->dtad_next; 11985 dtrace_actdesc_release(act, vstate); 11986 } 11987 11988 return (NULL); 11989 } 11990 11991 static dtrace_ecbdesc_t * 11992 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11993 cred_t *cr) 11994 { 11995 dtrace_ecbdesc_t *ep; 11996 dof_ecbdesc_t *ecb; 11997 dtrace_probedesc_t *desc; 11998 dtrace_predicate_t *pred = NULL; 11999 12000 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) { 12001 dtrace_dof_error(dof, "truncated ECB description"); 12002 return (NULL); 12003 } 12004 12005 if (sec->dofs_align != sizeof (uint64_t)) { 12006 dtrace_dof_error(dof, "bad alignment in ECB description"); 12007 return (NULL); 12008 } 12009 12010 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset); 12011 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes); 12012 12013 if (sec == NULL) 12014 return (NULL); 12015 12016 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 12017 ep->dted_uarg = ecb->dofe_uarg; 12018 desc = &ep->dted_probe; 12019 12020 if (dtrace_dof_probedesc(dof, sec, desc) == NULL) 12021 goto err; 12022 12023 if (ecb->dofe_pred != DOF_SECIDX_NONE) { 12024 if ((sec = dtrace_dof_sect(dof, 12025 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL) 12026 goto err; 12027 12028 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL) 12029 goto err; 12030 12031 ep->dted_pred.dtpdd_predicate = pred; 12032 } 12033 12034 if (ecb->dofe_actions != DOF_SECIDX_NONE) { 12035 if ((sec = dtrace_dof_sect(dof, 12036 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL) 12037 goto err; 12038 12039 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr); 12040 12041 if (ep->dted_action == NULL) 12042 goto err; 12043 } 12044 12045 return (ep); 12046 12047 err: 12048 if (pred != NULL) 12049 dtrace_predicate_release(pred, vstate); 12050 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 12051 return (NULL); 12052 } 12053 12054 /* 12055 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the 12056 * specified DOF. At present, this amounts to simply adding 'ubase' to the 12057 * site of any user SETX relocations to account for load object base address. 12058 * In the future, if we need other relocations, this function can be extended. 12059 */ 12060 static int 12061 dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase) 12062 { 12063 uintptr_t daddr = (uintptr_t)dof; 12064 dof_relohdr_t *dofr = 12065 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 12066 dof_sec_t *ss, *rs, *ts; 12067 dof_relodesc_t *r; 12068 uint_t i, n; 12069 12070 if (sec->dofs_size < sizeof (dof_relohdr_t) || 12071 sec->dofs_align != sizeof (dof_secidx_t)) { 12072 dtrace_dof_error(dof, "invalid relocation header"); 12073 return (-1); 12074 } 12075 12076 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); 12077 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); 12078 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); 12079 12080 if (ss == NULL || rs == NULL || ts == NULL) 12081 return (-1); /* dtrace_dof_error() has been called already */ 12082 12083 if (rs->dofs_entsize < sizeof (dof_relodesc_t) || 12084 rs->dofs_align != sizeof (uint64_t)) { 12085 dtrace_dof_error(dof, "invalid relocation section"); 12086 return (-1); 12087 } 12088 12089 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); 12090 n = rs->dofs_size / rs->dofs_entsize; 12091 12092 for (i = 0; i < n; i++) { 12093 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; 12094 12095 switch (r->dofr_type) { 12096 case DOF_RELO_NONE: 12097 break; 12098 case DOF_RELO_SETX: 12099 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + 12100 sizeof (uint64_t) > ts->dofs_size) { 12101 dtrace_dof_error(dof, "bad relocation offset"); 12102 return (-1); 12103 } 12104 12105 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { 12106 dtrace_dof_error(dof, "misaligned setx relo"); 12107 return (-1); 12108 } 12109 12110 *(uint64_t *)taddr += ubase; 12111 break; 12112 default: 12113 dtrace_dof_error(dof, "invalid relocation type"); 12114 return (-1); 12115 } 12116 12117 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); 12118 } 12119 12120 return (0); 12121 } 12122 12123 /* 12124 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated 12125 * header: it should be at the front of a memory region that is at least 12126 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in 12127 * size. It need not be validated in any other way. 12128 */ 12129 static int 12130 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, 12131 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes) 12132 { 12133 uint64_t len = dof->dofh_loadsz, seclen; 12134 uintptr_t daddr = (uintptr_t)dof; 12135 dtrace_ecbdesc_t *ep; 12136 dtrace_enabling_t *enab; 12137 uint_t i; 12138 12139 ASSERT(MUTEX_HELD(&dtrace_lock)); 12140 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); 12141 12142 /* 12143 * Check the DOF header identification bytes. In addition to checking 12144 * valid settings, we also verify that unused bits/bytes are zeroed so 12145 * we can use them later without fear of regressing existing binaries. 12146 */ 12147 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0], 12148 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) { 12149 dtrace_dof_error(dof, "DOF magic string mismatch"); 12150 return (-1); 12151 } 12152 12153 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 && 12154 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) { 12155 dtrace_dof_error(dof, "DOF has invalid data model"); 12156 return (-1); 12157 } 12158 12159 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) { 12160 dtrace_dof_error(dof, "DOF encoding mismatch"); 12161 return (-1); 12162 } 12163 12164 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 12165 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { 12166 dtrace_dof_error(dof, "DOF version mismatch"); 12167 return (-1); 12168 } 12169 12170 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { 12171 dtrace_dof_error(dof, "DOF uses unsupported instruction set"); 12172 return (-1); 12173 } 12174 12175 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) { 12176 dtrace_dof_error(dof, "DOF uses too many integer registers"); 12177 return (-1); 12178 } 12179 12180 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) { 12181 dtrace_dof_error(dof, "DOF uses too many tuple registers"); 12182 return (-1); 12183 } 12184 12185 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) { 12186 if (dof->dofh_ident[i] != 0) { 12187 dtrace_dof_error(dof, "DOF has invalid ident byte set"); 12188 return (-1); 12189 } 12190 } 12191 12192 if (dof->dofh_flags & ~DOF_FL_VALID) { 12193 dtrace_dof_error(dof, "DOF has invalid flag bits set"); 12194 return (-1); 12195 } 12196 12197 if (dof->dofh_secsize == 0) { 12198 dtrace_dof_error(dof, "zero section header size"); 12199 return (-1); 12200 } 12201 12202 /* 12203 * Check that the section headers don't exceed the amount of DOF 12204 * data. Note that we cast the section size and number of sections 12205 * to uint64_t's to prevent possible overflow in the multiplication. 12206 */ 12207 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize; 12208 12209 if (dof->dofh_secoff > len || seclen > len || 12210 dof->dofh_secoff + seclen > len) { 12211 dtrace_dof_error(dof, "truncated section headers"); 12212 return (-1); 12213 } 12214 12215 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) { 12216 dtrace_dof_error(dof, "misaligned section headers"); 12217 return (-1); 12218 } 12219 12220 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) { 12221 dtrace_dof_error(dof, "misaligned section size"); 12222 return (-1); 12223 } 12224 12225 /* 12226 * Take an initial pass through the section headers to be sure that 12227 * the headers don't have stray offsets. If the 'noprobes' flag is 12228 * set, do not permit sections relating to providers, probes, or args. 12229 */ 12230 for (i = 0; i < dof->dofh_secnum; i++) { 12231 dof_sec_t *sec = (dof_sec_t *)(daddr + 12232 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 12233 12234 if (noprobes) { 12235 switch (sec->dofs_type) { 12236 case DOF_SECT_PROVIDER: 12237 case DOF_SECT_PROBES: 12238 case DOF_SECT_PRARGS: 12239 case DOF_SECT_PROFFS: 12240 dtrace_dof_error(dof, "illegal sections " 12241 "for enabling"); 12242 return (-1); 12243 } 12244 } 12245 12246 if (DOF_SEC_ISLOADABLE(sec->dofs_type) && 12247 !(sec->dofs_flags & DOF_SECF_LOAD)) { 12248 dtrace_dof_error(dof, "loadable section with load " 12249 "flag unset"); 12250 return (-1); 12251 } 12252 12253 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 12254 continue; /* just ignore non-loadable sections */ 12255 12256 if (sec->dofs_align & (sec->dofs_align - 1)) { 12257 dtrace_dof_error(dof, "bad section alignment"); 12258 return (-1); 12259 } 12260 12261 if (sec->dofs_offset & (sec->dofs_align - 1)) { 12262 dtrace_dof_error(dof, "misaligned section"); 12263 return (-1); 12264 } 12265 12266 if (sec->dofs_offset > len || sec->dofs_size > len || 12267 sec->dofs_offset + sec->dofs_size > len) { 12268 dtrace_dof_error(dof, "corrupt section header"); 12269 return (-1); 12270 } 12271 12272 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr + 12273 sec->dofs_offset + sec->dofs_size - 1) != '\0') { 12274 dtrace_dof_error(dof, "non-terminating string table"); 12275 return (-1); 12276 } 12277 } 12278 12279 /* 12280 * Take a second pass through the sections and locate and perform any 12281 * relocations that are present. We do this after the first pass to 12282 * be sure that all sections have had their headers validated. 12283 */ 12284 for (i = 0; i < dof->dofh_secnum; i++) { 12285 dof_sec_t *sec = (dof_sec_t *)(daddr + 12286 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 12287 12288 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 12289 continue; /* skip sections that are not loadable */ 12290 12291 switch (sec->dofs_type) { 12292 case DOF_SECT_URELHDR: 12293 if (dtrace_dof_relocate(dof, sec, ubase) != 0) 12294 return (-1); 12295 break; 12296 } 12297 } 12298 12299 if ((enab = *enabp) == NULL) 12300 enab = *enabp = dtrace_enabling_create(vstate); 12301 12302 for (i = 0; i < dof->dofh_secnum; i++) { 12303 dof_sec_t *sec = (dof_sec_t *)(daddr + 12304 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 12305 12306 if (sec->dofs_type != DOF_SECT_ECBDESC) 12307 continue; 12308 12309 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { 12310 dtrace_enabling_destroy(enab); 12311 *enabp = NULL; 12312 return (-1); 12313 } 12314 12315 dtrace_enabling_add(enab, ep); 12316 } 12317 12318 return (0); 12319 } 12320 12321 /* 12322 * Process DOF for any options. This routine assumes that the DOF has been 12323 * at least processed by dtrace_dof_slurp(). 12324 */ 12325 static int 12326 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) 12327 { 12328 int i, rval; 12329 uint32_t entsize; 12330 size_t offs; 12331 dof_optdesc_t *desc; 12332 12333 for (i = 0; i < dof->dofh_secnum; i++) { 12334 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof + 12335 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 12336 12337 if (sec->dofs_type != DOF_SECT_OPTDESC) 12338 continue; 12339 12340 if (sec->dofs_align != sizeof (uint64_t)) { 12341 dtrace_dof_error(dof, "bad alignment in " 12342 "option description"); 12343 return (EINVAL); 12344 } 12345 12346 if ((entsize = sec->dofs_entsize) == 0) { 12347 dtrace_dof_error(dof, "zeroed option entry size"); 12348 return (EINVAL); 12349 } 12350 12351 if (entsize < sizeof (dof_optdesc_t)) { 12352 dtrace_dof_error(dof, "bad option entry size"); 12353 return (EINVAL); 12354 } 12355 12356 for (offs = 0; offs < sec->dofs_size; offs += entsize) { 12357 desc = (dof_optdesc_t *)((uintptr_t)dof + 12358 (uintptr_t)sec->dofs_offset + offs); 12359 12360 if (desc->dofo_strtab != DOF_SECIDX_NONE) { 12361 dtrace_dof_error(dof, "non-zero option string"); 12362 return (EINVAL); 12363 } 12364 12365 if (desc->dofo_value == DTRACEOPT_UNSET) { 12366 dtrace_dof_error(dof, "unset option"); 12367 return (EINVAL); 12368 } 12369 12370 if ((rval = dtrace_state_option(state, 12371 desc->dofo_option, desc->dofo_value)) != 0) { 12372 dtrace_dof_error(dof, "rejected option"); 12373 return (rval); 12374 } 12375 } 12376 } 12377 12378 return (0); 12379 } 12380 12381 /* 12382 * DTrace Consumer State Functions 12383 */ 12384 int 12385 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) 12386 { 12387 size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize; 12388 void *base; 12389 uintptr_t limit; 12390 dtrace_dynvar_t *dvar, *next, *start; 12391 int i; 12392 12393 ASSERT(MUTEX_HELD(&dtrace_lock)); 12394 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); 12395 12396 bzero(dstate, sizeof (dtrace_dstate_t)); 12397 12398 if ((dstate->dtds_chunksize = chunksize) == 0) 12399 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; 12400 12401 if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) 12402 size = min; 12403 12404 if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) 12405 return (ENOMEM); 12406 12407 dstate->dtds_size = size; 12408 dstate->dtds_base = base; 12409 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP); 12410 bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t)); 12411 12412 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)); 12413 12414 if (hashsize != 1 && (hashsize & 1)) 12415 hashsize--; 12416 12417 dstate->dtds_hashsize = hashsize; 12418 dstate->dtds_hash = dstate->dtds_base; 12419 12420 /* 12421 * Set all of our hash buckets to point to the single sink, and (if 12422 * it hasn't already been set), set the sink's hash value to be the 12423 * sink sentinel value. The sink is needed for dynamic variable 12424 * lookups to know that they have iterated over an entire, valid hash 12425 * chain. 12426 */ 12427 for (i = 0; i < hashsize; i++) 12428 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink; 12429 12430 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK) 12431 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK; 12432 12433 /* 12434 * Determine number of active CPUs. Divide free list evenly among 12435 * active CPUs. 12436 */ 12437 start = (dtrace_dynvar_t *) 12438 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); 12439 limit = (uintptr_t)base + size; 12440 12441 maxper = (limit - (uintptr_t)start) / NCPU; 12442 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; 12443 12444 for (i = 0; i < NCPU; i++) { 12445 dstate->dtds_percpu[i].dtdsc_free = dvar = start; 12446 12447 /* 12448 * If we don't even have enough chunks to make it once through 12449 * NCPUs, we're just going to allocate everything to the first 12450 * CPU. And if we're on the last CPU, we're going to allocate 12451 * whatever is left over. In either case, we set the limit to 12452 * be the limit of the dynamic variable space. 12453 */ 12454 if (maxper == 0 || i == NCPU - 1) { 12455 limit = (uintptr_t)base + size; 12456 start = NULL; 12457 } else { 12458 limit = (uintptr_t)start + maxper; 12459 start = (dtrace_dynvar_t *)limit; 12460 } 12461 12462 ASSERT(limit <= (uintptr_t)base + size); 12463 12464 for (;;) { 12465 next = (dtrace_dynvar_t *)((uintptr_t)dvar + 12466 dstate->dtds_chunksize); 12467 12468 if ((uintptr_t)next + dstate->dtds_chunksize >= limit) 12469 break; 12470 12471 dvar->dtdv_next = next; 12472 dvar = next; 12473 } 12474 12475 if (maxper == 0) 12476 break; 12477 } 12478 12479 return (0); 12480 } 12481 12482 void 12483 dtrace_dstate_fini(dtrace_dstate_t *dstate) 12484 { 12485 ASSERT(MUTEX_HELD(&cpu_lock)); 12486 12487 if (dstate->dtds_base == NULL) 12488 return; 12489 12490 kmem_free(dstate->dtds_base, dstate->dtds_size); 12491 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu); 12492 } 12493 12494 static void 12495 dtrace_vstate_fini(dtrace_vstate_t *vstate) 12496 { 12497 /* 12498 * Logical XOR, where are you? 12499 */ 12500 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL)); 12501 12502 if (vstate->dtvs_nglobals > 0) { 12503 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals * 12504 sizeof (dtrace_statvar_t *)); 12505 } 12506 12507 if (vstate->dtvs_ntlocals > 0) { 12508 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals * 12509 sizeof (dtrace_difv_t)); 12510 } 12511 12512 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL)); 12513 12514 if (vstate->dtvs_nlocals > 0) { 12515 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals * 12516 sizeof (dtrace_statvar_t *)); 12517 } 12518 } 12519 12520 static void 12521 dtrace_state_clean(dtrace_state_t *state) 12522 { 12523 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) 12524 return; 12525 12526 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); 12527 dtrace_speculation_clean(state); 12528 } 12529 12530 static void 12531 dtrace_state_deadman(dtrace_state_t *state) 12532 { 12533 hrtime_t now; 12534 12535 dtrace_sync(); 12536 12537 now = dtrace_gethrtime(); 12538 12539 if (state != dtrace_anon.dta_state && 12540 now - state->dts_laststatus >= dtrace_deadman_user) 12541 return; 12542 12543 /* 12544 * We must be sure that dts_alive never appears to be less than the 12545 * value upon entry to dtrace_state_deadman(), and because we lack a 12546 * dtrace_cas64(), we cannot store to it atomically. We thus instead 12547 * store INT64_MAX to it, followed by a memory barrier, followed by 12548 * the new value. This assures that dts_alive never appears to be 12549 * less than its true value, regardless of the order in which the 12550 * stores to the underlying storage are issued. 12551 */ 12552 state->dts_alive = INT64_MAX; 12553 dtrace_membar_producer(); 12554 state->dts_alive = now; 12555 } 12556 12557 dtrace_state_t * 12558 dtrace_state_create(dev_t *devp, cred_t *cr) 12559 { 12560 minor_t minor; 12561 major_t major; 12562 char c[30]; 12563 dtrace_state_t *state; 12564 dtrace_optval_t *opt; 12565 int bufsize = NCPU * sizeof (dtrace_buffer_t), i; 12566 12567 ASSERT(MUTEX_HELD(&dtrace_lock)); 12568 ASSERT(MUTEX_HELD(&cpu_lock)); 12569 12570 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, 12571 VM_BESTFIT | VM_SLEEP); 12572 12573 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { 12574 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 12575 return (NULL); 12576 } 12577 12578 state = ddi_get_soft_state(dtrace_softstate, minor); 12579 state->dts_epid = DTRACE_EPIDNONE + 1; 12580 12581 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor); 12582 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1, 12583 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 12584 12585 if (devp != NULL) { 12586 major = getemajor(*devp); 12587 } else { 12588 major = ddi_driver_major(dtrace_devi); 12589 } 12590 12591 state->dts_dev = makedevice(major, minor); 12592 12593 if (devp != NULL) 12594 *devp = state->dts_dev; 12595 12596 /* 12597 * We allocate NCPU buffers. On the one hand, this can be quite 12598 * a bit of memory per instance (nearly 36K on a Starcat). On the 12599 * other hand, it saves an additional memory reference in the probe 12600 * path. 12601 */ 12602 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); 12603 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); 12604 state->dts_cleaner = CYCLIC_NONE; 12605 state->dts_deadman = CYCLIC_NONE; 12606 state->dts_vstate.dtvs_state = state; 12607 12608 for (i = 0; i < DTRACEOPT_MAX; i++) 12609 state->dts_options[i] = DTRACEOPT_UNSET; 12610 12611 /* 12612 * Set the default options. 12613 */ 12614 opt = state->dts_options; 12615 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH; 12616 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO; 12617 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default; 12618 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default; 12619 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL; 12620 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default; 12621 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default; 12622 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default; 12623 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default; 12624 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default; 12625 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default; 12626 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; 12627 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; 12628 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; 12629 12630 state->dts_activity = DTRACE_ACTIVITY_INACTIVE; 12631 12632 /* 12633 * Depending on the user credentials, we set flag bits which alter probe 12634 * visibility or the amount of destructiveness allowed. In the case of 12635 * actual anonymous tracing, or the possession of all privileges, all of 12636 * the normal checks are bypassed. 12637 */ 12638 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 12639 state->dts_cred.dcr_visible = DTRACE_CRV_ALL; 12640 state->dts_cred.dcr_action = DTRACE_CRA_ALL; 12641 } else { 12642 /* 12643 * Set up the credentials for this instantiation. We take a 12644 * hold on the credential to prevent it from disappearing on 12645 * us; this in turn prevents the zone_t referenced by this 12646 * credential from disappearing. This means that we can 12647 * examine the credential and the zone from probe context. 12648 */ 12649 crhold(cr); 12650 state->dts_cred.dcr_cred = cr; 12651 12652 /* 12653 * CRA_PROC means "we have *some* privilege for dtrace" and 12654 * unlocks the use of variables like pid, zonename, etc. 12655 */ 12656 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) || 12657 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 12658 state->dts_cred.dcr_action |= DTRACE_CRA_PROC; 12659 } 12660 12661 /* 12662 * dtrace_user allows use of syscall and profile providers. 12663 * If the user also has proc_owner and/or proc_zone, we 12664 * extend the scope to include additional visibility and 12665 * destructive power. 12666 */ 12667 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) { 12668 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) { 12669 state->dts_cred.dcr_visible |= 12670 DTRACE_CRV_ALLPROC; 12671 12672 state->dts_cred.dcr_action |= 12673 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12674 } 12675 12676 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) { 12677 state->dts_cred.dcr_visible |= 12678 DTRACE_CRV_ALLZONE; 12679 12680 state->dts_cred.dcr_action |= 12681 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12682 } 12683 12684 /* 12685 * If we have all privs in whatever zone this is, 12686 * we can do destructive things to processes which 12687 * have altered credentials. 12688 */ 12689 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 12690 cr->cr_zone->zone_privset)) { 12691 state->dts_cred.dcr_action |= 12692 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 12693 } 12694 } 12695 12696 /* 12697 * Holding the dtrace_kernel privilege also implies that 12698 * the user has the dtrace_user privilege from a visibility 12699 * perspective. But without further privileges, some 12700 * destructive actions are not available. 12701 */ 12702 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) { 12703 /* 12704 * Make all probes in all zones visible. However, 12705 * this doesn't mean that all actions become available 12706 * to all zones. 12707 */ 12708 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL | 12709 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE; 12710 12711 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL | 12712 DTRACE_CRA_PROC; 12713 /* 12714 * Holding proc_owner means that destructive actions 12715 * for *this* zone are allowed. 12716 */ 12717 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 12718 state->dts_cred.dcr_action |= 12719 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12720 12721 /* 12722 * Holding proc_zone means that destructive actions 12723 * for this user/group ID in all zones is allowed. 12724 */ 12725 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 12726 state->dts_cred.dcr_action |= 12727 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12728 12729 /* 12730 * If we have all privs in whatever zone this is, 12731 * we can do destructive things to processes which 12732 * have altered credentials. 12733 */ 12734 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 12735 cr->cr_zone->zone_privset)) { 12736 state->dts_cred.dcr_action |= 12737 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 12738 } 12739 } 12740 12741 /* 12742 * Holding the dtrace_proc privilege gives control over fasttrap 12743 * and pid providers. We need to grant wider destructive 12744 * privileges in the event that the user has proc_owner and/or 12745 * proc_zone. 12746 */ 12747 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 12748 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 12749 state->dts_cred.dcr_action |= 12750 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12751 12752 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 12753 state->dts_cred.dcr_action |= 12754 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12755 } 12756 } 12757 12758 return (state); 12759 } 12760 12761 static int 12762 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) 12763 { 12764 dtrace_optval_t *opt = state->dts_options, size; 12765 processorid_t cpu; 12766 int flags = 0, rval, factor, divisor = 1; 12767 12768 ASSERT(MUTEX_HELD(&dtrace_lock)); 12769 ASSERT(MUTEX_HELD(&cpu_lock)); 12770 ASSERT(which < DTRACEOPT_MAX); 12771 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || 12772 (state == dtrace_anon.dta_state && 12773 state->dts_activity == DTRACE_ACTIVITY_ACTIVE)); 12774 12775 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0) 12776 return (0); 12777 12778 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET) 12779 cpu = opt[DTRACEOPT_CPU]; 12780 12781 if (which == DTRACEOPT_SPECSIZE) 12782 flags |= DTRACEBUF_NOSWITCH; 12783 12784 if (which == DTRACEOPT_BUFSIZE) { 12785 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING) 12786 flags |= DTRACEBUF_RING; 12787 12788 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL) 12789 flags |= DTRACEBUF_FILL; 12790 12791 if (state != dtrace_anon.dta_state || 12792 state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 12793 flags |= DTRACEBUF_INACTIVE; 12794 } 12795 12796 for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) { 12797 /* 12798 * The size must be 8-byte aligned. If the size is not 8-byte 12799 * aligned, drop it down by the difference. 12800 */ 12801 if (size & (sizeof (uint64_t) - 1)) 12802 size -= size & (sizeof (uint64_t) - 1); 12803 12804 if (size < state->dts_reserve) { 12805 /* 12806 * Buffers always must be large enough to accommodate 12807 * their prereserved space. We return E2BIG instead 12808 * of ENOMEM in this case to allow for user-level 12809 * software to differentiate the cases. 12810 */ 12811 return (E2BIG); 12812 } 12813 12814 rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor); 12815 12816 if (rval != ENOMEM) { 12817 opt[which] = size; 12818 return (rval); 12819 } 12820 12821 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 12822 return (rval); 12823 12824 for (divisor = 2; divisor < factor; divisor <<= 1) 12825 continue; 12826 } 12827 12828 return (ENOMEM); 12829 } 12830 12831 static int 12832 dtrace_state_buffers(dtrace_state_t *state) 12833 { 12834 dtrace_speculation_t *spec = state->dts_speculations; 12835 int rval, i; 12836 12837 if ((rval = dtrace_state_buffer(state, state->dts_buffer, 12838 DTRACEOPT_BUFSIZE)) != 0) 12839 return (rval); 12840 12841 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer, 12842 DTRACEOPT_AGGSIZE)) != 0) 12843 return (rval); 12844 12845 for (i = 0; i < state->dts_nspeculations; i++) { 12846 if ((rval = dtrace_state_buffer(state, 12847 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0) 12848 return (rval); 12849 } 12850 12851 return (0); 12852 } 12853 12854 static void 12855 dtrace_state_prereserve(dtrace_state_t *state) 12856 { 12857 dtrace_ecb_t *ecb; 12858 dtrace_probe_t *probe; 12859 12860 state->dts_reserve = 0; 12861 12862 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL) 12863 return; 12864 12865 /* 12866 * If our buffer policy is a "fill" buffer policy, we need to set the 12867 * prereserved space to be the space required by the END probes. 12868 */ 12869 probe = dtrace_probes[dtrace_probeid_end - 1]; 12870 ASSERT(probe != NULL); 12871 12872 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 12873 if (ecb->dte_state != state) 12874 continue; 12875 12876 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment; 12877 } 12878 } 12879 12880 static int 12881 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) 12882 { 12883 dtrace_optval_t *opt = state->dts_options, sz, nspec; 12884 dtrace_speculation_t *spec; 12885 dtrace_buffer_t *buf; 12886 cyc_handler_t hdlr; 12887 cyc_time_t when; 12888 int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t); 12889 dtrace_icookie_t cookie; 12890 12891 mutex_enter(&cpu_lock); 12892 mutex_enter(&dtrace_lock); 12893 12894 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 12895 rval = EBUSY; 12896 goto out; 12897 } 12898 12899 /* 12900 * Before we can perform any checks, we must prime all of the 12901 * retained enablings that correspond to this state. 12902 */ 12903 dtrace_enabling_prime(state); 12904 12905 if (state->dts_destructive && !state->dts_cred.dcr_destructive) { 12906 rval = EACCES; 12907 goto out; 12908 } 12909 12910 dtrace_state_prereserve(state); 12911 12912 /* 12913 * Now we want to do is try to allocate our speculations. 12914 * We do not automatically resize the number of speculations; if 12915 * this fails, we will fail the operation. 12916 */ 12917 nspec = opt[DTRACEOPT_NSPEC]; 12918 ASSERT(nspec != DTRACEOPT_UNSET); 12919 12920 if (nspec > INT_MAX) { 12921 rval = ENOMEM; 12922 goto out; 12923 } 12924 12925 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), 12926 KM_NOSLEEP | KM_NORMALPRI); 12927 12928 if (spec == NULL) { 12929 rval = ENOMEM; 12930 goto out; 12931 } 12932 12933 state->dts_speculations = spec; 12934 state->dts_nspeculations = (int)nspec; 12935 12936 for (i = 0; i < nspec; i++) { 12937 if ((buf = kmem_zalloc(bufsize, 12938 KM_NOSLEEP | KM_NORMALPRI)) == NULL) { 12939 rval = ENOMEM; 12940 goto err; 12941 } 12942 12943 spec[i].dtsp_buffer = buf; 12944 } 12945 12946 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) { 12947 if (dtrace_anon.dta_state == NULL) { 12948 rval = ENOENT; 12949 goto out; 12950 } 12951 12952 if (state->dts_necbs != 0) { 12953 rval = EALREADY; 12954 goto out; 12955 } 12956 12957 state->dts_anon = dtrace_anon_grab(); 12958 ASSERT(state->dts_anon != NULL); 12959 state = state->dts_anon; 12960 12961 /* 12962 * We want "grabanon" to be set in the grabbed state, so we'll 12963 * copy that option value from the grabbing state into the 12964 * grabbed state. 12965 */ 12966 state->dts_options[DTRACEOPT_GRABANON] = 12967 opt[DTRACEOPT_GRABANON]; 12968 12969 *cpu = dtrace_anon.dta_beganon; 12970 12971 /* 12972 * If the anonymous state is active (as it almost certainly 12973 * is if the anonymous enabling ultimately matched anything), 12974 * we don't allow any further option processing -- but we 12975 * don't return failure. 12976 */ 12977 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 12978 goto out; 12979 } 12980 12981 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET && 12982 opt[DTRACEOPT_AGGSIZE] != 0) { 12983 if (state->dts_aggregations == NULL) { 12984 /* 12985 * We're not going to create an aggregation buffer 12986 * because we don't have any ECBs that contain 12987 * aggregations -- set this option to 0. 12988 */ 12989 opt[DTRACEOPT_AGGSIZE] = 0; 12990 } else { 12991 /* 12992 * If we have an aggregation buffer, we must also have 12993 * a buffer to use as scratch. 12994 */ 12995 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || 12996 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { 12997 opt[DTRACEOPT_BUFSIZE] = state->dts_needed; 12998 } 12999 } 13000 } 13001 13002 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET && 13003 opt[DTRACEOPT_SPECSIZE] != 0) { 13004 if (!state->dts_speculates) { 13005 /* 13006 * We're not going to create speculation buffers 13007 * because we don't have any ECBs that actually 13008 * speculate -- set the speculation size to 0. 13009 */ 13010 opt[DTRACEOPT_SPECSIZE] = 0; 13011 } 13012 } 13013 13014 /* 13015 * The bare minimum size for any buffer that we're actually going to 13016 * do anything to is sizeof (uint64_t). 13017 */ 13018 sz = sizeof (uint64_t); 13019 13020 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) || 13021 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) || 13022 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) { 13023 /* 13024 * A buffer size has been explicitly set to 0 (or to a size 13025 * that will be adjusted to 0) and we need the space -- we 13026 * need to return failure. We return ENOSPC to differentiate 13027 * it from failing to allocate a buffer due to failure to meet 13028 * the reserve (for which we return E2BIG). 13029 */ 13030 rval = ENOSPC; 13031 goto out; 13032 } 13033 13034 if ((rval = dtrace_state_buffers(state)) != 0) 13035 goto err; 13036 13037 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET) 13038 sz = dtrace_dstate_defsize; 13039 13040 do { 13041 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz); 13042 13043 if (rval == 0) 13044 break; 13045 13046 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 13047 goto err; 13048 } while (sz >>= 1); 13049 13050 opt[DTRACEOPT_DYNVARSIZE] = sz; 13051 13052 if (rval != 0) 13053 goto err; 13054 13055 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max) 13056 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max; 13057 13058 if (opt[DTRACEOPT_CLEANRATE] == 0) 13059 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 13060 13061 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min) 13062 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min; 13063 13064 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) 13065 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 13066 13067 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; 13068 hdlr.cyh_arg = state; 13069 hdlr.cyh_level = CY_LOW_LEVEL; 13070 13071 when.cyt_when = 0; 13072 when.cyt_interval = opt[DTRACEOPT_CLEANRATE]; 13073 13074 state->dts_cleaner = cyclic_add(&hdlr, &when); 13075 13076 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman; 13077 hdlr.cyh_arg = state; 13078 hdlr.cyh_level = CY_LOW_LEVEL; 13079 13080 when.cyt_when = 0; 13081 when.cyt_interval = dtrace_deadman_interval; 13082 13083 state->dts_alive = state->dts_laststatus = dtrace_gethrtime(); 13084 state->dts_deadman = cyclic_add(&hdlr, &when); 13085 13086 state->dts_activity = DTRACE_ACTIVITY_WARMUP; 13087 13088 /* 13089 * Now it's time to actually fire the BEGIN probe. We need to disable 13090 * interrupts here both to record the CPU on which we fired the BEGIN 13091 * probe (the data from this CPU will be processed first at user 13092 * level) and to manually activate the buffer for this CPU. 13093 */ 13094 cookie = dtrace_interrupt_disable(); 13095 *cpu = CPU->cpu_id; 13096 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE); 13097 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE; 13098 13099 dtrace_probe(dtrace_probeid_begin, 13100 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 13101 dtrace_interrupt_enable(cookie); 13102 /* 13103 * We may have had an exit action from a BEGIN probe; only change our 13104 * state to ACTIVE if we're still in WARMUP. 13105 */ 13106 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP || 13107 state->dts_activity == DTRACE_ACTIVITY_DRAINING); 13108 13109 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP) 13110 state->dts_activity = DTRACE_ACTIVITY_ACTIVE; 13111 13112 /* 13113 * Regardless of whether or not now we're in ACTIVE or DRAINING, we 13114 * want each CPU to transition its principal buffer out of the 13115 * INACTIVE state. Doing this assures that no CPU will suddenly begin 13116 * processing an ECB halfway down a probe's ECB chain; all CPUs will 13117 * atomically transition from processing none of a state's ECBs to 13118 * processing all of them. 13119 */ 13120 dtrace_xcall(DTRACE_CPUALL, 13121 (dtrace_xcall_t)dtrace_buffer_activate, state); 13122 goto out; 13123 13124 err: 13125 dtrace_buffer_free(state->dts_buffer); 13126 dtrace_buffer_free(state->dts_aggbuffer); 13127 13128 if ((nspec = state->dts_nspeculations) == 0) { 13129 ASSERT(state->dts_speculations == NULL); 13130 goto out; 13131 } 13132 13133 spec = state->dts_speculations; 13134 ASSERT(spec != NULL); 13135 13136 for (i = 0; i < state->dts_nspeculations; i++) { 13137 if ((buf = spec[i].dtsp_buffer) == NULL) 13138 break; 13139 13140 dtrace_buffer_free(buf); 13141 kmem_free(buf, bufsize); 13142 } 13143 13144 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 13145 state->dts_nspeculations = 0; 13146 state->dts_speculations = NULL; 13147 13148 out: 13149 mutex_exit(&dtrace_lock); 13150 mutex_exit(&cpu_lock); 13151 13152 return (rval); 13153 } 13154 13155 static int 13156 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) 13157 { 13158 dtrace_icookie_t cookie; 13159 13160 ASSERT(MUTEX_HELD(&dtrace_lock)); 13161 13162 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && 13163 state->dts_activity != DTRACE_ACTIVITY_DRAINING) 13164 return (EINVAL); 13165 13166 /* 13167 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync 13168 * to be sure that every CPU has seen it. See below for the details 13169 * on why this is done. 13170 */ 13171 state->dts_activity = DTRACE_ACTIVITY_DRAINING; 13172 dtrace_sync(); 13173 13174 /* 13175 * By this point, it is impossible for any CPU to be still processing 13176 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to 13177 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any 13178 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe() 13179 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN 13180 * iff we're in the END probe. 13181 */ 13182 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN; 13183 dtrace_sync(); 13184 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN); 13185 13186 /* 13187 * Finally, we can release the reserve and call the END probe. We 13188 * disable interrupts across calling the END probe to allow us to 13189 * return the CPU on which we actually called the END probe. This 13190 * allows user-land to be sure that this CPU's principal buffer is 13191 * processed last. 13192 */ 13193 state->dts_reserve = 0; 13194 13195 cookie = dtrace_interrupt_disable(); 13196 *cpu = CPU->cpu_id; 13197 dtrace_probe(dtrace_probeid_end, 13198 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 13199 dtrace_interrupt_enable(cookie); 13200 13201 state->dts_activity = DTRACE_ACTIVITY_STOPPED; 13202 dtrace_sync(); 13203 13204 return (0); 13205 } 13206 13207 static int 13208 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, 13209 dtrace_optval_t val) 13210 { 13211 ASSERT(MUTEX_HELD(&dtrace_lock)); 13212 13213 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 13214 return (EBUSY); 13215 13216 if (option >= DTRACEOPT_MAX) 13217 return (EINVAL); 13218 13219 if (option != DTRACEOPT_CPU && val < 0) 13220 return (EINVAL); 13221 13222 switch (option) { 13223 case DTRACEOPT_DESTRUCTIVE: 13224 if (dtrace_destructive_disallow) 13225 return (EACCES); 13226 13227 state->dts_cred.dcr_destructive = 1; 13228 break; 13229 13230 case DTRACEOPT_BUFSIZE: 13231 case DTRACEOPT_DYNVARSIZE: 13232 case DTRACEOPT_AGGSIZE: 13233 case DTRACEOPT_SPECSIZE: 13234 case DTRACEOPT_STRSIZE: 13235 if (val < 0) 13236 return (EINVAL); 13237 13238 if (val >= LONG_MAX) { 13239 /* 13240 * If this is an otherwise negative value, set it to 13241 * the highest multiple of 128m less than LONG_MAX. 13242 * Technically, we're adjusting the size without 13243 * regard to the buffer resizing policy, but in fact, 13244 * this has no effect -- if we set the buffer size to 13245 * ~LONG_MAX and the buffer policy is ultimately set to 13246 * be "manual", the buffer allocation is guaranteed to 13247 * fail, if only because the allocation requires two 13248 * buffers. (We set the the size to the highest 13249 * multiple of 128m because it ensures that the size 13250 * will remain a multiple of a megabyte when 13251 * repeatedly halved -- all the way down to 15m.) 13252 */ 13253 val = LONG_MAX - (1 << 27) + 1; 13254 } 13255 } 13256 13257 state->dts_options[option] = val; 13258 13259 return (0); 13260 } 13261 13262 static void 13263 dtrace_state_destroy(dtrace_state_t *state) 13264 { 13265 dtrace_ecb_t *ecb; 13266 dtrace_vstate_t *vstate = &state->dts_vstate; 13267 minor_t minor = getminor(state->dts_dev); 13268 int i, bufsize = NCPU * sizeof (dtrace_buffer_t); 13269 dtrace_speculation_t *spec = state->dts_speculations; 13270 int nspec = state->dts_nspeculations; 13271 uint32_t match; 13272 13273 ASSERT(MUTEX_HELD(&dtrace_lock)); 13274 ASSERT(MUTEX_HELD(&cpu_lock)); 13275 13276 /* 13277 * First, retract any retained enablings for this state. 13278 */ 13279 dtrace_enabling_retract(state); 13280 ASSERT(state->dts_nretained == 0); 13281 13282 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE || 13283 state->dts_activity == DTRACE_ACTIVITY_DRAINING) { 13284 /* 13285 * We have managed to come into dtrace_state_destroy() on a 13286 * hot enabling -- almost certainly because of a disorderly 13287 * shutdown of a consumer. (That is, a consumer that is 13288 * exiting without having called dtrace_stop().) In this case, 13289 * we're going to set our activity to be KILLED, and then 13290 * issue a sync to be sure that everyone is out of probe 13291 * context before we start blowing away ECBs. 13292 */ 13293 state->dts_activity = DTRACE_ACTIVITY_KILLED; 13294 dtrace_sync(); 13295 } 13296 13297 /* 13298 * Release the credential hold we took in dtrace_state_create(). 13299 */ 13300 if (state->dts_cred.dcr_cred != NULL) 13301 crfree(state->dts_cred.dcr_cred); 13302 13303 /* 13304 * Now we can safely disable and destroy any enabled probes. Because 13305 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress 13306 * (especially if they're all enabled), we take two passes through the 13307 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and 13308 * in the second we disable whatever is left over. 13309 */ 13310 for (match = DTRACE_PRIV_KERNEL; ; match = 0) { 13311 for (i = 0; i < state->dts_necbs; i++) { 13312 if ((ecb = state->dts_ecbs[i]) == NULL) 13313 continue; 13314 13315 if (match && ecb->dte_probe != NULL) { 13316 dtrace_probe_t *probe = ecb->dte_probe; 13317 dtrace_provider_t *prov = probe->dtpr_provider; 13318 13319 if (!(prov->dtpv_priv.dtpp_flags & match)) 13320 continue; 13321 } 13322 13323 dtrace_ecb_disable(ecb); 13324 dtrace_ecb_destroy(ecb); 13325 } 13326 13327 if (!match) 13328 break; 13329 } 13330 13331 /* 13332 * Before we free the buffers, perform one more sync to assure that 13333 * every CPU is out of probe context. 13334 */ 13335 dtrace_sync(); 13336 13337 dtrace_buffer_free(state->dts_buffer); 13338 dtrace_buffer_free(state->dts_aggbuffer); 13339 13340 for (i = 0; i < nspec; i++) 13341 dtrace_buffer_free(spec[i].dtsp_buffer); 13342 13343 if (state->dts_cleaner != CYCLIC_NONE) 13344 cyclic_remove(state->dts_cleaner); 13345 13346 if (state->dts_deadman != CYCLIC_NONE) 13347 cyclic_remove(state->dts_deadman); 13348 13349 dtrace_dstate_fini(&vstate->dtvs_dynvars); 13350 dtrace_vstate_fini(vstate); 13351 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); 13352 13353 if (state->dts_aggregations != NULL) { 13354 #ifdef DEBUG 13355 for (i = 0; i < state->dts_naggregations; i++) 13356 ASSERT(state->dts_aggregations[i] == NULL); 13357 #endif 13358 ASSERT(state->dts_naggregations > 0); 13359 kmem_free(state->dts_aggregations, 13360 state->dts_naggregations * sizeof (dtrace_aggregation_t *)); 13361 } 13362 13363 kmem_free(state->dts_buffer, bufsize); 13364 kmem_free(state->dts_aggbuffer, bufsize); 13365 13366 for (i = 0; i < nspec; i++) 13367 kmem_free(spec[i].dtsp_buffer, bufsize); 13368 13369 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 13370 13371 dtrace_format_destroy(state); 13372 13373 vmem_destroy(state->dts_aggid_arena); 13374 ddi_soft_state_free(dtrace_softstate, minor); 13375 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 13376 } 13377 13378 /* 13379 * DTrace Anonymous Enabling Functions 13380 */ 13381 static dtrace_state_t * 13382 dtrace_anon_grab(void) 13383 { 13384 dtrace_state_t *state; 13385 13386 ASSERT(MUTEX_HELD(&dtrace_lock)); 13387 13388 if ((state = dtrace_anon.dta_state) == NULL) { 13389 ASSERT(dtrace_anon.dta_enabling == NULL); 13390 return (NULL); 13391 } 13392 13393 ASSERT(dtrace_anon.dta_enabling != NULL); 13394 ASSERT(dtrace_retained != NULL); 13395 13396 dtrace_enabling_destroy(dtrace_anon.dta_enabling); 13397 dtrace_anon.dta_enabling = NULL; 13398 dtrace_anon.dta_state = NULL; 13399 13400 return (state); 13401 } 13402 13403 static void 13404 dtrace_anon_property(void) 13405 { 13406 int i, rv; 13407 dtrace_state_t *state; 13408 dof_hdr_t *dof; 13409 char c[32]; /* enough for "dof-data-" + digits */ 13410 13411 ASSERT(MUTEX_HELD(&dtrace_lock)); 13412 ASSERT(MUTEX_HELD(&cpu_lock)); 13413 13414 for (i = 0; ; i++) { 13415 (void) snprintf(c, sizeof (c), "dof-data-%d", i); 13416 13417 dtrace_err_verbose = 1; 13418 13419 if ((dof = dtrace_dof_property(c)) == NULL) { 13420 dtrace_err_verbose = 0; 13421 break; 13422 } 13423 13424 /* 13425 * We want to create anonymous state, so we need to transition 13426 * the kernel debugger to indicate that DTrace is active. If 13427 * this fails (e.g. because the debugger has modified text in 13428 * some way), we won't continue with the processing. 13429 */ 13430 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 13431 cmn_err(CE_NOTE, "kernel debugger active; anonymous " 13432 "enabling ignored."); 13433 dtrace_dof_destroy(dof); 13434 break; 13435 } 13436 13437 /* 13438 * If we haven't allocated an anonymous state, we'll do so now. 13439 */ 13440 if ((state = dtrace_anon.dta_state) == NULL) { 13441 state = dtrace_state_create(NULL, NULL); 13442 dtrace_anon.dta_state = state; 13443 13444 if (state == NULL) { 13445 /* 13446 * This basically shouldn't happen: the only 13447 * failure mode from dtrace_state_create() is a 13448 * failure of ddi_soft_state_zalloc() that 13449 * itself should never happen. Still, the 13450 * interface allows for a failure mode, and 13451 * we want to fail as gracefully as possible: 13452 * we'll emit an error message and cease 13453 * processing anonymous state in this case. 13454 */ 13455 cmn_err(CE_WARN, "failed to create " 13456 "anonymous state"); 13457 dtrace_dof_destroy(dof); 13458 break; 13459 } 13460 } 13461 13462 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(), 13463 &dtrace_anon.dta_enabling, 0, B_TRUE); 13464 13465 if (rv == 0) 13466 rv = dtrace_dof_options(dof, state); 13467 13468 dtrace_err_verbose = 0; 13469 dtrace_dof_destroy(dof); 13470 13471 if (rv != 0) { 13472 /* 13473 * This is malformed DOF; chuck any anonymous state 13474 * that we created. 13475 */ 13476 ASSERT(dtrace_anon.dta_enabling == NULL); 13477 dtrace_state_destroy(state); 13478 dtrace_anon.dta_state = NULL; 13479 break; 13480 } 13481 13482 ASSERT(dtrace_anon.dta_enabling != NULL); 13483 } 13484 13485 if (dtrace_anon.dta_enabling != NULL) { 13486 int rval; 13487 13488 /* 13489 * dtrace_enabling_retain() can only fail because we are 13490 * trying to retain more enablings than are allowed -- but 13491 * we only have one anonymous enabling, and we are guaranteed 13492 * to be allowed at least one retained enabling; we assert 13493 * that dtrace_enabling_retain() returns success. 13494 */ 13495 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling); 13496 ASSERT(rval == 0); 13497 13498 dtrace_enabling_dump(dtrace_anon.dta_enabling); 13499 } 13500 } 13501 13502 /* 13503 * DTrace Helper Functions 13504 */ 13505 static void 13506 dtrace_helper_trace(dtrace_helper_action_t *helper, 13507 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) 13508 { 13509 uint32_t size, next, nnext, i; 13510 dtrace_helptrace_t *ent; 13511 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 13512 13513 if (!dtrace_helptrace_enabled) 13514 return; 13515 13516 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); 13517 13518 /* 13519 * What would a tracing framework be without its own tracing 13520 * framework? (Well, a hell of a lot simpler, for starters...) 13521 */ 13522 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals * 13523 sizeof (uint64_t) - sizeof (uint64_t); 13524 13525 /* 13526 * Iterate until we can allocate a slot in the trace buffer. 13527 */ 13528 do { 13529 next = dtrace_helptrace_next; 13530 13531 if (next + size < dtrace_helptrace_bufsize) { 13532 nnext = next + size; 13533 } else { 13534 nnext = size; 13535 } 13536 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next); 13537 13538 /* 13539 * We have our slot; fill it in. 13540 */ 13541 if (nnext == size) 13542 next = 0; 13543 13544 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next]; 13545 ent->dtht_helper = helper; 13546 ent->dtht_where = where; 13547 ent->dtht_nlocals = vstate->dtvs_nlocals; 13548 13549 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ? 13550 mstate->dtms_fltoffs : -1; 13551 ent->dtht_fault = DTRACE_FLAGS2FLT(flags); 13552 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 13553 13554 for (i = 0; i < vstate->dtvs_nlocals; i++) { 13555 dtrace_statvar_t *svar; 13556 13557 if ((svar = vstate->dtvs_locals[i]) == NULL) 13558 continue; 13559 13560 ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t)); 13561 ent->dtht_locals[i] = 13562 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id]; 13563 } 13564 } 13565 13566 static uint64_t 13567 dtrace_helper(int which, dtrace_mstate_t *mstate, 13568 dtrace_state_t *state, uint64_t arg0, uint64_t arg1) 13569 { 13570 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 13571 uint64_t sarg0 = mstate->dtms_arg[0]; 13572 uint64_t sarg1 = mstate->dtms_arg[1]; 13573 uint64_t rval; 13574 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers; 13575 dtrace_helper_action_t *helper; 13576 dtrace_vstate_t *vstate; 13577 dtrace_difo_t *pred; 13578 int i, trace = dtrace_helptrace_enabled; 13579 13580 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS); 13581 13582 if (helpers == NULL) 13583 return (0); 13584 13585 if ((helper = helpers->dthps_actions[which]) == NULL) 13586 return (0); 13587 13588 vstate = &helpers->dthps_vstate; 13589 mstate->dtms_arg[0] = arg0; 13590 mstate->dtms_arg[1] = arg1; 13591 13592 /* 13593 * Now iterate over each helper. If its predicate evaluates to 'true', 13594 * we'll call the corresponding actions. Note that the below calls 13595 * to dtrace_dif_emulate() may set faults in machine state. This is 13596 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow 13597 * the stored DIF offset with its own (which is the desired behavior). 13598 * Also, note the calls to dtrace_dif_emulate() may allocate scratch 13599 * from machine state; this is okay, too. 13600 */ 13601 for (; helper != NULL; helper = helper->dtha_next) { 13602 if ((pred = helper->dtha_predicate) != NULL) { 13603 if (trace) 13604 dtrace_helper_trace(helper, mstate, vstate, 0); 13605 13606 if (!dtrace_dif_emulate(pred, mstate, vstate, state)) 13607 goto next; 13608 13609 if (*flags & CPU_DTRACE_FAULT) 13610 goto err; 13611 } 13612 13613 for (i = 0; i < helper->dtha_nactions; i++) { 13614 if (trace) 13615 dtrace_helper_trace(helper, 13616 mstate, vstate, i + 1); 13617 13618 rval = dtrace_dif_emulate(helper->dtha_actions[i], 13619 mstate, vstate, state); 13620 13621 if (*flags & CPU_DTRACE_FAULT) 13622 goto err; 13623 } 13624 13625 next: 13626 if (trace) 13627 dtrace_helper_trace(helper, mstate, vstate, 13628 DTRACE_HELPTRACE_NEXT); 13629 } 13630 13631 if (trace) 13632 dtrace_helper_trace(helper, mstate, vstate, 13633 DTRACE_HELPTRACE_DONE); 13634 13635 /* 13636 * Restore the arg0 that we saved upon entry. 13637 */ 13638 mstate->dtms_arg[0] = sarg0; 13639 mstate->dtms_arg[1] = sarg1; 13640 13641 return (rval); 13642 13643 err: 13644 if (trace) 13645 dtrace_helper_trace(helper, mstate, vstate, 13646 DTRACE_HELPTRACE_ERR); 13647 13648 /* 13649 * Restore the arg0 that we saved upon entry. 13650 */ 13651 mstate->dtms_arg[0] = sarg0; 13652 mstate->dtms_arg[1] = sarg1; 13653 13654 return (NULL); 13655 } 13656 13657 static void 13658 dtrace_helper_action_destroy(dtrace_helper_action_t *helper, 13659 dtrace_vstate_t *vstate) 13660 { 13661 int i; 13662 13663 if (helper->dtha_predicate != NULL) 13664 dtrace_difo_release(helper->dtha_predicate, vstate); 13665 13666 for (i = 0; i < helper->dtha_nactions; i++) { 13667 ASSERT(helper->dtha_actions[i] != NULL); 13668 dtrace_difo_release(helper->dtha_actions[i], vstate); 13669 } 13670 13671 kmem_free(helper->dtha_actions, 13672 helper->dtha_nactions * sizeof (dtrace_difo_t *)); 13673 kmem_free(helper, sizeof (dtrace_helper_action_t)); 13674 } 13675 13676 static int 13677 dtrace_helper_destroygen(int gen) 13678 { 13679 proc_t *p = curproc; 13680 dtrace_helpers_t *help = p->p_dtrace_helpers; 13681 dtrace_vstate_t *vstate; 13682 int i; 13683 13684 ASSERT(MUTEX_HELD(&dtrace_lock)); 13685 13686 if (help == NULL || gen > help->dthps_generation) 13687 return (EINVAL); 13688 13689 vstate = &help->dthps_vstate; 13690 13691 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 13692 dtrace_helper_action_t *last = NULL, *h, *next; 13693 13694 for (h = help->dthps_actions[i]; h != NULL; h = next) { 13695 next = h->dtha_next; 13696 13697 if (h->dtha_generation == gen) { 13698 if (last != NULL) { 13699 last->dtha_next = next; 13700 } else { 13701 help->dthps_actions[i] = next; 13702 } 13703 13704 dtrace_helper_action_destroy(h, vstate); 13705 } else { 13706 last = h; 13707 } 13708 } 13709 } 13710 13711 /* 13712 * Interate until we've cleared out all helper providers with the 13713 * given generation number. 13714 */ 13715 for (;;) { 13716 dtrace_helper_provider_t *prov; 13717 13718 /* 13719 * Look for a helper provider with the right generation. We 13720 * have to start back at the beginning of the list each time 13721 * because we drop dtrace_lock. It's unlikely that we'll make 13722 * more than two passes. 13723 */ 13724 for (i = 0; i < help->dthps_nprovs; i++) { 13725 prov = help->dthps_provs[i]; 13726 13727 if (prov->dthp_generation == gen) 13728 break; 13729 } 13730 13731 /* 13732 * If there were no matches, we're done. 13733 */ 13734 if (i == help->dthps_nprovs) 13735 break; 13736 13737 /* 13738 * Move the last helper provider into this slot. 13739 */ 13740 help->dthps_nprovs--; 13741 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs]; 13742 help->dthps_provs[help->dthps_nprovs] = NULL; 13743 13744 mutex_exit(&dtrace_lock); 13745 13746 /* 13747 * If we have a meta provider, remove this helper provider. 13748 */ 13749 mutex_enter(&dtrace_meta_lock); 13750 if (dtrace_meta_pid != NULL) { 13751 ASSERT(dtrace_deferred_pid == NULL); 13752 dtrace_helper_provider_remove(&prov->dthp_prov, 13753 p->p_pid); 13754 } 13755 mutex_exit(&dtrace_meta_lock); 13756 13757 dtrace_helper_provider_destroy(prov); 13758 13759 mutex_enter(&dtrace_lock); 13760 } 13761 13762 return (0); 13763 } 13764 13765 static int 13766 dtrace_helper_validate(dtrace_helper_action_t *helper) 13767 { 13768 int err = 0, i; 13769 dtrace_difo_t *dp; 13770 13771 if ((dp = helper->dtha_predicate) != NULL) 13772 err += dtrace_difo_validate_helper(dp); 13773 13774 for (i = 0; i < helper->dtha_nactions; i++) 13775 err += dtrace_difo_validate_helper(helper->dtha_actions[i]); 13776 13777 return (err == 0); 13778 } 13779 13780 static int 13781 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep) 13782 { 13783 dtrace_helpers_t *help; 13784 dtrace_helper_action_t *helper, *last; 13785 dtrace_actdesc_t *act; 13786 dtrace_vstate_t *vstate; 13787 dtrace_predicate_t *pred; 13788 int count = 0, nactions = 0, i; 13789 13790 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) 13791 return (EINVAL); 13792 13793 help = curproc->p_dtrace_helpers; 13794 last = help->dthps_actions[which]; 13795 vstate = &help->dthps_vstate; 13796 13797 for (count = 0; last != NULL; last = last->dtha_next) { 13798 count++; 13799 if (last->dtha_next == NULL) 13800 break; 13801 } 13802 13803 /* 13804 * If we already have dtrace_helper_actions_max helper actions for this 13805 * helper action type, we'll refuse to add a new one. 13806 */ 13807 if (count >= dtrace_helper_actions_max) 13808 return (ENOSPC); 13809 13810 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP); 13811 helper->dtha_generation = help->dthps_generation; 13812 13813 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) { 13814 ASSERT(pred->dtp_difo != NULL); 13815 dtrace_difo_hold(pred->dtp_difo); 13816 helper->dtha_predicate = pred->dtp_difo; 13817 } 13818 13819 for (act = ep->dted_action; act != NULL; act = act->dtad_next) { 13820 if (act->dtad_kind != DTRACEACT_DIFEXPR) 13821 goto err; 13822 13823 if (act->dtad_difo == NULL) 13824 goto err; 13825 13826 nactions++; 13827 } 13828 13829 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) * 13830 (helper->dtha_nactions = nactions), KM_SLEEP); 13831 13832 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) { 13833 dtrace_difo_hold(act->dtad_difo); 13834 helper->dtha_actions[i++] = act->dtad_difo; 13835 } 13836 13837 if (!dtrace_helper_validate(helper)) 13838 goto err; 13839 13840 if (last == NULL) { 13841 help->dthps_actions[which] = helper; 13842 } else { 13843 last->dtha_next = helper; 13844 } 13845 13846 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { 13847 dtrace_helptrace_nlocals = vstate->dtvs_nlocals; 13848 dtrace_helptrace_next = 0; 13849 } 13850 13851 return (0); 13852 err: 13853 dtrace_helper_action_destroy(helper, vstate); 13854 return (EINVAL); 13855 } 13856 13857 static void 13858 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, 13859 dof_helper_t *dofhp) 13860 { 13861 ASSERT(MUTEX_NOT_HELD(&dtrace_lock)); 13862 13863 mutex_enter(&dtrace_meta_lock); 13864 mutex_enter(&dtrace_lock); 13865 13866 if (!dtrace_attached() || dtrace_meta_pid == NULL) { 13867 /* 13868 * If the dtrace module is loaded but not attached, or if 13869 * there aren't isn't a meta provider registered to deal with 13870 * these provider descriptions, we need to postpone creating 13871 * the actual providers until later. 13872 */ 13873 13874 if (help->dthps_next == NULL && help->dthps_prev == NULL && 13875 dtrace_deferred_pid != help) { 13876 help->dthps_deferred = 1; 13877 help->dthps_pid = p->p_pid; 13878 help->dthps_next = dtrace_deferred_pid; 13879 help->dthps_prev = NULL; 13880 if (dtrace_deferred_pid != NULL) 13881 dtrace_deferred_pid->dthps_prev = help; 13882 dtrace_deferred_pid = help; 13883 } 13884 13885 mutex_exit(&dtrace_lock); 13886 13887 } else if (dofhp != NULL) { 13888 /* 13889 * If the dtrace module is loaded and we have a particular 13890 * helper provider description, pass that off to the 13891 * meta provider. 13892 */ 13893 13894 mutex_exit(&dtrace_lock); 13895 13896 dtrace_helper_provide(dofhp, p->p_pid); 13897 13898 } else { 13899 /* 13900 * Otherwise, just pass all the helper provider descriptions 13901 * off to the meta provider. 13902 */ 13903 13904 int i; 13905 mutex_exit(&dtrace_lock); 13906 13907 for (i = 0; i < help->dthps_nprovs; i++) { 13908 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 13909 p->p_pid); 13910 } 13911 } 13912 13913 mutex_exit(&dtrace_meta_lock); 13914 } 13915 13916 static int 13917 dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) 13918 { 13919 dtrace_helpers_t *help; 13920 dtrace_helper_provider_t *hprov, **tmp_provs; 13921 uint_t tmp_maxprovs, i; 13922 13923 ASSERT(MUTEX_HELD(&dtrace_lock)); 13924 13925 help = curproc->p_dtrace_helpers; 13926 ASSERT(help != NULL); 13927 13928 /* 13929 * If we already have dtrace_helper_providers_max helper providers, 13930 * we're refuse to add a new one. 13931 */ 13932 if (help->dthps_nprovs >= dtrace_helper_providers_max) 13933 return (ENOSPC); 13934 13935 /* 13936 * Check to make sure this isn't a duplicate. 13937 */ 13938 for (i = 0; i < help->dthps_nprovs; i++) { 13939 if (dofhp->dofhp_addr == 13940 help->dthps_provs[i]->dthp_prov.dofhp_addr) 13941 return (EALREADY); 13942 } 13943 13944 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP); 13945 hprov->dthp_prov = *dofhp; 13946 hprov->dthp_ref = 1; 13947 hprov->dthp_generation = gen; 13948 13949 /* 13950 * Allocate a bigger table for helper providers if it's already full. 13951 */ 13952 if (help->dthps_maxprovs == help->dthps_nprovs) { 13953 tmp_maxprovs = help->dthps_maxprovs; 13954 tmp_provs = help->dthps_provs; 13955 13956 if (help->dthps_maxprovs == 0) 13957 help->dthps_maxprovs = 2; 13958 else 13959 help->dthps_maxprovs *= 2; 13960 if (help->dthps_maxprovs > dtrace_helper_providers_max) 13961 help->dthps_maxprovs = dtrace_helper_providers_max; 13962 13963 ASSERT(tmp_maxprovs < help->dthps_maxprovs); 13964 13965 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs * 13966 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 13967 13968 if (tmp_provs != NULL) { 13969 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs * 13970 sizeof (dtrace_helper_provider_t *)); 13971 kmem_free(tmp_provs, tmp_maxprovs * 13972 sizeof (dtrace_helper_provider_t *)); 13973 } 13974 } 13975 13976 help->dthps_provs[help->dthps_nprovs] = hprov; 13977 help->dthps_nprovs++; 13978 13979 return (0); 13980 } 13981 13982 static void 13983 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov) 13984 { 13985 mutex_enter(&dtrace_lock); 13986 13987 if (--hprov->dthp_ref == 0) { 13988 dof_hdr_t *dof; 13989 mutex_exit(&dtrace_lock); 13990 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof; 13991 dtrace_dof_destroy(dof); 13992 kmem_free(hprov, sizeof (dtrace_helper_provider_t)); 13993 } else { 13994 mutex_exit(&dtrace_lock); 13995 } 13996 } 13997 13998 static int 13999 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) 14000 { 14001 uintptr_t daddr = (uintptr_t)dof; 14002 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 14003 dof_provider_t *provider; 14004 dof_probe_t *probe; 14005 uint8_t *arg; 14006 char *strtab, *typestr; 14007 dof_stridx_t typeidx; 14008 size_t typesz; 14009 uint_t nprobes, j, k; 14010 14011 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER); 14012 14013 if (sec->dofs_offset & (sizeof (uint_t) - 1)) { 14014 dtrace_dof_error(dof, "misaligned section offset"); 14015 return (-1); 14016 } 14017 14018 /* 14019 * The section needs to be large enough to contain the DOF provider 14020 * structure appropriate for the given version. 14021 */ 14022 if (sec->dofs_size < 14023 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ? 14024 offsetof(dof_provider_t, dofpv_prenoffs) : 14025 sizeof (dof_provider_t))) { 14026 dtrace_dof_error(dof, "provider section too small"); 14027 return (-1); 14028 } 14029 14030 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 14031 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab); 14032 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes); 14033 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs); 14034 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs); 14035 14036 if (str_sec == NULL || prb_sec == NULL || 14037 arg_sec == NULL || off_sec == NULL) 14038 return (-1); 14039 14040 enoff_sec = NULL; 14041 14042 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 14043 provider->dofpv_prenoffs != DOF_SECT_NONE && 14044 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS, 14045 provider->dofpv_prenoffs)) == NULL) 14046 return (-1); 14047 14048 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 14049 14050 if (provider->dofpv_name >= str_sec->dofs_size || 14051 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) { 14052 dtrace_dof_error(dof, "invalid provider name"); 14053 return (-1); 14054 } 14055 14056 if (prb_sec->dofs_entsize == 0 || 14057 prb_sec->dofs_entsize > prb_sec->dofs_size) { 14058 dtrace_dof_error(dof, "invalid entry size"); 14059 return (-1); 14060 } 14061 14062 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) { 14063 dtrace_dof_error(dof, "misaligned entry size"); 14064 return (-1); 14065 } 14066 14067 if (off_sec->dofs_entsize != sizeof (uint32_t)) { 14068 dtrace_dof_error(dof, "invalid entry size"); 14069 return (-1); 14070 } 14071 14072 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) { 14073 dtrace_dof_error(dof, "misaligned section offset"); 14074 return (-1); 14075 } 14076 14077 if (arg_sec->dofs_entsize != sizeof (uint8_t)) { 14078 dtrace_dof_error(dof, "invalid entry size"); 14079 return (-1); 14080 } 14081 14082 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 14083 14084 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 14085 14086 /* 14087 * Take a pass through the probes to check for errors. 14088 */ 14089 for (j = 0; j < nprobes; j++) { 14090 probe = (dof_probe_t *)(uintptr_t)(daddr + 14091 prb_sec->dofs_offset + j * prb_sec->dofs_entsize); 14092 14093 if (probe->dofpr_func >= str_sec->dofs_size) { 14094 dtrace_dof_error(dof, "invalid function name"); 14095 return (-1); 14096 } 14097 14098 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) { 14099 dtrace_dof_error(dof, "function name too long"); 14100 return (-1); 14101 } 14102 14103 if (probe->dofpr_name >= str_sec->dofs_size || 14104 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) { 14105 dtrace_dof_error(dof, "invalid probe name"); 14106 return (-1); 14107 } 14108 14109 /* 14110 * The offset count must not wrap the index, and the offsets 14111 * must also not overflow the section's data. 14112 */ 14113 if (probe->dofpr_offidx + probe->dofpr_noffs < 14114 probe->dofpr_offidx || 14115 (probe->dofpr_offidx + probe->dofpr_noffs) * 14116 off_sec->dofs_entsize > off_sec->dofs_size) { 14117 dtrace_dof_error(dof, "invalid probe offset"); 14118 return (-1); 14119 } 14120 14121 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) { 14122 /* 14123 * If there's no is-enabled offset section, make sure 14124 * there aren't any is-enabled offsets. Otherwise 14125 * perform the same checks as for probe offsets 14126 * (immediately above). 14127 */ 14128 if (enoff_sec == NULL) { 14129 if (probe->dofpr_enoffidx != 0 || 14130 probe->dofpr_nenoffs != 0) { 14131 dtrace_dof_error(dof, "is-enabled " 14132 "offsets with null section"); 14133 return (-1); 14134 } 14135 } else if (probe->dofpr_enoffidx + 14136 probe->dofpr_nenoffs < probe->dofpr_enoffidx || 14137 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) * 14138 enoff_sec->dofs_entsize > enoff_sec->dofs_size) { 14139 dtrace_dof_error(dof, "invalid is-enabled " 14140 "offset"); 14141 return (-1); 14142 } 14143 14144 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) { 14145 dtrace_dof_error(dof, "zero probe and " 14146 "is-enabled offsets"); 14147 return (-1); 14148 } 14149 } else if (probe->dofpr_noffs == 0) { 14150 dtrace_dof_error(dof, "zero probe offsets"); 14151 return (-1); 14152 } 14153 14154 if (probe->dofpr_argidx + probe->dofpr_xargc < 14155 probe->dofpr_argidx || 14156 (probe->dofpr_argidx + probe->dofpr_xargc) * 14157 arg_sec->dofs_entsize > arg_sec->dofs_size) { 14158 dtrace_dof_error(dof, "invalid args"); 14159 return (-1); 14160 } 14161 14162 typeidx = probe->dofpr_nargv; 14163 typestr = strtab + probe->dofpr_nargv; 14164 for (k = 0; k < probe->dofpr_nargc; k++) { 14165 if (typeidx >= str_sec->dofs_size) { 14166 dtrace_dof_error(dof, "bad " 14167 "native argument type"); 14168 return (-1); 14169 } 14170 14171 typesz = strlen(typestr) + 1; 14172 if (typesz > DTRACE_ARGTYPELEN) { 14173 dtrace_dof_error(dof, "native " 14174 "argument type too long"); 14175 return (-1); 14176 } 14177 typeidx += typesz; 14178 typestr += typesz; 14179 } 14180 14181 typeidx = probe->dofpr_xargv; 14182 typestr = strtab + probe->dofpr_xargv; 14183 for (k = 0; k < probe->dofpr_xargc; k++) { 14184 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) { 14185 dtrace_dof_error(dof, "bad " 14186 "native argument index"); 14187 return (-1); 14188 } 14189 14190 if (typeidx >= str_sec->dofs_size) { 14191 dtrace_dof_error(dof, "bad " 14192 "translated argument type"); 14193 return (-1); 14194 } 14195 14196 typesz = strlen(typestr) + 1; 14197 if (typesz > DTRACE_ARGTYPELEN) { 14198 dtrace_dof_error(dof, "translated argument " 14199 "type too long"); 14200 return (-1); 14201 } 14202 14203 typeidx += typesz; 14204 typestr += typesz; 14205 } 14206 } 14207 14208 return (0); 14209 } 14210 14211 static int 14212 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp) 14213 { 14214 dtrace_helpers_t *help; 14215 dtrace_vstate_t *vstate; 14216 dtrace_enabling_t *enab = NULL; 14217 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; 14218 uintptr_t daddr = (uintptr_t)dof; 14219 14220 ASSERT(MUTEX_HELD(&dtrace_lock)); 14221 14222 if ((help = curproc->p_dtrace_helpers) == NULL) 14223 help = dtrace_helpers_create(curproc); 14224 14225 vstate = &help->dthps_vstate; 14226 14227 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, 14228 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) { 14229 dtrace_dof_destroy(dof); 14230 return (rv); 14231 } 14232 14233 /* 14234 * Look for helper providers and validate their descriptions. 14235 */ 14236 if (dhp != NULL) { 14237 for (i = 0; i < dof->dofh_secnum; i++) { 14238 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 14239 dof->dofh_secoff + i * dof->dofh_secsize); 14240 14241 if (sec->dofs_type != DOF_SECT_PROVIDER) 14242 continue; 14243 14244 if (dtrace_helper_provider_validate(dof, sec) != 0) { 14245 dtrace_enabling_destroy(enab); 14246 dtrace_dof_destroy(dof); 14247 return (-1); 14248 } 14249 14250 nprovs++; 14251 } 14252 } 14253 14254 /* 14255 * Now we need to walk through the ECB descriptions in the enabling. 14256 */ 14257 for (i = 0; i < enab->dten_ndesc; i++) { 14258 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 14259 dtrace_probedesc_t *desc = &ep->dted_probe; 14260 14261 if (strcmp(desc->dtpd_provider, "dtrace") != 0) 14262 continue; 14263 14264 if (strcmp(desc->dtpd_mod, "helper") != 0) 14265 continue; 14266 14267 if (strcmp(desc->dtpd_func, "ustack") != 0) 14268 continue; 14269 14270 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, 14271 ep)) != 0) { 14272 /* 14273 * Adding this helper action failed -- we are now going 14274 * to rip out the entire generation and return failure. 14275 */ 14276 (void) dtrace_helper_destroygen(help->dthps_generation); 14277 dtrace_enabling_destroy(enab); 14278 dtrace_dof_destroy(dof); 14279 return (-1); 14280 } 14281 14282 nhelpers++; 14283 } 14284 14285 if (nhelpers < enab->dten_ndesc) 14286 dtrace_dof_error(dof, "unmatched helpers"); 14287 14288 gen = help->dthps_generation++; 14289 dtrace_enabling_destroy(enab); 14290 14291 if (dhp != NULL && nprovs > 0) { 14292 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; 14293 if (dtrace_helper_provider_add(dhp, gen) == 0) { 14294 mutex_exit(&dtrace_lock); 14295 dtrace_helper_provider_register(curproc, help, dhp); 14296 mutex_enter(&dtrace_lock); 14297 14298 destroy = 0; 14299 } 14300 } 14301 14302 if (destroy) 14303 dtrace_dof_destroy(dof); 14304 14305 return (gen); 14306 } 14307 14308 static dtrace_helpers_t * 14309 dtrace_helpers_create(proc_t *p) 14310 { 14311 dtrace_helpers_t *help; 14312 14313 ASSERT(MUTEX_HELD(&dtrace_lock)); 14314 ASSERT(p->p_dtrace_helpers == NULL); 14315 14316 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); 14317 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) * 14318 DTRACE_NHELPER_ACTIONS, KM_SLEEP); 14319 14320 p->p_dtrace_helpers = help; 14321 dtrace_helpers++; 14322 14323 return (help); 14324 } 14325 14326 static void 14327 dtrace_helpers_destroy(void) 14328 { 14329 dtrace_helpers_t *help; 14330 dtrace_vstate_t *vstate; 14331 proc_t *p = curproc; 14332 int i; 14333 14334 mutex_enter(&dtrace_lock); 14335 14336 ASSERT(p->p_dtrace_helpers != NULL); 14337 ASSERT(dtrace_helpers > 0); 14338 14339 help = p->p_dtrace_helpers; 14340 vstate = &help->dthps_vstate; 14341 14342 /* 14343 * We're now going to lose the help from this process. 14344 */ 14345 p->p_dtrace_helpers = NULL; 14346 dtrace_sync(); 14347 14348 /* 14349 * Destory the helper actions. 14350 */ 14351 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 14352 dtrace_helper_action_t *h, *next; 14353 14354 for (h = help->dthps_actions[i]; h != NULL; h = next) { 14355 next = h->dtha_next; 14356 dtrace_helper_action_destroy(h, vstate); 14357 h = next; 14358 } 14359 } 14360 14361 mutex_exit(&dtrace_lock); 14362 14363 /* 14364 * Destroy the helper providers. 14365 */ 14366 if (help->dthps_maxprovs > 0) { 14367 mutex_enter(&dtrace_meta_lock); 14368 if (dtrace_meta_pid != NULL) { 14369 ASSERT(dtrace_deferred_pid == NULL); 14370 14371 for (i = 0; i < help->dthps_nprovs; i++) { 14372 dtrace_helper_provider_remove( 14373 &help->dthps_provs[i]->dthp_prov, p->p_pid); 14374 } 14375 } else { 14376 mutex_enter(&dtrace_lock); 14377 ASSERT(help->dthps_deferred == 0 || 14378 help->dthps_next != NULL || 14379 help->dthps_prev != NULL || 14380 help == dtrace_deferred_pid); 14381 14382 /* 14383 * Remove the helper from the deferred list. 14384 */ 14385 if (help->dthps_next != NULL) 14386 help->dthps_next->dthps_prev = help->dthps_prev; 14387 if (help->dthps_prev != NULL) 14388 help->dthps_prev->dthps_next = help->dthps_next; 14389 if (dtrace_deferred_pid == help) { 14390 dtrace_deferred_pid = help->dthps_next; 14391 ASSERT(help->dthps_prev == NULL); 14392 } 14393 14394 mutex_exit(&dtrace_lock); 14395 } 14396 14397 mutex_exit(&dtrace_meta_lock); 14398 14399 for (i = 0; i < help->dthps_nprovs; i++) { 14400 dtrace_helper_provider_destroy(help->dthps_provs[i]); 14401 } 14402 14403 kmem_free(help->dthps_provs, help->dthps_maxprovs * 14404 sizeof (dtrace_helper_provider_t *)); 14405 } 14406 14407 mutex_enter(&dtrace_lock); 14408 14409 dtrace_vstate_fini(&help->dthps_vstate); 14410 kmem_free(help->dthps_actions, 14411 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS); 14412 kmem_free(help, sizeof (dtrace_helpers_t)); 14413 14414 --dtrace_helpers; 14415 mutex_exit(&dtrace_lock); 14416 } 14417 14418 static void 14419 dtrace_helpers_duplicate(proc_t *from, proc_t *to) 14420 { 14421 dtrace_helpers_t *help, *newhelp; 14422 dtrace_helper_action_t *helper, *new, *last; 14423 dtrace_difo_t *dp; 14424 dtrace_vstate_t *vstate; 14425 int i, j, sz, hasprovs = 0; 14426 14427 mutex_enter(&dtrace_lock); 14428 ASSERT(from->p_dtrace_helpers != NULL); 14429 ASSERT(dtrace_helpers > 0); 14430 14431 help = from->p_dtrace_helpers; 14432 newhelp = dtrace_helpers_create(to); 14433 ASSERT(to->p_dtrace_helpers != NULL); 14434 14435 newhelp->dthps_generation = help->dthps_generation; 14436 vstate = &newhelp->dthps_vstate; 14437 14438 /* 14439 * Duplicate the helper actions. 14440 */ 14441 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 14442 if ((helper = help->dthps_actions[i]) == NULL) 14443 continue; 14444 14445 for (last = NULL; helper != NULL; helper = helper->dtha_next) { 14446 new = kmem_zalloc(sizeof (dtrace_helper_action_t), 14447 KM_SLEEP); 14448 new->dtha_generation = helper->dtha_generation; 14449 14450 if ((dp = helper->dtha_predicate) != NULL) { 14451 dp = dtrace_difo_duplicate(dp, vstate); 14452 new->dtha_predicate = dp; 14453 } 14454 14455 new->dtha_nactions = helper->dtha_nactions; 14456 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions; 14457 new->dtha_actions = kmem_alloc(sz, KM_SLEEP); 14458 14459 for (j = 0; j < new->dtha_nactions; j++) { 14460 dtrace_difo_t *dp = helper->dtha_actions[j]; 14461 14462 ASSERT(dp != NULL); 14463 dp = dtrace_difo_duplicate(dp, vstate); 14464 new->dtha_actions[j] = dp; 14465 } 14466 14467 if (last != NULL) { 14468 last->dtha_next = new; 14469 } else { 14470 newhelp->dthps_actions[i] = new; 14471 } 14472 14473 last = new; 14474 } 14475 } 14476 14477 /* 14478 * Duplicate the helper providers and register them with the 14479 * DTrace framework. 14480 */ 14481 if (help->dthps_nprovs > 0) { 14482 newhelp->dthps_nprovs = help->dthps_nprovs; 14483 newhelp->dthps_maxprovs = help->dthps_nprovs; 14484 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs * 14485 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 14486 for (i = 0; i < newhelp->dthps_nprovs; i++) { 14487 newhelp->dthps_provs[i] = help->dthps_provs[i]; 14488 newhelp->dthps_provs[i]->dthp_ref++; 14489 } 14490 14491 hasprovs = 1; 14492 } 14493 14494 mutex_exit(&dtrace_lock); 14495 14496 if (hasprovs) 14497 dtrace_helper_provider_register(to, newhelp, NULL); 14498 } 14499 14500 /* 14501 * DTrace Hook Functions 14502 */ 14503 static void 14504 dtrace_module_loaded(struct modctl *ctl) 14505 { 14506 dtrace_provider_t *prv; 14507 14508 mutex_enter(&dtrace_provider_lock); 14509 mutex_enter(&mod_lock); 14510 14511 ASSERT(ctl->mod_busy); 14512 14513 /* 14514 * We're going to call each providers per-module provide operation 14515 * specifying only this module. 14516 */ 14517 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) 14518 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 14519 14520 mutex_exit(&mod_lock); 14521 mutex_exit(&dtrace_provider_lock); 14522 14523 /* 14524 * If we have any retained enablings, we need to match against them. 14525 * Enabling probes requires that cpu_lock be held, and we cannot hold 14526 * cpu_lock here -- it is legal for cpu_lock to be held when loading a 14527 * module. (In particular, this happens when loading scheduling 14528 * classes.) So if we have any retained enablings, we need to dispatch 14529 * our task queue to do the match for us. 14530 */ 14531 mutex_enter(&dtrace_lock); 14532 14533 if (dtrace_retained == NULL) { 14534 mutex_exit(&dtrace_lock); 14535 return; 14536 } 14537 14538 (void) taskq_dispatch(dtrace_taskq, 14539 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); 14540 14541 mutex_exit(&dtrace_lock); 14542 14543 /* 14544 * And now, for a little heuristic sleaze: in general, we want to 14545 * match modules as soon as they load. However, we cannot guarantee 14546 * this, because it would lead us to the lock ordering violation 14547 * outlined above. The common case, of course, is that cpu_lock is 14548 * _not_ held -- so we delay here for a clock tick, hoping that that's 14549 * long enough for the task queue to do its work. If it's not, it's 14550 * not a serious problem -- it just means that the module that we 14551 * just loaded may not be immediately instrumentable. 14552 */ 14553 delay(1); 14554 } 14555 14556 static void 14557 dtrace_module_unloaded(struct modctl *ctl) 14558 { 14559 dtrace_probe_t template, *probe, *first, *next; 14560 dtrace_provider_t *prov; 14561 14562 template.dtpr_mod = ctl->mod_modname; 14563 14564 mutex_enter(&dtrace_provider_lock); 14565 mutex_enter(&mod_lock); 14566 mutex_enter(&dtrace_lock); 14567 14568 if (dtrace_bymod == NULL) { 14569 /* 14570 * The DTrace module is loaded (obviously) but not attached; 14571 * we don't have any work to do. 14572 */ 14573 mutex_exit(&dtrace_provider_lock); 14574 mutex_exit(&mod_lock); 14575 mutex_exit(&dtrace_lock); 14576 return; 14577 } 14578 14579 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); 14580 probe != NULL; probe = probe->dtpr_nextmod) { 14581 if (probe->dtpr_ecb != NULL) { 14582 mutex_exit(&dtrace_provider_lock); 14583 mutex_exit(&mod_lock); 14584 mutex_exit(&dtrace_lock); 14585 14586 /* 14587 * This shouldn't _actually_ be possible -- we're 14588 * unloading a module that has an enabled probe in it. 14589 * (It's normally up to the provider to make sure that 14590 * this can't happen.) However, because dtps_enable() 14591 * doesn't have a failure mode, there can be an 14592 * enable/unload race. Upshot: we don't want to 14593 * assert, but we're not going to disable the 14594 * probe, either. 14595 */ 14596 if (dtrace_err_verbose) { 14597 cmn_err(CE_WARN, "unloaded module '%s' had " 14598 "enabled probes", ctl->mod_modname); 14599 } 14600 14601 return; 14602 } 14603 } 14604 14605 probe = first; 14606 14607 for (first = NULL; probe != NULL; probe = next) { 14608 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); 14609 14610 dtrace_probes[probe->dtpr_id - 1] = NULL; 14611 14612 next = probe->dtpr_nextmod; 14613 dtrace_hash_remove(dtrace_bymod, probe); 14614 dtrace_hash_remove(dtrace_byfunc, probe); 14615 dtrace_hash_remove(dtrace_byname, probe); 14616 14617 if (first == NULL) { 14618 first = probe; 14619 probe->dtpr_nextmod = NULL; 14620 } else { 14621 probe->dtpr_nextmod = first; 14622 first = probe; 14623 } 14624 } 14625 14626 /* 14627 * We've removed all of the module's probes from the hash chains and 14628 * from the probe array. Now issue a dtrace_sync() to be sure that 14629 * everyone has cleared out from any probe array processing. 14630 */ 14631 dtrace_sync(); 14632 14633 for (probe = first; probe != NULL; probe = first) { 14634 first = probe->dtpr_nextmod; 14635 prov = probe->dtpr_provider; 14636 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, 14637 probe->dtpr_arg); 14638 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 14639 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 14640 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 14641 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); 14642 kmem_free(probe, sizeof (dtrace_probe_t)); 14643 } 14644 14645 mutex_exit(&dtrace_lock); 14646 mutex_exit(&mod_lock); 14647 mutex_exit(&dtrace_provider_lock); 14648 } 14649 14650 void 14651 dtrace_suspend(void) 14652 { 14653 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend)); 14654 } 14655 14656 void 14657 dtrace_resume(void) 14658 { 14659 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume)); 14660 } 14661 14662 static int 14663 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) 14664 { 14665 ASSERT(MUTEX_HELD(&cpu_lock)); 14666 mutex_enter(&dtrace_lock); 14667 14668 switch (what) { 14669 case CPU_CONFIG: { 14670 dtrace_state_t *state; 14671 dtrace_optval_t *opt, rs, c; 14672 14673 /* 14674 * For now, we only allocate a new buffer for anonymous state. 14675 */ 14676 if ((state = dtrace_anon.dta_state) == NULL) 14677 break; 14678 14679 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 14680 break; 14681 14682 opt = state->dts_options; 14683 c = opt[DTRACEOPT_CPU]; 14684 14685 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu) 14686 break; 14687 14688 /* 14689 * Regardless of what the actual policy is, we're going to 14690 * temporarily set our resize policy to be manual. We're 14691 * also going to temporarily set our CPU option to denote 14692 * the newly configured CPU. 14693 */ 14694 rs = opt[DTRACEOPT_BUFRESIZE]; 14695 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL; 14696 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu; 14697 14698 (void) dtrace_state_buffers(state); 14699 14700 opt[DTRACEOPT_BUFRESIZE] = rs; 14701 opt[DTRACEOPT_CPU] = c; 14702 14703 break; 14704 } 14705 14706 case CPU_UNCONFIG: 14707 /* 14708 * We don't free the buffer in the CPU_UNCONFIG case. (The 14709 * buffer will be freed when the consumer exits.) 14710 */ 14711 break; 14712 14713 default: 14714 break; 14715 } 14716 14717 mutex_exit(&dtrace_lock); 14718 return (0); 14719 } 14720 14721 static void 14722 dtrace_cpu_setup_initial(processorid_t cpu) 14723 { 14724 (void) dtrace_cpu_setup(CPU_CONFIG, cpu); 14725 } 14726 14727 static void 14728 dtrace_toxrange_add(uintptr_t base, uintptr_t limit) 14729 { 14730 if (dtrace_toxranges >= dtrace_toxranges_max) { 14731 int osize, nsize; 14732 dtrace_toxrange_t *range; 14733 14734 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 14735 14736 if (osize == 0) { 14737 ASSERT(dtrace_toxrange == NULL); 14738 ASSERT(dtrace_toxranges_max == 0); 14739 dtrace_toxranges_max = 1; 14740 } else { 14741 dtrace_toxranges_max <<= 1; 14742 } 14743 14744 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 14745 range = kmem_zalloc(nsize, KM_SLEEP); 14746 14747 if (dtrace_toxrange != NULL) { 14748 ASSERT(osize != 0); 14749 bcopy(dtrace_toxrange, range, osize); 14750 kmem_free(dtrace_toxrange, osize); 14751 } 14752 14753 dtrace_toxrange = range; 14754 } 14755 14756 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL); 14757 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL); 14758 14759 dtrace_toxrange[dtrace_toxranges].dtt_base = base; 14760 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; 14761 dtrace_toxranges++; 14762 } 14763 14764 /* 14765 * DTrace Driver Cookbook Functions 14766 */ 14767 /*ARGSUSED*/ 14768 static int 14769 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 14770 { 14771 dtrace_provider_id_t id; 14772 dtrace_state_t *state = NULL; 14773 dtrace_enabling_t *enab; 14774 14775 mutex_enter(&cpu_lock); 14776 mutex_enter(&dtrace_provider_lock); 14777 mutex_enter(&dtrace_lock); 14778 14779 if (ddi_soft_state_init(&dtrace_softstate, 14780 sizeof (dtrace_state_t), 0) != 0) { 14781 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); 14782 mutex_exit(&cpu_lock); 14783 mutex_exit(&dtrace_provider_lock); 14784 mutex_exit(&dtrace_lock); 14785 return (DDI_FAILURE); 14786 } 14787 14788 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, 14789 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || 14790 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, 14791 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { 14792 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); 14793 ddi_remove_minor_node(devi, NULL); 14794 ddi_soft_state_fini(&dtrace_softstate); 14795 mutex_exit(&cpu_lock); 14796 mutex_exit(&dtrace_provider_lock); 14797 mutex_exit(&dtrace_lock); 14798 return (DDI_FAILURE); 14799 } 14800 14801 ddi_report_dev(devi); 14802 dtrace_devi = devi; 14803 14804 dtrace_modload = dtrace_module_loaded; 14805 dtrace_modunload = dtrace_module_unloaded; 14806 dtrace_cpu_init = dtrace_cpu_setup_initial; 14807 dtrace_helpers_cleanup = dtrace_helpers_destroy; 14808 dtrace_helpers_fork = dtrace_helpers_duplicate; 14809 dtrace_cpustart_init = dtrace_suspend; 14810 dtrace_cpustart_fini = dtrace_resume; 14811 dtrace_debugger_init = dtrace_suspend; 14812 dtrace_debugger_fini = dtrace_resume; 14813 14814 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 14815 14816 ASSERT(MUTEX_HELD(&cpu_lock)); 14817 14818 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, 14819 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 14820 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, 14821 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, 14822 VM_SLEEP | VMC_IDENTIFIER); 14823 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 14824 1, INT_MAX, 0); 14825 14826 dtrace_state_cache = kmem_cache_create("dtrace_state_cache", 14827 sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, 14828 NULL, NULL, NULL, NULL, NULL, 0); 14829 14830 ASSERT(MUTEX_HELD(&cpu_lock)); 14831 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), 14832 offsetof(dtrace_probe_t, dtpr_nextmod), 14833 offsetof(dtrace_probe_t, dtpr_prevmod)); 14834 14835 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), 14836 offsetof(dtrace_probe_t, dtpr_nextfunc), 14837 offsetof(dtrace_probe_t, dtpr_prevfunc)); 14838 14839 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), 14840 offsetof(dtrace_probe_t, dtpr_nextname), 14841 offsetof(dtrace_probe_t, dtpr_prevname)); 14842 14843 if (dtrace_retain_max < 1) { 14844 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; " 14845 "setting to 1", dtrace_retain_max); 14846 dtrace_retain_max = 1; 14847 } 14848 14849 /* 14850 * Now discover our toxic ranges. 14851 */ 14852 dtrace_toxic_ranges(dtrace_toxrange_add); 14853 14854 /* 14855 * Before we register ourselves as a provider to our own framework, 14856 * we would like to assert that dtrace_provider is NULL -- but that's 14857 * not true if we were loaded as a dependency of a DTrace provider. 14858 * Once we've registered, we can assert that dtrace_provider is our 14859 * pseudo provider. 14860 */ 14861 (void) dtrace_register("dtrace", &dtrace_provider_attr, 14862 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id); 14863 14864 ASSERT(dtrace_provider != NULL); 14865 ASSERT((dtrace_provider_id_t)dtrace_provider == id); 14866 14867 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) 14868 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); 14869 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) 14870 dtrace_provider, NULL, NULL, "END", 0, NULL); 14871 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) 14872 dtrace_provider, NULL, NULL, "ERROR", 1, NULL); 14873 14874 dtrace_anon_property(); 14875 mutex_exit(&cpu_lock); 14876 14877 /* 14878 * If DTrace helper tracing is enabled, we need to allocate the 14879 * trace buffer and initialize the values. 14880 */ 14881 if (dtrace_helptrace_enabled) { 14882 ASSERT(dtrace_helptrace_buffer == NULL); 14883 dtrace_helptrace_buffer = 14884 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); 14885 dtrace_helptrace_next = 0; 14886 } 14887 14888 /* 14889 * If there are already providers, we must ask them to provide their 14890 * probes, and then match any anonymous enabling against them. Note 14891 * that there should be no other retained enablings at this time: 14892 * the only retained enablings at this time should be the anonymous 14893 * enabling. 14894 */ 14895 if (dtrace_anon.dta_enabling != NULL) { 14896 ASSERT(dtrace_retained == dtrace_anon.dta_enabling); 14897 14898 dtrace_enabling_provide(NULL); 14899 state = dtrace_anon.dta_state; 14900 14901 /* 14902 * We couldn't hold cpu_lock across the above call to 14903 * dtrace_enabling_provide(), but we must hold it to actually 14904 * enable the probes. We have to drop all of our locks, pick 14905 * up cpu_lock, and regain our locks before matching the 14906 * retained anonymous enabling. 14907 */ 14908 mutex_exit(&dtrace_lock); 14909 mutex_exit(&dtrace_provider_lock); 14910 14911 mutex_enter(&cpu_lock); 14912 mutex_enter(&dtrace_provider_lock); 14913 mutex_enter(&dtrace_lock); 14914 14915 if ((enab = dtrace_anon.dta_enabling) != NULL) 14916 (void) dtrace_enabling_match(enab, NULL); 14917 14918 mutex_exit(&cpu_lock); 14919 } 14920 14921 mutex_exit(&dtrace_lock); 14922 mutex_exit(&dtrace_provider_lock); 14923 14924 if (state != NULL) { 14925 /* 14926 * If we created any anonymous state, set it going now. 14927 */ 14928 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon); 14929 } 14930 14931 return (DDI_SUCCESS); 14932 } 14933 14934 /*ARGSUSED*/ 14935 static int 14936 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 14937 { 14938 dtrace_state_t *state; 14939 uint32_t priv; 14940 uid_t uid; 14941 zoneid_t zoneid; 14942 14943 if (getminor(*devp) == DTRACEMNRN_HELPER) 14944 return (0); 14945 14946 /* 14947 * If this wasn't an open with the "helper" minor, then it must be 14948 * the "dtrace" minor. 14949 */ 14950 if (getminor(*devp) != DTRACEMNRN_DTRACE) 14951 return (ENXIO); 14952 14953 /* 14954 * If no DTRACE_PRIV_* bits are set in the credential, then the 14955 * caller lacks sufficient permission to do anything with DTrace. 14956 */ 14957 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid); 14958 if (priv == DTRACE_PRIV_NONE) 14959 return (EACCES); 14960 14961 /* 14962 * Ask all providers to provide all their probes. 14963 */ 14964 mutex_enter(&dtrace_provider_lock); 14965 dtrace_probe_provide(NULL, NULL); 14966 mutex_exit(&dtrace_provider_lock); 14967 14968 mutex_enter(&cpu_lock); 14969 mutex_enter(&dtrace_lock); 14970 dtrace_opens++; 14971 dtrace_membar_producer(); 14972 14973 /* 14974 * If the kernel debugger is active (that is, if the kernel debugger 14975 * modified text in some way), we won't allow the open. 14976 */ 14977 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 14978 dtrace_opens--; 14979 mutex_exit(&cpu_lock); 14980 mutex_exit(&dtrace_lock); 14981 return (EBUSY); 14982 } 14983 14984 state = dtrace_state_create(devp, cred_p); 14985 mutex_exit(&cpu_lock); 14986 14987 if (state == NULL) { 14988 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) 14989 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 14990 mutex_exit(&dtrace_lock); 14991 return (EAGAIN); 14992 } 14993 14994 mutex_exit(&dtrace_lock); 14995 14996 return (0); 14997 } 14998 14999 /*ARGSUSED*/ 15000 static int 15001 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 15002 { 15003 minor_t minor = getminor(dev); 15004 dtrace_state_t *state; 15005 15006 if (minor == DTRACEMNRN_HELPER) 15007 return (0); 15008 15009 state = ddi_get_soft_state(dtrace_softstate, minor); 15010 15011 mutex_enter(&cpu_lock); 15012 mutex_enter(&dtrace_lock); 15013 15014 if (state->dts_anon) { 15015 /* 15016 * There is anonymous state. Destroy that first. 15017 */ 15018 ASSERT(dtrace_anon.dta_state == NULL); 15019 dtrace_state_destroy(state->dts_anon); 15020 } 15021 15022 dtrace_state_destroy(state); 15023 ASSERT(dtrace_opens > 0); 15024 15025 /* 15026 * Only relinquish control of the kernel debugger interface when there 15027 * are no consumers and no anonymous enablings. 15028 */ 15029 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) 15030 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 15031 15032 mutex_exit(&dtrace_lock); 15033 mutex_exit(&cpu_lock); 15034 15035 return (0); 15036 } 15037 15038 /*ARGSUSED*/ 15039 static int 15040 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv) 15041 { 15042 int rval; 15043 dof_helper_t help, *dhp = NULL; 15044 15045 switch (cmd) { 15046 case DTRACEHIOC_ADDDOF: 15047 if (copyin((void *)arg, &help, sizeof (help)) != 0) { 15048 dtrace_dof_error(NULL, "failed to copyin DOF helper"); 15049 return (EFAULT); 15050 } 15051 15052 dhp = &help; 15053 arg = (intptr_t)help.dofhp_dof; 15054 /*FALLTHROUGH*/ 15055 15056 case DTRACEHIOC_ADD: { 15057 dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval); 15058 15059 if (dof == NULL) 15060 return (rval); 15061 15062 mutex_enter(&dtrace_lock); 15063 15064 /* 15065 * dtrace_helper_slurp() takes responsibility for the dof -- 15066 * it may free it now or it may save it and free it later. 15067 */ 15068 if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) { 15069 *rv = rval; 15070 rval = 0; 15071 } else { 15072 rval = EINVAL; 15073 } 15074 15075 mutex_exit(&dtrace_lock); 15076 return (rval); 15077 } 15078 15079 case DTRACEHIOC_REMOVE: { 15080 mutex_enter(&dtrace_lock); 15081 rval = dtrace_helper_destroygen(arg); 15082 mutex_exit(&dtrace_lock); 15083 15084 return (rval); 15085 } 15086 15087 default: 15088 break; 15089 } 15090 15091 return (ENOTTY); 15092 } 15093 15094 /*ARGSUSED*/ 15095 static int 15096 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 15097 { 15098 minor_t minor = getminor(dev); 15099 dtrace_state_t *state; 15100 int rval; 15101 15102 if (minor == DTRACEMNRN_HELPER) 15103 return (dtrace_ioctl_helper(cmd, arg, rv)); 15104 15105 state = ddi_get_soft_state(dtrace_softstate, minor); 15106 15107 if (state->dts_anon) { 15108 ASSERT(dtrace_anon.dta_state == NULL); 15109 state = state->dts_anon; 15110 } 15111 15112 switch (cmd) { 15113 case DTRACEIOC_PROVIDER: { 15114 dtrace_providerdesc_t pvd; 15115 dtrace_provider_t *pvp; 15116 15117 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) 15118 return (EFAULT); 15119 15120 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; 15121 mutex_enter(&dtrace_provider_lock); 15122 15123 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { 15124 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) 15125 break; 15126 } 15127 15128 mutex_exit(&dtrace_provider_lock); 15129 15130 if (pvp == NULL) 15131 return (ESRCH); 15132 15133 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); 15134 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); 15135 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) 15136 return (EFAULT); 15137 15138 return (0); 15139 } 15140 15141 case DTRACEIOC_EPROBE: { 15142 dtrace_eprobedesc_t epdesc; 15143 dtrace_ecb_t *ecb; 15144 dtrace_action_t *act; 15145 void *buf; 15146 size_t size; 15147 uintptr_t dest; 15148 int nrecs; 15149 15150 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) 15151 return (EFAULT); 15152 15153 mutex_enter(&dtrace_lock); 15154 15155 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { 15156 mutex_exit(&dtrace_lock); 15157 return (EINVAL); 15158 } 15159 15160 if (ecb->dte_probe == NULL) { 15161 mutex_exit(&dtrace_lock); 15162 return (EINVAL); 15163 } 15164 15165 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; 15166 epdesc.dtepd_uarg = ecb->dte_uarg; 15167 epdesc.dtepd_size = ecb->dte_size; 15168 15169 nrecs = epdesc.dtepd_nrecs; 15170 epdesc.dtepd_nrecs = 0; 15171 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 15172 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 15173 continue; 15174 15175 epdesc.dtepd_nrecs++; 15176 } 15177 15178 /* 15179 * Now that we have the size, we need to allocate a temporary 15180 * buffer in which to store the complete description. We need 15181 * the temporary buffer to be able to drop dtrace_lock() 15182 * across the copyout(), below. 15183 */ 15184 size = sizeof (dtrace_eprobedesc_t) + 15185 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); 15186 15187 buf = kmem_alloc(size, KM_SLEEP); 15188 dest = (uintptr_t)buf; 15189 15190 bcopy(&epdesc, (void *)dest, sizeof (epdesc)); 15191 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); 15192 15193 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 15194 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 15195 continue; 15196 15197 if (nrecs-- == 0) 15198 break; 15199 15200 bcopy(&act->dta_rec, (void *)dest, 15201 sizeof (dtrace_recdesc_t)); 15202 dest += sizeof (dtrace_recdesc_t); 15203 } 15204 15205 mutex_exit(&dtrace_lock); 15206 15207 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 15208 kmem_free(buf, size); 15209 return (EFAULT); 15210 } 15211 15212 kmem_free(buf, size); 15213 return (0); 15214 } 15215 15216 case DTRACEIOC_AGGDESC: { 15217 dtrace_aggdesc_t aggdesc; 15218 dtrace_action_t *act; 15219 dtrace_aggregation_t *agg; 15220 int nrecs; 15221 uint32_t offs; 15222 dtrace_recdesc_t *lrec; 15223 void *buf; 15224 size_t size; 15225 uintptr_t dest; 15226 15227 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) 15228 return (EFAULT); 15229 15230 mutex_enter(&dtrace_lock); 15231 15232 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { 15233 mutex_exit(&dtrace_lock); 15234 return (EINVAL); 15235 } 15236 15237 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; 15238 15239 nrecs = aggdesc.dtagd_nrecs; 15240 aggdesc.dtagd_nrecs = 0; 15241 15242 offs = agg->dtag_base; 15243 lrec = &agg->dtag_action.dta_rec; 15244 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; 15245 15246 for (act = agg->dtag_first; ; act = act->dta_next) { 15247 ASSERT(act->dta_intuple || 15248 DTRACEACT_ISAGG(act->dta_kind)); 15249 15250 /* 15251 * If this action has a record size of zero, it 15252 * denotes an argument to the aggregating action. 15253 * Because the presence of this record doesn't (or 15254 * shouldn't) affect the way the data is interpreted, 15255 * we don't copy it out to save user-level the 15256 * confusion of dealing with a zero-length record. 15257 */ 15258 if (act->dta_rec.dtrd_size == 0) { 15259 ASSERT(agg->dtag_hasarg); 15260 continue; 15261 } 15262 15263 aggdesc.dtagd_nrecs++; 15264 15265 if (act == &agg->dtag_action) 15266 break; 15267 } 15268 15269 /* 15270 * Now that we have the size, we need to allocate a temporary 15271 * buffer in which to store the complete description. We need 15272 * the temporary buffer to be able to drop dtrace_lock() 15273 * across the copyout(), below. 15274 */ 15275 size = sizeof (dtrace_aggdesc_t) + 15276 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); 15277 15278 buf = kmem_alloc(size, KM_SLEEP); 15279 dest = (uintptr_t)buf; 15280 15281 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); 15282 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); 15283 15284 for (act = agg->dtag_first; ; act = act->dta_next) { 15285 dtrace_recdesc_t rec = act->dta_rec; 15286 15287 /* 15288 * See the comment in the above loop for why we pass 15289 * over zero-length records. 15290 */ 15291 if (rec.dtrd_size == 0) { 15292 ASSERT(agg->dtag_hasarg); 15293 continue; 15294 } 15295 15296 if (nrecs-- == 0) 15297 break; 15298 15299 rec.dtrd_offset -= offs; 15300 bcopy(&rec, (void *)dest, sizeof (rec)); 15301 dest += sizeof (dtrace_recdesc_t); 15302 15303 if (act == &agg->dtag_action) 15304 break; 15305 } 15306 15307 mutex_exit(&dtrace_lock); 15308 15309 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 15310 kmem_free(buf, size); 15311 return (EFAULT); 15312 } 15313 15314 kmem_free(buf, size); 15315 return (0); 15316 } 15317 15318 case DTRACEIOC_ENABLE: { 15319 dof_hdr_t *dof; 15320 dtrace_enabling_t *enab = NULL; 15321 dtrace_vstate_t *vstate; 15322 int err = 0; 15323 15324 *rv = 0; 15325 15326 /* 15327 * If a NULL argument has been passed, we take this as our 15328 * cue to reevaluate our enablings. 15329 */ 15330 if (arg == NULL) { 15331 dtrace_enabling_matchall(); 15332 15333 return (0); 15334 } 15335 15336 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) 15337 return (rval); 15338 15339 mutex_enter(&cpu_lock); 15340 mutex_enter(&dtrace_lock); 15341 vstate = &state->dts_vstate; 15342 15343 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 15344 mutex_exit(&dtrace_lock); 15345 mutex_exit(&cpu_lock); 15346 dtrace_dof_destroy(dof); 15347 return (EBUSY); 15348 } 15349 15350 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) { 15351 mutex_exit(&dtrace_lock); 15352 mutex_exit(&cpu_lock); 15353 dtrace_dof_destroy(dof); 15354 return (EINVAL); 15355 } 15356 15357 if ((rval = dtrace_dof_options(dof, state)) != 0) { 15358 dtrace_enabling_destroy(enab); 15359 mutex_exit(&dtrace_lock); 15360 mutex_exit(&cpu_lock); 15361 dtrace_dof_destroy(dof); 15362 return (rval); 15363 } 15364 15365 if ((err = dtrace_enabling_match(enab, rv)) == 0) { 15366 err = dtrace_enabling_retain(enab); 15367 } else { 15368 dtrace_enabling_destroy(enab); 15369 } 15370 15371 mutex_exit(&cpu_lock); 15372 mutex_exit(&dtrace_lock); 15373 dtrace_dof_destroy(dof); 15374 15375 return (err); 15376 } 15377 15378 case DTRACEIOC_REPLICATE: { 15379 dtrace_repldesc_t desc; 15380 dtrace_probedesc_t *match = &desc.dtrpd_match; 15381 dtrace_probedesc_t *create = &desc.dtrpd_create; 15382 int err; 15383 15384 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15385 return (EFAULT); 15386 15387 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15388 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15389 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15390 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15391 15392 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15393 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15394 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15395 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15396 15397 mutex_enter(&dtrace_lock); 15398 err = dtrace_enabling_replicate(state, match, create); 15399 mutex_exit(&dtrace_lock); 15400 15401 return (err); 15402 } 15403 15404 case DTRACEIOC_PROBEMATCH: 15405 case DTRACEIOC_PROBES: { 15406 dtrace_probe_t *probe = NULL; 15407 dtrace_probedesc_t desc; 15408 dtrace_probekey_t pkey; 15409 dtrace_id_t i; 15410 int m = 0; 15411 uint32_t priv; 15412 uid_t uid; 15413 zoneid_t zoneid; 15414 15415 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15416 return (EFAULT); 15417 15418 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15419 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15420 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15421 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15422 15423 /* 15424 * Before we attempt to match this probe, we want to give 15425 * all providers the opportunity to provide it. 15426 */ 15427 if (desc.dtpd_id == DTRACE_IDNONE) { 15428 mutex_enter(&dtrace_provider_lock); 15429 dtrace_probe_provide(&desc, NULL); 15430 mutex_exit(&dtrace_provider_lock); 15431 desc.dtpd_id++; 15432 } 15433 15434 if (cmd == DTRACEIOC_PROBEMATCH) { 15435 dtrace_probekey(&desc, &pkey); 15436 pkey.dtpk_id = DTRACE_IDNONE; 15437 } 15438 15439 dtrace_cred2priv(cr, &priv, &uid, &zoneid); 15440 15441 mutex_enter(&dtrace_lock); 15442 15443 if (cmd == DTRACEIOC_PROBEMATCH) { 15444 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 15445 if ((probe = dtrace_probes[i - 1]) != NULL && 15446 (m = dtrace_match_probe(probe, &pkey, 15447 priv, uid, zoneid)) != 0) 15448 break; 15449 } 15450 15451 if (m < 0) { 15452 mutex_exit(&dtrace_lock); 15453 return (EINVAL); 15454 } 15455 15456 } else { 15457 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 15458 if ((probe = dtrace_probes[i - 1]) != NULL && 15459 dtrace_match_priv(probe, priv, uid, zoneid)) 15460 break; 15461 } 15462 } 15463 15464 if (probe == NULL) { 15465 mutex_exit(&dtrace_lock); 15466 return (ESRCH); 15467 } 15468 15469 dtrace_probe_description(probe, &desc); 15470 mutex_exit(&dtrace_lock); 15471 15472 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15473 return (EFAULT); 15474 15475 return (0); 15476 } 15477 15478 case DTRACEIOC_PROBEARG: { 15479 dtrace_argdesc_t desc; 15480 dtrace_probe_t *probe; 15481 dtrace_provider_t *prov; 15482 15483 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15484 return (EFAULT); 15485 15486 if (desc.dtargd_id == DTRACE_IDNONE) 15487 return (EINVAL); 15488 15489 if (desc.dtargd_ndx == DTRACE_ARGNONE) 15490 return (EINVAL); 15491 15492 mutex_enter(&dtrace_provider_lock); 15493 mutex_enter(&mod_lock); 15494 mutex_enter(&dtrace_lock); 15495 15496 if (desc.dtargd_id > dtrace_nprobes) { 15497 mutex_exit(&dtrace_lock); 15498 mutex_exit(&mod_lock); 15499 mutex_exit(&dtrace_provider_lock); 15500 return (EINVAL); 15501 } 15502 15503 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) { 15504 mutex_exit(&dtrace_lock); 15505 mutex_exit(&mod_lock); 15506 mutex_exit(&dtrace_provider_lock); 15507 return (EINVAL); 15508 } 15509 15510 mutex_exit(&dtrace_lock); 15511 15512 prov = probe->dtpr_provider; 15513 15514 if (prov->dtpv_pops.dtps_getargdesc == NULL) { 15515 /* 15516 * There isn't any typed information for this probe. 15517 * Set the argument number to DTRACE_ARGNONE. 15518 */ 15519 desc.dtargd_ndx = DTRACE_ARGNONE; 15520 } else { 15521 desc.dtargd_native[0] = '\0'; 15522 desc.dtargd_xlate[0] = '\0'; 15523 desc.dtargd_mapping = desc.dtargd_ndx; 15524 15525 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, 15526 probe->dtpr_id, probe->dtpr_arg, &desc); 15527 } 15528 15529 mutex_exit(&mod_lock); 15530 mutex_exit(&dtrace_provider_lock); 15531 15532 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15533 return (EFAULT); 15534 15535 return (0); 15536 } 15537 15538 case DTRACEIOC_GO: { 15539 processorid_t cpuid; 15540 rval = dtrace_state_go(state, &cpuid); 15541 15542 if (rval != 0) 15543 return (rval); 15544 15545 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 15546 return (EFAULT); 15547 15548 return (0); 15549 } 15550 15551 case DTRACEIOC_STOP: { 15552 processorid_t cpuid; 15553 15554 mutex_enter(&dtrace_lock); 15555 rval = dtrace_state_stop(state, &cpuid); 15556 mutex_exit(&dtrace_lock); 15557 15558 if (rval != 0) 15559 return (rval); 15560 15561 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 15562 return (EFAULT); 15563 15564 return (0); 15565 } 15566 15567 case DTRACEIOC_DOFGET: { 15568 dof_hdr_t hdr, *dof; 15569 uint64_t len; 15570 15571 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) 15572 return (EFAULT); 15573 15574 mutex_enter(&dtrace_lock); 15575 dof = dtrace_dof_create(state); 15576 mutex_exit(&dtrace_lock); 15577 15578 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); 15579 rval = copyout(dof, (void *)arg, len); 15580 dtrace_dof_destroy(dof); 15581 15582 return (rval == 0 ? 0 : EFAULT); 15583 } 15584 15585 case DTRACEIOC_AGGSNAP: 15586 case DTRACEIOC_BUFSNAP: { 15587 dtrace_bufdesc_t desc; 15588 caddr_t cached; 15589 dtrace_buffer_t *buf; 15590 15591 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15592 return (EFAULT); 15593 15594 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) 15595 return (EINVAL); 15596 15597 mutex_enter(&dtrace_lock); 15598 15599 if (cmd == DTRACEIOC_BUFSNAP) { 15600 buf = &state->dts_buffer[desc.dtbd_cpu]; 15601 } else { 15602 buf = &state->dts_aggbuffer[desc.dtbd_cpu]; 15603 } 15604 15605 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { 15606 size_t sz = buf->dtb_offset; 15607 15608 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { 15609 mutex_exit(&dtrace_lock); 15610 return (EBUSY); 15611 } 15612 15613 /* 15614 * If this buffer has already been consumed, we're 15615 * going to indicate that there's nothing left here 15616 * to consume. 15617 */ 15618 if (buf->dtb_flags & DTRACEBUF_CONSUMED) { 15619 mutex_exit(&dtrace_lock); 15620 15621 desc.dtbd_size = 0; 15622 desc.dtbd_drops = 0; 15623 desc.dtbd_errors = 0; 15624 desc.dtbd_oldest = 0; 15625 sz = sizeof (desc); 15626 15627 if (copyout(&desc, (void *)arg, sz) != 0) 15628 return (EFAULT); 15629 15630 return (0); 15631 } 15632 15633 /* 15634 * If this is a ring buffer that has wrapped, we want 15635 * to copy the whole thing out. 15636 */ 15637 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 15638 dtrace_buffer_polish(buf); 15639 sz = buf->dtb_size; 15640 } 15641 15642 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { 15643 mutex_exit(&dtrace_lock); 15644 return (EFAULT); 15645 } 15646 15647 desc.dtbd_size = sz; 15648 desc.dtbd_drops = buf->dtb_drops; 15649 desc.dtbd_errors = buf->dtb_errors; 15650 desc.dtbd_oldest = buf->dtb_xamot_offset; 15651 desc.dtbd_timestamp = dtrace_gethrtime(); 15652 15653 mutex_exit(&dtrace_lock); 15654 15655 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15656 return (EFAULT); 15657 15658 buf->dtb_flags |= DTRACEBUF_CONSUMED; 15659 15660 return (0); 15661 } 15662 15663 if (buf->dtb_tomax == NULL) { 15664 ASSERT(buf->dtb_xamot == NULL); 15665 mutex_exit(&dtrace_lock); 15666 return (ENOENT); 15667 } 15668 15669 cached = buf->dtb_tomax; 15670 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 15671 15672 dtrace_xcall(desc.dtbd_cpu, 15673 (dtrace_xcall_t)dtrace_buffer_switch, buf); 15674 15675 state->dts_errors += buf->dtb_xamot_errors; 15676 15677 /* 15678 * If the buffers did not actually switch, then the cross call 15679 * did not take place -- presumably because the given CPU is 15680 * not in the ready set. If this is the case, we'll return 15681 * ENOENT. 15682 */ 15683 if (buf->dtb_tomax == cached) { 15684 ASSERT(buf->dtb_xamot != cached); 15685 mutex_exit(&dtrace_lock); 15686 return (ENOENT); 15687 } 15688 15689 ASSERT(cached == buf->dtb_xamot); 15690 15691 /* 15692 * We have our snapshot; now copy it out. 15693 */ 15694 if (copyout(buf->dtb_xamot, desc.dtbd_data, 15695 buf->dtb_xamot_offset) != 0) { 15696 mutex_exit(&dtrace_lock); 15697 return (EFAULT); 15698 } 15699 15700 desc.dtbd_size = buf->dtb_xamot_offset; 15701 desc.dtbd_drops = buf->dtb_xamot_drops; 15702 desc.dtbd_errors = buf->dtb_xamot_errors; 15703 desc.dtbd_oldest = 0; 15704 desc.dtbd_timestamp = buf->dtb_switched; 15705 15706 mutex_exit(&dtrace_lock); 15707 15708 /* 15709 * Finally, copy out the buffer description. 15710 */ 15711 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15712 return (EFAULT); 15713 15714 return (0); 15715 } 15716 15717 case DTRACEIOC_CONF: { 15718 dtrace_conf_t conf; 15719 15720 bzero(&conf, sizeof (conf)); 15721 conf.dtc_difversion = DIF_VERSION; 15722 conf.dtc_difintregs = DIF_DIR_NREGS; 15723 conf.dtc_diftupregs = DIF_DTR_NREGS; 15724 conf.dtc_ctfmodel = CTF_MODEL_NATIVE; 15725 15726 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) 15727 return (EFAULT); 15728 15729 return (0); 15730 } 15731 15732 case DTRACEIOC_STATUS: { 15733 dtrace_status_t stat; 15734 dtrace_dstate_t *dstate; 15735 int i, j; 15736 uint64_t nerrs; 15737 15738 /* 15739 * See the comment in dtrace_state_deadman() for the reason 15740 * for setting dts_laststatus to INT64_MAX before setting 15741 * it to the correct value. 15742 */ 15743 state->dts_laststatus = INT64_MAX; 15744 dtrace_membar_producer(); 15745 state->dts_laststatus = dtrace_gethrtime(); 15746 15747 bzero(&stat, sizeof (stat)); 15748 15749 mutex_enter(&dtrace_lock); 15750 15751 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { 15752 mutex_exit(&dtrace_lock); 15753 return (ENOENT); 15754 } 15755 15756 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) 15757 stat.dtst_exiting = 1; 15758 15759 nerrs = state->dts_errors; 15760 dstate = &state->dts_vstate.dtvs_dynvars; 15761 15762 for (i = 0; i < NCPU; i++) { 15763 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; 15764 15765 stat.dtst_dyndrops += dcpu->dtdsc_drops; 15766 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; 15767 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; 15768 15769 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) 15770 stat.dtst_filled++; 15771 15772 nerrs += state->dts_buffer[i].dtb_errors; 15773 15774 for (j = 0; j < state->dts_nspeculations; j++) { 15775 dtrace_speculation_t *spec; 15776 dtrace_buffer_t *buf; 15777 15778 spec = &state->dts_speculations[j]; 15779 buf = &spec->dtsp_buffer[i]; 15780 stat.dtst_specdrops += buf->dtb_xamot_drops; 15781 } 15782 } 15783 15784 stat.dtst_specdrops_busy = state->dts_speculations_busy; 15785 stat.dtst_specdrops_unavail = state->dts_speculations_unavail; 15786 stat.dtst_stkstroverflows = state->dts_stkstroverflows; 15787 stat.dtst_dblerrors = state->dts_dblerrors; 15788 stat.dtst_killed = 15789 (state->dts_activity == DTRACE_ACTIVITY_KILLED); 15790 stat.dtst_errors = nerrs; 15791 15792 mutex_exit(&dtrace_lock); 15793 15794 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) 15795 return (EFAULT); 15796 15797 return (0); 15798 } 15799 15800 case DTRACEIOC_FORMAT: { 15801 dtrace_fmtdesc_t fmt; 15802 char *str; 15803 int len; 15804 15805 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) 15806 return (EFAULT); 15807 15808 mutex_enter(&dtrace_lock); 15809 15810 if (fmt.dtfd_format == 0 || 15811 fmt.dtfd_format > state->dts_nformats) { 15812 mutex_exit(&dtrace_lock); 15813 return (EINVAL); 15814 } 15815 15816 /* 15817 * Format strings are allocated contiguously and they are 15818 * never freed; if a format index is less than the number 15819 * of formats, we can assert that the format map is non-NULL 15820 * and that the format for the specified index is non-NULL. 15821 */ 15822 ASSERT(state->dts_formats != NULL); 15823 str = state->dts_formats[fmt.dtfd_format - 1]; 15824 ASSERT(str != NULL); 15825 15826 len = strlen(str) + 1; 15827 15828 if (len > fmt.dtfd_length) { 15829 fmt.dtfd_length = len; 15830 15831 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { 15832 mutex_exit(&dtrace_lock); 15833 return (EINVAL); 15834 } 15835 } else { 15836 if (copyout(str, fmt.dtfd_string, len) != 0) { 15837 mutex_exit(&dtrace_lock); 15838 return (EINVAL); 15839 } 15840 } 15841 15842 mutex_exit(&dtrace_lock); 15843 return (0); 15844 } 15845 15846 default: 15847 break; 15848 } 15849 15850 return (ENOTTY); 15851 } 15852 15853 /*ARGSUSED*/ 15854 static int 15855 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 15856 { 15857 dtrace_state_t *state; 15858 15859 switch (cmd) { 15860 case DDI_DETACH: 15861 break; 15862 15863 case DDI_SUSPEND: 15864 return (DDI_SUCCESS); 15865 15866 default: 15867 return (DDI_FAILURE); 15868 } 15869 15870 mutex_enter(&cpu_lock); 15871 mutex_enter(&dtrace_provider_lock); 15872 mutex_enter(&dtrace_lock); 15873 15874 ASSERT(dtrace_opens == 0); 15875 15876 if (dtrace_helpers > 0) { 15877 mutex_exit(&dtrace_provider_lock); 15878 mutex_exit(&dtrace_lock); 15879 mutex_exit(&cpu_lock); 15880 return (DDI_FAILURE); 15881 } 15882 15883 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { 15884 mutex_exit(&dtrace_provider_lock); 15885 mutex_exit(&dtrace_lock); 15886 mutex_exit(&cpu_lock); 15887 return (DDI_FAILURE); 15888 } 15889 15890 dtrace_provider = NULL; 15891 15892 if ((state = dtrace_anon_grab()) != NULL) { 15893 /* 15894 * If there were ECBs on this state, the provider should 15895 * have not been allowed to detach; assert that there is 15896 * none. 15897 */ 15898 ASSERT(state->dts_necbs == 0); 15899 dtrace_state_destroy(state); 15900 15901 /* 15902 * If we're being detached with anonymous state, we need to 15903 * indicate to the kernel debugger that DTrace is now inactive. 15904 */ 15905 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 15906 } 15907 15908 bzero(&dtrace_anon, sizeof (dtrace_anon_t)); 15909 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 15910 dtrace_cpu_init = NULL; 15911 dtrace_helpers_cleanup = NULL; 15912 dtrace_helpers_fork = NULL; 15913 dtrace_cpustart_init = NULL; 15914 dtrace_cpustart_fini = NULL; 15915 dtrace_debugger_init = NULL; 15916 dtrace_debugger_fini = NULL; 15917 dtrace_modload = NULL; 15918 dtrace_modunload = NULL; 15919 15920 mutex_exit(&cpu_lock); 15921 15922 if (dtrace_helptrace_enabled) { 15923 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize); 15924 dtrace_helptrace_buffer = NULL; 15925 } 15926 15927 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *)); 15928 dtrace_probes = NULL; 15929 dtrace_nprobes = 0; 15930 15931 dtrace_hash_destroy(dtrace_bymod); 15932 dtrace_hash_destroy(dtrace_byfunc); 15933 dtrace_hash_destroy(dtrace_byname); 15934 dtrace_bymod = NULL; 15935 dtrace_byfunc = NULL; 15936 dtrace_byname = NULL; 15937 15938 kmem_cache_destroy(dtrace_state_cache); 15939 vmem_destroy(dtrace_minor); 15940 vmem_destroy(dtrace_arena); 15941 15942 if (dtrace_toxrange != NULL) { 15943 kmem_free(dtrace_toxrange, 15944 dtrace_toxranges_max * sizeof (dtrace_toxrange_t)); 15945 dtrace_toxrange = NULL; 15946 dtrace_toxranges = 0; 15947 dtrace_toxranges_max = 0; 15948 } 15949 15950 ddi_remove_minor_node(dtrace_devi, NULL); 15951 dtrace_devi = NULL; 15952 15953 ddi_soft_state_fini(&dtrace_softstate); 15954 15955 ASSERT(dtrace_vtime_references == 0); 15956 ASSERT(dtrace_opens == 0); 15957 ASSERT(dtrace_retained == NULL); 15958 15959 mutex_exit(&dtrace_lock); 15960 mutex_exit(&dtrace_provider_lock); 15961 15962 /* 15963 * We don't destroy the task queue until after we have dropped our 15964 * locks (taskq_destroy() may block on running tasks). To prevent 15965 * attempting to do work after we have effectively detached but before 15966 * the task queue has been destroyed, all tasks dispatched via the 15967 * task queue must check that DTrace is still attached before 15968 * performing any operation. 15969 */ 15970 taskq_destroy(dtrace_taskq); 15971 dtrace_taskq = NULL; 15972 15973 return (DDI_SUCCESS); 15974 } 15975 15976 /*ARGSUSED*/ 15977 static int 15978 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 15979 { 15980 int error; 15981 15982 switch (infocmd) { 15983 case DDI_INFO_DEVT2DEVINFO: 15984 *result = (void *)dtrace_devi; 15985 error = DDI_SUCCESS; 15986 break; 15987 case DDI_INFO_DEVT2INSTANCE: 15988 *result = (void *)0; 15989 error = DDI_SUCCESS; 15990 break; 15991 default: 15992 error = DDI_FAILURE; 15993 } 15994 return (error); 15995 } 15996 15997 static struct cb_ops dtrace_cb_ops = { 15998 dtrace_open, /* open */ 15999 dtrace_close, /* close */ 16000 nulldev, /* strategy */ 16001 nulldev, /* print */ 16002 nodev, /* dump */ 16003 nodev, /* read */ 16004 nodev, /* write */ 16005 dtrace_ioctl, /* ioctl */ 16006 nodev, /* devmap */ 16007 nodev, /* mmap */ 16008 nodev, /* segmap */ 16009 nochpoll, /* poll */ 16010 ddi_prop_op, /* cb_prop_op */ 16011 0, /* streamtab */ 16012 D_NEW | D_MP /* Driver compatibility flag */ 16013 }; 16014 16015 static struct dev_ops dtrace_ops = { 16016 DEVO_REV, /* devo_rev */ 16017 0, /* refcnt */ 16018 dtrace_info, /* get_dev_info */ 16019 nulldev, /* identify */ 16020 nulldev, /* probe */ 16021 dtrace_attach, /* attach */ 16022 dtrace_detach, /* detach */ 16023 nodev, /* reset */ 16024 &dtrace_cb_ops, /* driver operations */ 16025 NULL, /* bus operations */ 16026 nodev, /* dev power */ 16027 ddi_quiesce_not_needed, /* quiesce */ 16028 }; 16029 16030 static struct modldrv modldrv = { 16031 &mod_driverops, /* module type (this is a pseudo driver) */ 16032 "Dynamic Tracing", /* name of module */ 16033 &dtrace_ops, /* driver ops */ 16034 }; 16035 16036 static struct modlinkage modlinkage = { 16037 MODREV_1, 16038 (void *)&modldrv, 16039 NULL 16040 }; 16041 16042 int 16043 _init(void) 16044 { 16045 return (mod_install(&modlinkage)); 16046 } 16047 16048 int 16049 _info(struct modinfo *modinfop) 16050 { 16051 return (mod_info(&modlinkage, modinfop)); 16052 } 16053 16054 int 16055 _fini(void) 16056 { 16057 return (mod_remove(&modlinkage)); 16058 } 16059