1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/systm.h> 29 #include <sys/conf.h> 30 #include <sys/stat.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/modctl.h> 34 #include <sys/cpu_module.h> 35 #include <vm/hat_sfmmu.h> 36 #include <vm/seg_kmem.h> 37 #include <vm/seg_kpm.h> 38 #include <vm/vm_dep.h> 39 #include <sys/machsystm.h> 40 #include <sys/machasi.h> 41 #include <sys/sysmacros.h> 42 #include <sys/callb.h> 43 #include <sys/archsystm.h> 44 #include <sys/trapstat.h> 45 #ifdef sun4v 46 #include <sys/hypervisor_api.h> 47 #endif 48 #ifndef sun4v 49 #include <sys/chip.h> 50 #endif 51 52 /* BEGIN CSTYLED */ 53 /* 54 * trapstat: Trap Statistics through Dynamic Trap Table Interposition 55 * ------------------------------------------------------------------- 56 * 57 * Motivation and Overview 58 * 59 * Despite being a fundamental indicator of system behavior, there has 60 * historically been very little insight provided into the frequency and cost 61 * of machine-specific traps. The lack of insight has been especially acute 62 * on UltraSPARC microprocessors: because these microprocessors handle TLB 63 * misses as software traps, the frequency and duration of traps play a 64 * decisive role in the performance of the memory system. As applications have 65 * increasingly outstripped TLB reach, this has become increasingly true. 66 * 67 * Part of the difficulty of observing trap behavior is that the trap handlers 68 * are so frequently called (e.g. millions of times per second) that any 69 * permanently enabled instrumentation would induce an unacceptable performance 70 * degradation. Thus, it is a constraint on any trap observability 71 * infrastructure that it have no probe effect when not explicitly enabled. 72 * 73 * The basic idea, then, is to create an interposing trap table in which each 74 * entry increments a per-trap, in-memory counter and then jumps to the actual, 75 * underlying trap table entry. To enable trapstat, we atomically write to the 76 * trap base address (%tba) register to point to our interposing trap table. 77 * (Note that per-CPU statistics fall out by creating a different trap table 78 * for each CPU.) 79 * 80 * Implementation Details 81 * 82 * While the idea is straight-forward, a nuance of SPARC V9 slightly 83 * complicates the implementation. Unlike its predecessors, SPARC V9 supports 84 * the notion of nested traps. The trap level is kept in the TL register: 85 * during normal operation it is 0; when a trap is taken, the TL register is 86 * incremented by 1. To aid system software, SPARC V9 breaks the trap table 87 * into two halves: the lower half contains the trap handlers for traps taken 88 * when TL is 0; the upper half contains the trap handlers for traps taken 89 * when TL is greater than 0. Each half is further subdivided into two 90 * subsequent halves: the lower half contains the trap handlers for traps 91 * other than those induced by the trap instruction (Tcc variants); the upper 92 * half contains the trap handlers for traps induced by the trap instruction. 93 * This gives a total of four ranges, with each range containing 256 traps: 94 * 95 * +--------------------------------+- 3ff 96 * | | . 97 * | Trap instruction, TL>0 | . 98 * | | . 99 * |- - - - - - - - - - - - - - - - +- 300 100 * |- - - - - - - - - - - - - - - - +- 2ff 101 * | | . 102 * | Non-trap instruction, TL>0 | . 103 * | | . 104 * |- - - - - - - - - - - - - - - - +- 200 105 * |- - - - - - - - - - - - - - - - +- 1ff 106 * | | . 107 * | Trap instruction, TL=0 | . 108 * | | . 109 * |- - - - - - - - - - - - - - - - +- 100 110 * |- - - - - - - - - - - - - - - - +- 0ff 111 * | | . 112 * | Non-trap instruction, TL=0 | . 113 * | | . 114 * +--------------------------------+- 000 115 * 116 * 117 * Solaris, however, doesn't have reason to support trap instructions when 118 * TL>0 (only privileged code may execute at TL>0; not supporting this only 119 * constrains our own implementation). The trap table actually looks like: 120 * 121 * +--------------------------------+- 2ff 122 * | | . 123 * | Non-trap instruction, TL>0 | . 124 * | | . 125 * |- - - - - - - - - - - - - - - - +- 200 126 * |- - - - - - - - - - - - - - - - +- 1ff 127 * | | . 128 * | Trap instruction, TL=0 | . 129 * | | . 130 * |- - - - - - - - - - - - - - - - +- 100 131 * |- - - - - - - - - - - - - - - - +- 0ff 132 * | | . 133 * | Non-trap instruction, TL=0 | . 134 * | | . 135 * +--------------------------------+- 000 136 * 137 * Putatively to aid system software, SPARC V9 has the notion of multiple 138 * sets of global registers. UltraSPARC defines four sets of global 139 * registers: 140 * 141 * Normal Globals 142 * Alternate Globals (AGs) 143 * MMU Globals (MGs) 144 * Interrupt Globals (IGs) 145 * 146 * The set of globals in use is controlled by bits in PSTATE; when TL is 0 147 * (and PSTATE has not been otherwise explicitly modified), the Normal Globals 148 * are in use. When a trap is issued, PSTATE is modified to point to a set of 149 * globals corresponding to the trap type. Most traps correspond to the 150 * Alternate Globals, with a minority corresponding to the MMU Globals, and 151 * only the interrupt-vector trap (vector 0x60) corresponding to the Interrupt 152 * Globals. (The complete mapping can be found in the UltraSPARC I&II User's 153 * Manual.) 154 * 155 * Note that the sets of globals are per trap _type_, not per trap _level_. 156 * Thus, when executing a TL>0 trap handler, one may not have registers 157 * available (for example, both trap-instruction traps and spill traps execute 158 * on the alternate globals; if a trap-instruction trap induces a window spill, 159 * the window spill handler has no available globals). For trapstat, this is 160 * problematic: a register is required to transfer control from one arbitrary 161 * location (in the interposing trap table) to another (in the actual trap 162 * table). 163 * 164 * We solve this problem by exploiting the trap table's location at the bottom 165 * of valid kernel memory (i.e. at KERNELBASE). We locate the interposing trap 166 * tables just below KERNELBASE -- thereby allowing us to use a branch-always 167 * instruction (ba) instead of a jump instruction (jmp) to transfer control 168 * from the TL>0 entries in the interposing trap table to the TL>0 entries in 169 * the actual trap table. (N.B. while this allows trap table interposition to 170 * work, it necessarily limits trapstat to only recording information about 171 * TL=0 traps -- there is no way to increment a counter without using a 172 * register.) Diagrammatically: 173 * 174 * Actual trap table: 175 * 176 * +--------------------------------+- 2ff 177 * | | . 178 * | Non-trap instruction, TL>0 | . <-----------------------+ 179 * | | . <-----------------------|-+ 180 * |- - - - - - - - - - - - - - - - +- 200 <-----------------------|-|-+ 181 * |- - - - - - - - - - - - - - - - +- 1ff | | | 182 * | | . | | | 183 * | Trap instruction, TL=0 | . <-----------------+ | | | 184 * | | . <-----------------|-+ | | | 185 * |- - - - - - - - - - - - - - - - +- 100 <-----------------|-|-+ | | | 186 * |- - - - - - - - - - - - - - - - +- 0ff | | | | | | 187 * | | . | | | | | | 188 * | Non-trap instruction, TL=0 | . <-----------+ | | | | | | 189 * | | . <-----------|-+ | | | | | | 190 * +--------------------------------+- 000 <-----------|-|-+ | | | | | | 191 * KERNELBASE | | | | | | | | | 192 * | | | | | | | | | 193 * | | | | | | | | | 194 * Interposing trap table: | | | | | | | | | 195 * | | | | | | | | | 196 * +--------------------------------+- 2ff | | | | | | | | | 197 * | ... | . | | | | | | | | | 198 * | ... | . | | | | | | | | | 199 * | ... | . | | | | | | | | | 200 * |- - - - - - - - - - - - - - - - +- 203 | | | | | | | | | 201 * | ba,a | -------------|-|-|-|-|-|-+ | | 202 * |- - - - - - - - - - - - - - - - +- 202 | | | | | | | | 203 * | ba,a | -------------|-|-|-|-|-|---+ | 204 * |- - - - - - - - - - - - - - - - +- 201 | | | | | | | 205 * | ba,a | -------------|-|-|-|-|-|-----+ 206 * |- - - - - - - - - - - - - - - - +- 200 | | | | | | 207 * | ... | . | | | | | | 208 * | ... | . | | | | | | 209 * | ... | . | | | | | | 210 * |- - - - - - - - - - - - - - - - +- 103 | | | | | | 211 * | (Increment counter) | | | | | | | 212 * | ba,a | -------------------+ | | 213 * |- - - - - - - - - - - - - - - - +- 102 | | | | | 214 * | (Increment counter) | | | | | | 215 * | ba,a | ---------------------+ | 216 * |- - - - - - - - - - - - - - - - +- 101 | | | | 217 * | (Increment counter) | | | | | 218 * | ba,a | -----------------------+ 219 * |- - - - - - - - - - - - - - - - +- 100 | | | 220 * | ... | . | | | 221 * | ... | . | | | 222 * | ... | . | | | 223 * |- - - - - - - - - - - - - - - - +- 003 | | | 224 * | (Increment counter) | | | | 225 * | ba,a | -------------+ | | 226 * |- - - - - - - - - - - - - - - - +- 002 | | 227 * | (Increment counter) | | | 228 * | ba,a | ---------------+ | 229 * |- - - - - - - - - - - - - - - - +- 001 | 230 * | (Increment counter) | | 231 * | ba,a | -----------------+ 232 * +--------------------------------+- 000 233 * KERNELBASE - tstat_total_size 234 * 235 * tstat_total_size is the number of pages required for each trap table. It 236 * must be true that KERNELBASE - tstat_total_size is less than the maximum 237 * branch displacement; if each CPU were to consume a disjoint virtual range 238 * below KERNELBASE for its trap table, we could support at most 239 * (maximum_branch_displacement / tstat_total_size) CPUs. The maximum branch 240 * displacement for Bicc variants is just under eight megabytes, and (because 241 * the %tba must be 32K aligned), tstat_total_size must be at least 32K; if 242 * each CPU were to consume a disjoint virtual range, we would have an 243 * unacceptably low upper bound of 256 CPUs. 244 * 245 * While there are tricks that one could use to address this constraint (e.g., 246 * creating trampolines every maximum_branch_displacement bytes), we instead 247 * solve this by not permitting each CPU to consume a disjoint virtual range. 248 * Rather, we have each CPU's interposing trap table use the _same_ virtual 249 * range, but we back the trap tables with disjoint physical memory. Normally, 250 * such one-to-many virtual-to-physical mappings are illegal; this is 251 * permissible here only because the pages for the interposing trap table are 252 * necessarily locked in the TLB. (The CPUs thus never have the opportunity to 253 * discover that they have conflicting translations.) 254 * 255 * On CMT architectures in which CPUs can share MMUs, the above trick will not 256 * work: two CPUs that share an MMU cannot have the same virtual address map 257 * to disjoint physical pages. On these architectures, any CPUs sharing the 258 * same MMU must consume a disjoint 32K virtual address range -- limiting the 259 * number of CPUs sharing an MMU on these architectures to 256 due to the 260 * branch displacement limitation described above. On the sun4v architecture, 261 * there is a further limitation: a guest may not have more than eight locked 262 * TLB entries per MMU. To allow operation under this restriction, the 263 * interposing trap table and the trap statistics are each accessed through 264 * a single 4M TLB entry. This limits the footprint to two locked entries 265 * (one for the I-TLB and one for the D-TLB), but further restricts the number 266 * of CPUs to 128 per MMU. However, support for more than 128 CPUs can easily 267 * be added via a hybrid scheme, where the same 4M virtual address is used 268 * on different MMUs. 269 * 270 * 271 * TLB Statistics 272 * 273 * Because TLB misses are an important component of system performance, we wish 274 * to know much more about these traps than simply the number received. 275 * Specifically, we wish to know: 276 * 277 * (a) The amount of time spent executing the TLB miss handler 278 * (b) TLB misses versus TSB misses 279 * (c) Kernel-level misses versus user-level misses 280 * (d) Misses per pagesize 281 * 282 * TLB Statistics: Time Spent Executing 283 * 284 * To accurately determine the amount of time spent executing the TLB miss 285 * handler, one must get a timestamp on trap entry and trap exit, subtract the 286 * latter from the former, and add the result to an accumulating count. 287 * Consider flow of control during normal TLB miss processing (where "ldx 288 * [%g2], %g2" is an arbitrary TLB-missing instruction): 289 * 290 * + - - - - - - - -+ 291 * : : 292 * : ldx [%g2], %g2 :<-------------------------------------------------------+ 293 * : : Return from trap: | 294 * + - - - - - - - -+ TL <- TL - 1 (0) | 295 * | %pc <- TSTATE[TL].TPC (address of load) | 296 * | TLB miss: | 297 * | TL <- TL + 1 (1) | 298 * | %pc <- TLB-miss-trap-handler | 299 * | | 300 * v | 301 * + - - - - - - - - - - - - - - - + | 302 * : : | 303 * : Lookup VA in TSB : | 304 * : If (hit) : | 305 * : Fill TLB : | 306 * : Else : | 307 * : Lookup VA (hme hash table : | 308 * : or segkpm) : | 309 * : Fill TLB : | 310 * : Endif : | 311 * : Issue "retry" ---------------------------------------------------------+ 312 * : : 313 * + - - - - - - - - - - - - - - - + 314 * TLB-miss-trap-handler 315 * 316 * 317 * As the above diagram indicates, interposing on the trap table allows one 318 * only to determine a timestamp on trap _entry_: when the TLB miss handler 319 * has completed filling the TLB, a "retry" will be issued, and control will 320 * transfer immediately back to the missing %pc. 321 * 322 * To obtain a timestamp on trap exit, we must then somehow interpose between 323 * the "retry" and the subsequent control transfer to the TLB-missing 324 * instruction. To do this, we _push_ a trap level. The basic idea is to 325 * spoof a TLB miss by raising TL, setting the %tpc to be within text 326 * controlled by trapstat (the "TLB return entry") and branching to the 327 * underlying TLB miss handler. When the TLB miss handler issues its "retry", 328 * control will transfer not to the TLB-missing instruction, but rather to the 329 * TLB return entry. This code can then obtain a timestamp, and issue its own 330 * "retry" -- thereby correctly returning to the TLB-missing instruction. 331 * Here is the above TLB miss flow control diagram modified to reflect 332 * trapstat's operation: 333 * 334 * + - - - - - - - -+ 335 * : : 336 * : ldx [%g2], %g2 :<-------------------------------------------------------+ 337 * : : Return from trap: | 338 * + - - - - - - - -+ TL <- TL - 1 (0) | 339 * | %pc <- TSTATE[TL].TPC (address of load) | 340 * | TLB miss: | 341 * | TL <- TL + 1 (1) | 342 * | %pc <- TLB-miss-trap-handler (trapstat) | 343 * | | 344 * v TLB-return-entry (trapstat) | 345 * + - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - + | 346 * : : : : | 347 * : Record timestamp : : Record timestamp : | 348 * : TL <- 2 : : Take timestamp difference : | 349 * : TSTATE[1].TPC <- TLB-return-entry : : Add to running total : | 350 * : ba,a TLB-miss-trap-handler -----------+ : Issue "retry" --------------+ 351 * : : | : : 352 * + - - - - - - - - - - - - - - - - - - + | + - - - - - - - - - - - - - + 353 * TLB-miss-trap-handler | ^ 354 * (trapstat) | | 355 * | | 356 * | | 357 * +-----------------------+ | 358 * | | 359 * | | 360 * v | 361 * + - - - - - - - - - - - - - - - + | 362 * : : | 363 * : Lookup VA in TSB : | 364 * : If (hit) : | 365 * : Fill TLB : | 366 * : Else : | 367 * : Lookup VA (hme hash table : | 368 * : or segkpm) : | 369 * : Fill TLB : | 370 * : Endif : | 371 * : Issue "retry" ------------------------------------------+ 372 * : : Return from trap: 373 * + - - - - - - - - - - - - - - - + TL <- TL - 1 (1) 374 * TLB-miss-trap-handler %pc <- TSTATE[TL].TPC (TLB-return-entry) 375 * 376 * 377 * A final subterfuge is required to complete our artifice: if we miss in 378 * the TLB, the TSB _and_ the subsequent hash or segkpm lookup (that is, if 379 * there is no valid translation for the TLB-missing address), common system 380 * software will need to accurately determine the %tpc as part of its page 381 * fault handling. We therefore modify the kernel to check the %tpc in this 382 * case: if the %tpc falls within the VA range controlled by trapstat and 383 * the TL is 2, TL is simply lowered back to 1 (this check is implemented 384 * by the TSTAT_CHECK_TL1 macro). Lowering TL to 1 has the effect of 385 * discarding the state pushed by trapstat. 386 * 387 * TLB Statistics: TLB Misses versus TSB Misses 388 * 389 * Distinguishing TLB misses from TSB misses requires further interposition 390 * on the TLB miss handler: we cannot know a priori or a posteriori if a 391 * given VA will or has hit in the TSB. 392 * 393 * We achieve this distinction by adding a second TLB return entry almost 394 * identical to the first -- differing only in the address to which it 395 * stores its results. We then modify the TLB miss handlers of the kernel 396 * such that they check the %tpc when they determine that a TLB miss has 397 * subsequently missed in the TSB: if the %tpc lies within trapstat's VA 398 * range and TL is 2 (that is, if trapstat is running), the TLB miss handler 399 * _increments_ the %tpc by the size of the TLB return entry. The ensuing 400 * "retry" will thus transfer control to the second TLB return entry, and 401 * the time spent in the handler will be accumulated in a memory location 402 * specific to TSB misses. 403 * 404 * N.B.: To minimize the amount of knowledge the kernel must have of trapstat, 405 * we do not allow the kernel to hard-code the size of the TLB return entry. 406 * Rather, the actual tsbmiss handler executes a known instruction at the 407 * corresponding tsbmiss patch points (see the tstat_tsbmiss_patch_table) with 408 * the %tpc in %g7: when trapstat is not running, these points contain the 409 * harmless TSTAT_TSBMISS_INSTR instruction ("add %g7, 0, %g7"). Before 410 * running, trapstat modifies the instructions at these patch points such 411 * that the simm13 equals the size of the TLB return entry. 412 * 413 * TLB Statistics: Kernel-level Misses versus User-level Misses 414 * 415 * Differentiating user-level misses from kernel-level misses employs a 416 * similar technique, but is simplified by the ability to distinguish a 417 * user-level miss from a kernel-level miss a priori by reading the context 418 * register: we implement kernel-/user-level differentiation by again doubling 419 * the number of TLB return entries, and setting the %tpc to the appropriate 420 * TLB return entry in trapstat's TLB miss handler. Together with the doubling 421 * of entries required for TLB-miss/TSB-miss differentiation, this yields a 422 * total of four TLB return entries: 423 * 424 * Level TSB hit? Structure member 425 * ------------------------------------------------------------ 426 * Kernel Yes tstat_tlbret_t.ttlbr_ktlb 427 * Kernel No tstat_tlbret_t.ttlbr_ktsb 428 * User Yes tstat_tlbret_t.ttlbr_utlb 429 * User No tstat_tlbret_t.ttlbr_utsb 430 * 431 * TLB Statistics: Misses per Pagesize 432 * 433 * As with the TLB-/TSB-miss differentiation, we have no way of determining 434 * pagesize a priori. This is therefore implemented by mandating a new rule: 435 * whenever the kernel fills the TLB in its TLB miss handler, the TTE 436 * corresponding to the TLB-missing VA must be in %g5 when the handler 437 * executes its "retry". This allows the TLB return entry to determine 438 * pagesize by simply looking at the pagesize field in the TTE stored in 439 * %g5. 440 * 441 * TLB Statistics: Probe Effect 442 * 443 * As one might imagine, gathering TLB statistics by pushing a trap level 444 * induces significant probe effect. To account for this probe effect, 445 * trapstat attempts to observe it by executing a code sequence with a known 446 * number of TLB misses both before and after interposing on the trap table. 447 * This allows trapstat to determine a per-trap probe effect which can then be 448 * factored into the "%tim" fields of the trapstat command. 449 * 450 * Note that on sun4v platforms, TLB misses are normally handled by the 451 * hypervisor or the hardware TSB walker. Thus no fast MMU miss information 452 * is reported for normal operation. However, when trapstat is invoked 453 * with -t or -T option to collect detailed TLB statistics, kernel takes 454 * over TLB miss handling. This results in significantly more overhead 455 * and TLB statistics may not be as accurate as on sun4u platforms. 456 * On some processors, hypervisor or hardware may provide a low overhead 457 * interface to collect TSB hit statistics. This support is exposed via 458 * a well defined CPU module interface (cpu_trapstat_conf to enable this 459 * interface and cpu_trapstat_data to get detailed TSB hit statistics). 460 * In this scenario, TSB miss statistics is collected by intercepting the 461 * IMMU_miss and DMMU_miss traps using above mentioned trap interposition 462 * approach. 463 * 464 * Locking 465 * 466 * The implementation uses two locks: tstat_lock (a local lock) and the global 467 * cpu_lock. tstat_lock is used to assure trapstat's consistency in the 468 * presence of multithreaded /dev/trapstat consumers (while as of this writing 469 * the only consumer of /dev/trapstat is single threaded, it is obviously 470 * necessary to correctly support multithreaded access). cpu_lock is held 471 * whenever CPUs are being manipulated directly, to prevent them from 472 * disappearing in the process. Because trapstat's DR callback 473 * (trapstat_cpu_setup()) must grab tstat_lock and is called with cpu_lock 474 * held, the lock ordering is necessarily cpu_lock before tstat_lock. 475 * 476 */ 477 /* END CSTYLED */ 478 479 static dev_info_t *tstat_devi; /* saved in xxattach() for xxinfo() */ 480 static int tstat_open; /* set if driver is open */ 481 static kmutex_t tstat_lock; /* serialize access */ 482 static vmem_t *tstat_arena; /* arena for TLB-locked pages */ 483 static tstat_percpu_t *tstat_percpu; /* per-CPU data */ 484 static int tstat_running; /* set if trapstat is running */ 485 static tstat_data_t *tstat_buffer; /* staging buffer for outgoing data */ 486 static int tstat_options; /* bit-wise indication of options */ 487 static int *tstat_enabled; /* map of enabled trap entries */ 488 static int tstat_tsbmiss_patched; /* tsbmiss patch flag */ 489 static callb_id_t tstat_cprcb; /* CPR callback */ 490 static char *tstat_probe_area; /* VA range used for probe effect */ 491 static caddr_t tstat_probe_phys; /* physical to back above VA */ 492 static hrtime_t tstat_probe_time; /* time spent on probe effect */ 493 static hrtime_t tstat_probe_before[TSTAT_PROBE_NLAPS]; 494 static hrtime_t tstat_probe_after[TSTAT_PROBE_NLAPS]; 495 static uint_t tstat_pgszs; /* # of kernel page sizes */ 496 static uint_t tstat_user_pgszs; /* # of user page sizes */ 497 498 /* 499 * sizeof tstat_data_t + pgsz data for the kernel. For simplicity's sake, when 500 * we collect data, we do it based upon szc, but when we report data back to 501 * userland, we have to do it based upon the userszc which may not match. 502 * So, these two variables are for internal use and exported use respectively. 503 */ 504 static size_t tstat_data_t_size; 505 static size_t tstat_data_t_exported_size; 506 507 static size_t tstat_data_pages; /* number of pages of tstat data */ 508 static size_t tstat_data_size; /* tstat data size in bytes */ 509 static size_t tstat_total_pages; /* #data pages + #instr pages */ 510 static size_t tstat_total_size; /* tstat data size + instr size */ 511 #ifdef sun4v 512 static caddr_t tstat_va; /* VA of memory reserved for TBA */ 513 static pfn_t tstat_pfn; /* PFN of memory reserved for TBA */ 514 static boolean_t tstat_fast_tlbstat = B_FALSE; 515 #endif 516 517 /* 518 * In the above block comment, see "TLB Statistics: TLB Misses versus 519 * TSB Misses" for an explanation of the tsbmiss patch points. 520 */ 521 extern uint32_t tsbmiss_trapstat_patch_point; 522 extern uint32_t tsbmiss_trapstat_patch_point_kpm; 523 extern uint32_t tsbmiss_trapstat_patch_point_kpm_small; 524 525 /* 526 * Trapstat tsbmiss patch table 527 */ 528 tstat_tsbmiss_patch_entry_t tstat_tsbmiss_patch_table[] = { 529 {(uint32_t *)&tsbmiss_trapstat_patch_point, 0}, 530 {(uint32_t *)&tsbmiss_trapstat_patch_point_kpm, 0}, 531 {(uint32_t *)&tsbmiss_trapstat_patch_point_kpm_small, 0}, 532 {(uint32_t *)NULL, 0} 533 }; 534 535 /* 536 * We define some general SPARC-specific constants to allow more readable 537 * relocations. 538 */ 539 #define NOP 0x01000000 540 #define HI22(v) ((uint32_t)(v) >> 10) 541 #define LO10(v) ((uint32_t)(v) & 0x3ff) 542 #define LO12(v) ((uint32_t)(v) & 0xfff) 543 #define DISP22(from, to) \ 544 ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff) 545 #define ASI(asi) ((asi) << 5) 546 547 /* 548 * The interposing trap table must be locked in the I-TLB, and any data 549 * referred to in the interposing trap handler must be locked in the D-TLB. 550 * This function locks these pages in the appropriate TLBs by creating TTEs 551 * from whole cloth, and manually loading them into the TLB. This function is 552 * called from cross call context. 553 * 554 * On sun4v platforms, we use 4M page size mappings to minimize the number 555 * of locked down entries (i.e. permanent mappings). Each CPU uses a 556 * reserved portion of that 4M page for its TBA and data. 557 */ 558 static void 559 trapstat_load_tlb(void) 560 { 561 #ifndef sun4v 562 int i; 563 #else 564 uint64_t ret; 565 #endif 566 tte_t tte; 567 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 568 caddr_t va = tcpu->tcpu_vabase; 569 570 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 571 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 572 573 #ifndef sun4v 574 for (i = 0; i < tstat_total_pages; i++, va += MMU_PAGESIZE) { 575 tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) | 576 TTE_PFN_INTHI(tcpu->tcpu_pfn[i]); 577 if (i < TSTAT_INSTR_PAGES) { 578 tte.tte_intlo = TTE_PFN_INTLO(tcpu->tcpu_pfn[i]) | 579 TTE_LCK_INT | TTE_CP_INT | TTE_PRIV_INT; 580 sfmmu_itlb_ld_kva(va, &tte); 581 } else { 582 tte.tte_intlo = TTE_PFN_INTLO(tcpu->tcpu_pfn[i]) | 583 TTE_LCK_INT | TTE_CP_INT | TTE_CV_INT | 584 TTE_PRIV_INT | TTE_HWWR_INT; 585 sfmmu_dtlb_ld_kva(va, &tte); 586 } 587 } 588 #else /* sun4v */ 589 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(tstat_pfn); 590 tte.tte_intlo = TTE_PFN_INTLO(tstat_pfn) | TTE_CP_INT | 591 TTE_CV_INT | TTE_PRIV_INT | TTE_HWWR_INT | 592 TTE_SZ_INTLO(TTE4M); 593 ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, 594 MAP_ITLB | MAP_DTLB); 595 596 if (ret != H_EOK) 597 cmn_err(CE_PANIC, "trapstat: cannot map new TBA " 598 "for cpu %d (error: 0x%lx)", CPU->cpu_id, ret); 599 #endif /* sun4v */ 600 } 601 602 /* 603 * As mentioned in the "TLB Statistics: TLB Misses versus TSB Misses" section 604 * of the block comment, TLB misses are differentiated from TSB misses in 605 * part by hot-patching the instructions at the tsbmiss patch points (see 606 * tstat_tsbmiss_patch_table). This routine is used both to initially patch 607 * the instructions, and to patch them back to their original values upon 608 * restoring the original trap table. 609 */ 610 static void 611 trapstat_hotpatch() 612 { 613 uint32_t instr; 614 uint32_t simm13; 615 tstat_tsbmiss_patch_entry_t *ep; 616 617 ASSERT(MUTEX_HELD(&tstat_lock)); 618 619 if (!(tstat_options & TSTAT_OPT_TLBDATA)) 620 return; 621 622 if (!tstat_tsbmiss_patched) { 623 /* 624 * We haven't patched the TSB paths; do so now. 625 */ 626 /*CONSTCOND*/ 627 ASSERT(offsetof(tstat_tlbret_t, ttlbr_ktsb) - 628 offsetof(tstat_tlbret_t, ttlbr_ktlb) == 629 offsetof(tstat_tlbret_t, ttlbr_utsb) - 630 offsetof(tstat_tlbret_t, ttlbr_utlb)); 631 632 simm13 = offsetof(tstat_tlbret_t, ttlbr_ktsb) - 633 offsetof(tstat_tlbret_t, ttlbr_ktlb); 634 635 for (ep = tstat_tsbmiss_patch_table; ep->tpe_addr; ep++) { 636 ASSERT(ep->tpe_instr == 0); 637 instr = ep->tpe_instr = *ep->tpe_addr; 638 639 /* 640 * Assert that the instruction we're about to patch is 641 * "add %g7, 0, %g7" (0x8e01e000). 642 */ 643 ASSERT(instr == TSTAT_TSBMISS_INSTR); 644 645 instr |= simm13; 646 hot_patch_kernel_text((caddr_t)ep->tpe_addr, 647 instr, sizeof (instr)); 648 } 649 650 tstat_tsbmiss_patched = 1; 651 652 } else { 653 /* 654 * Remove patches from the TSB paths. 655 */ 656 for (ep = tstat_tsbmiss_patch_table; ep->tpe_addr; ep++) { 657 ASSERT(ep->tpe_instr == TSTAT_TSBMISS_INSTR); 658 hot_patch_kernel_text((caddr_t)ep->tpe_addr, 659 ep->tpe_instr, sizeof (instr)); 660 ep->tpe_instr = 0; 661 } 662 663 tstat_tsbmiss_patched = 0; 664 } 665 } 666 667 /* 668 * This is the routine executed to clock the performance of the trap table, 669 * executed both before and after interposing on the trap table to attempt to 670 * determine probe effect. The probe effect is used to adjust the "%tim" 671 * fields of trapstat's -t and -T output; we only use TLB misses to clock the 672 * trap table. We execute the inner loop (which is designed to exceed the 673 * TLB's reach) nlaps times, taking the best time as our time (thereby 674 * factoring out the effects of interrupts, cache misses or other perturbing 675 * events. 676 */ 677 static hrtime_t 678 trapstat_probe_laps(int nlaps, hrtime_t *buf) 679 { 680 int i, j = 0; 681 hrtime_t ts, best = INT64_MAX; 682 683 while (nlaps--) { 684 ts = rdtick(); 685 686 for (i = 0; i < TSTAT_PROBE_SIZE; i += MMU_PAGESIZE) 687 *((volatile char *)&tstat_probe_area[i]); 688 689 if ((ts = rdtick() - ts) < best) 690 best = ts; 691 buf[j++] = ts; 692 } 693 694 return (best); 695 } 696 697 /* 698 * This routine determines the probe effect by calling trapstat_probe_laps() 699 * both without and with the interposing trap table. Note that this is 700 * called from a cross call on the desired CPU, and that it is called on 701 * every CPU (this is necessary because the probe effect may differ from 702 * one CPU to another). 703 */ 704 static void 705 trapstat_probe() 706 { 707 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 708 hrtime_t before, after; 709 710 if (!(tcpu->tcpu_flags & TSTAT_CPU_SELECTED)) 711 return; 712 713 if (tstat_probe_area == NULL || (tstat_options & TSTAT_OPT_NOGO)) 714 return; 715 716 /* 717 * We very much expect the %tba to be KERNELBASE; this is a 718 * precautionary measure to assure that trapstat doesn't melt the 719 * machine should the %tba point unexpectedly elsewhere. 720 */ 721 if (get_tba() != (caddr_t)KERNELBASE) 722 return; 723 724 /* 725 * Preserve this CPU's data before destroying it by enabling the 726 * interposing trap table. We can safely use tstat_buffer because 727 * the caller of the trapstat_probe() cross call is holding tstat_lock. 728 */ 729 bcopy(tcpu->tcpu_data, tstat_buffer, tstat_data_t_size); 730 731 tstat_probe_time = gethrtime(); 732 733 before = trapstat_probe_laps(TSTAT_PROBE_NLAPS, tstat_probe_before); 734 (void) set_tba(tcpu->tcpu_ibase); 735 736 after = trapstat_probe_laps(TSTAT_PROBE_NLAPS, tstat_probe_after); 737 (void) set_tba((caddr_t)KERNELBASE); 738 739 tstat_probe_time = gethrtime() - tstat_probe_time; 740 741 bcopy(tstat_buffer, tcpu->tcpu_data, tstat_data_t_size); 742 tcpu->tcpu_data->tdata_peffect = (after - before) / TSTAT_PROBE_NPAGES; 743 } 744 745 static void 746 trapstat_probe_alloc() 747 { 748 pfn_t pfn; 749 caddr_t va; 750 int i; 751 752 ASSERT(MUTEX_HELD(&tstat_lock)); 753 ASSERT(tstat_probe_area == NULL); 754 ASSERT(tstat_probe_phys == NULL); 755 756 if (!(tstat_options & TSTAT_OPT_TLBDATA)) 757 return; 758 759 /* 760 * Grab some virtual from the heap arena. 761 */ 762 tstat_probe_area = vmem_alloc(heap_arena, TSTAT_PROBE_SIZE, VM_SLEEP); 763 va = tstat_probe_area; 764 765 /* 766 * Grab a single physical page. 767 */ 768 tstat_probe_phys = vmem_alloc(tstat_arena, MMU_PAGESIZE, VM_SLEEP); 769 pfn = hat_getpfnum(kas.a_hat, tstat_probe_phys); 770 771 /* 772 * Now set the translation for every page in our virtual range 773 * to be our allocated physical page. 774 */ 775 for (i = 0; i < TSTAT_PROBE_NPAGES; i++) { 776 hat_devload(kas.a_hat, va, MMU_PAGESIZE, pfn, PROT_READ, 777 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 778 va += MMU_PAGESIZE; 779 } 780 } 781 782 static void 783 trapstat_probe_free() 784 { 785 caddr_t va; 786 int i; 787 788 ASSERT(MUTEX_HELD(&tstat_lock)); 789 790 if ((va = tstat_probe_area) == NULL) 791 return; 792 793 for (i = 0; i < TSTAT_PROBE_NPAGES; i++) { 794 hat_unload(kas.a_hat, va, MMU_PAGESIZE, HAT_UNLOAD_UNLOCK); 795 va += MMU_PAGESIZE; 796 } 797 798 vmem_free(tstat_arena, tstat_probe_phys, MMU_PAGESIZE); 799 vmem_free(heap_arena, tstat_probe_area, TSTAT_PROBE_SIZE); 800 801 tstat_probe_phys = NULL; 802 tstat_probe_area = NULL; 803 } 804 805 /* 806 * This routine actually enables a CPU by setting its %tba to be the 807 * CPU's interposing trap table. It is called out of cross call context. 808 */ 809 static void 810 trapstat_enable() 811 { 812 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 813 814 if (!(tcpu->tcpu_flags & TSTAT_CPU_SELECTED)) 815 return; 816 817 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 818 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 819 820 if (get_tba() != (caddr_t)KERNELBASE) 821 return; 822 823 if (!(tstat_options & TSTAT_OPT_NOGO)) 824 (void) set_tba(tcpu->tcpu_ibase); 825 tcpu->tcpu_flags |= TSTAT_CPU_ENABLED; 826 #ifdef sun4v 827 if ((tstat_options & TSTAT_OPT_TLBDATA) && 828 !(tstat_options & TSTAT_OPT_NOGO)) { 829 if (tstat_fast_tlbstat) { 830 /* 831 * Invoke processor specific interface to enable 832 * collection of TSB hit statistics. 833 */ 834 cpu_trapstat_conf(CPU_TSTATCONF_ENABLE); 835 } else { 836 /* 837 * Collect TLB miss statistics by taking over 838 * TLB miss handling from the hypervisor. This 839 * is done by telling the hypervisor that there 840 * is no TSB configured. Also set TSTAT_TLB_STATS 841 * flag so that no user TSB is configured during 842 * context switch time. 843 */ 844 cpu_t *cp = CPU; 845 846 cp->cpu_m.cpu_tstat_flags |= TSTAT_TLB_STATS; 847 (void) hv_set_ctx0(NULL, NULL); 848 (void) hv_set_ctxnon0(NULL, NULL); 849 } 850 } 851 #endif 852 } 853 854 /* 855 * This routine disables a CPU (vis a vis trapstat) by setting its %tba to be 856 * the actual, underlying trap table. It is called out of cross call context. 857 */ 858 static void 859 trapstat_disable() 860 { 861 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 862 863 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 864 return; 865 866 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 867 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 868 869 if (!(tstat_options & TSTAT_OPT_NOGO)) 870 (void) set_tba((caddr_t)KERNELBASE); 871 872 tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED; 873 874 #ifdef sun4v 875 if ((tstat_options & TSTAT_OPT_TLBDATA) && 876 !(tstat_options & TSTAT_OPT_NOGO)) { 877 if (tstat_fast_tlbstat) { 878 /* 879 * Invoke processor specific interface to disable 880 * collection of TSB hit statistics on each processor. 881 */ 882 cpu_trapstat_conf(CPU_TSTATCONF_DISABLE); 883 } else { 884 /* 885 * As part of collecting TLB miss statistics, we took 886 * over TLB miss handling from the hypervisor by 887 * telling the hypervisor that NO TSB is configured. 888 * We need to restore that by communicating proper 889 * kernel/user TSB information so that TLB misses 890 * can be handled by the hypervisor or the hardware 891 * more efficiently. 892 * 893 * We restore kernel TSB information right away. 894 * However, to minimize any locking dependency, we 895 * don't restore user TSB information right away. 896 * Instead, we simply clear the TSTAT_TLB_STATS flag 897 * so that the user TSB information is automatically 898 * restored on next context switch. 899 * 900 * Note that the call to restore kernel TSB information 901 * will normally not fail, unless wrong information is 902 * passed here. In that scenario, system will still 903 * continue to function properly with the exception of 904 * kernel handling all the TLB misses. 905 */ 906 struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock; 907 cpu_t *cp = CPU; 908 909 cp->cpu_m.cpu_tstat_flags &= ~TSTAT_TLB_STATS; 910 (void) hv_set_ctx0(hvbp->hv_tsb_info_cnt, 911 hvbp->hv_tsb_info_pa); 912 } 913 } 914 #endif 915 } 916 917 /* 918 * We use %tick as the time base when recording the time spent executing 919 * the trap handler. %tick, however, is not necessarily kept in sync 920 * across CPUs (indeed, different CPUs may have different %tick frequencies). 921 * We therefore cross call onto a CPU to get a snapshot of its data to 922 * copy out; this is the routine executed out of that cross call. 923 */ 924 static void 925 trapstat_snapshot() 926 { 927 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 928 tstat_data_t *data = tcpu->tcpu_data; 929 930 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 931 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 932 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ENABLED); 933 934 data->tdata_snapts = gethrtime(); 935 data->tdata_snaptick = rdtick(); 936 bcopy(data, tstat_buffer, tstat_data_t_size); 937 #ifdef sun4v 938 /* 939 * Invoke processor specific interface to collect TSB hit 940 * statistics on each processor. 941 */ 942 if ((tstat_options & TSTAT_OPT_TLBDATA) && tstat_fast_tlbstat) 943 cpu_trapstat_data((void *) tstat_buffer->tdata_pgsz, 944 tstat_pgszs); 945 #endif 946 } 947 948 /* 949 * The TSTAT_RETENT_* constants define offsets in the TLB return entry. 950 * They are used only in trapstat_tlbretent() (below) and #undef'd 951 * immediately afterwards. Any change to "retent" in trapstat_tlbretent() 952 * will likely require changes to these constants. 953 */ 954 955 #ifndef sun4v 956 #define TSTAT_RETENT_STATHI 1 957 #define TSTAT_RETENT_STATLO 2 958 #define TSTAT_RETENT_SHIFT 11 959 #define TSTAT_RETENT_COUNT_LD 13 960 #define TSTAT_RETENT_COUNT_ST 15 961 #define TSTAT_RETENT_TMPTSHI 16 962 #define TSTAT_RETENT_TMPTSLO 17 963 #define TSTAT_RETENT_TIME_LD 19 964 #define TSTAT_RETENT_TIME_ST 21 965 #else /* sun4v */ 966 #define TSTAT_RETENT_STATHI 1 967 #define TSTAT_RETENT_STATLO 2 968 #define TSTAT_RETENT_SHIFT 5 969 #define TSTAT_RETENT_COUNT_LD 7 970 #define TSTAT_RETENT_COUNT_ST 9 971 #define TSTAT_RETENT_TMPTSHI 10 972 #define TSTAT_RETENT_TMPTSLO 11 973 #define TSTAT_RETENT_TIME_LD 13 974 #define TSTAT_RETENT_TIME_ST 15 975 #endif /* sun4v */ 976 977 static void 978 trapstat_tlbretent(tstat_percpu_t *tcpu, tstat_tlbretent_t *ret, 979 tstat_missdata_t *data) 980 { 981 uint32_t *ent = ret->ttlbrent_instr, shift; 982 uintptr_t base, tmptick = TSTAT_DATA_OFFS(tcpu, tdata_tmptick); 983 984 /* 985 * This is the entry executed upon return from the TLB/TSB miss 986 * handler (i.e. the code interpositioned between the "retry" and 987 * the actual return to the TLB-missing instruction). Detail on its 988 * theory of operation can be found in the "TLB Statistics" section 989 * of the block comment. Note that we expect the TTE just loaded 990 * into the TLB to be in %g5; all other globals are available as 991 * scratch. Finally, note that the page size information in sun4v is 992 * located in the lower bits of the TTE -- requiring us to have a 993 * different return entry on sun4v. 994 */ 995 static const uint32_t retent[TSTAT_TLBRET_NINSTR] = { 996 #ifndef sun4v 997 0x87410000, /* rd %tick, %g3 */ 998 0x03000000, /* sethi %hi(stat), %g1 */ 999 0x82106000, /* or %g1, %lo(stat), %g1 */ 1000 0x89297001, /* sllx %g5, 1, %g4 */ 1001 0x8931303e, /* srlx %g4, 62, %g4 */ 1002 0x8531702e, /* srlx %g5, 46, %g2 */ 1003 0x8408a004, /* and %g2, 4, %g2 */ 1004 0x88110002, /* or %g4, %g2, %g4 */ 1005 0x80a12005, /* cmp %g4, 5 */ 1006 0x34400002, /* bg,a,pn %icc, +8 */ 1007 0x88102004, /* mov 4, %g4 */ 1008 0x89292000, /* sll %g4, shift, %g4 */ 1009 0x82004004, /* add %g1, %g4, %g1 */ 1010 0xc4586000, /* ldx [%g1 + tmiss_count], %g2 */ 1011 0x8400a001, /* add %g2, 1, %g2 */ 1012 0xc4706000, /* stx %g2, [%g1 + tmiss_count] */ 1013 0x0d000000, /* sethi %hi(tdata_tmptick), %g6 */ 1014 0xc459a000, /* ldx [%g6 + %lo(tdata_tmptick)], %g2 */ 1015 0x8620c002, /* sub %g3, %g2, %g3 */ 1016 0xc4586000, /* ldx [%g1 + tmiss_time], %g2 */ 1017 0x84008003, /* add %g2, %g3, %g2 */ 1018 0xc4706000, /* stx %g2, [%g1 + tmiss_time] */ 1019 0x83f00000 /* retry */ 1020 #else /* sun4v */ 1021 0x87410000, /* rd %tick, %g3 */ 1022 0x03000000, /* sethi %hi(stat), %g1 */ 1023 0x82106000, /* or %g1, %lo(stat), %g1 */ 1024 0x8929703d, /* sllx %g5, 61, %g4 */ 1025 0x8931303d, /* srlx %g4, 61, %g4 */ 1026 0x89292000, /* sll %g4, shift, %g4 */ 1027 0x82004004, /* add %g1, %g4, %g1 */ 1028 0xc4586000, /* ldx [%g1 + tmiss_count], %g2 */ 1029 0x8400a001, /* add %g2, 1, %g2 */ 1030 0xc4706000, /* stx %g2, [%g1 + tmiss_count] */ 1031 0x0d000000, /* sethi %hi(tdata_tmptick), %g6 */ 1032 0xc459a000, /* ldx [%g6 + %lo(tdata_tmptick)], %g2 */ 1033 0x8620c002, /* sub %g3, %g2, %g3 */ 1034 0xc4586000, /* ldx [%g1 + tmiss_time], %g2 */ 1035 0x84008003, /* add %g2, %g3, %g2 */ 1036 0xc4706000, /* stx %g2, [%g1 + tmiss_time] */ 1037 0x83f00000 /* retry */ 1038 #endif /* sun4v */ 1039 }; 1040 1041 ASSERT(MUTEX_HELD(&tstat_lock)); 1042 /*CONSTCOND*/ 1043 ASSERT(offsetof(tstat_missdata_t, tmiss_count) <= LO10(-1)); 1044 /*CONSTCOND*/ 1045 ASSERT(offsetof(tstat_missdata_t, tmiss_time) <= LO10(-1)); 1046 /*CONSTCOND*/ 1047 ASSERT(!((sizeof (tstat_pgszdata_t) - 1) & sizeof (tstat_pgszdata_t))); 1048 1049 for (shift = 1; (1 << shift) != sizeof (tstat_pgszdata_t); shift++) 1050 continue; 1051 1052 base = (uintptr_t)tcpu->tcpu_dbase + 1053 ((uintptr_t)data - (uintptr_t)tcpu->tcpu_data); 1054 1055 bcopy(retent, ent, sizeof (retent)); 1056 1057 ent[TSTAT_RETENT_STATHI] |= HI22(base); 1058 ent[TSTAT_RETENT_STATLO] |= LO10(base); 1059 ent[TSTAT_RETENT_SHIFT] |= shift; 1060 /* LINTED E_EXPR_NULL_EFFECT */ 1061 ent[TSTAT_RETENT_COUNT_LD] |= offsetof(tstat_missdata_t, tmiss_count); 1062 /* LINTED E_EXPR_NULL_EFFECT */ 1063 ent[TSTAT_RETENT_COUNT_ST] |= offsetof(tstat_missdata_t, tmiss_count); 1064 ent[TSTAT_RETENT_TMPTSHI] |= HI22(tmptick); 1065 ent[TSTAT_RETENT_TMPTSLO] |= LO10(tmptick); 1066 ent[TSTAT_RETENT_TIME_LD] |= offsetof(tstat_missdata_t, tmiss_time); 1067 ent[TSTAT_RETENT_TIME_ST] |= offsetof(tstat_missdata_t, tmiss_time); 1068 } 1069 1070 #undef TSTAT_RETENT_STATHI 1071 #undef TSTAT_RETENT_STATLO 1072 #undef TSTAT_RETENT_SHIFT 1073 #undef TSTAT_RETENT_COUNT_LD 1074 #undef TSTAT_RETENT_COUNT_ST 1075 #undef TSTAT_RETENT_TMPTSHI 1076 #undef TSTAT_RETENT_TMPTSLO 1077 #undef TSTAT_RETENT_TIME_LD 1078 #undef TSTAT_RETENT_TIME_ST 1079 1080 /* 1081 * The TSTAT_TLBENT_* constants define offsets in the TLB entry. They are 1082 * used only in trapstat_tlbent() (below) and #undef'd immediately afterwards. 1083 * Any change to "tlbent" in trapstat_tlbent() will likely require changes 1084 * to these constants. 1085 */ 1086 1087 #ifndef sun4v 1088 #define TSTAT_TLBENT_STATHI 0 1089 #define TSTAT_TLBENT_STATLO_LD 1 1090 #define TSTAT_TLBENT_STATLO_ST 3 1091 #define TSTAT_TLBENT_MMUASI 15 1092 #define TSTAT_TLBENT_TPCHI 18 1093 #define TSTAT_TLBENT_TPCLO_USER 19 1094 #define TSTAT_TLBENT_TPCLO_KERN 21 1095 #define TSTAT_TLBENT_TSHI 25 1096 #define TSTAT_TLBENT_TSLO 27 1097 #define TSTAT_TLBENT_BA 28 1098 #else /* sun4v */ 1099 #define TSTAT_TLBENT_STATHI 0 1100 #define TSTAT_TLBENT_STATLO_LD 1 1101 #define TSTAT_TLBENT_STATLO_ST 3 1102 #define TSTAT_TLBENT_TAGTARGET 19 1103 #define TSTAT_TLBENT_TPCHI 21 1104 #define TSTAT_TLBENT_TPCLO_USER 22 1105 #define TSTAT_TLBENT_TPCLO_KERN 24 1106 #define TSTAT_TLBENT_TSHI 28 1107 #define TSTAT_TLBENT_TSLO 30 1108 #define TSTAT_TLBENT_BA 31 1109 #endif /* sun4v */ 1110 1111 static void 1112 trapstat_tlbent(tstat_percpu_t *tcpu, int entno) 1113 { 1114 uint32_t *ent; 1115 uintptr_t orig, va, baoffs; 1116 #ifndef sun4v 1117 int itlb = entno == TSTAT_ENT_ITLBMISS; 1118 #else 1119 int itlb = (entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_ITLBMISS); 1120 #endif 1121 int entoffs = entno << TSTAT_ENT_SHIFT; 1122 uintptr_t tmptick, stat, tpc, utpc; 1123 tstat_pgszdata_t *data = &tcpu->tcpu_data->tdata_pgsz[0]; 1124 tstat_tlbdata_t *udata, *kdata; 1125 tstat_tlbret_t *ret; 1126 #ifndef sun4v 1127 uint32_t asi = itlb ? ASI(ASI_IMMU) : ASI(ASI_DMMU); 1128 #else 1129 uint32_t tagtarget_off = itlb ? MMFSA_I_CTX : MMFSA_D_CTX; 1130 #endif 1131 1132 /* 1133 * When trapstat is run with TLB statistics, this is the entry for 1134 * both I- and D-TLB misses; this code performs trap level pushing, 1135 * as described in the "TLB Statistics" section of the block comment. 1136 * This code is executing at TL 1; %tstate[0] contains the saved 1137 * state at the time of the TLB miss. Pushing trap level 1 (and thus 1138 * raising TL to 2) requires us to fill in %tstate[1] with our %pstate, 1139 * %cwp and %asi. We leave %tt unchanged, and we set %tpc and %tnpc to 1140 * the appropriate TLB return entry (based on the context of the miss). 1141 * Finally, we sample %tick, and stash it in the tdata_tmptick member 1142 * the per-CPU tstat_data structure. tdata_tmptick will be used in 1143 * the TLB return entry to determine the amount of time spent in the 1144 * TLB miss handler. 1145 * 1146 * Note that on sun4v platforms, we must obtain the context information 1147 * from the MMU fault status area. (The base address of this MMU fault 1148 * status area is kept in the scratchpad register 0.) 1149 */ 1150 static const uint32_t tlbent[] = { 1151 #ifndef sun4v 1152 0x03000000, /* sethi %hi(stat), %g1 */ 1153 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1154 0x8400a001, /* add %g2, 1, %g2 */ 1155 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1156 0x85524000, /* rdpr %cwp, %g2 */ 1157 0x87518000, /* rdpr %pstate, %g3 */ 1158 0x8728f008, /* sllx %g3, 8, %g3 */ 1159 0x84108003, /* or %g2, %g3, %g2 */ 1160 0x8740c000, /* rd %asi, %g3 */ 1161 0x8728f018, /* sllx %g3, 24, %g3 */ 1162 0x84108003, /* or %g2, %g3, %g2 */ 1163 0x8350c000, /* rdpr %tt, %g1 */ 1164 0x8f902002, /* wrpr %g0, 2, %tl */ 1165 0x85908000, /* wrpr %g2, %g0, %tstate */ 1166 0x87904000, /* wrpr %g1, %g0, %tt */ 1167 0xc2d80000, /* ldxa [%g0]ASI_MMU, %g1 */ 1168 0x83307030, /* srlx %g1, CTXSHIFT, %g1 */ 1169 0x02c04004, /* brz,pn %g1, .+0x10 */ 1170 0x03000000, /* sethi %hi(new_tpc), %g1 */ 1171 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1172 0x30800002, /* ba,a .+0x8 */ 1173 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1174 0x81904000, /* wrpr %g1, %g0, %tpc */ 1175 0x82006004, /* add %g1, 4, %g1 */ 1176 0x83904000, /* wrpr %g1, %g0, %tnpc */ 1177 0x03000000, /* sethi %hi(tmptick), %g1 */ 1178 0x85410000, /* rd %tick, %g2 */ 1179 0xc4706000, /* stx %g2, [%g1 + %lo(tmptick)] */ 1180 0x30800000, /* ba,a addr */ 1181 NOP, NOP, NOP 1182 #else /* sun4v */ 1183 0x03000000, /* sethi %hi(stat), %g1 */ 1184 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1185 0x8400a001, /* add %g2, 1, %g2 */ 1186 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1187 0x85524000, /* rdpr %cwp, %g2 */ 1188 0x87518000, /* rdpr %pstate, %g3 */ 1189 0x8728f008, /* sllx %g3, 8, %g3 */ 1190 0x84108003, /* or %g2, %g3, %g2 */ 1191 0x8740c000, /* rd %asi, %g3 */ 1192 0x8728f018, /* sllx %g3, 24, %g3 */ 1193 0x83540000, /* rdpr %gl, %g1 */ 1194 0x83287028, /* sllx %g1, 40, %g1 */ 1195 0x86104003, /* or %g1, %g3, %g3 */ 1196 0x84108003, /* or %g2, %g3, %g2 */ 1197 0x8350c000, /* rdpr %tt, %g1 */ 1198 0x8f902002, /* wrpr %g0, 2, %tl */ 1199 0x85908000, /* wrpr %g2, %g0, %tstate */ 1200 0x87904000, /* wrpr %g1, %g0, %tt */ 1201 0xc2d80400, /* ldxa [%g0]ASI_SCRATCHPAD, %g1 */ 1202 0xc2586000, /* ldx [%g1 + MMFSA_?_CTX], %g1 */ 1203 0x02c04004, /* brz,pn %g1, .+0x10 */ 1204 0x03000000, /* sethi %hi(new_tpc), %g1 */ 1205 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1206 0x30800002, /* ba,a .+0x8 */ 1207 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1208 0x81904000, /* wrpr %g1, %g0, %tpc */ 1209 0x82006004, /* add %g1, 4, %g1 */ 1210 0x83904000, /* wrpr %g1, %g0, %tnpc */ 1211 0x03000000, /* sethi %hi(tmptick), %g1 */ 1212 0x85410000, /* rd %tick, %g2 */ 1213 0xc4706000, /* stx %g2, [%g1 + %lo(tmptick)] */ 1214 0x30800000 /* ba,a addr */ 1215 #endif /* sun4v */ 1216 }; 1217 1218 ASSERT(MUTEX_HELD(&tstat_lock)); 1219 #ifndef sun4v 1220 ASSERT(entno == TSTAT_ENT_ITLBMISS || entno == TSTAT_ENT_DTLBMISS); 1221 #else 1222 ASSERT(entno == TSTAT_ENT_ITLBMISS || entno == TSTAT_ENT_DTLBMISS || 1223 entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_DMMUMISS); 1224 #endif 1225 1226 stat = TSTAT_DATA_OFFS(tcpu, tdata_traps) + entoffs; 1227 tmptick = TSTAT_DATA_OFFS(tcpu, tdata_tmptick); 1228 1229 if (itlb) { 1230 ret = &tcpu->tcpu_instr->tinst_itlbret; 1231 udata = &data->tpgsz_user.tmode_itlb; 1232 kdata = &data->tpgsz_kernel.tmode_itlb; 1233 tpc = TSTAT_INSTR_OFFS(tcpu, tinst_itlbret.ttlbr_ktlb); 1234 } else { 1235 ret = &tcpu->tcpu_instr->tinst_dtlbret; 1236 udata = &data->tpgsz_user.tmode_dtlb; 1237 kdata = &data->tpgsz_kernel.tmode_dtlb; 1238 tpc = TSTAT_INSTR_OFFS(tcpu, tinst_dtlbret.ttlbr_ktlb); 1239 } 1240 1241 utpc = tpc + offsetof(tstat_tlbret_t, ttlbr_utlb) - 1242 offsetof(tstat_tlbret_t, ttlbr_ktlb); 1243 1244 ASSERT(HI22(tpc) == HI22(utpc)); 1245 1246 ent = (uint32_t *)((uintptr_t)tcpu->tcpu_instr + entoffs); 1247 orig = KERNELBASE + entoffs; 1248 va = (uintptr_t)tcpu->tcpu_ibase + entoffs; 1249 baoffs = TSTAT_TLBENT_BA * sizeof (uint32_t); 1250 1251 #ifdef sun4v 1252 if (entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_DMMUMISS) { 1253 /* 1254 * Because of lack of space, interposing tlbent trap 1255 * handler for IMMU_miss and DMMU_miss traps cannot be 1256 * placed in-line. Instead, we copy it to the space set 1257 * aside for these traps in per CPU trapstat area and 1258 * invoke it by placing a branch in the trap table itself. 1259 */ 1260 static const uint32_t mmumiss[TSTAT_ENT_NINSTR] = { 1261 0x30800000, /* ba,a addr */ 1262 NOP, NOP, NOP, NOP, NOP, NOP, NOP 1263 }; 1264 uint32_t *tent = ent; /* trap vector entry */ 1265 uintptr_t tentva = va; /* trap vector entry va */ 1266 1267 if (itlb) { 1268 ent = (uint32_t *)((uintptr_t) 1269 &tcpu->tcpu_instr->tinst_immumiss); 1270 va = TSTAT_INSTR_OFFS(tcpu, tinst_immumiss); 1271 } else { 1272 ent = (uint32_t *)((uintptr_t) 1273 &tcpu->tcpu_instr->tinst_dmmumiss); 1274 va = TSTAT_INSTR_OFFS(tcpu, tinst_dmmumiss); 1275 } 1276 bcopy(mmumiss, tent, sizeof (mmumiss)); 1277 tent[0] |= DISP22(tentva, va); 1278 } 1279 #endif /* sun4v */ 1280 1281 bcopy(tlbent, ent, sizeof (tlbent)); 1282 1283 ent[TSTAT_TLBENT_STATHI] |= HI22(stat); 1284 ent[TSTAT_TLBENT_STATLO_LD] |= LO10(stat); 1285 ent[TSTAT_TLBENT_STATLO_ST] |= LO10(stat); 1286 #ifndef sun4v 1287 ent[TSTAT_TLBENT_MMUASI] |= asi; 1288 #else 1289 ent[TSTAT_TLBENT_TAGTARGET] |= tagtarget_off; 1290 #endif 1291 ent[TSTAT_TLBENT_TPCHI] |= HI22(tpc); 1292 ent[TSTAT_TLBENT_TPCLO_USER] |= LO10(utpc); 1293 ent[TSTAT_TLBENT_TPCLO_KERN] |= LO10(tpc); 1294 ent[TSTAT_TLBENT_TSHI] |= HI22(tmptick); 1295 ent[TSTAT_TLBENT_TSLO] |= LO10(tmptick); 1296 ent[TSTAT_TLBENT_BA] |= DISP22(va + baoffs, orig); 1297 1298 /* 1299 * And now set up the TLB return entries. 1300 */ 1301 trapstat_tlbretent(tcpu, &ret->ttlbr_ktlb, &kdata->ttlb_tlb); 1302 trapstat_tlbretent(tcpu, &ret->ttlbr_ktsb, &kdata->ttlb_tsb); 1303 trapstat_tlbretent(tcpu, &ret->ttlbr_utlb, &udata->ttlb_tlb); 1304 trapstat_tlbretent(tcpu, &ret->ttlbr_utsb, &udata->ttlb_tsb); 1305 } 1306 1307 #undef TSTAT_TLBENT_STATHI 1308 #undef TSTAT_TLBENT_STATLO_LD 1309 #undef TSTAT_TLBENT_STATLO_ST 1310 #ifndef sun4v 1311 #undef TSTAT_TLBENT_MMUASI 1312 #else 1313 #undef TSTAT_TLBENT_TAGTARGET 1314 #endif 1315 #undef TSTAT_TLBENT_TPCHI 1316 #undef TSTAT_TLBENT_TPCLO_USER 1317 #undef TSTAT_TLBENT_TPCLO_KERN 1318 #undef TSTAT_TLBENT_TSHI 1319 #undef TSTAT_TLBENT_TSLO 1320 #undef TSTAT_TLBENT_BA 1321 1322 /* 1323 * The TSTAT_ENABLED_* constants define offsets in the enabled entry; the 1324 * TSTAT_DISABLED_BA constant defines an offset in the disabled entry. Both 1325 * sets of constants are used only in trapstat_make_traptab() (below) and 1326 * #undef'd immediately afterwards. Any change to "enabled" or "disabled" 1327 * in trapstat_make_traptab() will likely require changes to these constants. 1328 */ 1329 #define TSTAT_ENABLED_STATHI 0 1330 #define TSTAT_ENABLED_STATLO_LD 1 1331 #define TSTAT_ENABLED_STATLO_ST 3 1332 #define TSTAT_ENABLED_BA 4 1333 #define TSTAT_DISABLED_BA 0 1334 1335 static void 1336 trapstat_make_traptab(tstat_percpu_t *tcpu) 1337 { 1338 uint32_t *ent; 1339 uint64_t *stat; 1340 uintptr_t orig, va, en_baoffs, dis_baoffs; 1341 int nent; 1342 1343 /* 1344 * This is the entry in the interposing trap table for enabled trap 1345 * table entries. It loads a counter, increments it and stores it 1346 * back before branching to the actual trap table entry. 1347 */ 1348 static const uint32_t enabled[TSTAT_ENT_NINSTR] = { 1349 0x03000000, /* sethi %hi(stat), %g1 */ 1350 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1351 0x8400a001, /* add %g2, 1, %g2 */ 1352 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1353 0x30800000, /* ba,a addr */ 1354 NOP, NOP, NOP 1355 }; 1356 1357 /* 1358 * This is the entry in the interposing trap table for disabled trap 1359 * table entries. It simply branches to the actual, underlying trap 1360 * table entry. As explained in the "Implementation Details" section 1361 * of the block comment, all TL>0 traps _must_ use the disabled entry; 1362 * additional entries may be explicitly disabled through the use 1363 * of TSTATIOC_ENTRY/TSTATIOC_NOENTRY. 1364 */ 1365 static const uint32_t disabled[TSTAT_ENT_NINSTR] = { 1366 0x30800000, /* ba,a addr */ 1367 NOP, NOP, NOP, NOP, NOP, NOP, NOP, 1368 }; 1369 1370 ASSERT(MUTEX_HELD(&tstat_lock)); 1371 1372 ent = tcpu->tcpu_instr->tinst_traptab; 1373 stat = (uint64_t *)TSTAT_DATA_OFFS(tcpu, tdata_traps); 1374 orig = KERNELBASE; 1375 va = (uintptr_t)tcpu->tcpu_ibase; 1376 en_baoffs = TSTAT_ENABLED_BA * sizeof (uint32_t); 1377 dis_baoffs = TSTAT_DISABLED_BA * sizeof (uint32_t); 1378 1379 for (nent = 0; nent < TSTAT_TOTAL_NENT; nent++) { 1380 if (tstat_enabled[nent]) { 1381 bcopy(enabled, ent, sizeof (enabled)); 1382 ent[TSTAT_ENABLED_STATHI] |= HI22((uintptr_t)stat); 1383 ent[TSTAT_ENABLED_STATLO_LD] |= LO10((uintptr_t)stat); 1384 ent[TSTAT_ENABLED_STATLO_ST] |= LO10((uintptr_t)stat); 1385 ent[TSTAT_ENABLED_BA] |= DISP22(va + en_baoffs, orig); 1386 } else { 1387 bcopy(disabled, ent, sizeof (disabled)); 1388 ent[TSTAT_DISABLED_BA] |= DISP22(va + dis_baoffs, orig); 1389 } 1390 1391 stat++; 1392 orig += sizeof (enabled); 1393 ent += sizeof (enabled) / sizeof (*ent); 1394 va += sizeof (enabled); 1395 } 1396 } 1397 1398 #undef TSTAT_ENABLED_STATHI 1399 #undef TSTAT_ENABLED_STATLO_LD 1400 #undef TSTAT_ENABLED_STATLO_ST 1401 #undef TSTAT_ENABLED_BA 1402 #undef TSTAT_DISABLED_BA 1403 1404 #ifndef sun4v 1405 /* 1406 * See Section A.6 in SPARC v9 Manual. 1407 * max branch = 4*((2^21)-1) = 8388604 1408 */ 1409 #define MAX_BICC_BRANCH_DISPLACEMENT (4 * ((1 << 21) - 1)) 1410 #endif 1411 1412 static void 1413 trapstat_setup(processorid_t cpu) 1414 { 1415 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1416 #ifndef sun4v 1417 int i; 1418 caddr_t va; 1419 pfn_t *pfn; 1420 cpu_t *cp; 1421 uint_t strand_idx; 1422 size_t tstat_offset; 1423 #endif 1424 1425 ASSERT(tcpu->tcpu_pfn == NULL); 1426 ASSERT(tcpu->tcpu_instr == NULL); 1427 ASSERT(tcpu->tcpu_data == NULL); 1428 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1429 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED)); 1430 ASSERT(MUTEX_HELD(&cpu_lock)); 1431 ASSERT(MUTEX_HELD(&tstat_lock)); 1432 1433 /* 1434 * The lower fifteen bits of the %tba are always read as zero; we must 1435 * align our instruction base address appropriately. 1436 */ 1437 #ifndef sun4v 1438 tstat_offset = tstat_total_size; 1439 1440 cp = cpu_get(cpu); 1441 ASSERT(cp != NULL); 1442 if ((strand_idx = cpu ^ chip_plat_get_coreid(cp)) != 0) { 1443 /* 1444 * On sun4u platforms with multiple CPUs sharing the MMU 1445 * (Olympus-C has 2 strands per core), each CPU uses a 1446 * disjoint trap table. The indexing is based on the 1447 * strand id, which is obtained by XOR'ing the cpuid with 1448 * the coreid. 1449 */ 1450 tstat_offset += tstat_total_size * strand_idx; 1451 1452 /* 1453 * Offset must be less than the maximum PC-relative branch 1454 * displacement for Bicc variants. See the Implementation 1455 * Details comment. 1456 */ 1457 ASSERT(tstat_offset <= MAX_BICC_BRANCH_DISPLACEMENT); 1458 } 1459 1460 tcpu->tcpu_ibase = (caddr_t)((KERNELBASE - tstat_offset) 1461 & TSTAT_TBA_MASK); 1462 tcpu->tcpu_dbase = tcpu->tcpu_ibase + TSTAT_INSTR_SIZE; 1463 tcpu->tcpu_vabase = tcpu->tcpu_ibase; 1464 1465 tcpu->tcpu_pfn = vmem_alloc(tstat_arena, tstat_total_pages, VM_SLEEP); 1466 bzero(tcpu->tcpu_pfn, tstat_total_pages); 1467 pfn = tcpu->tcpu_pfn; 1468 1469 tcpu->tcpu_instr = vmem_alloc(tstat_arena, TSTAT_INSTR_SIZE, VM_SLEEP); 1470 1471 va = (caddr_t)tcpu->tcpu_instr; 1472 for (i = 0; i < TSTAT_INSTR_PAGES; i++, va += MMU_PAGESIZE) 1473 *pfn++ = hat_getpfnum(kas.a_hat, va); 1474 1475 /* 1476 * We must be sure that the pages that we will use to examine the data 1477 * have the same virtual color as the pages to which the data is being 1478 * recorded, hence the alignment and phase constraints on the 1479 * allocation. 1480 */ 1481 tcpu->tcpu_data = vmem_xalloc(tstat_arena, tstat_data_size, 1482 shm_alignment, (uintptr_t)tcpu->tcpu_dbase & (shm_alignment - 1), 1483 0, 0, NULL, VM_SLEEP); 1484 bzero(tcpu->tcpu_data, tstat_data_size); 1485 tcpu->tcpu_data->tdata_cpuid = cpu; 1486 1487 va = (caddr_t)tcpu->tcpu_data; 1488 for (i = 0; i < tstat_data_pages; i++, va += MMU_PAGESIZE) 1489 *pfn++ = hat_getpfnum(kas.a_hat, va); 1490 #else /* sun4v */ 1491 ASSERT(!(tstat_total_size > (1 + ~TSTAT_TBA_MASK))); 1492 tcpu->tcpu_vabase = (caddr_t)(KERNELBASE - MMU_PAGESIZE4M); 1493 tcpu->tcpu_ibase = tcpu->tcpu_vabase + (cpu * (1 + ~TSTAT_TBA_MASK)); 1494 tcpu->tcpu_dbase = tcpu->tcpu_ibase + TSTAT_INSTR_SIZE; 1495 1496 tcpu->tcpu_pfn = &tstat_pfn; 1497 tcpu->tcpu_instr = (tstat_instr_t *)(tstat_va + (cpu * 1498 (1 + ~TSTAT_TBA_MASK))); 1499 tcpu->tcpu_data = (tstat_data_t *)(tstat_va + (cpu * 1500 (1 + ~TSTAT_TBA_MASK)) + TSTAT_INSTR_SIZE); 1501 bzero(tcpu->tcpu_data, tstat_data_size); 1502 tcpu->tcpu_data->tdata_cpuid = cpu; 1503 #endif /* sun4v */ 1504 1505 /* 1506 * Now that we have all of the instruction and data pages allocated, 1507 * make the trap table from scratch. 1508 */ 1509 trapstat_make_traptab(tcpu); 1510 1511 if (tstat_options & TSTAT_OPT_TLBDATA) { 1512 /* 1513 * TLB Statistics have been specified; set up the I- and D-TLB 1514 * entries and corresponding TLB return entries. 1515 */ 1516 #ifndef sun4v 1517 trapstat_tlbent(tcpu, TSTAT_ENT_ITLBMISS); 1518 trapstat_tlbent(tcpu, TSTAT_ENT_DTLBMISS); 1519 #else 1520 if (tstat_fast_tlbstat) { 1521 trapstat_tlbent(tcpu, TSTAT_ENT_IMMUMISS); 1522 trapstat_tlbent(tcpu, TSTAT_ENT_DMMUMISS); 1523 } else { 1524 trapstat_tlbent(tcpu, TSTAT_ENT_ITLBMISS); 1525 trapstat_tlbent(tcpu, TSTAT_ENT_DTLBMISS); 1526 } 1527 #endif 1528 } 1529 1530 tcpu->tcpu_flags |= TSTAT_CPU_ALLOCATED; 1531 1532 /* 1533 * Finally, get the target CPU to load the locked pages into its TLBs. 1534 */ 1535 xc_one(cpu, (xcfunc_t *)trapstat_load_tlb, 0, 0); 1536 } 1537 1538 static void 1539 trapstat_teardown(processorid_t cpu) 1540 { 1541 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1542 #ifndef sun4v 1543 int i; 1544 #endif 1545 caddr_t va = tcpu->tcpu_vabase; 1546 1547 ASSERT(tcpu->tcpu_pfn != NULL); 1548 ASSERT(tcpu->tcpu_instr != NULL); 1549 ASSERT(tcpu->tcpu_data != NULL); 1550 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1551 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 1552 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1553 ASSERT(MUTEX_HELD(&cpu_lock)); 1554 ASSERT(MUTEX_HELD(&tstat_lock)); 1555 1556 #ifndef sun4v 1557 vmem_free(tstat_arena, tcpu->tcpu_pfn, tstat_total_pages); 1558 vmem_free(tstat_arena, tcpu->tcpu_instr, TSTAT_INSTR_SIZE); 1559 vmem_free(tstat_arena, tcpu->tcpu_data, tstat_data_size); 1560 1561 for (i = 0; i < tstat_total_pages; i++, va += MMU_PAGESIZE) { 1562 xt_one(cpu, vtag_flushpage_tl1, (uint64_t)va, 1563 (uint64_t)ksfmmup); 1564 } 1565 #else 1566 xt_one(cpu, vtag_unmap_perm_tl1, (uint64_t)va, KCONTEXT); 1567 #endif 1568 1569 tcpu->tcpu_pfn = NULL; 1570 tcpu->tcpu_instr = NULL; 1571 tcpu->tcpu_data = NULL; 1572 tcpu->tcpu_flags &= ~TSTAT_CPU_ALLOCATED; 1573 } 1574 1575 static int 1576 trapstat_go() 1577 { 1578 cpu_t *cp; 1579 1580 mutex_enter(&cpu_lock); 1581 mutex_enter(&tstat_lock); 1582 1583 if (tstat_running) { 1584 mutex_exit(&tstat_lock); 1585 mutex_exit(&cpu_lock); 1586 return (EBUSY); 1587 } 1588 1589 #ifdef sun4v 1590 /* 1591 * Allocate large page to hold interposing tables. 1592 */ 1593 tstat_va = contig_mem_alloc(MMU_PAGESIZE4M); 1594 tstat_pfn = va_to_pfn(tstat_va); 1595 if (tstat_pfn == PFN_INVALID) 1596 return (EAGAIN); 1597 1598 /* 1599 * For detailed TLB statistics, invoke CPU specific interface 1600 * to see if it supports a low overhead interface to collect 1601 * TSB hit statistics. If so, make set tstat_fast_tlbstat flag 1602 * to reflect that. 1603 */ 1604 if (tstat_options & TSTAT_OPT_TLBDATA) { 1605 int error; 1606 1607 error = cpu_trapstat_conf(CPU_TSTATCONF_INIT); 1608 if (error == 0) 1609 tstat_fast_tlbstat = B_TRUE; 1610 else if (error != ENOTSUP) { 1611 contig_mem_free(tstat_va, MMU_PAGESIZE4M); 1612 return (error); 1613 } 1614 } 1615 #endif 1616 1617 /* 1618 * First, perform any necessary hot patching. 1619 */ 1620 trapstat_hotpatch(); 1621 1622 /* 1623 * Allocate the resources we'll need to measure probe effect. 1624 */ 1625 trapstat_probe_alloc(); 1626 1627 1628 cp = cpu_list; 1629 do { 1630 if (!(tstat_percpu[cp->cpu_id].tcpu_flags & TSTAT_CPU_SELECTED)) 1631 continue; 1632 1633 trapstat_setup(cp->cpu_id); 1634 1635 /* 1636 * Note that due to trapstat_probe()'s use of global data, 1637 * we determine the probe effect on each CPU serially instead 1638 * of in parallel with an xc_all(). 1639 */ 1640 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_probe, 0, 0); 1641 } while ((cp = cp->cpu_next) != cpu_list); 1642 1643 xc_all((xcfunc_t *)trapstat_enable, 0, 0); 1644 1645 trapstat_probe_free(); 1646 tstat_running = 1; 1647 mutex_exit(&tstat_lock); 1648 mutex_exit(&cpu_lock); 1649 1650 return (0); 1651 } 1652 1653 static int 1654 trapstat_stop() 1655 { 1656 int i; 1657 1658 mutex_enter(&cpu_lock); 1659 mutex_enter(&tstat_lock); 1660 if (!tstat_running) { 1661 mutex_exit(&tstat_lock); 1662 mutex_exit(&cpu_lock); 1663 return (ENXIO); 1664 } 1665 1666 xc_all((xcfunc_t *)trapstat_disable, 0, 0); 1667 1668 for (i = 0; i <= max_cpuid; i++) { 1669 if (tstat_percpu[i].tcpu_flags & TSTAT_CPU_ALLOCATED) 1670 trapstat_teardown(i); 1671 } 1672 1673 #ifdef sun4v 1674 if (tstat_options & TSTAT_OPT_TLBDATA) 1675 cpu_trapstat_conf(CPU_TSTATCONF_FINI); 1676 contig_mem_free(tstat_va, MMU_PAGESIZE4M); 1677 #endif 1678 trapstat_hotpatch(); 1679 tstat_running = 0; 1680 mutex_exit(&tstat_lock); 1681 mutex_exit(&cpu_lock); 1682 1683 return (0); 1684 } 1685 1686 /* 1687 * This is trapstat's DR CPU configuration callback. It's called (with 1688 * cpu_lock held) to unconfigure a newly powered-off CPU, or to configure a 1689 * powered-off CPU that is to be brought into the system. We need only take 1690 * action in the unconfigure case: because a powered-off CPU will have its 1691 * trap table restored to KERNELBASE if it is ever powered back on, we must 1692 * update the flags to reflect that trapstat is no longer enabled on the 1693 * powered-off CPU. Note that this means that a TSTAT_CPU_ENABLED CPU that 1694 * is unconfigured/powered off and later powered back on/reconfigured will 1695 * _not_ be re-TSTAT_CPU_ENABLED. 1696 */ 1697 static int 1698 trapstat_cpu_setup(cpu_setup_t what, processorid_t cpu) 1699 { 1700 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1701 1702 ASSERT(MUTEX_HELD(&cpu_lock)); 1703 mutex_enter(&tstat_lock); 1704 1705 if (!tstat_running) { 1706 mutex_exit(&tstat_lock); 1707 return (0); 1708 } 1709 1710 switch (what) { 1711 case CPU_CONFIG: 1712 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1713 break; 1714 1715 case CPU_UNCONFIG: 1716 if (tcpu->tcpu_flags & TSTAT_CPU_ENABLED) { 1717 tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED; 1718 #ifdef sun4v 1719 /* 1720 * A power-off, causes the cpu mondo queues to be 1721 * unconfigured on sun4v. Since we can't teardown 1722 * trapstat's mappings on the cpu that is going away, 1723 * we simply mark it as not allocated. This will 1724 * prevent a teardown on a cpu with the same cpu id 1725 * that might have been added while trapstat is running. 1726 */ 1727 if (tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED) { 1728 tcpu->tcpu_pfn = NULL; 1729 tcpu->tcpu_instr = NULL; 1730 tcpu->tcpu_data = NULL; 1731 tcpu->tcpu_flags &= ~TSTAT_CPU_ALLOCATED; 1732 } 1733 #endif 1734 } 1735 break; 1736 1737 default: 1738 break; 1739 } 1740 1741 mutex_exit(&tstat_lock); 1742 return (0); 1743 } 1744 1745 /* 1746 * This is called before a CPR suspend and after a CPR resume. We don't have 1747 * anything to do before a suspend, but after a restart we must restore the 1748 * trap table to be our interposing trap table. However, we don't actually 1749 * know whether or not the CPUs have been powered off -- this routine may be 1750 * called while restoring from a failed CPR suspend. We thus run through each 1751 * TSTAT_CPU_ENABLED CPU, and explicitly destroy and reestablish its 1752 * interposing trap table. This assures that our state is correct regardless 1753 * of whether or not the CPU has been newly powered on. 1754 */ 1755 /*ARGSUSED*/ 1756 static boolean_t 1757 trapstat_cpr(void *arg, int code) 1758 { 1759 cpu_t *cp; 1760 1761 if (code == CB_CODE_CPR_CHKPT) 1762 return (B_TRUE); 1763 1764 ASSERT(code == CB_CODE_CPR_RESUME); 1765 1766 mutex_enter(&cpu_lock); 1767 mutex_enter(&tstat_lock); 1768 1769 if (!tstat_running) { 1770 mutex_exit(&tstat_lock); 1771 mutex_exit(&cpu_lock); 1772 return (B_TRUE); 1773 } 1774 1775 cp = cpu_list; 1776 do { 1777 tstat_percpu_t *tcpu = &tstat_percpu[cp->cpu_id]; 1778 1779 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 1780 continue; 1781 1782 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1783 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 1784 1785 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_disable, 0, 0); 1786 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1787 1788 /* 1789 * Preserve this CPU's data in tstat_buffer and rip down its 1790 * interposing trap table. 1791 */ 1792 bcopy(tcpu->tcpu_data, tstat_buffer, tstat_data_t_size); 1793 trapstat_teardown(cp->cpu_id); 1794 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED)); 1795 1796 /* 1797 * Reestablish the interposing trap table and restore the old 1798 * data. 1799 */ 1800 trapstat_setup(cp->cpu_id); 1801 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 1802 bcopy(tstat_buffer, tcpu->tcpu_data, tstat_data_t_size); 1803 1804 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_enable, 0, 0); 1805 } while ((cp = cp->cpu_next) != cpu_list); 1806 1807 mutex_exit(&tstat_lock); 1808 mutex_exit(&cpu_lock); 1809 1810 return (B_TRUE); 1811 } 1812 1813 /*ARGSUSED*/ 1814 static int 1815 trapstat_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 1816 { 1817 int i; 1818 1819 mutex_enter(&cpu_lock); 1820 mutex_enter(&tstat_lock); 1821 if (tstat_open != 0) { 1822 mutex_exit(&tstat_lock); 1823 mutex_exit(&cpu_lock); 1824 return (EBUSY); 1825 } 1826 1827 /* 1828 * Register this in open() rather than in attach() to prevent deadlock 1829 * with DR code. During attach, I/O device tree locks are grabbed 1830 * before trapstat_attach() is invoked - registering in attach 1831 * will result in the lock order: device tree lock, cpu_lock. 1832 * DR code however requires that cpu_lock be acquired before 1833 * device tree locks. 1834 */ 1835 ASSERT(!tstat_running); 1836 register_cpu_setup_func((cpu_setup_func_t *)trapstat_cpu_setup, NULL); 1837 1838 /* 1839 * Clear all options. And until specific CPUs are specified, we'll 1840 * mark all CPUs as selected. 1841 */ 1842 tstat_options = 0; 1843 1844 for (i = 0; i <= max_cpuid; i++) 1845 tstat_percpu[i].tcpu_flags |= TSTAT_CPU_SELECTED; 1846 1847 /* 1848 * By default, all traps at TL=0 are enabled. Traps at TL>0 must 1849 * be disabled. 1850 */ 1851 for (i = 0; i < TSTAT_TOTAL_NENT; i++) 1852 tstat_enabled[i] = i < TSTAT_NENT ? 1 : 0; 1853 1854 tstat_open = 1; 1855 mutex_exit(&tstat_lock); 1856 mutex_exit(&cpu_lock); 1857 1858 return (0); 1859 } 1860 1861 /*ARGSUSED*/ 1862 static int 1863 trapstat_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 1864 { 1865 (void) trapstat_stop(); 1866 1867 ASSERT(!tstat_running); 1868 1869 mutex_enter(&cpu_lock); 1870 unregister_cpu_setup_func((cpu_setup_func_t *)trapstat_cpu_setup, NULL); 1871 mutex_exit(&cpu_lock); 1872 1873 tstat_open = 0; 1874 return (DDI_SUCCESS); 1875 } 1876 1877 static int 1878 trapstat_option(int option) 1879 { 1880 mutex_enter(&tstat_lock); 1881 1882 if (tstat_running) { 1883 mutex_exit(&tstat_lock); 1884 return (EBUSY); 1885 } 1886 1887 tstat_options |= option; 1888 mutex_exit(&tstat_lock); 1889 1890 return (0); 1891 } 1892 1893 /*ARGSUSED*/ 1894 static int 1895 trapstat_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *crd, int *rval) 1896 { 1897 int i, j, out; 1898 size_t dsize; 1899 1900 switch (cmd) { 1901 case TSTATIOC_GO: 1902 return (trapstat_go()); 1903 1904 case TSTATIOC_NOGO: 1905 return (trapstat_option(TSTAT_OPT_NOGO)); 1906 1907 case TSTATIOC_STOP: 1908 return (trapstat_stop()); 1909 1910 case TSTATIOC_CPU: 1911 if (arg < 0 || arg > max_cpuid) 1912 return (EINVAL); 1913 /*FALLTHROUGH*/ 1914 1915 case TSTATIOC_NOCPU: 1916 mutex_enter(&tstat_lock); 1917 1918 if (tstat_running) { 1919 mutex_exit(&tstat_lock); 1920 return (EBUSY); 1921 } 1922 1923 /* 1924 * If this is the first CPU to be specified (or if we are 1925 * being asked to explicitly de-select CPUs), disable all CPUs. 1926 */ 1927 if (!(tstat_options & TSTAT_OPT_CPU) || cmd == TSTATIOC_NOCPU) { 1928 tstat_options |= TSTAT_OPT_CPU; 1929 1930 for (i = 0; i <= max_cpuid; i++) { 1931 tstat_percpu_t *tcpu = &tstat_percpu[i]; 1932 1933 ASSERT(cmd == TSTATIOC_NOCPU || 1934 (tcpu->tcpu_flags & TSTAT_CPU_SELECTED)); 1935 tcpu->tcpu_flags &= ~TSTAT_CPU_SELECTED; 1936 } 1937 } 1938 1939 if (cmd == TSTATIOC_CPU) 1940 tstat_percpu[arg].tcpu_flags |= TSTAT_CPU_SELECTED; 1941 1942 mutex_exit(&tstat_lock); 1943 1944 return (0); 1945 1946 case TSTATIOC_ENTRY: 1947 mutex_enter(&tstat_lock); 1948 1949 if (tstat_running) { 1950 mutex_exit(&tstat_lock); 1951 return (EBUSY); 1952 } 1953 1954 if (arg >= TSTAT_NENT || arg < 0) { 1955 mutex_exit(&tstat_lock); 1956 return (EINVAL); 1957 } 1958 1959 if (!(tstat_options & TSTAT_OPT_ENTRY)) { 1960 /* 1961 * If this is the first entry that we are explicitly 1962 * enabling, explicitly disable every TL=0 entry. 1963 */ 1964 for (i = 0; i < TSTAT_NENT; i++) 1965 tstat_enabled[i] = 0; 1966 1967 tstat_options |= TSTAT_OPT_ENTRY; 1968 } 1969 1970 tstat_enabled[arg] = 1; 1971 mutex_exit(&tstat_lock); 1972 return (0); 1973 1974 case TSTATIOC_NOENTRY: 1975 mutex_enter(&tstat_lock); 1976 1977 if (tstat_running) { 1978 mutex_exit(&tstat_lock); 1979 return (EBUSY); 1980 } 1981 1982 for (i = 0; i < TSTAT_NENT; i++) 1983 tstat_enabled[i] = 0; 1984 1985 mutex_exit(&tstat_lock); 1986 return (0); 1987 1988 case TSTATIOC_READ: 1989 mutex_enter(&tstat_lock); 1990 1991 if (tstat_options & TSTAT_OPT_TLBDATA) { 1992 dsize = tstat_data_t_exported_size; 1993 } else { 1994 dsize = sizeof (tstat_data_t); 1995 } 1996 1997 for (i = 0, out = 0; i <= max_cpuid; i++) { 1998 tstat_percpu_t *tcpu = &tstat_percpu[i]; 1999 2000 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 2001 continue; 2002 2003 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 2004 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 2005 2006 tstat_buffer->tdata_cpuid = -1; 2007 xc_one(i, (xcfunc_t *)trapstat_snapshot, 0, 0); 2008 2009 if (tstat_buffer->tdata_cpuid == -1) { 2010 /* 2011 * This CPU is not currently responding to 2012 * cross calls; we have caught it while it is 2013 * being unconfigured. We'll drop tstat_lock 2014 * and pick up and drop cpu_lock. By the 2015 * time we acquire cpu_lock, the DR operation 2016 * will appear consistent and we can assert 2017 * that trapstat_cpu_setup() has cleared 2018 * TSTAT_CPU_ENABLED. 2019 */ 2020 mutex_exit(&tstat_lock); 2021 mutex_enter(&cpu_lock); 2022 mutex_exit(&cpu_lock); 2023 mutex_enter(&tstat_lock); 2024 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 2025 continue; 2026 } 2027 2028 /* 2029 * Need to compensate for the difference between page 2030 * sizes exported to users and page sizes available 2031 * within the kernel. 2032 */ 2033 if ((tstat_options & TSTAT_OPT_TLBDATA) && 2034 (tstat_pgszs != tstat_user_pgszs)) { 2035 tstat_pgszdata_t *tp; 2036 uint_t szc; 2037 2038 tp = &tstat_buffer->tdata_pgsz[0]; 2039 for (j = 0; j < tstat_user_pgszs; j++) { 2040 if ((szc = USERSZC_2_SZC(j)) != j) { 2041 bcopy(&tp[szc], &tp[j], 2042 sizeof (tstat_pgszdata_t)); 2043 } 2044 } 2045 } 2046 2047 if (copyout(tstat_buffer, (void *)arg, dsize) != 0) { 2048 mutex_exit(&tstat_lock); 2049 return (EFAULT); 2050 } 2051 2052 out++; 2053 arg += dsize; 2054 } 2055 2056 if (out != max_cpuid + 1) { 2057 processorid_t cpuid = -1; 2058 arg += offsetof(tstat_data_t, tdata_cpuid); 2059 2060 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) { 2061 mutex_exit(&tstat_lock); 2062 return (EFAULT); 2063 } 2064 } 2065 2066 mutex_exit(&tstat_lock); 2067 2068 return (0); 2069 2070 case TSTATIOC_TLBDATA: 2071 return (trapstat_option(TSTAT_OPT_TLBDATA)); 2072 2073 default: 2074 break; 2075 } 2076 2077 return (ENOTTY); 2078 } 2079 2080 /*ARGSUSED*/ 2081 static int 2082 trapstat_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 2083 { 2084 int error; 2085 2086 switch (infocmd) { 2087 case DDI_INFO_DEVT2DEVINFO: 2088 *result = (void *)tstat_devi; 2089 error = DDI_SUCCESS; 2090 break; 2091 case DDI_INFO_DEVT2INSTANCE: 2092 *result = (void *)0; 2093 error = DDI_SUCCESS; 2094 break; 2095 default: 2096 error = DDI_FAILURE; 2097 } 2098 return (error); 2099 } 2100 2101 static int 2102 trapstat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 2103 { 2104 switch (cmd) { 2105 case DDI_ATTACH: 2106 break; 2107 2108 case DDI_RESUME: 2109 return (DDI_SUCCESS); 2110 2111 default: 2112 return (DDI_FAILURE); 2113 } 2114 2115 if (ddi_create_minor_node(devi, "trapstat", S_IFCHR, 2116 0, DDI_PSEUDO, 0) == DDI_FAILURE) { 2117 ddi_remove_minor_node(devi, NULL); 2118 return (DDI_FAILURE); 2119 } 2120 2121 ddi_report_dev(devi); 2122 tstat_devi = devi; 2123 2124 tstat_pgszs = page_num_pagesizes(); 2125 tstat_user_pgszs = page_num_user_pagesizes(); 2126 tstat_data_t_size = sizeof (tstat_data_t) + 2127 (tstat_pgszs - 1) * sizeof (tstat_pgszdata_t); 2128 tstat_data_t_exported_size = sizeof (tstat_data_t) + 2129 (tstat_user_pgszs - 1) * sizeof (tstat_pgszdata_t); 2130 #ifndef sun4v 2131 tstat_data_pages = (tstat_data_t_size >> MMU_PAGESHIFT) + 1; 2132 tstat_total_pages = TSTAT_INSTR_PAGES + tstat_data_pages; 2133 tstat_data_size = tstat_data_pages * MMU_PAGESIZE; 2134 tstat_total_size = TSTAT_INSTR_SIZE + tstat_data_size; 2135 #else 2136 tstat_data_pages = 0; 2137 tstat_data_size = tstat_data_t_size; 2138 tstat_total_pages = ((TSTAT_INSTR_SIZE + tstat_data_size) >> 2139 MMU_PAGESHIFT) + 1; 2140 tstat_total_size = tstat_total_pages * MMU_PAGESIZE; 2141 #endif 2142 2143 tstat_percpu = kmem_zalloc((max_cpuid + 1) * 2144 sizeof (tstat_percpu_t), KM_SLEEP); 2145 2146 /* 2147 * Create our own arena backed by segkmem to assure a source of 2148 * MMU_PAGESIZE-aligned allocations. We allocate out of the 2149 * heap32_arena to assure that we can address the allocated memory with 2150 * a single sethi/simm13 pair in the interposing trap table entries. 2151 */ 2152 tstat_arena = vmem_create("trapstat", NULL, 0, MMU_PAGESIZE, 2153 segkmem_alloc_permanent, segkmem_free, heap32_arena, 0, VM_SLEEP); 2154 2155 tstat_enabled = kmem_alloc(TSTAT_TOTAL_NENT * sizeof (int), KM_SLEEP); 2156 tstat_buffer = kmem_alloc(tstat_data_t_size, KM_SLEEP); 2157 2158 /* 2159 * CB_CL_CPR_POST_USER is the class that executes from cpr_resume() 2160 * after user threads can be restarted. By executing in this class, 2161 * we are assured of the availability of system services needed to 2162 * resume trapstat (specifically, we are assured that all CPUs are 2163 * restarted and responding to cross calls). 2164 */ 2165 tstat_cprcb = 2166 callb_add(trapstat_cpr, NULL, CB_CL_CPR_POST_USER, "trapstat"); 2167 2168 return (DDI_SUCCESS); 2169 } 2170 2171 static int 2172 trapstat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 2173 { 2174 int rval; 2175 2176 ASSERT(devi == tstat_devi); 2177 2178 switch (cmd) { 2179 case DDI_DETACH: 2180 break; 2181 2182 case DDI_SUSPEND: 2183 return (DDI_SUCCESS); 2184 2185 default: 2186 return (DDI_FAILURE); 2187 } 2188 2189 ASSERT(!tstat_running); 2190 2191 rval = callb_delete(tstat_cprcb); 2192 ASSERT(rval == 0); 2193 2194 kmem_free(tstat_buffer, tstat_data_t_size); 2195 kmem_free(tstat_enabled, TSTAT_TOTAL_NENT * sizeof (int)); 2196 vmem_destroy(tstat_arena); 2197 kmem_free(tstat_percpu, (max_cpuid + 1) * sizeof (tstat_percpu_t)); 2198 ddi_remove_minor_node(devi, NULL); 2199 2200 return (DDI_SUCCESS); 2201 } 2202 2203 /* 2204 * Configuration data structures 2205 */ 2206 static struct cb_ops trapstat_cb_ops = { 2207 trapstat_open, /* open */ 2208 trapstat_close, /* close */ 2209 nulldev, /* strategy */ 2210 nulldev, /* print */ 2211 nodev, /* dump */ 2212 nodev, /* read */ 2213 nodev, /* write */ 2214 trapstat_ioctl, /* ioctl */ 2215 nodev, /* devmap */ 2216 nodev, /* mmap */ 2217 nodev, /* segmap */ 2218 nochpoll, /* poll */ 2219 ddi_prop_op, /* cb_prop_op */ 2220 0, /* streamtab */ 2221 D_MP | D_NEW /* Driver compatibility flag */ 2222 }; 2223 2224 static struct dev_ops trapstat_ops = { 2225 DEVO_REV, /* devo_rev, */ 2226 0, /* refcnt */ 2227 trapstat_info, /* getinfo */ 2228 nulldev, /* identify */ 2229 nulldev, /* probe */ 2230 trapstat_attach, /* attach */ 2231 trapstat_detach, /* detach */ 2232 nulldev, /* reset */ 2233 &trapstat_cb_ops, /* cb_ops */ 2234 (struct bus_ops *)0, /* bus_ops */ 2235 }; 2236 2237 static struct modldrv modldrv = { 2238 &mod_driverops, /* Type of module. This one is a driver */ 2239 "Trap Statistics", /* name of module */ 2240 &trapstat_ops, /* driver ops */ 2241 }; 2242 2243 static struct modlinkage modlinkage = { 2244 MODREV_1, (void *)&modldrv, NULL 2245 }; 2246 2247 int 2248 _init(void) 2249 { 2250 return (mod_install(&modlinkage)); 2251 } 2252 2253 int 2254 _fini(void) 2255 { 2256 return (mod_remove(&modlinkage)); 2257 } 2258 2259 int 2260 _info(struct modinfo *modinfop) 2261 { 2262 return (mod_info(&modlinkage, modinfop)); 2263 } 2264