1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 #include <sys/systm.h> 28 #include <sys/conf.h> 29 #include <sys/stat.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/modctl.h> 33 #include <sys/cpu_module.h> 34 #include <vm/hat_sfmmu.h> 35 #include <vm/seg_kmem.h> 36 #include <vm/seg_kpm.h> 37 #include <vm/vm_dep.h> 38 #include <sys/machsystm.h> 39 #include <sys/machasi.h> 40 #include <sys/sysmacros.h> 41 #include <sys/callb.h> 42 #include <sys/archsystm.h> 43 #include <sys/trapstat.h> 44 #ifdef sun4v 45 #include <sys/hypervisor_api.h> 46 #endif 47 #ifndef sun4v 48 #include <sys/pghw.h> 49 #endif 50 51 /* BEGIN CSTYLED */ 52 /* 53 * trapstat: Trap Statistics through Dynamic Trap Table Interposition 54 * ------------------------------------------------------------------- 55 * 56 * Motivation and Overview 57 * 58 * Despite being a fundamental indicator of system behavior, there has 59 * historically been very little insight provided into the frequency and cost 60 * of machine-specific traps. The lack of insight has been especially acute 61 * on UltraSPARC microprocessors: because these microprocessors handle TLB 62 * misses as software traps, the frequency and duration of traps play a 63 * decisive role in the performance of the memory system. As applications have 64 * increasingly outstripped TLB reach, this has become increasingly true. 65 * 66 * Part of the difficulty of observing trap behavior is that the trap handlers 67 * are so frequently called (e.g. millions of times per second) that any 68 * permanently enabled instrumentation would induce an unacceptable performance 69 * degradation. Thus, it is a constraint on any trap observability 70 * infrastructure that it have no probe effect when not explicitly enabled. 71 * 72 * The basic idea, then, is to create an interposing trap table in which each 73 * entry increments a per-trap, in-memory counter and then jumps to the actual, 74 * underlying trap table entry. To enable trapstat, we atomically write to the 75 * trap base address (%tba) register to point to our interposing trap table. 76 * (Note that per-CPU statistics fall out by creating a different trap table 77 * for each CPU.) 78 * 79 * Implementation Details 80 * 81 * While the idea is straight-forward, a nuance of SPARC V9 slightly 82 * complicates the implementation. Unlike its predecessors, SPARC V9 supports 83 * the notion of nested traps. The trap level is kept in the TL register: 84 * during normal operation it is 0; when a trap is taken, the TL register is 85 * incremented by 1. To aid system software, SPARC V9 breaks the trap table 86 * into two halves: the lower half contains the trap handlers for traps taken 87 * when TL is 0; the upper half contains the trap handlers for traps taken 88 * when TL is greater than 0. Each half is further subdivided into two 89 * subsequent halves: the lower half contains the trap handlers for traps 90 * other than those induced by the trap instruction (Tcc variants); the upper 91 * half contains the trap handlers for traps induced by the trap instruction. 92 * This gives a total of four ranges, with each range containing 256 traps: 93 * 94 * +--------------------------------+- 3ff 95 * | | . 96 * | Trap instruction, TL>0 | . 97 * | | . 98 * |- - - - - - - - - - - - - - - - +- 300 99 * |- - - - - - - - - - - - - - - - +- 2ff 100 * | | . 101 * | Non-trap instruction, TL>0 | . 102 * | | . 103 * |- - - - - - - - - - - - - - - - +- 200 104 * |- - - - - - - - - - - - - - - - +- 1ff 105 * | | . 106 * | Trap instruction, TL=0 | . 107 * | | . 108 * |- - - - - - - - - - - - - - - - +- 100 109 * |- - - - - - - - - - - - - - - - +- 0ff 110 * | | . 111 * | Non-trap instruction, TL=0 | . 112 * | | . 113 * +--------------------------------+- 000 114 * 115 * 116 * Solaris, however, doesn't have reason to support trap instructions when 117 * TL>0 (only privileged code may execute at TL>0; not supporting this only 118 * constrains our own implementation). The trap table actually looks like: 119 * 120 * +--------------------------------+- 2ff 121 * | | . 122 * | Non-trap instruction, TL>0 | . 123 * | | . 124 * |- - - - - - - - - - - - - - - - +- 200 125 * |- - - - - - - - - - - - - - - - +- 1ff 126 * | | . 127 * | Trap instruction, TL=0 | . 128 * | | . 129 * |- - - - - - - - - - - - - - - - +- 100 130 * |- - - - - - - - - - - - - - - - +- 0ff 131 * | | . 132 * | Non-trap instruction, TL=0 | . 133 * | | . 134 * +--------------------------------+- 000 135 * 136 * Putatively to aid system software, SPARC V9 has the notion of multiple 137 * sets of global registers. UltraSPARC defines four sets of global 138 * registers: 139 * 140 * Normal Globals 141 * Alternate Globals (AGs) 142 * MMU Globals (MGs) 143 * Interrupt Globals (IGs) 144 * 145 * The set of globals in use is controlled by bits in PSTATE; when TL is 0 146 * (and PSTATE has not been otherwise explicitly modified), the Normal Globals 147 * are in use. When a trap is issued, PSTATE is modified to point to a set of 148 * globals corresponding to the trap type. Most traps correspond to the 149 * Alternate Globals, with a minority corresponding to the MMU Globals, and 150 * only the interrupt-vector trap (vector 0x60) corresponding to the Interrupt 151 * Globals. (The complete mapping can be found in the UltraSPARC I&II User's 152 * Manual.) 153 * 154 * Note that the sets of globals are per trap _type_, not per trap _level_. 155 * Thus, when executing a TL>0 trap handler, one may not have registers 156 * available (for example, both trap-instruction traps and spill traps execute 157 * on the alternate globals; if a trap-instruction trap induces a window spill, 158 * the window spill handler has no available globals). For trapstat, this is 159 * problematic: a register is required to transfer control from one arbitrary 160 * location (in the interposing trap table) to another (in the actual trap 161 * table). 162 * 163 * We solve this problem by exploiting the trap table's location at the bottom 164 * of valid kernel memory (i.e. at KERNELBASE). We locate the interposing trap 165 * tables just below KERNELBASE -- thereby allowing us to use a branch-always 166 * instruction (ba) instead of a jump instruction (jmp) to transfer control 167 * from the TL>0 entries in the interposing trap table to the TL>0 entries in 168 * the actual trap table. (N.B. while this allows trap table interposition to 169 * work, it necessarily limits trapstat to only recording information about 170 * TL=0 traps -- there is no way to increment a counter without using a 171 * register.) Diagrammatically: 172 * 173 * Actual trap table: 174 * 175 * +--------------------------------+- 2ff 176 * | | . 177 * | Non-trap instruction, TL>0 | . <-----------------------+ 178 * | | . <-----------------------|-+ 179 * |- - - - - - - - - - - - - - - - +- 200 <-----------------------|-|-+ 180 * |- - - - - - - - - - - - - - - - +- 1ff | | | 181 * | | . | | | 182 * | Trap instruction, TL=0 | . <-----------------+ | | | 183 * | | . <-----------------|-+ | | | 184 * |- - - - - - - - - - - - - - - - +- 100 <-----------------|-|-+ | | | 185 * |- - - - - - - - - - - - - - - - +- 0ff | | | | | | 186 * | | . | | | | | | 187 * | Non-trap instruction, TL=0 | . <-----------+ | | | | | | 188 * | | . <-----------|-+ | | | | | | 189 * +--------------------------------+- 000 <-----------|-|-+ | | | | | | 190 * KERNELBASE | | | | | | | | | 191 * | | | | | | | | | 192 * | | | | | | | | | 193 * Interposing trap table: | | | | | | | | | 194 * | | | | | | | | | 195 * +--------------------------------+- 2ff | | | | | | | | | 196 * | ... | . | | | | | | | | | 197 * | ... | . | | | | | | | | | 198 * | ... | . | | | | | | | | | 199 * |- - - - - - - - - - - - - - - - +- 203 | | | | | | | | | 200 * | ba,a | -------------|-|-|-|-|-|-+ | | 201 * |- - - - - - - - - - - - - - - - +- 202 | | | | | | | | 202 * | ba,a | -------------|-|-|-|-|-|---+ | 203 * |- - - - - - - - - - - - - - - - +- 201 | | | | | | | 204 * | ba,a | -------------|-|-|-|-|-|-----+ 205 * |- - - - - - - - - - - - - - - - +- 200 | | | | | | 206 * | ... | . | | | | | | 207 * | ... | . | | | | | | 208 * | ... | . | | | | | | 209 * |- - - - - - - - - - - - - - - - +- 103 | | | | | | 210 * | (Increment counter) | | | | | | | 211 * | ba,a | -------------------+ | | 212 * |- - - - - - - - - - - - - - - - +- 102 | | | | | 213 * | (Increment counter) | | | | | | 214 * | ba,a | ---------------------+ | 215 * |- - - - - - - - - - - - - - - - +- 101 | | | | 216 * | (Increment counter) | | | | | 217 * | ba,a | -----------------------+ 218 * |- - - - - - - - - - - - - - - - +- 100 | | | 219 * | ... | . | | | 220 * | ... | . | | | 221 * | ... | . | | | 222 * |- - - - - - - - - - - - - - - - +- 003 | | | 223 * | (Increment counter) | | | | 224 * | ba,a | -------------+ | | 225 * |- - - - - - - - - - - - - - - - +- 002 | | 226 * | (Increment counter) | | | 227 * | ba,a | ---------------+ | 228 * |- - - - - - - - - - - - - - - - +- 001 | 229 * | (Increment counter) | | 230 * | ba,a | -----------------+ 231 * +--------------------------------+- 000 232 * KERNELBASE - tstat_total_size 233 * 234 * tstat_total_size is the number of pages required for each trap table. It 235 * must be true that KERNELBASE - tstat_total_size is less than the maximum 236 * branch displacement; if each CPU were to consume a disjoint virtual range 237 * below KERNELBASE for its trap table, we could support at most 238 * (maximum_branch_displacement / tstat_total_size) CPUs. The maximum branch 239 * displacement for Bicc variants is just under eight megabytes, and (because 240 * the %tba must be 32K aligned), tstat_total_size must be at least 32K; if 241 * each CPU were to consume a disjoint virtual range, we would have an 242 * unacceptably low upper bound of 256 CPUs. 243 * 244 * While there are tricks that one could use to address this constraint (e.g., 245 * creating trampolines every maximum_branch_displacement bytes), we instead 246 * solve this by not permitting each CPU to consume a disjoint virtual range. 247 * Rather, we have each CPU's interposing trap table use the _same_ virtual 248 * range, but we back the trap tables with disjoint physical memory. Normally, 249 * such one-to-many virtual-to-physical mappings are illegal; this is 250 * permissible here only because the pages for the interposing trap table are 251 * necessarily locked in the TLB. (The CPUs thus never have the opportunity to 252 * discover that they have conflicting translations.) 253 * 254 * On CMT architectures in which CPUs can share MMUs, the above trick will not 255 * work: two CPUs that share an MMU cannot have the same virtual address map 256 * to disjoint physical pages. On these architectures, any CPUs sharing the 257 * same MMU must consume a disjoint 32K virtual address range -- limiting the 258 * number of CPUs sharing an MMU on these architectures to 256 due to the 259 * branch displacement limitation described above. On the sun4v architecture, 260 * there is a further limitation: a guest may not have more than eight locked 261 * TLB entries per MMU. To allow operation under this restriction, the 262 * interposing trap table and the trap statistics are each accessed through 263 * a single 4M TLB entry. This limits the footprint to two locked entries 264 * (one for the I-TLB and one for the D-TLB), but further restricts the number 265 * of CPUs to 128 per MMU. However, support for more than 128 CPUs can easily 266 * be added via a hybrid scheme, where the same 4M virtual address is used 267 * on different MMUs. 268 * 269 * On sun4v architecture, we currently don't use hybrid scheme as it imposes 270 * additional restriction on live migration and transparent CPU replacement. 271 * Instead, we increase the number of supported CPUs by reducing the virtual 272 * address space requirements per CPU via shared interposing trap table as 273 * follows: 274 * 275 * Offset (within 4MB page) 276 * +------------------------------------+- 0x400000 277 * | CPU 507 trap statistics (8KB) | . 278 * |- - - - - - - - - - - - - - - - - - +- 0x3fe000 279 * | | 280 * | ... | 281 * | | 282 * |- - - - - - - - - - - - - - - - - - +- 0x00c000 283 * | CPU 1 trap statistics (8KB) | . 284 * |- - - - - - - - - - - - - - - - - - +- 0x00a000 285 * | CPU 0 trap statistics (8KB) | . 286 * |- - - - - - - - - - - - - - - - - - +- 0x008000 287 * | Shared trap handler continuation | . 288 * |- - - - - - - - - - - - - - - - - - +- 0x006000 289 * | Non-trap instruction, TL>0 | . 290 * |- - - - - - - - - - - - - - - - - - +- 0x004000 291 * | Trap instruction, TL=0 | . 292 * |- - - - - - - - - - - - - - - - - - +- 0x002000 293 * | Non-trap instruction, TL=0 | . 294 * +------------------------------------+- 0x000000 295 * 296 * Note that each CPU has its own 8K space for its trap statistics but 297 * shares the same interposing trap handlers. Interposing trap handlers 298 * use the CPU ID to determine the location of per CPU trap statistics 299 * area dynamically. This increases the interposing trap handler overhead, 300 * but is acceptable as it allows us to support up to 508 CPUs with one 301 * 4MB page on sun4v architecture. Support for additional CPUs can be 302 * added via hybrid scheme as mentioned earlier. 303 * 304 * TLB Statistics 305 * 306 * Because TLB misses are an important component of system performance, we wish 307 * to know much more about these traps than simply the number received. 308 * Specifically, we wish to know: 309 * 310 * (a) The amount of time spent executing the TLB miss handler 311 * (b) TLB misses versus TSB misses 312 * (c) Kernel-level misses versus user-level misses 313 * (d) Misses per pagesize 314 * 315 * TLB Statistics: Time Spent Executing 316 * 317 * To accurately determine the amount of time spent executing the TLB miss 318 * handler, one must get a timestamp on trap entry and trap exit, subtract the 319 * latter from the former, and add the result to an accumulating count. 320 * Consider flow of control during normal TLB miss processing (where "ldx 321 * [%g2], %g2" is an arbitrary TLB-missing instruction): 322 * 323 * + - - - - - - - -+ 324 * : : 325 * : ldx [%g2], %g2 :<-------------------------------------------------------+ 326 * : : Return from trap: | 327 * + - - - - - - - -+ TL <- TL - 1 (0) | 328 * | %pc <- TSTATE[TL].TPC (address of load) | 329 * | TLB miss: | 330 * | TL <- TL + 1 (1) | 331 * | %pc <- TLB-miss-trap-handler | 332 * | | 333 * v | 334 * + - - - - - - - - - - - - - - - + | 335 * : : | 336 * : Lookup VA in TSB : | 337 * : If (hit) : | 338 * : Fill TLB : | 339 * : Else : | 340 * : Lookup VA (hme hash table : | 341 * : or segkpm) : | 342 * : Fill TLB : | 343 * : Endif : | 344 * : Issue "retry" ---------------------------------------------------------+ 345 * : : 346 * + - - - - - - - - - - - - - - - + 347 * TLB-miss-trap-handler 348 * 349 * 350 * As the above diagram indicates, interposing on the trap table allows one 351 * only to determine a timestamp on trap _entry_: when the TLB miss handler 352 * has completed filling the TLB, a "retry" will be issued, and control will 353 * transfer immediately back to the missing %pc. 354 * 355 * To obtain a timestamp on trap exit, we must then somehow interpose between 356 * the "retry" and the subsequent control transfer to the TLB-missing 357 * instruction. To do this, we _push_ a trap level. The basic idea is to 358 * spoof a TLB miss by raising TL, setting the %tpc to be within text 359 * controlled by trapstat (the "TLB return entry") and branching to the 360 * underlying TLB miss handler. When the TLB miss handler issues its "retry", 361 * control will transfer not to the TLB-missing instruction, but rather to the 362 * TLB return entry. This code can then obtain a timestamp, and issue its own 363 * "retry" -- thereby correctly returning to the TLB-missing instruction. 364 * Here is the above TLB miss flow control diagram modified to reflect 365 * trapstat's operation: 366 * 367 * + - - - - - - - -+ 368 * : : 369 * : ldx [%g2], %g2 :<-------------------------------------------------------+ 370 * : : Return from trap: | 371 * + - - - - - - - -+ TL <- TL - 1 (0) | 372 * | %pc <- TSTATE[TL].TPC (address of load) | 373 * | TLB miss: | 374 * | TL <- TL + 1 (1) | 375 * | %pc <- TLB-miss-trap-handler (trapstat) | 376 * | | 377 * v TLB-return-entry (trapstat) | 378 * + - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - + | 379 * : : : : | 380 * : Record timestamp : : Record timestamp : | 381 * : TL <- 2 : : Take timestamp difference : | 382 * : TSTATE[1].TPC <- TLB-return-entry : : Add to running total : | 383 * : ba,a TLB-miss-trap-handler -----------+ : Issue "retry" --------------+ 384 * : : | : : 385 * + - - - - - - - - - - - - - - - - - - + | + - - - - - - - - - - - - - + 386 * TLB-miss-trap-handler | ^ 387 * (trapstat) | | 388 * | | 389 * | | 390 * +-----------------------+ | 391 * | | 392 * | | 393 * v | 394 * + - - - - - - - - - - - - - - - + | 395 * : : | 396 * : Lookup VA in TSB : | 397 * : If (hit) : | 398 * : Fill TLB : | 399 * : Else : | 400 * : Lookup VA (hme hash table : | 401 * : or segkpm) : | 402 * : Fill TLB : | 403 * : Endif : | 404 * : Issue "retry" ------------------------------------------+ 405 * : : Return from trap: 406 * + - - - - - - - - - - - - - - - + TL <- TL - 1 (1) 407 * TLB-miss-trap-handler %pc <- TSTATE[TL].TPC (TLB-return-entry) 408 * 409 * 410 * A final subterfuge is required to complete our artifice: if we miss in 411 * the TLB, the TSB _and_ the subsequent hash or segkpm lookup (that is, if 412 * there is no valid translation for the TLB-missing address), common system 413 * software will need to accurately determine the %tpc as part of its page 414 * fault handling. We therefore modify the kernel to check the %tpc in this 415 * case: if the %tpc falls within the VA range controlled by trapstat and 416 * the TL is 2, TL is simply lowered back to 1 (this check is implemented 417 * by the TSTAT_CHECK_TL1 macro). Lowering TL to 1 has the effect of 418 * discarding the state pushed by trapstat. 419 * 420 * TLB Statistics: TLB Misses versus TSB Misses 421 * 422 * Distinguishing TLB misses from TSB misses requires further interposition 423 * on the TLB miss handler: we cannot know a priori or a posteriori if a 424 * given VA will or has hit in the TSB. 425 * 426 * We achieve this distinction by adding a second TLB return entry almost 427 * identical to the first -- differing only in the address to which it 428 * stores its results. We then modify the TLB miss handlers of the kernel 429 * such that they check the %tpc when they determine that a TLB miss has 430 * subsequently missed in the TSB: if the %tpc lies within trapstat's VA 431 * range and TL is 2 (that is, if trapstat is running), the TLB miss handler 432 * _increments_ the %tpc by the size of the TLB return entry. The ensuing 433 * "retry" will thus transfer control to the second TLB return entry, and 434 * the time spent in the handler will be accumulated in a memory location 435 * specific to TSB misses. 436 * 437 * N.B.: To minimize the amount of knowledge the kernel must have of trapstat, 438 * we do not allow the kernel to hard-code the size of the TLB return entry. 439 * Rather, the actual tsbmiss handler executes a known instruction at the 440 * corresponding tsbmiss patch points (see the tstat_tsbmiss_patch_table) with 441 * the %tpc in %g7: when trapstat is not running, these points contain the 442 * harmless TSTAT_TSBMISS_INSTR instruction ("add %g7, 0, %g7"). Before 443 * running, trapstat modifies the instructions at these patch points such 444 * that the simm13 equals the size of the TLB return entry. 445 * 446 * TLB Statistics: Kernel-level Misses versus User-level Misses 447 * 448 * Differentiating user-level misses from kernel-level misses employs a 449 * similar technique, but is simplified by the ability to distinguish a 450 * user-level miss from a kernel-level miss a priori by reading the context 451 * register: we implement kernel-/user-level differentiation by again doubling 452 * the number of TLB return entries, and setting the %tpc to the appropriate 453 * TLB return entry in trapstat's TLB miss handler. Together with the doubling 454 * of entries required for TLB-miss/TSB-miss differentiation, this yields a 455 * total of four TLB return entries: 456 * 457 * Level TSB hit? Structure member 458 * ------------------------------------------------------------ 459 * Kernel Yes tstat_tlbret_t.ttlbr_ktlb 460 * Kernel No tstat_tlbret_t.ttlbr_ktsb 461 * User Yes tstat_tlbret_t.ttlbr_utlb 462 * User No tstat_tlbret_t.ttlbr_utsb 463 * 464 * TLB Statistics: Misses per Pagesize 465 * 466 * As with the TLB-/TSB-miss differentiation, we have no way of determining 467 * pagesize a priori. This is therefore implemented by mandating a new rule: 468 * whenever the kernel fills the TLB in its TLB miss handler, the TTE 469 * corresponding to the TLB-missing VA must be in %g5 when the handler 470 * executes its "retry". This allows the TLB return entry to determine 471 * pagesize by simply looking at the pagesize field in the TTE stored in 472 * %g5. 473 * 474 * TLB Statistics: Probe Effect 475 * 476 * As one might imagine, gathering TLB statistics by pushing a trap level 477 * induces significant probe effect. To account for this probe effect, 478 * trapstat attempts to observe it by executing a code sequence with a known 479 * number of TLB misses both before and after interposing on the trap table. 480 * This allows trapstat to determine a per-trap probe effect which can then be 481 * factored into the "%tim" fields of the trapstat command. 482 * 483 * Note that on sun4v platforms, TLB misses are normally handled by the 484 * hypervisor or the hardware TSB walker. Thus no fast MMU miss information 485 * is reported for normal operation. However, when trapstat is invoked 486 * with -t or -T option to collect detailed TLB statistics, kernel takes 487 * over TLB miss handling. This results in significantly more overhead 488 * and TLB statistics may not be as accurate as on sun4u platforms. 489 * On some processors, hypervisor or hardware may provide a low overhead 490 * interface to collect TSB hit statistics. This support is exposed via 491 * a well defined CPU module interface (cpu_trapstat_conf to enable this 492 * interface and cpu_trapstat_data to get detailed TSB hit statistics). 493 * In this scenario, TSB miss statistics is collected by intercepting the 494 * IMMU_miss and DMMU_miss traps using above mentioned trap interposition 495 * approach. 496 * 497 * Locking 498 * 499 * The implementation uses two locks: tstat_lock (a local lock) and the global 500 * cpu_lock. tstat_lock is used to assure trapstat's consistency in the 501 * presence of multithreaded /dev/trapstat consumers (while as of this writing 502 * the only consumer of /dev/trapstat is single threaded, it is obviously 503 * necessary to correctly support multithreaded access). cpu_lock is held 504 * whenever CPUs are being manipulated directly, to prevent them from 505 * disappearing in the process. Because trapstat's DR callback 506 * (trapstat_cpu_setup()) must grab tstat_lock and is called with cpu_lock 507 * held, the lock ordering is necessarily cpu_lock before tstat_lock. 508 * 509 */ 510 /* END CSTYLED */ 511 512 static dev_info_t *tstat_devi; /* saved in xxattach() for xxinfo() */ 513 static int tstat_open; /* set if driver is open */ 514 static kmutex_t tstat_lock; /* serialize access */ 515 static vmem_t *tstat_arena; /* arena for TLB-locked pages */ 516 static tstat_percpu_t *tstat_percpu; /* per-CPU data */ 517 static int tstat_running; /* set if trapstat is running */ 518 static tstat_data_t *tstat_buffer; /* staging buffer for outgoing data */ 519 static int tstat_options; /* bit-wise indication of options */ 520 static int *tstat_enabled; /* map of enabled trap entries */ 521 static int tstat_tsbmiss_patched; /* tsbmiss patch flag */ 522 static callb_id_t tstat_cprcb; /* CPR callback */ 523 static char *tstat_probe_area; /* VA range used for probe effect */ 524 static caddr_t tstat_probe_phys; /* physical to back above VA */ 525 static hrtime_t tstat_probe_time; /* time spent on probe effect */ 526 static hrtime_t tstat_probe_before[TSTAT_PROBE_NLAPS]; 527 static hrtime_t tstat_probe_after[TSTAT_PROBE_NLAPS]; 528 static uint_t tstat_pgszs; /* # of kernel page sizes */ 529 static uint_t tstat_user_pgszs; /* # of user page sizes */ 530 531 /* 532 * sizeof tstat_data_t + pgsz data for the kernel. For simplicity's sake, when 533 * we collect data, we do it based upon szc, but when we report data back to 534 * userland, we have to do it based upon the userszc which may not match. 535 * So, these two variables are for internal use and exported use respectively. 536 */ 537 static size_t tstat_data_t_size; 538 static size_t tstat_data_t_exported_size; 539 540 #ifndef sun4v 541 542 static size_t tstat_data_pages; /* number of pages of tstat data */ 543 static size_t tstat_data_size; /* tstat data size in bytes */ 544 static size_t tstat_total_pages; /* #data pages + #instr pages */ 545 static size_t tstat_total_size; /* tstat data size + instr size */ 546 547 #else /* sun4v */ 548 549 static caddr_t tstat_va; /* VA of memory reserved for TBA */ 550 static pfn_t tstat_pfn; /* PFN of memory reserved for TBA */ 551 static boolean_t tstat_fast_tlbstat = B_FALSE; 552 static int tstat_traptab_initialized; 553 554 #endif /* sun4v */ 555 556 /* 557 * In the above block comment, see "TLB Statistics: TLB Misses versus 558 * TSB Misses" for an explanation of the tsbmiss patch points. 559 */ 560 extern uint32_t tsbmiss_trapstat_patch_point; 561 extern uint32_t tsbmiss_trapstat_patch_point_kpm; 562 extern uint32_t tsbmiss_trapstat_patch_point_kpm_small; 563 564 /* 565 * Trapstat tsbmiss patch table 566 */ 567 tstat_tsbmiss_patch_entry_t tstat_tsbmiss_patch_table[] = { 568 {(uint32_t *)&tsbmiss_trapstat_patch_point, 0}, 569 {(uint32_t *)&tsbmiss_trapstat_patch_point_kpm, 0}, 570 {(uint32_t *)&tsbmiss_trapstat_patch_point_kpm_small, 0}, 571 {(uint32_t *)NULL, 0} 572 }; 573 574 /* 575 * We define some general SPARC-specific constants to allow more readable 576 * relocations. 577 */ 578 #define NOP 0x01000000 579 #define HI22(v) ((uint32_t)(v) >> 10) 580 #define LO10(v) ((uint32_t)(v) & 0x3ff) 581 #define LO12(v) ((uint32_t)(v) & 0xfff) 582 #define DISP22(from, to) \ 583 ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff) 584 #define ASI(asi) ((asi) << 5) 585 586 /* 587 * The interposing trap table must be locked in the I-TLB, and any data 588 * referred to in the interposing trap handler must be locked in the D-TLB. 589 * This function locks these pages in the appropriate TLBs by creating TTEs 590 * from whole cloth, and manually loading them into the TLB. This function is 591 * called from cross call context. 592 * 593 * On sun4v platforms, we use 4M page size mappings to minimize the number 594 * of locked down entries (i.e. permanent mappings). Each CPU uses a 595 * reserved portion of that 4M page for its TBA and data. 596 */ 597 static void 598 trapstat_load_tlb(void) 599 { 600 #ifndef sun4v 601 int i; 602 #else 603 uint64_t ret; 604 #endif 605 tte_t tte; 606 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 607 caddr_t va = tcpu->tcpu_vabase; 608 609 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 610 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 611 612 #ifndef sun4v 613 for (i = 0; i < tstat_total_pages; i++, va += MMU_PAGESIZE) { 614 tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) | 615 TTE_PFN_INTHI(tcpu->tcpu_pfn[i]); 616 if (i < TSTAT_INSTR_PAGES) { 617 tte.tte_intlo = TTE_PFN_INTLO(tcpu->tcpu_pfn[i]) | 618 TTE_LCK_INT | TTE_CP_INT | TTE_PRIV_INT; 619 sfmmu_itlb_ld_kva(va, &tte); 620 } else { 621 tte.tte_intlo = TTE_PFN_INTLO(tcpu->tcpu_pfn[i]) | 622 TTE_LCK_INT | TTE_CP_INT | TTE_CV_INT | 623 TTE_PRIV_INT | TTE_HWWR_INT; 624 sfmmu_dtlb_ld_kva(va, &tte); 625 } 626 } 627 #else /* sun4v */ 628 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(tstat_pfn); 629 tte.tte_intlo = TTE_PFN_INTLO(tstat_pfn) | TTE_CP_INT | 630 TTE_CV_INT | TTE_PRIV_INT | TTE_HWWR_INT | 631 TTE_SZ_INTLO(TTE4M); 632 ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, 633 MAP_ITLB | MAP_DTLB); 634 635 if (ret != H_EOK) 636 cmn_err(CE_PANIC, "trapstat: cannot map new TBA " 637 "for cpu %d (error: 0x%lx)", CPU->cpu_id, ret); 638 #endif /* sun4v */ 639 } 640 641 /* 642 * As mentioned in the "TLB Statistics: TLB Misses versus TSB Misses" section 643 * of the block comment, TLB misses are differentiated from TSB misses in 644 * part by hot-patching the instructions at the tsbmiss patch points (see 645 * tstat_tsbmiss_patch_table). This routine is used both to initially patch 646 * the instructions, and to patch them back to their original values upon 647 * restoring the original trap table. 648 */ 649 static void 650 trapstat_hotpatch() 651 { 652 uint32_t instr; 653 uint32_t simm13; 654 tstat_tsbmiss_patch_entry_t *ep; 655 656 ASSERT(MUTEX_HELD(&tstat_lock)); 657 658 if (!(tstat_options & TSTAT_OPT_TLBDATA)) 659 return; 660 661 if (!tstat_tsbmiss_patched) { 662 /* 663 * We haven't patched the TSB paths; do so now. 664 */ 665 /*CONSTCOND*/ 666 ASSERT(offsetof(tstat_tlbret_t, ttlbr_ktsb) - 667 offsetof(tstat_tlbret_t, ttlbr_ktlb) == 668 offsetof(tstat_tlbret_t, ttlbr_utsb) - 669 offsetof(tstat_tlbret_t, ttlbr_utlb)); 670 671 simm13 = offsetof(tstat_tlbret_t, ttlbr_ktsb) - 672 offsetof(tstat_tlbret_t, ttlbr_ktlb); 673 674 for (ep = tstat_tsbmiss_patch_table; ep->tpe_addr; ep++) { 675 ASSERT(ep->tpe_instr == 0); 676 instr = ep->tpe_instr = *ep->tpe_addr; 677 678 /* 679 * Assert that the instruction we're about to patch is 680 * "add %g7, 0, %g7" (0x8e01e000). 681 */ 682 ASSERT(instr == TSTAT_TSBMISS_INSTR); 683 684 instr |= simm13; 685 hot_patch_kernel_text((caddr_t)ep->tpe_addr, 686 instr, sizeof (instr)); 687 } 688 689 tstat_tsbmiss_patched = 1; 690 691 } else { 692 /* 693 * Remove patches from the TSB paths. 694 */ 695 for (ep = tstat_tsbmiss_patch_table; ep->tpe_addr; ep++) { 696 ASSERT(ep->tpe_instr == TSTAT_TSBMISS_INSTR); 697 hot_patch_kernel_text((caddr_t)ep->tpe_addr, 698 ep->tpe_instr, sizeof (instr)); 699 ep->tpe_instr = 0; 700 } 701 702 tstat_tsbmiss_patched = 0; 703 } 704 } 705 706 /* 707 * This is the routine executed to clock the performance of the trap table, 708 * executed both before and after interposing on the trap table to attempt to 709 * determine probe effect. The probe effect is used to adjust the "%tim" 710 * fields of trapstat's -t and -T output; we only use TLB misses to clock the 711 * trap table. We execute the inner loop (which is designed to exceed the 712 * TLB's reach) nlaps times, taking the best time as our time (thereby 713 * factoring out the effects of interrupts, cache misses or other perturbing 714 * events. 715 */ 716 static hrtime_t 717 trapstat_probe_laps(int nlaps, hrtime_t *buf) 718 { 719 int i, j = 0; 720 hrtime_t ts, best = INT64_MAX; 721 722 while (nlaps--) { 723 ts = rdtick(); 724 725 for (i = 0; i < TSTAT_PROBE_SIZE; i += MMU_PAGESIZE) 726 *((volatile char *)&tstat_probe_area[i]); 727 728 if ((ts = rdtick() - ts) < best) 729 best = ts; 730 buf[j++] = ts; 731 } 732 733 return (best); 734 } 735 736 /* 737 * This routine determines the probe effect by calling trapstat_probe_laps() 738 * both without and with the interposing trap table. Note that this is 739 * called from a cross call on the desired CPU, and that it is called on 740 * every CPU (this is necessary because the probe effect may differ from 741 * one CPU to another). 742 */ 743 static void 744 trapstat_probe() 745 { 746 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 747 hrtime_t before, after; 748 749 if (!(tcpu->tcpu_flags & TSTAT_CPU_SELECTED)) 750 return; 751 752 if (tstat_probe_area == NULL || (tstat_options & TSTAT_OPT_NOGO)) 753 return; 754 755 /* 756 * We very much expect the %tba to be KERNELBASE; this is a 757 * precautionary measure to assure that trapstat doesn't melt the 758 * machine should the %tba point unexpectedly elsewhere. 759 */ 760 if (get_tba() != (caddr_t)KERNELBASE) 761 return; 762 763 /* 764 * Preserve this CPU's data before destroying it by enabling the 765 * interposing trap table. We can safely use tstat_buffer because 766 * the caller of the trapstat_probe() cross call is holding tstat_lock. 767 */ 768 bcopy(tcpu->tcpu_data, tstat_buffer, tstat_data_t_size); 769 770 tstat_probe_time = gethrtime(); 771 772 before = trapstat_probe_laps(TSTAT_PROBE_NLAPS, tstat_probe_before); 773 (void) set_tba(tcpu->tcpu_ibase); 774 775 after = trapstat_probe_laps(TSTAT_PROBE_NLAPS, tstat_probe_after); 776 (void) set_tba((caddr_t)KERNELBASE); 777 778 tstat_probe_time = gethrtime() - tstat_probe_time; 779 780 bcopy(tstat_buffer, tcpu->tcpu_data, tstat_data_t_size); 781 tcpu->tcpu_data->tdata_peffect = (after - before) / TSTAT_PROBE_NPAGES; 782 } 783 784 static void 785 trapstat_probe_alloc() 786 { 787 pfn_t pfn; 788 caddr_t va; 789 int i; 790 791 ASSERT(MUTEX_HELD(&tstat_lock)); 792 ASSERT(tstat_probe_area == NULL); 793 ASSERT(tstat_probe_phys == NULL); 794 795 if (!(tstat_options & TSTAT_OPT_TLBDATA)) 796 return; 797 798 /* 799 * Grab some virtual from the heap arena. 800 */ 801 tstat_probe_area = vmem_alloc(heap_arena, TSTAT_PROBE_SIZE, VM_SLEEP); 802 va = tstat_probe_area; 803 804 /* 805 * Grab a single physical page. 806 */ 807 tstat_probe_phys = vmem_alloc(tstat_arena, MMU_PAGESIZE, VM_SLEEP); 808 pfn = hat_getpfnum(kas.a_hat, tstat_probe_phys); 809 810 /* 811 * Now set the translation for every page in our virtual range 812 * to be our allocated physical page. 813 */ 814 for (i = 0; i < TSTAT_PROBE_NPAGES; i++) { 815 hat_devload(kas.a_hat, va, MMU_PAGESIZE, pfn, PROT_READ, 816 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 817 va += MMU_PAGESIZE; 818 } 819 } 820 821 static void 822 trapstat_probe_free() 823 { 824 caddr_t va; 825 int i; 826 827 ASSERT(MUTEX_HELD(&tstat_lock)); 828 829 if ((va = tstat_probe_area) == NULL) 830 return; 831 832 for (i = 0; i < TSTAT_PROBE_NPAGES; i++) { 833 hat_unload(kas.a_hat, va, MMU_PAGESIZE, HAT_UNLOAD_UNLOCK); 834 va += MMU_PAGESIZE; 835 } 836 837 vmem_free(tstat_arena, tstat_probe_phys, MMU_PAGESIZE); 838 vmem_free(heap_arena, tstat_probe_area, TSTAT_PROBE_SIZE); 839 840 tstat_probe_phys = NULL; 841 tstat_probe_area = NULL; 842 } 843 844 /* 845 * This routine actually enables a CPU by setting its %tba to be the 846 * CPU's interposing trap table. It is called out of cross call context. 847 */ 848 static void 849 trapstat_enable() 850 { 851 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 852 853 if (!(tcpu->tcpu_flags & TSTAT_CPU_SELECTED)) 854 return; 855 856 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 857 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 858 859 if (get_tba() != (caddr_t)KERNELBASE) 860 return; 861 862 if (!(tstat_options & TSTAT_OPT_NOGO)) 863 (void) set_tba(tcpu->tcpu_ibase); 864 tcpu->tcpu_flags |= TSTAT_CPU_ENABLED; 865 #ifdef sun4v 866 if ((tstat_options & TSTAT_OPT_TLBDATA) && 867 !(tstat_options & TSTAT_OPT_NOGO)) { 868 if (tstat_fast_tlbstat) { 869 /* 870 * Invoke processor specific interface to enable 871 * collection of TSB hit statistics. 872 */ 873 cpu_trapstat_conf(CPU_TSTATCONF_ENABLE); 874 } else { 875 /* 876 * Collect TLB miss statistics by taking over 877 * TLB miss handling from the hypervisor. This 878 * is done by telling the hypervisor that there 879 * is no TSB configured. Also set TSTAT_TLB_STATS 880 * flag so that no user TSB is configured during 881 * context switch time. 882 */ 883 cpu_t *cp = CPU; 884 885 cp->cpu_m.cpu_tstat_flags |= TSTAT_TLB_STATS; 886 (void) hv_set_ctx0(NULL, NULL); 887 (void) hv_set_ctxnon0(NULL, NULL); 888 } 889 } 890 #endif 891 } 892 893 /* 894 * This routine disables a CPU (vis a vis trapstat) by setting its %tba to be 895 * the actual, underlying trap table. It is called out of cross call context. 896 */ 897 static void 898 trapstat_disable() 899 { 900 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 901 902 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 903 return; 904 905 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 906 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 907 908 if (!(tstat_options & TSTAT_OPT_NOGO)) 909 (void) set_tba((caddr_t)KERNELBASE); 910 911 tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED; 912 913 #ifdef sun4v 914 if ((tstat_options & TSTAT_OPT_TLBDATA) && 915 !(tstat_options & TSTAT_OPT_NOGO)) { 916 if (tstat_fast_tlbstat) { 917 /* 918 * Invoke processor specific interface to disable 919 * collection of TSB hit statistics on each processor. 920 */ 921 cpu_trapstat_conf(CPU_TSTATCONF_DISABLE); 922 } else { 923 /* 924 * As part of collecting TLB miss statistics, we took 925 * over TLB miss handling from the hypervisor by 926 * telling the hypervisor that NO TSB is configured. 927 * We need to restore that by communicating proper 928 * kernel/user TSB information so that TLB misses 929 * can be handled by the hypervisor or the hardware 930 * more efficiently. 931 * 932 * We restore kernel TSB information right away. 933 * However, to minimize any locking dependency, we 934 * don't restore user TSB information right away. 935 * Instead, we simply clear the TSTAT_TLB_STATS flag 936 * so that the user TSB information is automatically 937 * restored on next context switch. 938 * 939 * Note that the call to restore kernel TSB information 940 * will normally not fail, unless wrong information is 941 * passed here. In that scenario, system will still 942 * continue to function properly with the exception of 943 * kernel handling all the TLB misses. 944 */ 945 struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock; 946 cpu_t *cp = CPU; 947 948 cp->cpu_m.cpu_tstat_flags &= ~TSTAT_TLB_STATS; 949 (void) hv_set_ctx0(hvbp->hv_tsb_info_cnt, 950 hvbp->hv_tsb_info_pa); 951 } 952 } 953 #endif 954 } 955 956 /* 957 * We use %tick as the time base when recording the time spent executing 958 * the trap handler. %tick, however, is not necessarily kept in sync 959 * across CPUs (indeed, different CPUs may have different %tick frequencies). 960 * We therefore cross call onto a CPU to get a snapshot of its data to 961 * copy out; this is the routine executed out of that cross call. 962 */ 963 static void 964 trapstat_snapshot() 965 { 966 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 967 tstat_data_t *data = tcpu->tcpu_data; 968 969 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 970 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 971 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ENABLED); 972 973 data->tdata_snapts = gethrtime(); 974 data->tdata_snaptick = rdtick(); 975 bcopy(data, tstat_buffer, tstat_data_t_size); 976 #ifdef sun4v 977 /* 978 * Invoke processor specific interface to collect TSB hit 979 * statistics on each processor. 980 */ 981 if ((tstat_options & TSTAT_OPT_TLBDATA) && tstat_fast_tlbstat) 982 cpu_trapstat_data((void *) tstat_buffer->tdata_pgsz, 983 tstat_pgszs); 984 #endif 985 } 986 987 /* 988 * The TSTAT_RETENT_* constants define offsets in the TLB return entry. 989 * They are used only in trapstat_tlbretent() (below) and #undef'd 990 * immediately afterwards. Any change to "retent" in trapstat_tlbretent() 991 * will likely require changes to these constants. 992 */ 993 994 #ifndef sun4v 995 #define TSTAT_RETENT_STATHI 1 996 #define TSTAT_RETENT_STATLO 2 997 #define TSTAT_RETENT_SHIFT 11 998 #define TSTAT_RETENT_COUNT_LD 13 999 #define TSTAT_RETENT_COUNT_ST 15 1000 #define TSTAT_RETENT_TMPTSHI 16 1001 #define TSTAT_RETENT_TMPTSLO 17 1002 #define TSTAT_RETENT_TIME_LD 19 1003 #define TSTAT_RETENT_TIME_ST 21 1004 #else /* sun4v */ 1005 #define TSTAT_RETENT_TDATASHFT 2 1006 #define TSTAT_RETENT_STATHI 4 1007 #define TSTAT_RETENT_STATLO 6 1008 #define TSTAT_RETENT_SHIFT 9 1009 #define TSTAT_RETENT_COUNT_LD 11 1010 #define TSTAT_RETENT_COUNT_ST 13 1011 #define TSTAT_RETENT_TMPTSHI 14 1012 #define TSTAT_RETENT_TMPTSLO 16 1013 #define TSTAT_RETENT_TIME_LD 18 1014 #define TSTAT_RETENT_TIME_ST 20 1015 #endif /* sun4v */ 1016 1017 static void 1018 trapstat_tlbretent(tstat_percpu_t *tcpu, tstat_tlbretent_t *ret, 1019 tstat_missdata_t *data) 1020 { 1021 uint32_t *ent = ret->ttlbrent_instr, shift; 1022 uintptr_t base; 1023 #ifndef sun4v 1024 uintptr_t tmptick = TSTAT_DATA_OFFS(tcpu, tdata_tmptick); 1025 #else 1026 uintptr_t tmptick = TSTAT_CPU0_DATA_OFFS(tcpu, tdata_tmptick); 1027 #endif 1028 1029 /* 1030 * This is the entry executed upon return from the TLB/TSB miss 1031 * handler (i.e. the code interpositioned between the "retry" and 1032 * the actual return to the TLB-missing instruction). Detail on its 1033 * theory of operation can be found in the "TLB Statistics" section 1034 * of the block comment. Note that we expect the TTE just loaded 1035 * into the TLB to be in %g5; all other globals are available as 1036 * scratch. Finally, note that the page size information in sun4v is 1037 * located in the lower bits of the TTE -- requiring us to have a 1038 * different return entry on sun4v. 1039 */ 1040 static const uint32_t retent[TSTAT_TLBRET_NINSTR] = { 1041 #ifndef sun4v 1042 0x87410000, /* rd %tick, %g3 */ 1043 0x03000000, /* sethi %hi(stat), %g1 */ 1044 0x82106000, /* or %g1, %lo(stat), %g1 */ 1045 0x89297001, /* sllx %g5, 1, %g4 */ 1046 0x8931303e, /* srlx %g4, 62, %g4 */ 1047 0x8531702e, /* srlx %g5, 46, %g2 */ 1048 0x8408a004, /* and %g2, 4, %g2 */ 1049 0x88110002, /* or %g4, %g2, %g4 */ 1050 0x80a12005, /* cmp %g4, 5 */ 1051 0x34400002, /* bg,a,pn %icc, +8 */ 1052 0x88102004, /* mov 4, %g4 */ 1053 0x89292000, /* sll %g4, shift, %g4 */ 1054 0x82004004, /* add %g1, %g4, %g1 */ 1055 0xc4586000, /* ldx [%g1 + tmiss_count], %g2 */ 1056 0x8400a001, /* add %g2, 1, %g2 */ 1057 0xc4706000, /* stx %g2, [%g1 + tmiss_count] */ 1058 0x0d000000, /* sethi %hi(tdata_tmptick), %g6 */ 1059 0xc459a000, /* ldx [%g6 + %lo(tdata_tmptick)], %g2 */ 1060 0x8620c002, /* sub %g3, %g2, %g3 */ 1061 0xc4586000, /* ldx [%g1 + tmiss_time], %g2 */ 1062 0x84008003, /* add %g2, %g3, %g2 */ 1063 0xc4706000, /* stx %g2, [%g1 + tmiss_time] */ 1064 0x83f00000 /* retry */ 1065 #else /* sun4v */ 1066 0x82102008, /* mov SCRATCHPAD_CPUID, %g1 */ 1067 0xced84400, /* ldxa [%g1]ASI_SCRATCHPAD, %g7 */ 1068 0x8f29f000, /* sllx %g7, TSTAT_DATA_SHIFT, %g7 */ 1069 0x87410000, /* rd %tick, %g3 */ 1070 0x03000000, /* sethi %hi(stat), %g1 */ 1071 0x82004007, /* add %g1, %g7, %g1 */ 1072 0x82106000, /* or %g1, %lo(stat), %g1 */ 1073 0x8929703d, /* sllx %g5, 61, %g4 */ 1074 0x8931303d, /* srlx %g4, 61, %g4 */ 1075 0x89292000, /* sll %g4, shift, %g4 */ 1076 0x82004004, /* add %g1, %g4, %g1 */ 1077 0xc4586000, /* ldx [%g1 + tmiss_count], %g2 */ 1078 0x8400a001, /* add %g2, 1, %g2 */ 1079 0xc4706000, /* stx %g2, [%g1 + tmiss_count] */ 1080 0x0d000000, /* sethi %hi(tdata_tmptick), %g6 */ 1081 0x8c018007, /* add %g6, %g7, %g6 */ 1082 0xc459a000, /* ldx [%g6 + %lo(tdata_tmptick)], %g2 */ 1083 0x8620c002, /* sub %g3, %g2, %g3 */ 1084 0xc4586000, /* ldx [%g1 + tmiss_time], %g2 */ 1085 0x84008003, /* add %g2, %g3, %g2 */ 1086 0xc4706000, /* stx %g2, [%g1 + tmiss_time] */ 1087 0x83f00000 /* retry */ 1088 #endif /* sun4v */ 1089 }; 1090 1091 ASSERT(MUTEX_HELD(&tstat_lock)); 1092 /*CONSTCOND*/ 1093 ASSERT(offsetof(tstat_missdata_t, tmiss_count) <= LO10(-1)); 1094 /*CONSTCOND*/ 1095 ASSERT(offsetof(tstat_missdata_t, tmiss_time) <= LO10(-1)); 1096 /*CONSTCOND*/ 1097 ASSERT(!((sizeof (tstat_pgszdata_t) - 1) & sizeof (tstat_pgszdata_t))); 1098 1099 for (shift = 1; (1 << shift) != sizeof (tstat_pgszdata_t); shift++) 1100 continue; 1101 1102 base = (uintptr_t)tcpu->tcpu_ibase + TSTAT_INSTR_SIZE + 1103 ((uintptr_t)data - (uintptr_t)tcpu->tcpu_data); 1104 1105 bcopy(retent, ent, sizeof (retent)); 1106 1107 #if defined(sun4v) 1108 ent[TSTAT_RETENT_TDATASHFT] |= LO10((uintptr_t)TSTAT_DATA_SHIFT); 1109 #endif 1110 ent[TSTAT_RETENT_STATHI] |= HI22(base); 1111 ent[TSTAT_RETENT_STATLO] |= LO10(base); 1112 ent[TSTAT_RETENT_SHIFT] |= shift; 1113 /* LINTED E_EXPR_NULL_EFFECT */ 1114 ent[TSTAT_RETENT_COUNT_LD] |= offsetof(tstat_missdata_t, tmiss_count); 1115 /* LINTED E_EXPR_NULL_EFFECT */ 1116 ent[TSTAT_RETENT_COUNT_ST] |= offsetof(tstat_missdata_t, tmiss_count); 1117 ent[TSTAT_RETENT_TMPTSHI] |= HI22(tmptick); 1118 ent[TSTAT_RETENT_TMPTSLO] |= LO10(tmptick); 1119 ent[TSTAT_RETENT_TIME_LD] |= offsetof(tstat_missdata_t, tmiss_time); 1120 ent[TSTAT_RETENT_TIME_ST] |= offsetof(tstat_missdata_t, tmiss_time); 1121 } 1122 1123 #if defined(sun4v) 1124 #undef TSTAT_RETENT_TDATASHFT 1125 #endif 1126 #undef TSTAT_RETENT_STATHI 1127 #undef TSTAT_RETENT_STATLO 1128 #undef TSTAT_RETENT_SHIFT 1129 #undef TSTAT_RETENT_COUNT_LD 1130 #undef TSTAT_RETENT_COUNT_ST 1131 #undef TSTAT_RETENT_TMPTSHI 1132 #undef TSTAT_RETENT_TMPTSLO 1133 #undef TSTAT_RETENT_TIME_LD 1134 #undef TSTAT_RETENT_TIME_ST 1135 1136 /* 1137 * The TSTAT_TLBENT_* constants define offsets in the TLB entry. They are 1138 * used only in trapstat_tlbent() (below) and #undef'd immediately afterwards. 1139 * Any change to "tlbent" in trapstat_tlbent() will likely require changes 1140 * to these constants. 1141 */ 1142 1143 #ifndef sun4v 1144 #define TSTAT_TLBENT_STATHI 0 1145 #define TSTAT_TLBENT_STATLO_LD 1 1146 #define TSTAT_TLBENT_STATLO_ST 3 1147 #define TSTAT_TLBENT_MMUASI 15 1148 #define TSTAT_TLBENT_TPCHI 18 1149 #define TSTAT_TLBENT_TPCLO_USER 19 1150 #define TSTAT_TLBENT_TPCLO_KERN 21 1151 #define TSTAT_TLBENT_TSHI 25 1152 #define TSTAT_TLBENT_TSLO 27 1153 #define TSTAT_TLBENT_BA 28 1154 #else /* sun4v */ 1155 #define TSTAT_TLBENT_TDATASHFT 2 1156 #define TSTAT_TLBENT_STATHI 3 1157 #define TSTAT_TLBENT_STATLO_LD 5 1158 #define TSTAT_TLBENT_STATLO_ST 7 1159 #define TSTAT_TLBENT_TAGTARGET 23 1160 #define TSTAT_TLBENT_TPCHI 25 1161 #define TSTAT_TLBENT_TPCLO_USER 26 1162 #define TSTAT_TLBENT_TPCLO_KERN 28 1163 #define TSTAT_TLBENT_TSHI 32 1164 #define TSTAT_TLBENT_TSLO 35 1165 #define TSTAT_TLBENT_BA 36 1166 #endif /* sun4v */ 1167 1168 static void 1169 trapstat_tlbent(tstat_percpu_t *tcpu, int entno) 1170 { 1171 uint32_t *ent; 1172 uintptr_t orig, va, baoffs; 1173 #ifndef sun4v 1174 int itlb = entno == TSTAT_ENT_ITLBMISS; 1175 uint32_t asi = itlb ? ASI(ASI_IMMU) : ASI(ASI_DMMU); 1176 #else 1177 int itlb = (entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_ITLBMISS); 1178 uint32_t tagtarget_off = itlb ? MMFSA_I_CTX : MMFSA_D_CTX; 1179 uint32_t *tent; /* MMU trap vector entry */ 1180 uintptr_t tentva; /* MMU trap vector entry va */ 1181 static const uint32_t mmumiss[TSTAT_ENT_NINSTR] = { 1182 0x30800000, /* ba,a addr */ 1183 NOP, NOP, NOP, NOP, NOP, NOP, NOP 1184 }; 1185 #endif 1186 int entoffs = entno << TSTAT_ENT_SHIFT; 1187 uintptr_t tmptick, stat, tpc, utpc; 1188 tstat_pgszdata_t *data = &tcpu->tcpu_data->tdata_pgsz[0]; 1189 tstat_tlbdata_t *udata, *kdata; 1190 tstat_tlbret_t *ret; 1191 1192 /* 1193 * When trapstat is run with TLB statistics, this is the entry for 1194 * both I- and D-TLB misses; this code performs trap level pushing, 1195 * as described in the "TLB Statistics" section of the block comment. 1196 * This code is executing at TL 1; %tstate[0] contains the saved 1197 * state at the time of the TLB miss. Pushing trap level 1 (and thus 1198 * raising TL to 2) requires us to fill in %tstate[1] with our %pstate, 1199 * %cwp and %asi. We leave %tt unchanged, and we set %tpc and %tnpc to 1200 * the appropriate TLB return entry (based on the context of the miss). 1201 * Finally, we sample %tick, and stash it in the tdata_tmptick member 1202 * the per-CPU tstat_data structure. tdata_tmptick will be used in 1203 * the TLB return entry to determine the amount of time spent in the 1204 * TLB miss handler. 1205 * 1206 * Note that on sun4v platforms, we must obtain the context information 1207 * from the MMU fault status area. (The base address of this MMU fault 1208 * status area is kept in the scratchpad register 0.) 1209 */ 1210 static const uint32_t tlbent[] = { 1211 #ifndef sun4v 1212 0x03000000, /* sethi %hi(stat), %g1 */ 1213 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1214 0x8400a001, /* add %g2, 1, %g2 */ 1215 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1216 0x85524000, /* rdpr %cwp, %g2 */ 1217 0x87518000, /* rdpr %pstate, %g3 */ 1218 0x8728f008, /* sllx %g3, 8, %g3 */ 1219 0x84108003, /* or %g2, %g3, %g2 */ 1220 0x8740c000, /* rd %asi, %g3 */ 1221 0x8728f018, /* sllx %g3, 24, %g3 */ 1222 0x84108003, /* or %g2, %g3, %g2 */ 1223 0x8350c000, /* rdpr %tt, %g1 */ 1224 0x8f902002, /* wrpr %g0, 2, %tl */ 1225 0x85908000, /* wrpr %g2, %g0, %tstate */ 1226 0x87904000, /* wrpr %g1, %g0, %tt */ 1227 0xc2d80000, /* ldxa [%g0]ASI_MMU, %g1 */ 1228 0x83307030, /* srlx %g1, CTXSHIFT, %g1 */ 1229 0x02c04004, /* brz,pn %g1, .+0x10 */ 1230 0x03000000, /* sethi %hi(new_tpc), %g1 */ 1231 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1232 0x30800002, /* ba,a .+0x8 */ 1233 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1234 0x81904000, /* wrpr %g1, %g0, %tpc */ 1235 0x82006004, /* add %g1, 4, %g1 */ 1236 0x83904000, /* wrpr %g1, %g0, %tnpc */ 1237 0x03000000, /* sethi %hi(tmptick), %g1 */ 1238 0x85410000, /* rd %tick, %g2 */ 1239 0xc4706000, /* stx %g2, [%g1 + %lo(tmptick)] */ 1240 0x30800000, /* ba,a addr */ 1241 NOP, NOP, NOP 1242 #else /* sun4v */ 1243 0x82102008, /* mov SCRATCHPAD_CPUID, %g1 */ 1244 0xc8d84400, /* ldxa [%g1]ASI_SCRATCHPAD, %g4 */ 1245 0x89293000, /* sllx %g4, TSTAT_DATA_SHIFT, %g4 */ 1246 0x03000000, /* sethi %hi(stat), %g1 */ 1247 0x82004004, /* add %g1, %g4, %g1 */ 1248 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1249 0x8400a001, /* add %g2, 1, %g2 */ 1250 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1251 0x85524000, /* rdpr %cwp, %g2 */ 1252 0x87518000, /* rdpr %pstate, %g3 */ 1253 0x8728f008, /* sllx %g3, 8, %g3 */ 1254 0x84108003, /* or %g2, %g3, %g2 */ 1255 0x8740c000, /* rd %asi, %g3 */ 1256 0x8728f018, /* sllx %g3, 24, %g3 */ 1257 0x83540000, /* rdpr %gl, %g1 */ 1258 0x83287028, /* sllx %g1, 40, %g1 */ 1259 0x86104003, /* or %g1, %g3, %g3 */ 1260 0x84108003, /* or %g2, %g3, %g2 */ 1261 0x8350c000, /* rdpr %tt, %g1 */ 1262 0x8f902002, /* wrpr %g0, 2, %tl */ 1263 0x85908000, /* wrpr %g2, %g0, %tstate */ 1264 0x87904000, /* wrpr %g1, %g0, %tt */ 1265 0xc2d80400, /* ldxa [%g0]ASI_SCRATCHPAD, %g1 */ 1266 0xc2586000, /* ldx [%g1 + MMFSA_?_CTX], %g1 */ 1267 0x02c04004, /* brz,pn %g1, .+0x10 */ 1268 0x03000000, /* sethi %hi(new_tpc), %g1 */ 1269 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1270 0x30800002, /* ba,a .+0x8 */ 1271 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1272 0x81904000, /* wrpr %g1, %g0, %tpc */ 1273 0x82006004, /* add %g1, 4, %g1 */ 1274 0x83904000, /* wrpr %g1, %g0, %tnpc */ 1275 0x03000000, /* sethi %hi(tmptick), %g1 */ 1276 0x82004004, /* add %g1, %g4, %g1 */ 1277 0x85410000, /* rd %tick, %g2 */ 1278 0xc4706000, /* stx %g2, [%g1 + %lo(tmptick)] */ 1279 0x30800000 /* ba,a addr */ 1280 #endif /* sun4v */ 1281 }; 1282 1283 ASSERT(MUTEX_HELD(&tstat_lock)); 1284 #ifndef sun4v 1285 ASSERT(entno == TSTAT_ENT_ITLBMISS || entno == TSTAT_ENT_DTLBMISS); 1286 1287 stat = TSTAT_DATA_OFFS(tcpu, tdata_traps) + entoffs; 1288 tmptick = TSTAT_DATA_OFFS(tcpu, tdata_tmptick); 1289 #else /* sun4v */ 1290 ASSERT(entno == TSTAT_ENT_ITLBMISS || entno == TSTAT_ENT_DTLBMISS || 1291 entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_DMMUMISS); 1292 1293 stat = TSTAT_CPU0_DATA_OFFS(tcpu, tdata_traps) + entoffs; 1294 tmptick = TSTAT_CPU0_DATA_OFFS(tcpu, tdata_tmptick); 1295 #endif /* sun4v */ 1296 1297 if (itlb) { 1298 ret = &tcpu->tcpu_instr->tinst_itlbret; 1299 udata = &data->tpgsz_user.tmode_itlb; 1300 kdata = &data->tpgsz_kernel.tmode_itlb; 1301 tpc = TSTAT_INSTR_OFFS(tcpu, tinst_itlbret.ttlbr_ktlb); 1302 } else { 1303 ret = &tcpu->tcpu_instr->tinst_dtlbret; 1304 udata = &data->tpgsz_user.tmode_dtlb; 1305 kdata = &data->tpgsz_kernel.tmode_dtlb; 1306 tpc = TSTAT_INSTR_OFFS(tcpu, tinst_dtlbret.ttlbr_ktlb); 1307 } 1308 1309 utpc = tpc + offsetof(tstat_tlbret_t, ttlbr_utlb) - 1310 offsetof(tstat_tlbret_t, ttlbr_ktlb); 1311 1312 ASSERT(HI22(tpc) == HI22(utpc)); 1313 1314 ent = (uint32_t *)((uintptr_t)tcpu->tcpu_instr + entoffs); 1315 orig = KERNELBASE + entoffs; 1316 va = (uintptr_t)tcpu->tcpu_ibase + entoffs; 1317 baoffs = TSTAT_TLBENT_BA * sizeof (uint32_t); 1318 1319 #ifdef sun4v 1320 /* 1321 * Because of lack of space, interposing tlbent trap handler 1322 * for TLB and MMU miss traps cannot be placed in-line. Instead, 1323 * we copy it to the space set aside for shared trap handlers 1324 * continuation in the interposing trap table and invoke it by 1325 * placing a branch in the trap table itself. 1326 */ 1327 tent = ent; /* trap vector entry */ 1328 tentva = va; /* trap vector entry va */ 1329 1330 if (itlb) { 1331 ent = (uint32_t *)((uintptr_t) 1332 &tcpu->tcpu_instr->tinst_immumiss); 1333 va = TSTAT_INSTR_OFFS(tcpu, tinst_immumiss); 1334 } else { 1335 ent = (uint32_t *)((uintptr_t) 1336 &tcpu->tcpu_instr->tinst_dmmumiss); 1337 va = TSTAT_INSTR_OFFS(tcpu, tinst_dmmumiss); 1338 } 1339 bcopy(mmumiss, tent, sizeof (mmumiss)); 1340 tent[0] |= DISP22(tentva, va); 1341 #endif /* sun4v */ 1342 1343 bcopy(tlbent, ent, sizeof (tlbent)); 1344 1345 #if defined(sun4v) 1346 ent[TSTAT_TLBENT_TDATASHFT] |= LO10((uintptr_t)TSTAT_DATA_SHIFT); 1347 #endif 1348 ent[TSTAT_TLBENT_STATHI] |= HI22(stat); 1349 ent[TSTAT_TLBENT_STATLO_LD] |= LO10(stat); 1350 ent[TSTAT_TLBENT_STATLO_ST] |= LO10(stat); 1351 #ifndef sun4v 1352 ent[TSTAT_TLBENT_MMUASI] |= asi; 1353 #else 1354 ent[TSTAT_TLBENT_TAGTARGET] |= tagtarget_off; 1355 #endif 1356 ent[TSTAT_TLBENT_TPCHI] |= HI22(tpc); 1357 ent[TSTAT_TLBENT_TPCLO_USER] |= LO10(utpc); 1358 ent[TSTAT_TLBENT_TPCLO_KERN] |= LO10(tpc); 1359 ent[TSTAT_TLBENT_TSHI] |= HI22(tmptick); 1360 ent[TSTAT_TLBENT_TSLO] |= LO10(tmptick); 1361 ent[TSTAT_TLBENT_BA] |= DISP22(va + baoffs, orig); 1362 1363 /* 1364 * And now set up the TLB return entries. 1365 */ 1366 trapstat_tlbretent(tcpu, &ret->ttlbr_ktlb, &kdata->ttlb_tlb); 1367 trapstat_tlbretent(tcpu, &ret->ttlbr_ktsb, &kdata->ttlb_tsb); 1368 trapstat_tlbretent(tcpu, &ret->ttlbr_utlb, &udata->ttlb_tlb); 1369 trapstat_tlbretent(tcpu, &ret->ttlbr_utsb, &udata->ttlb_tsb); 1370 } 1371 1372 #if defined(sun4v) 1373 #undef TSTAT_TLBENT_TDATASHFT 1374 #endif 1375 #undef TSTAT_TLBENT_STATHI 1376 #undef TSTAT_TLBENT_STATLO_LD 1377 #undef TSTAT_TLBENT_STATLO_ST 1378 #ifndef sun4v 1379 #undef TSTAT_TLBENT_MMUASI 1380 #else 1381 #undef TSTAT_TLBENT_TAGTARGET 1382 #endif 1383 #undef TSTAT_TLBENT_TPCHI 1384 #undef TSTAT_TLBENT_TPCLO_USER 1385 #undef TSTAT_TLBENT_TPCLO_KERN 1386 #undef TSTAT_TLBENT_TSHI 1387 #undef TSTAT_TLBENT_TSLO 1388 #undef TSTAT_TLBENT_BA 1389 1390 /* 1391 * The TSTAT_ENABLED_* constants define offsets in the enabled entry; the 1392 * TSTAT_DISABLED_BA constant defines an offset in the disabled entry. Both 1393 * sets of constants are used only in trapstat_make_traptab() (below) and 1394 * #undef'd immediately afterwards. Any change to "enabled" or "disabled" 1395 * in trapstat_make_traptab() will likely require changes to these constants. 1396 */ 1397 #ifndef sun4v 1398 #define TSTAT_ENABLED_STATHI 0 1399 #define TSTAT_ENABLED_STATLO_LD 1 1400 #define TSTAT_ENABLED_STATLO_ST 3 1401 #define TSTAT_ENABLED_BA 4 1402 #define TSTAT_DISABLED_BA 0 1403 1404 static void 1405 trapstat_make_traptab(tstat_percpu_t *tcpu) 1406 { 1407 uint32_t *ent; 1408 uint64_t *stat; 1409 uintptr_t orig, va, en_baoffs, dis_baoffs; 1410 int nent; 1411 1412 /* 1413 * This is the entry in the interposing trap table for enabled trap 1414 * table entries. It loads a counter, increments it and stores it 1415 * back before branching to the actual trap table entry. 1416 */ 1417 static const uint32_t enabled[TSTAT_ENT_NINSTR] = { 1418 0x03000000, /* sethi %hi(stat), %g1 */ 1419 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1420 0x8400a001, /* add %g2, 1, %g2 */ 1421 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1422 0x30800000, /* ba,a addr */ 1423 NOP, NOP, NOP 1424 }; 1425 1426 /* 1427 * This is the entry in the interposing trap table for disabled trap 1428 * table entries. It simply branches to the actual, underlying trap 1429 * table entry. As explained in the "Implementation Details" section 1430 * of the block comment, all TL>0 traps _must_ use the disabled entry; 1431 * additional entries may be explicitly disabled through the use 1432 * of TSTATIOC_ENTRY/TSTATIOC_NOENTRY. 1433 */ 1434 static const uint32_t disabled[TSTAT_ENT_NINSTR] = { 1435 0x30800000, /* ba,a addr */ 1436 NOP, NOP, NOP, NOP, NOP, NOP, NOP, 1437 }; 1438 1439 ASSERT(MUTEX_HELD(&tstat_lock)); 1440 1441 ent = tcpu->tcpu_instr->tinst_traptab; 1442 stat = (uint64_t *)TSTAT_DATA_OFFS(tcpu, tdata_traps); 1443 orig = KERNELBASE; 1444 va = (uintptr_t)tcpu->tcpu_ibase; 1445 en_baoffs = TSTAT_ENABLED_BA * sizeof (uint32_t); 1446 dis_baoffs = TSTAT_DISABLED_BA * sizeof (uint32_t); 1447 1448 for (nent = 0; nent < TSTAT_TOTAL_NENT; nent++) { 1449 if (tstat_enabled[nent]) { 1450 bcopy(enabled, ent, sizeof (enabled)); 1451 ent[TSTAT_ENABLED_STATHI] |= HI22((uintptr_t)stat); 1452 ent[TSTAT_ENABLED_STATLO_LD] |= LO10((uintptr_t)stat); 1453 ent[TSTAT_ENABLED_STATLO_ST] |= LO10((uintptr_t)stat); 1454 ent[TSTAT_ENABLED_BA] |= DISP22(va + en_baoffs, orig); 1455 } else { 1456 bcopy(disabled, ent, sizeof (disabled)); 1457 ent[TSTAT_DISABLED_BA] |= DISP22(va + dis_baoffs, orig); 1458 } 1459 1460 stat++; 1461 orig += sizeof (enabled); 1462 ent += sizeof (enabled) / sizeof (*ent); 1463 va += sizeof (enabled); 1464 } 1465 } 1466 1467 #undef TSTAT_ENABLED_STATHI 1468 #undef TSTAT_ENABLED_STATLO_LD 1469 #undef TSTAT_ENABLED_STATLO_ST 1470 #undef TSTAT_ENABLED_BA 1471 #undef TSTAT_DISABLED_BA 1472 1473 #else /* sun4v */ 1474 1475 #define TSTAT_ENABLED_STATHI 0 1476 #define TSTAT_ENABLED_STATLO 1 1477 #define TSTAT_ENABLED_ADDRHI 2 1478 #define TSTAT_ENABLED_ADDRLO 3 1479 #define TSTAT_ENABLED_CONTBA 6 1480 #define TSTAT_ENABLED_TDATASHFT 7 1481 #define TSTAT_DISABLED_BA 0 1482 1483 static void 1484 trapstat_make_traptab(tstat_percpu_t *tcpu) 1485 { 1486 uint32_t *ent; 1487 uint64_t *stat; 1488 uintptr_t orig, va, en_baoffs, dis_baoffs; 1489 uintptr_t tstat_cont_va; 1490 int nent; 1491 1492 /* 1493 * This is the entry in the interposing trap table for enabled trap 1494 * table entries. It loads a counter, increments it and stores it 1495 * back before branching to the actual trap table entry. 1496 * 1497 * All CPUs share the same interposing trap entry to count the 1498 * number of traps. Note that the trap counter is kept in per CPU 1499 * trap statistics area. Its address is obtained dynamically by 1500 * adding the offset of that CPU's trap statistics area from CPU 0 1501 * (i.e. cpu_id * TSTAT_DATA_SIZE) to the address of the CPU 0 1502 * trap counter already coded in the interposing trap entry itself. 1503 * 1504 * Since this interposing code sequence to count traps takes more 1505 * than 8 instructions, it's split in two parts as follows: 1506 * 1507 * tstat_trapcnt: 1508 * sethi %hi(stat), %g1 1509 * or %g1, %lo[stat), %g1 ! %g1 = CPU0 trap counter addr 1510 * sethi %hi(addr), %g2 1511 * or %g2, %lo(addr), %g2 ! %g2 = real trap handler addr 1512 * mov ASI_SCRATCHPAD_CPUID, %g3 1513 * ldxa [%g3]ASI_SCRATCHPAD, %g3 ! %g3 = CPU ID 1514 * ba tstat_trapcnt_cont ! branch to tstat_trapcnt_cont 1515 * sllx %g3, TSTAT_DATA_SHIFT, %g3 ! %g3 = CPU trapstat data offset 1516 * 1517 * tstat_trapcnt_cont: 1518 * ldx [%g1 + %g3], %g4 ! get counter value 1519 * add %g4, 1, %g4 ! increment value 1520 * jmp %g2 ! jump to original trap handler 1521 * stx %g4, [%g1 + %g3] ! store counter value 1522 * 1523 * First part, i.e. tstat_trapcnt, is per trap and is kept in-line in 1524 * the interposing trap table. However, the tstat_trapcnt_cont code 1525 * sequence is shared by all traps and is kept right after the 1526 * the interposing trap table. 1527 */ 1528 static const uint32_t enabled[TSTAT_ENT_NINSTR] = { 1529 0x03000000, /* sethi %hi(stat), %g1 */ 1530 0x82106000, /* or %g1, %lo[stat), %g1 */ 1531 0x05000000, /* sethi %hi(addr), %g2 */ 1532 0x8410a000, /* or %g2, %lo(addr), %g2 */ 1533 0x86102008, /* mov ASI_SCRATCHPAD_CPUID, %g3 */ 1534 0xc6d8c400, /* ldxa [%g3]ASI_SCRATCHPAD, %g3 */ 1535 0x10800000, /* ba enabled_cont */ 1536 0x8728f000 /* sllx %g3, TSTAT_DATA_SHIFT, %g3 */ 1537 }; 1538 1539 static const uint32_t enabled_cont[TSTAT_ENT_NINSTR] = { 1540 0xc8584003, /* ldx [%g1 + %g3], %g4 */ 1541 0x88012001, /* add %g4, 1, %g4 */ 1542 0x81c08000, /* jmp %g2 */ 1543 0xc8704003, /* stx %g4, [%g1 + %g3] */ 1544 NOP, NOP, NOP, NOP 1545 }; 1546 1547 /* 1548 * This is the entry in the interposing trap table for disabled trap 1549 * table entries. It simply branches to the actual, underlying trap 1550 * table entry. As explained in the "Implementation Details" section 1551 * of the block comment, all TL>0 traps _must_ use the disabled entry; 1552 * additional entries may be explicitly disabled through the use 1553 * of TSTATIOC_ENTRY/TSTATIOC_NOENTRY. 1554 */ 1555 static const uint32_t disabled[TSTAT_ENT_NINSTR] = { 1556 0x30800000, /* ba,a addr */ 1557 NOP, NOP, NOP, NOP, NOP, NOP, NOP, 1558 }; 1559 1560 ASSERT(MUTEX_HELD(&tstat_lock)); 1561 ent = tcpu->tcpu_instr->tinst_traptab; 1562 stat = (uint64_t *)TSTAT_CPU0_DATA_OFFS(tcpu, tdata_traps); 1563 orig = KERNELBASE; 1564 va = (uintptr_t)tcpu->tcpu_ibase; 1565 en_baoffs = TSTAT_ENABLED_CONTBA * sizeof (uint32_t); 1566 dis_baoffs = TSTAT_DISABLED_BA * sizeof (uint32_t); 1567 tstat_cont_va = TSTAT_INSTR_OFFS(tcpu, tinst_trapcnt); 1568 1569 for (nent = 0; nent < TSTAT_TOTAL_NENT; nent++) { 1570 if (tstat_enabled[nent]) { 1571 bcopy(enabled, ent, sizeof (enabled)); 1572 ent[TSTAT_ENABLED_STATHI] |= HI22((uintptr_t)stat); 1573 ent[TSTAT_ENABLED_STATLO] |= LO10((uintptr_t)stat); 1574 ent[TSTAT_ENABLED_ADDRHI] |= HI22((uintptr_t)orig); 1575 ent[TSTAT_ENABLED_ADDRLO] |= LO10((uintptr_t)orig); 1576 ent[TSTAT_ENABLED_CONTBA] |= 1577 DISP22(va + en_baoffs, tstat_cont_va); 1578 ent[TSTAT_ENABLED_TDATASHFT] |= 1579 LO10((uintptr_t)TSTAT_DATA_SHIFT); 1580 } else { 1581 bcopy(disabled, ent, sizeof (disabled)); 1582 ent[TSTAT_DISABLED_BA] |= DISP22(va + dis_baoffs, orig); 1583 } 1584 1585 stat++; 1586 orig += sizeof (enabled); 1587 ent += sizeof (enabled) / sizeof (*ent); 1588 va += sizeof (enabled); 1589 } 1590 bcopy(enabled_cont, (uint32_t *)tcpu->tcpu_instr->tinst_trapcnt, 1591 sizeof (enabled_cont)); 1592 } 1593 1594 #undef TSTAT_ENABLED_TDATASHFT 1595 #undef TSTAT_ENABLED_STATHI 1596 #undef TSTAT_ENABLED_STATLO 1597 #undef TSTAT_ENABLED_ADDRHI 1598 #undef TSTAT_ENABLED_ADDRLO 1599 #undef TSTAT_ENABLED_CONTBA 1600 #undef TSTAT_DISABLED_BA 1601 1602 #endif /* sun4v */ 1603 1604 #ifndef sun4v 1605 /* 1606 * See Section A.6 in SPARC v9 Manual. 1607 * max branch = 4*((2^21)-1) = 8388604 1608 */ 1609 #define MAX_BICC_BRANCH_DISPLACEMENT (4 * ((1 << 21) - 1)) 1610 #endif 1611 1612 static void 1613 trapstat_setup(processorid_t cpu) 1614 { 1615 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1616 #ifndef sun4v 1617 int i; 1618 caddr_t va; 1619 pfn_t *pfn; 1620 cpu_t *cp; 1621 uint_t strand_idx; 1622 size_t tstat_offset; 1623 #endif 1624 1625 ASSERT(tcpu->tcpu_pfn == NULL); 1626 ASSERT(tcpu->tcpu_instr == NULL); 1627 ASSERT(tcpu->tcpu_data == NULL); 1628 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1629 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED)); 1630 ASSERT(MUTEX_HELD(&cpu_lock)); 1631 ASSERT(MUTEX_HELD(&tstat_lock)); 1632 1633 #ifndef sun4v 1634 /* 1635 * The lower fifteen bits of the %tba are always read as zero; we must 1636 * align our instruction base address appropriately. 1637 */ 1638 tstat_offset = tstat_total_size; 1639 1640 cp = cpu_get(cpu); 1641 ASSERT(cp != NULL); 1642 if ((strand_idx = cpu ^ pg_plat_hw_instance_id(cp, PGHW_IPIPE)) != 0) { 1643 /* 1644 * On sun4u platforms with multiple CPUs sharing the MMU 1645 * (Olympus-C has 2 strands per core), each CPU uses a 1646 * disjoint trap table. The indexing is based on the 1647 * strand id, which is obtained by XOR'ing the cpuid with 1648 * the coreid. 1649 */ 1650 tstat_offset += tstat_total_size * strand_idx; 1651 1652 /* 1653 * Offset must be less than the maximum PC-relative branch 1654 * displacement for Bicc variants. See the Implementation 1655 * Details comment. 1656 */ 1657 ASSERT(tstat_offset <= MAX_BICC_BRANCH_DISPLACEMENT); 1658 } 1659 1660 tcpu->tcpu_ibase = (caddr_t)((KERNELBASE - tstat_offset) 1661 & TSTAT_TBA_MASK); 1662 tcpu->tcpu_dbase = tcpu->tcpu_ibase + TSTAT_INSTR_SIZE; 1663 tcpu->tcpu_vabase = tcpu->tcpu_ibase; 1664 1665 tcpu->tcpu_pfn = vmem_alloc(tstat_arena, tstat_total_pages, VM_SLEEP); 1666 bzero(tcpu->tcpu_pfn, tstat_total_pages); 1667 pfn = tcpu->tcpu_pfn; 1668 1669 tcpu->tcpu_instr = vmem_alloc(tstat_arena, TSTAT_INSTR_SIZE, VM_SLEEP); 1670 1671 va = (caddr_t)tcpu->tcpu_instr; 1672 for (i = 0; i < TSTAT_INSTR_PAGES; i++, va += MMU_PAGESIZE) 1673 *pfn++ = hat_getpfnum(kas.a_hat, va); 1674 1675 /* 1676 * We must be sure that the pages that we will use to examine the data 1677 * have the same virtual color as the pages to which the data is being 1678 * recorded, hence the alignment and phase constraints on the 1679 * allocation. 1680 */ 1681 tcpu->tcpu_data = vmem_xalloc(tstat_arena, tstat_data_size, 1682 shm_alignment, (uintptr_t)tcpu->tcpu_dbase & (shm_alignment - 1), 1683 0, 0, NULL, VM_SLEEP); 1684 bzero(tcpu->tcpu_data, tstat_data_size); 1685 tcpu->tcpu_data->tdata_cpuid = cpu; 1686 1687 va = (caddr_t)tcpu->tcpu_data; 1688 for (i = 0; i < tstat_data_pages; i++, va += MMU_PAGESIZE) 1689 *pfn++ = hat_getpfnum(kas.a_hat, va); 1690 1691 /* 1692 * Now that we have all of the instruction and data pages allocated, 1693 * make the trap table from scratch. 1694 */ 1695 trapstat_make_traptab(tcpu); 1696 1697 if (tstat_options & TSTAT_OPT_TLBDATA) { 1698 /* 1699 * TLB Statistics have been specified; set up the I- and D-TLB 1700 * entries and corresponding TLB return entries. 1701 */ 1702 trapstat_tlbent(tcpu, TSTAT_ENT_ITLBMISS); 1703 trapstat_tlbent(tcpu, TSTAT_ENT_DTLBMISS); 1704 } 1705 1706 #else /* sun4v */ 1707 1708 /* 1709 * The lower fifteen bits of the %tba are always read as zero; hence 1710 * it must be aligned at least on 512K boundary. 1711 */ 1712 tcpu->tcpu_vabase = (caddr_t)(KERNELBASE - MMU_PAGESIZE4M); 1713 tcpu->tcpu_ibase = tcpu->tcpu_vabase; 1714 tcpu->tcpu_dbase = tcpu->tcpu_ibase + TSTAT_INSTR_SIZE + 1715 cpu * TSTAT_DATA_SIZE; 1716 1717 tcpu->tcpu_pfn = &tstat_pfn; 1718 tcpu->tcpu_instr = (tstat_instr_t *)tstat_va; 1719 tcpu->tcpu_data = (tstat_data_t *)(tstat_va + TSTAT_INSTR_SIZE + 1720 cpu * TSTAT_DATA_SIZE); 1721 bzero(tcpu->tcpu_data, TSTAT_DATA_SIZE); 1722 tcpu->tcpu_data->tdata_cpuid = cpu; 1723 1724 /* 1725 * Now that we have all of the instruction and data pages allocated, 1726 * make the trap table from scratch. It should be done only once 1727 * as it is shared by all CPUs. 1728 */ 1729 if (!tstat_traptab_initialized) 1730 trapstat_make_traptab(tcpu); 1731 1732 if (tstat_options & TSTAT_OPT_TLBDATA) { 1733 /* 1734 * TLB Statistics have been specified; set up the I- and D-TLB 1735 * entries and corresponding TLB return entries. 1736 */ 1737 if (!tstat_traptab_initialized) { 1738 if (tstat_fast_tlbstat) { 1739 trapstat_tlbent(tcpu, TSTAT_ENT_IMMUMISS); 1740 trapstat_tlbent(tcpu, TSTAT_ENT_DMMUMISS); 1741 } else { 1742 trapstat_tlbent(tcpu, TSTAT_ENT_ITLBMISS); 1743 trapstat_tlbent(tcpu, TSTAT_ENT_DTLBMISS); 1744 } 1745 } 1746 } 1747 tstat_traptab_initialized = 1; 1748 #endif /* sun4v */ 1749 1750 tcpu->tcpu_flags |= TSTAT_CPU_ALLOCATED; 1751 1752 /* 1753 * Finally, get the target CPU to load the locked pages into its TLBs. 1754 */ 1755 xc_one(cpu, (xcfunc_t *)trapstat_load_tlb, 0, 0); 1756 } 1757 1758 static void 1759 trapstat_teardown(processorid_t cpu) 1760 { 1761 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1762 #ifndef sun4v 1763 int i; 1764 #endif 1765 caddr_t va = tcpu->tcpu_vabase; 1766 1767 ASSERT(tcpu->tcpu_pfn != NULL); 1768 ASSERT(tcpu->tcpu_instr != NULL); 1769 ASSERT(tcpu->tcpu_data != NULL); 1770 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1771 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 1772 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1773 ASSERT(MUTEX_HELD(&cpu_lock)); 1774 ASSERT(MUTEX_HELD(&tstat_lock)); 1775 1776 #ifndef sun4v 1777 vmem_free(tstat_arena, tcpu->tcpu_pfn, tstat_total_pages); 1778 vmem_free(tstat_arena, tcpu->tcpu_instr, TSTAT_INSTR_SIZE); 1779 vmem_free(tstat_arena, tcpu->tcpu_data, tstat_data_size); 1780 1781 for (i = 0; i < tstat_total_pages; i++, va += MMU_PAGESIZE) { 1782 xt_one(cpu, vtag_flushpage_tl1, (uint64_t)va, 1783 (uint64_t)ksfmmup); 1784 } 1785 #else 1786 xt_one(cpu, vtag_unmap_perm_tl1, (uint64_t)va, KCONTEXT); 1787 #endif 1788 1789 tcpu->tcpu_pfn = NULL; 1790 tcpu->tcpu_instr = NULL; 1791 tcpu->tcpu_data = NULL; 1792 tcpu->tcpu_flags &= ~TSTAT_CPU_ALLOCATED; 1793 } 1794 1795 static int 1796 trapstat_go() 1797 { 1798 cpu_t *cp; 1799 1800 mutex_enter(&cpu_lock); 1801 mutex_enter(&tstat_lock); 1802 1803 if (tstat_running) { 1804 mutex_exit(&tstat_lock); 1805 mutex_exit(&cpu_lock); 1806 return (EBUSY); 1807 } 1808 1809 #ifdef sun4v 1810 /* 1811 * Allocate large page to hold interposing tables. 1812 */ 1813 tstat_va = contig_mem_alloc(MMU_PAGESIZE4M); 1814 tstat_pfn = va_to_pfn(tstat_va); 1815 if (tstat_pfn == PFN_INVALID) { 1816 mutex_exit(&tstat_lock); 1817 mutex_exit(&cpu_lock); 1818 return (EAGAIN); 1819 } 1820 1821 /* 1822 * For detailed TLB statistics, invoke CPU specific interface 1823 * to see if it supports a low overhead interface to collect 1824 * TSB hit statistics. If so, make set tstat_fast_tlbstat flag 1825 * to reflect that. 1826 */ 1827 if (tstat_options & TSTAT_OPT_TLBDATA) { 1828 int error; 1829 1830 tstat_fast_tlbstat = B_FALSE; 1831 error = cpu_trapstat_conf(CPU_TSTATCONF_INIT); 1832 if (error == 0) 1833 tstat_fast_tlbstat = B_TRUE; 1834 else if (error != ENOTSUP) { 1835 contig_mem_free(tstat_va, MMU_PAGESIZE4M); 1836 mutex_exit(&tstat_lock); 1837 mutex_exit(&cpu_lock); 1838 return (error); 1839 } 1840 } 1841 #endif /* sun4v */ 1842 1843 /* 1844 * First, perform any necessary hot patching. 1845 */ 1846 trapstat_hotpatch(); 1847 1848 /* 1849 * Allocate the resources we'll need to measure probe effect. 1850 */ 1851 trapstat_probe_alloc(); 1852 1853 1854 cp = cpu_list; 1855 do { 1856 if (!(tstat_percpu[cp->cpu_id].tcpu_flags & TSTAT_CPU_SELECTED)) 1857 continue; 1858 1859 trapstat_setup(cp->cpu_id); 1860 1861 /* 1862 * Note that due to trapstat_probe()'s use of global data, 1863 * we determine the probe effect on each CPU serially instead 1864 * of in parallel with an xc_all(). 1865 */ 1866 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_probe, 0, 0); 1867 } while ((cp = cp->cpu_next) != cpu_list); 1868 1869 xc_all((xcfunc_t *)trapstat_enable, 0, 0); 1870 1871 trapstat_probe_free(); 1872 tstat_running = 1; 1873 mutex_exit(&tstat_lock); 1874 mutex_exit(&cpu_lock); 1875 1876 return (0); 1877 } 1878 1879 static int 1880 trapstat_stop() 1881 { 1882 int i; 1883 1884 mutex_enter(&cpu_lock); 1885 mutex_enter(&tstat_lock); 1886 if (!tstat_running) { 1887 mutex_exit(&tstat_lock); 1888 mutex_exit(&cpu_lock); 1889 return (ENXIO); 1890 } 1891 1892 xc_all((xcfunc_t *)trapstat_disable, 0, 0); 1893 1894 for (i = 0; i <= max_cpuid; i++) { 1895 if (tstat_percpu[i].tcpu_flags & TSTAT_CPU_ALLOCATED) 1896 trapstat_teardown(i); 1897 } 1898 1899 #ifdef sun4v 1900 tstat_traptab_initialized = 0; 1901 if (tstat_options & TSTAT_OPT_TLBDATA) 1902 cpu_trapstat_conf(CPU_TSTATCONF_FINI); 1903 contig_mem_free(tstat_va, MMU_PAGESIZE4M); 1904 #endif 1905 trapstat_hotpatch(); 1906 tstat_running = 0; 1907 mutex_exit(&tstat_lock); 1908 mutex_exit(&cpu_lock); 1909 1910 return (0); 1911 } 1912 1913 /* 1914 * This is trapstat's DR CPU configuration callback. It's called (with 1915 * cpu_lock held) to unconfigure a newly powered-off CPU, or to configure a 1916 * powered-off CPU that is to be brought into the system. We need only take 1917 * action in the unconfigure case: because a powered-off CPU will have its 1918 * trap table restored to KERNELBASE if it is ever powered back on, we must 1919 * update the flags to reflect that trapstat is no longer enabled on the 1920 * powered-off CPU. Note that this means that a TSTAT_CPU_ENABLED CPU that 1921 * is unconfigured/powered off and later powered back on/reconfigured will 1922 * _not_ be re-TSTAT_CPU_ENABLED. 1923 */ 1924 static int 1925 trapstat_cpu_setup(cpu_setup_t what, processorid_t cpu) 1926 { 1927 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1928 1929 ASSERT(MUTEX_HELD(&cpu_lock)); 1930 mutex_enter(&tstat_lock); 1931 1932 if (!tstat_running) { 1933 mutex_exit(&tstat_lock); 1934 return (0); 1935 } 1936 1937 switch (what) { 1938 case CPU_CONFIG: 1939 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1940 break; 1941 1942 case CPU_UNCONFIG: 1943 if (tcpu->tcpu_flags & TSTAT_CPU_ENABLED) { 1944 tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED; 1945 #ifdef sun4v 1946 /* 1947 * A power-off, causes the cpu mondo queues to be 1948 * unconfigured on sun4v. Since we can't teardown 1949 * trapstat's mappings on the cpu that is going away, 1950 * we simply mark it as not allocated. This will 1951 * prevent a teardown on a cpu with the same cpu id 1952 * that might have been added while trapstat is running. 1953 */ 1954 if (tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED) { 1955 tcpu->tcpu_pfn = NULL; 1956 tcpu->tcpu_instr = NULL; 1957 tcpu->tcpu_data = NULL; 1958 tcpu->tcpu_flags &= ~TSTAT_CPU_ALLOCATED; 1959 } 1960 #endif 1961 } 1962 break; 1963 1964 default: 1965 break; 1966 } 1967 1968 mutex_exit(&tstat_lock); 1969 return (0); 1970 } 1971 1972 /* 1973 * This is called before a CPR suspend and after a CPR resume. We don't have 1974 * anything to do before a suspend, but after a restart we must restore the 1975 * trap table to be our interposing trap table. However, we don't actually 1976 * know whether or not the CPUs have been powered off -- this routine may be 1977 * called while restoring from a failed CPR suspend. We thus run through each 1978 * TSTAT_CPU_ENABLED CPU, and explicitly destroy and reestablish its 1979 * interposing trap table. This assures that our state is correct regardless 1980 * of whether or not the CPU has been newly powered on. 1981 */ 1982 /*ARGSUSED*/ 1983 static boolean_t 1984 trapstat_cpr(void *arg, int code) 1985 { 1986 cpu_t *cp; 1987 1988 if (code == CB_CODE_CPR_CHKPT) 1989 return (B_TRUE); 1990 1991 ASSERT(code == CB_CODE_CPR_RESUME); 1992 1993 mutex_enter(&cpu_lock); 1994 mutex_enter(&tstat_lock); 1995 1996 if (!tstat_running) { 1997 mutex_exit(&tstat_lock); 1998 mutex_exit(&cpu_lock); 1999 return (B_TRUE); 2000 } 2001 2002 cp = cpu_list; 2003 do { 2004 tstat_percpu_t *tcpu = &tstat_percpu[cp->cpu_id]; 2005 2006 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 2007 continue; 2008 2009 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 2010 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 2011 2012 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_disable, 0, 0); 2013 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 2014 2015 /* 2016 * Preserve this CPU's data in tstat_buffer and rip down its 2017 * interposing trap table. 2018 */ 2019 bcopy(tcpu->tcpu_data, tstat_buffer, tstat_data_t_size); 2020 trapstat_teardown(cp->cpu_id); 2021 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED)); 2022 2023 /* 2024 * Reestablish the interposing trap table and restore the old 2025 * data. 2026 */ 2027 trapstat_setup(cp->cpu_id); 2028 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 2029 bcopy(tstat_buffer, tcpu->tcpu_data, tstat_data_t_size); 2030 2031 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_enable, 0, 0); 2032 } while ((cp = cp->cpu_next) != cpu_list); 2033 2034 mutex_exit(&tstat_lock); 2035 mutex_exit(&cpu_lock); 2036 2037 return (B_TRUE); 2038 } 2039 2040 /*ARGSUSED*/ 2041 static int 2042 trapstat_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 2043 { 2044 int i; 2045 2046 mutex_enter(&cpu_lock); 2047 mutex_enter(&tstat_lock); 2048 if (tstat_open != 0) { 2049 mutex_exit(&tstat_lock); 2050 mutex_exit(&cpu_lock); 2051 return (EBUSY); 2052 } 2053 2054 /* 2055 * Register this in open() rather than in attach() to prevent deadlock 2056 * with DR code. During attach, I/O device tree locks are grabbed 2057 * before trapstat_attach() is invoked - registering in attach 2058 * will result in the lock order: device tree lock, cpu_lock. 2059 * DR code however requires that cpu_lock be acquired before 2060 * device tree locks. 2061 */ 2062 ASSERT(!tstat_running); 2063 register_cpu_setup_func((cpu_setup_func_t *)trapstat_cpu_setup, NULL); 2064 2065 /* 2066 * Clear all options. And until specific CPUs are specified, we'll 2067 * mark all CPUs as selected. 2068 */ 2069 tstat_options = 0; 2070 2071 for (i = 0; i <= max_cpuid; i++) 2072 tstat_percpu[i].tcpu_flags |= TSTAT_CPU_SELECTED; 2073 2074 /* 2075 * By default, all traps at TL=0 are enabled. Traps at TL>0 must 2076 * be disabled. 2077 */ 2078 for (i = 0; i < TSTAT_TOTAL_NENT; i++) 2079 tstat_enabled[i] = i < TSTAT_NENT ? 1 : 0; 2080 2081 tstat_open = 1; 2082 mutex_exit(&tstat_lock); 2083 mutex_exit(&cpu_lock); 2084 2085 return (0); 2086 } 2087 2088 /*ARGSUSED*/ 2089 static int 2090 trapstat_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 2091 { 2092 (void) trapstat_stop(); 2093 2094 ASSERT(!tstat_running); 2095 2096 mutex_enter(&cpu_lock); 2097 unregister_cpu_setup_func((cpu_setup_func_t *)trapstat_cpu_setup, NULL); 2098 mutex_exit(&cpu_lock); 2099 2100 tstat_open = 0; 2101 return (DDI_SUCCESS); 2102 } 2103 2104 static int 2105 trapstat_option(int option) 2106 { 2107 mutex_enter(&tstat_lock); 2108 2109 if (tstat_running) { 2110 mutex_exit(&tstat_lock); 2111 return (EBUSY); 2112 } 2113 2114 tstat_options |= option; 2115 mutex_exit(&tstat_lock); 2116 2117 return (0); 2118 } 2119 2120 /*ARGSUSED*/ 2121 static int 2122 trapstat_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *crd, int *rval) 2123 { 2124 int i, j, out; 2125 size_t dsize; 2126 2127 switch (cmd) { 2128 case TSTATIOC_GO: 2129 return (trapstat_go()); 2130 2131 case TSTATIOC_NOGO: 2132 return (trapstat_option(TSTAT_OPT_NOGO)); 2133 2134 case TSTATIOC_STOP: 2135 return (trapstat_stop()); 2136 2137 case TSTATIOC_CPU: 2138 if (arg < 0 || arg > max_cpuid) 2139 return (EINVAL); 2140 /*FALLTHROUGH*/ 2141 2142 case TSTATIOC_NOCPU: 2143 mutex_enter(&tstat_lock); 2144 2145 if (tstat_running) { 2146 mutex_exit(&tstat_lock); 2147 return (EBUSY); 2148 } 2149 2150 /* 2151 * If this is the first CPU to be specified (or if we are 2152 * being asked to explicitly de-select CPUs), disable all CPUs. 2153 */ 2154 if (!(tstat_options & TSTAT_OPT_CPU) || cmd == TSTATIOC_NOCPU) { 2155 tstat_options |= TSTAT_OPT_CPU; 2156 2157 for (i = 0; i <= max_cpuid; i++) { 2158 tstat_percpu_t *tcpu = &tstat_percpu[i]; 2159 2160 ASSERT(cmd == TSTATIOC_NOCPU || 2161 (tcpu->tcpu_flags & TSTAT_CPU_SELECTED)); 2162 tcpu->tcpu_flags &= ~TSTAT_CPU_SELECTED; 2163 } 2164 } 2165 2166 if (cmd == TSTATIOC_CPU) 2167 tstat_percpu[arg].tcpu_flags |= TSTAT_CPU_SELECTED; 2168 2169 mutex_exit(&tstat_lock); 2170 2171 return (0); 2172 2173 case TSTATIOC_ENTRY: 2174 mutex_enter(&tstat_lock); 2175 2176 if (tstat_running) { 2177 mutex_exit(&tstat_lock); 2178 return (EBUSY); 2179 } 2180 2181 if (arg >= TSTAT_NENT || arg < 0) { 2182 mutex_exit(&tstat_lock); 2183 return (EINVAL); 2184 } 2185 2186 if (!(tstat_options & TSTAT_OPT_ENTRY)) { 2187 /* 2188 * If this is the first entry that we are explicitly 2189 * enabling, explicitly disable every TL=0 entry. 2190 */ 2191 for (i = 0; i < TSTAT_NENT; i++) 2192 tstat_enabled[i] = 0; 2193 2194 tstat_options |= TSTAT_OPT_ENTRY; 2195 } 2196 2197 tstat_enabled[arg] = 1; 2198 mutex_exit(&tstat_lock); 2199 return (0); 2200 2201 case TSTATIOC_NOENTRY: 2202 mutex_enter(&tstat_lock); 2203 2204 if (tstat_running) { 2205 mutex_exit(&tstat_lock); 2206 return (EBUSY); 2207 } 2208 2209 for (i = 0; i < TSTAT_NENT; i++) 2210 tstat_enabled[i] = 0; 2211 2212 mutex_exit(&tstat_lock); 2213 return (0); 2214 2215 case TSTATIOC_READ: 2216 mutex_enter(&tstat_lock); 2217 2218 if (tstat_options & TSTAT_OPT_TLBDATA) { 2219 dsize = tstat_data_t_exported_size; 2220 } else { 2221 dsize = sizeof (tstat_data_t); 2222 } 2223 2224 for (i = 0, out = 0; i <= max_cpuid; i++) { 2225 tstat_percpu_t *tcpu = &tstat_percpu[i]; 2226 2227 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 2228 continue; 2229 2230 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 2231 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 2232 2233 tstat_buffer->tdata_cpuid = -1; 2234 xc_one(i, (xcfunc_t *)trapstat_snapshot, 0, 0); 2235 2236 if (tstat_buffer->tdata_cpuid == -1) { 2237 /* 2238 * This CPU is not currently responding to 2239 * cross calls; we have caught it while it is 2240 * being unconfigured. We'll drop tstat_lock 2241 * and pick up and drop cpu_lock. By the 2242 * time we acquire cpu_lock, the DR operation 2243 * will appear consistent and we can assert 2244 * that trapstat_cpu_setup() has cleared 2245 * TSTAT_CPU_ENABLED. 2246 */ 2247 mutex_exit(&tstat_lock); 2248 mutex_enter(&cpu_lock); 2249 mutex_exit(&cpu_lock); 2250 mutex_enter(&tstat_lock); 2251 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 2252 continue; 2253 } 2254 2255 /* 2256 * Need to compensate for the difference between page 2257 * sizes exported to users and page sizes available 2258 * within the kernel. 2259 */ 2260 if ((tstat_options & TSTAT_OPT_TLBDATA) && 2261 (tstat_pgszs != tstat_user_pgszs)) { 2262 tstat_pgszdata_t *tp; 2263 uint_t szc; 2264 2265 tp = &tstat_buffer->tdata_pgsz[0]; 2266 for (j = 0; j < tstat_user_pgszs; j++) { 2267 if ((szc = USERSZC_2_SZC(j)) != j) { 2268 bcopy(&tp[szc], &tp[j], 2269 sizeof (tstat_pgszdata_t)); 2270 } 2271 } 2272 } 2273 2274 if (copyout(tstat_buffer, (void *)arg, dsize) != 0) { 2275 mutex_exit(&tstat_lock); 2276 return (EFAULT); 2277 } 2278 2279 out++; 2280 arg += dsize; 2281 } 2282 2283 if (out != max_cpuid + 1) { 2284 processorid_t cpuid = -1; 2285 arg += offsetof(tstat_data_t, tdata_cpuid); 2286 2287 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) { 2288 mutex_exit(&tstat_lock); 2289 return (EFAULT); 2290 } 2291 } 2292 2293 mutex_exit(&tstat_lock); 2294 2295 return (0); 2296 2297 case TSTATIOC_TLBDATA: 2298 return (trapstat_option(TSTAT_OPT_TLBDATA)); 2299 2300 default: 2301 break; 2302 } 2303 2304 return (ENOTTY); 2305 } 2306 2307 /*ARGSUSED*/ 2308 static int 2309 trapstat_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 2310 { 2311 int error; 2312 2313 switch (infocmd) { 2314 case DDI_INFO_DEVT2DEVINFO: 2315 *result = (void *)tstat_devi; 2316 error = DDI_SUCCESS; 2317 break; 2318 case DDI_INFO_DEVT2INSTANCE: 2319 *result = (void *)0; 2320 error = DDI_SUCCESS; 2321 break; 2322 default: 2323 error = DDI_FAILURE; 2324 } 2325 return (error); 2326 } 2327 2328 static int 2329 trapstat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 2330 { 2331 switch (cmd) { 2332 case DDI_ATTACH: 2333 break; 2334 2335 case DDI_RESUME: 2336 return (DDI_SUCCESS); 2337 2338 default: 2339 return (DDI_FAILURE); 2340 } 2341 2342 if (ddi_create_minor_node(devi, "trapstat", S_IFCHR, 2343 0, DDI_PSEUDO, 0) == DDI_FAILURE) { 2344 ddi_remove_minor_node(devi, NULL); 2345 return (DDI_FAILURE); 2346 } 2347 2348 ddi_report_dev(devi); 2349 tstat_devi = devi; 2350 2351 tstat_pgszs = page_num_pagesizes(); 2352 tstat_user_pgszs = page_num_user_pagesizes(0); 2353 tstat_data_t_size = sizeof (tstat_data_t) + 2354 (tstat_pgszs - 1) * sizeof (tstat_pgszdata_t); 2355 tstat_data_t_exported_size = sizeof (tstat_data_t) + 2356 (tstat_user_pgszs - 1) * sizeof (tstat_pgszdata_t); 2357 #ifndef sun4v 2358 tstat_data_pages = (tstat_data_t_size >> MMU_PAGESHIFT) + 1; 2359 tstat_total_pages = TSTAT_INSTR_PAGES + tstat_data_pages; 2360 tstat_data_size = tstat_data_pages * MMU_PAGESIZE; 2361 tstat_total_size = TSTAT_INSTR_SIZE + tstat_data_size; 2362 #else 2363 ASSERT(tstat_data_t_size <= TSTAT_DATA_SIZE); 2364 #endif 2365 2366 tstat_percpu = kmem_zalloc((max_cpuid + 1) * 2367 sizeof (tstat_percpu_t), KM_SLEEP); 2368 2369 /* 2370 * Create our own arena backed by segkmem to assure a source of 2371 * MMU_PAGESIZE-aligned allocations. We allocate out of the 2372 * heap32_arena to assure that we can address the allocated memory with 2373 * a single sethi/simm13 pair in the interposing trap table entries. 2374 */ 2375 tstat_arena = vmem_create("trapstat", NULL, 0, MMU_PAGESIZE, 2376 segkmem_alloc_permanent, segkmem_free, heap32_arena, 0, VM_SLEEP); 2377 2378 tstat_enabled = kmem_alloc(TSTAT_TOTAL_NENT * sizeof (int), KM_SLEEP); 2379 tstat_buffer = kmem_alloc(tstat_data_t_size, KM_SLEEP); 2380 2381 /* 2382 * CB_CL_CPR_POST_USER is the class that executes from cpr_resume() 2383 * after user threads can be restarted. By executing in this class, 2384 * we are assured of the availability of system services needed to 2385 * resume trapstat (specifically, we are assured that all CPUs are 2386 * restarted and responding to cross calls). 2387 */ 2388 tstat_cprcb = 2389 callb_add(trapstat_cpr, NULL, CB_CL_CPR_POST_USER, "trapstat"); 2390 2391 return (DDI_SUCCESS); 2392 } 2393 2394 static int 2395 trapstat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 2396 { 2397 int rval; 2398 2399 ASSERT(devi == tstat_devi); 2400 2401 switch (cmd) { 2402 case DDI_DETACH: 2403 break; 2404 2405 case DDI_SUSPEND: 2406 return (DDI_SUCCESS); 2407 2408 default: 2409 return (DDI_FAILURE); 2410 } 2411 2412 ASSERT(!tstat_running); 2413 2414 rval = callb_delete(tstat_cprcb); 2415 ASSERT(rval == 0); 2416 2417 kmem_free(tstat_buffer, tstat_data_t_size); 2418 kmem_free(tstat_enabled, TSTAT_TOTAL_NENT * sizeof (int)); 2419 vmem_destroy(tstat_arena); 2420 kmem_free(tstat_percpu, (max_cpuid + 1) * sizeof (tstat_percpu_t)); 2421 ddi_remove_minor_node(devi, NULL); 2422 2423 return (DDI_SUCCESS); 2424 } 2425 2426 /* 2427 * Configuration data structures 2428 */ 2429 static struct cb_ops trapstat_cb_ops = { 2430 trapstat_open, /* open */ 2431 trapstat_close, /* close */ 2432 nulldev, /* strategy */ 2433 nulldev, /* print */ 2434 nodev, /* dump */ 2435 nodev, /* read */ 2436 nodev, /* write */ 2437 trapstat_ioctl, /* ioctl */ 2438 nodev, /* devmap */ 2439 nodev, /* mmap */ 2440 nodev, /* segmap */ 2441 nochpoll, /* poll */ 2442 ddi_prop_op, /* cb_prop_op */ 2443 0, /* streamtab */ 2444 D_MP | D_NEW /* Driver compatibility flag */ 2445 }; 2446 2447 static struct dev_ops trapstat_ops = { 2448 DEVO_REV, /* devo_rev, */ 2449 0, /* refcnt */ 2450 trapstat_info, /* getinfo */ 2451 nulldev, /* identify */ 2452 nulldev, /* probe */ 2453 trapstat_attach, /* attach */ 2454 trapstat_detach, /* detach */ 2455 nulldev, /* reset */ 2456 &trapstat_cb_ops, /* cb_ops */ 2457 (struct bus_ops *)0, /* bus_ops */ 2458 NULL, /* power */ 2459 ddi_quiesce_not_needed, /* quiesce */ 2460 }; 2461 2462 static struct modldrv modldrv = { 2463 &mod_driverops, /* Type of module. This one is a driver */ 2464 "Trap Statistics", /* name of module */ 2465 &trapstat_ops, /* driver ops */ 2466 }; 2467 2468 static struct modlinkage modlinkage = { 2469 MODREV_1, (void *)&modldrv, NULL 2470 }; 2471 2472 int 2473 _init(void) 2474 { 2475 return (mod_install(&modlinkage)); 2476 } 2477 2478 int 2479 _fini(void) 2480 { 2481 return (mod_remove(&modlinkage)); 2482 } 2483 2484 int 2485 _info(struct modinfo *modinfop) 2486 { 2487 return (mod_info(&modlinkage, modinfop)); 2488 } 2489