1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/systm.h> 30 #include <sys/conf.h> 31 #include <sys/stat.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/modctl.h> 35 #include <sys/cpu_module.h> 36 #include <vm/hat_sfmmu.h> 37 #include <vm/seg_kmem.h> 38 #include <vm/seg_kpm.h> 39 #include <vm/vm_dep.h> 40 #include <sys/machsystm.h> 41 #include <sys/machasi.h> 42 #include <sys/sysmacros.h> 43 #include <sys/callb.h> 44 #include <sys/archsystm.h> 45 #include <sys/trapstat.h> 46 #ifdef sun4v 47 #include <sys/hypervisor_api.h> 48 #endif 49 50 /* BEGIN CSTYLED */ 51 /* 52 * trapstat: Trap Statistics through Dynamic Trap Table Interposition 53 * ------------------------------------------------------------------- 54 * 55 * Motivation and Overview 56 * 57 * Despite being a fundamental indicator of system behavior, there has 58 * historically been very little insight provided into the frequency and cost 59 * of machine-specific traps. The lack of insight has been especially acute 60 * on UltraSPARC microprocessors: because these microprocessors handle TLB 61 * misses as software traps, the frequency and duration of traps play a 62 * decisive role in the performance of the memory system. As applications have 63 * increasingly outstripped TLB reach, this has become increasingly true. 64 * 65 * Part of the difficulty of observing trap behavior is that the trap handlers 66 * are so frequently called (e.g. millions of times per second) that any 67 * permanently enabled instrumentation would induce an unacceptable performance 68 * degradation. Thus, it is a constraint on any trap observability 69 * infrastructure that it have no probe effect when not explicitly enabled. 70 * 71 * The basic idea, then, is to create an interposing trap table in which each 72 * entry increments a per-trap, in-memory counter and then jumps to the actual, 73 * underlying trap table entry. To enable trapstat, we atomically write to the 74 * trap base address (%tba) register to point to our interposing trap table. 75 * (Note that per-CPU statistics fall out by creating a different trap table 76 * for each CPU.) 77 * 78 * Implementation Details 79 * 80 * While the idea is straight-forward, a nuance of SPARC V9 slightly 81 * complicates the implementation. Unlike its predecessors, SPARC V9 supports 82 * the notion of nested traps. The trap level is kept in the TL register: 83 * during normal operation it is 0; when a trap is taken, the TL register is 84 * incremented by 1. To aid system software, SPARC V9 breaks the trap table 85 * into two halves: the lower half contains the trap handlers for traps taken 86 * when TL is 0; the upper half contains the trap handlers for traps taken 87 * when TL is greater than 0. Each half is further subdivided into two 88 * subsequent halves: the lower half contains the trap handlers for traps 89 * other than those induced by the trap instruction (Tcc variants); the upper 90 * half contains the trap handlers for traps induced by the trap instruction. 91 * This gives a total of four ranges, with each range containing 256 traps: 92 * 93 * +--------------------------------+- 3ff 94 * | | . 95 * | Trap instruction, TL>0 | . 96 * | | . 97 * |- - - - - - - - - - - - - - - - +- 300 98 * |- - - - - - - - - - - - - - - - +- 2ff 99 * | | . 100 * | Non-trap instruction, TL>0 | . 101 * | | . 102 * |- - - - - - - - - - - - - - - - +- 200 103 * |- - - - - - - - - - - - - - - - +- 1ff 104 * | | . 105 * | Trap instruction, TL=0 | . 106 * | | . 107 * |- - - - - - - - - - - - - - - - +- 100 108 * |- - - - - - - - - - - - - - - - +- 0ff 109 * | | . 110 * | Non-trap instruction, TL=0 | . 111 * | | . 112 * +--------------------------------+- 000 113 * 114 * 115 * Solaris, however, doesn't have reason to support trap instructions when 116 * TL>0 (only privileged code may execute at TL>0; not supporting this only 117 * constrains our own implementation). The trap table actually looks like: 118 * 119 * +--------------------------------+- 2ff 120 * | | . 121 * | Non-trap instruction, TL>0 | . 122 * | | . 123 * |- - - - - - - - - - - - - - - - +- 200 124 * |- - - - - - - - - - - - - - - - +- 1ff 125 * | | . 126 * | Trap instruction, TL=0 | . 127 * | | . 128 * |- - - - - - - - - - - - - - - - +- 100 129 * |- - - - - - - - - - - - - - - - +- 0ff 130 * | | . 131 * | Non-trap instruction, TL=0 | . 132 * | | . 133 * +--------------------------------+- 000 134 * 135 * Putatively to aid system software, SPARC V9 has the notion of multiple 136 * sets of global registers. UltraSPARC defines four sets of global 137 * registers: 138 * 139 * Normal Globals 140 * Alternate Globals (AGs) 141 * MMU Globals (MGs) 142 * Interrupt Globals (IGs) 143 * 144 * The set of globals in use is controlled by bits in PSTATE; when TL is 0 145 * (and PSTATE has not been otherwise explicitly modified), the Normal Globals 146 * are in use. When a trap is issued, PSTATE is modified to point to a set of 147 * globals corresponding to the trap type. Most traps correspond to the 148 * Alternate Globals, with a minority corresponding to the MMU Globals, and 149 * only the interrupt-vector trap (vector 0x60) corresponding to the Interrupt 150 * Globals. (The complete mapping can be found in the UltraSPARC I&II User's 151 * Manual.) 152 * 153 * Note that the sets of globals are per trap _type_, not per trap _level_. 154 * Thus, when executing a TL>0 trap handler, one may not have registers 155 * available (for example, both trap-instruction traps and spill traps execute 156 * on the alternate globals; if a trap-instruction trap induces a window spill, 157 * the window spill handler has no available globals). For trapstat, this is 158 * problematic: a register is required to transfer control from one arbitrary 159 * location (in the interposing trap table) to another (in the actual trap 160 * table). 161 * 162 * We solve this problem by exploiting the trap table's location at the bottom 163 * of valid kernel memory (i.e. at KERNELBASE). We locate the interposing trap 164 * tables just below KERNELBASE -- thereby allowing us to use a branch-always 165 * instruction (ba) instead of a jump instruction (jmp) to transfer control 166 * from the TL>0 entries in the interposing trap table to the TL>0 entries in 167 * the actual trap table. (N.B. while this allows trap table interposition to 168 * work, it necessarily limits trapstat to only recording information about 169 * TL=0 traps -- there is no way to increment a counter without using a 170 * register.) Diagrammatically: 171 * 172 * Actual trap table: 173 * 174 * +--------------------------------+- 2ff 175 * | | . 176 * | Non-trap instruction, TL>0 | . <-----------------------+ 177 * | | . <-----------------------|-+ 178 * |- - - - - - - - - - - - - - - - +- 200 <-----------------------|-|-+ 179 * |- - - - - - - - - - - - - - - - +- 1ff | | | 180 * | | . | | | 181 * | Trap instruction, TL=0 | . <-----------------+ | | | 182 * | | . <-----------------|-+ | | | 183 * |- - - - - - - - - - - - - - - - +- 100 <-----------------|-|-+ | | | 184 * |- - - - - - - - - - - - - - - - +- 0ff | | | | | | 185 * | | . | | | | | | 186 * | Non-trap instruction, TL=0 | . <-----------+ | | | | | | 187 * | | . <-----------|-+ | | | | | | 188 * +--------------------------------+- 000 <-----------|-|-+ | | | | | | 189 * KERNELBASE | | | | | | | | | 190 * | | | | | | | | | 191 * | | | | | | | | | 192 * Interposing trap table: | | | | | | | | | 193 * | | | | | | | | | 194 * +--------------------------------+- 2ff | | | | | | | | | 195 * | ... | . | | | | | | | | | 196 * | ... | . | | | | | | | | | 197 * | ... | . | | | | | | | | | 198 * |- - - - - - - - - - - - - - - - +- 203 | | | | | | | | | 199 * | ba,a | -------------|-|-|-|-|-|-+ | | 200 * |- - - - - - - - - - - - - - - - +- 202 | | | | | | | | 201 * | ba,a | -------------|-|-|-|-|-|---+ | 202 * |- - - - - - - - - - - - - - - - +- 201 | | | | | | | 203 * | ba,a | -------------|-|-|-|-|-|-----+ 204 * |- - - - - - - - - - - - - - - - +- 200 | | | | | | 205 * | ... | . | | | | | | 206 * | ... | . | | | | | | 207 * | ... | . | | | | | | 208 * |- - - - - - - - - - - - - - - - +- 103 | | | | | | 209 * | (Increment counter) | | | | | | | 210 * | ba,a | -------------------+ | | 211 * |- - - - - - - - - - - - - - - - +- 102 | | | | | 212 * | (Increment counter) | | | | | | 213 * | ba,a | ---------------------+ | 214 * |- - - - - - - - - - - - - - - - +- 101 | | | | 215 * | (Increment counter) | | | | | 216 * | ba,a | -----------------------+ 217 * |- - - - - - - - - - - - - - - - +- 100 | | | 218 * | ... | . | | | 219 * | ... | . | | | 220 * | ... | . | | | 221 * |- - - - - - - - - - - - - - - - +- 003 | | | 222 * | (Increment counter) | | | | 223 * | ba,a | -------------+ | | 224 * |- - - - - - - - - - - - - - - - +- 002 | | 225 * | (Increment counter) | | | 226 * | ba,a | ---------------+ | 227 * |- - - - - - - - - - - - - - - - +- 001 | 228 * | (Increment counter) | | 229 * | ba,a | -----------------+ 230 * +--------------------------------+- 000 231 * KERNELBASE - tstat_total_size 232 * 233 * tstat_total_size is the number of pages required for each trap table. It 234 * must be true that KERNELBASE - tstat_total_size is less than the maximum 235 * branch displacement; if each CPU were to consume a disjoint virtual range 236 * below KERNELBASE for its trap table, we could support at most 237 * (maximum_branch_displacement / tstat_total_size) CPUs. The maximum branch 238 * displacement for Bicc variants is just under eight megabytes, and (because 239 * the %tba must be 32K aligned), tstat_total_size must be at least 32K; if 240 * each CPU were to consume a disjoint virtual range, we would have an 241 * unacceptably low upper bound of 256 CPUs. 242 * 243 * While there are tricks that one could use to address this constraint (e.g., 244 * creating trampolines every maximum_branch_displacement bytes), we instead 245 * solve this by not permitting each CPU to consume a disjoint virtual range. 246 * Rather, we have each CPU's interposing trap table use the _same_ virtual 247 * range, but we back the trap tables with disjoint physical memory. Normally, 248 * such one-to-many virtual-to-physical mappings are illegal; this is 249 * permissible here only because the pages for the interposing trap table are 250 * necessarily locked in the TLB. (The CPUs thus never have the opportunity to 251 * discover that they have conflicting translations.) 252 * 253 * On CMT architectures in which CPUs can share MMUs, the above trick will not 254 * work: two CPUs that share an MMU cannot have the same virtual address map 255 * to disjoint physical pages. On these architectures, any CPUs sharing the 256 * same MMU must consume a disjoint 32K virtual address range -- limiting the 257 * number of CPUs sharing an MMU on these architectures to 256 due to the 258 * branch displacement limitation described above. On the sun4v architecture, 259 * there is a further limitation: a guest may not have more than eight locked 260 * TLB entries per MMU. To allow operation under this restriction, the 261 * interposing trap table and the trap statistics are each accessed through 262 * a single 4M TLB entry. This limits the footprint to two locked entries 263 * (one for the I-TLB and one for the D-TLB), but further restricts the number 264 * of CPUs to 128 per MMU. However, support for more than 128 CPUs can easily 265 * be added via a hybrid scheme, where the same 4M virtual address is used 266 * on different MMUs. 267 * 268 * 269 * TLB Statistics 270 * 271 * Because TLB misses are an important component of system performance, we wish 272 * to know much more about these traps than simply the number received. 273 * Specifically, we wish to know: 274 * 275 * (a) The amount of time spent executing the TLB miss handler 276 * (b) TLB misses versus TSB misses 277 * (c) Kernel-level misses versus user-level misses 278 * (d) Misses per pagesize 279 * 280 * TLB Statistics: Time Spent Executing 281 * 282 * To accurately determine the amount of time spent executing the TLB miss 283 * handler, one must get a timestamp on trap entry and trap exit, subtract the 284 * latter from the former, and add the result to an accumulating count. 285 * Consider flow of control during normal TLB miss processing (where "ldx 286 * [%g2], %g2" is an arbitrary TLB-missing instruction): 287 * 288 * + - - - - - - - -+ 289 * : : 290 * : ldx [%g2], %g2 :<-------------------------------------------------------+ 291 * : : Return from trap: | 292 * + - - - - - - - -+ TL <- TL - 1 (0) | 293 * | %pc <- TSTATE[TL].TPC (address of load) | 294 * | TLB miss: | 295 * | TL <- TL + 1 (1) | 296 * | %pc <- TLB-miss-trap-handler | 297 * | | 298 * v | 299 * + - - - - - - - - - - - - - - - + | 300 * : : | 301 * : Lookup VA in TSB : | 302 * : If (hit) : | 303 * : Fill TLB : | 304 * : Else : | 305 * : Lookup VA (hme hash table : | 306 * : or segkpm) : | 307 * : Fill TLB : | 308 * : Endif : | 309 * : Issue "retry" ---------------------------------------------------------+ 310 * : : 311 * + - - - - - - - - - - - - - - - + 312 * TLB-miss-trap-handler 313 * 314 * 315 * As the above diagram indicates, interposing on the trap table allows one 316 * only to determine a timestamp on trap _entry_: when the TLB miss handler 317 * has completed filling the TLB, a "retry" will be issued, and control will 318 * transfer immediately back to the missing %pc. 319 * 320 * To obtain a timestamp on trap exit, we must then somehow interpose between 321 * the "retry" and the subsequent control transfer to the TLB-missing 322 * instruction. To do this, we _push_ a trap level. The basic idea is to 323 * spoof a TLB miss by raising TL, setting the %tpc to be within text 324 * controlled by trapstat (the "TLB return entry") and branching to the 325 * underlying TLB miss handler. When the TLB miss handler issues its "retry", 326 * control will transfer not to the TLB-missing instruction, but rather to the 327 * TLB return entry. This code can then obtain a timestamp, and issue its own 328 * "retry" -- thereby correctly returning to the TLB-missing instruction. 329 * Here is the above TLB miss flow control diagram modified to reflect 330 * trapstat's operation: 331 * 332 * + - - - - - - - -+ 333 * : : 334 * : ldx [%g2], %g2 :<-------------------------------------------------------+ 335 * : : Return from trap: | 336 * + - - - - - - - -+ TL <- TL - 1 (0) | 337 * | %pc <- TSTATE[TL].TPC (address of load) | 338 * | TLB miss: | 339 * | TL <- TL + 1 (1) | 340 * | %pc <- TLB-miss-trap-handler (trapstat) | 341 * | | 342 * v TLB-return-entry (trapstat) | 343 * + - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - + | 344 * : : : : | 345 * : Record timestamp : : Record timestamp : | 346 * : TL <- 2 : : Take timestamp difference : | 347 * : TSTATE[1].TPC <- TLB-return-entry : : Add to running total : | 348 * : ba,a TLB-miss-trap-handler -----------+ : Issue "retry" --------------+ 349 * : : | : : 350 * + - - - - - - - - - - - - - - - - - - + | + - - - - - - - - - - - - - + 351 * TLB-miss-trap-handler | ^ 352 * (trapstat) | | 353 * | | 354 * | | 355 * +-----------------------+ | 356 * | | 357 * | | 358 * v | 359 * + - - - - - - - - - - - - - - - + | 360 * : : | 361 * : Lookup VA in TSB : | 362 * : If (hit) : | 363 * : Fill TLB : | 364 * : Else : | 365 * : Lookup VA (hme hash table : | 366 * : or segkpm) : | 367 * : Fill TLB : | 368 * : Endif : | 369 * : Issue "retry" ------------------------------------------+ 370 * : : Return from trap: 371 * + - - - - - - - - - - - - - - - + TL <- TL - 1 (1) 372 * TLB-miss-trap-handler %pc <- TSTATE[TL].TPC (TLB-return-entry) 373 * 374 * 375 * A final subterfuge is required to complete our artifice: if we miss in 376 * the TLB, the TSB _and_ the subsequent hash or segkpm lookup (that is, if 377 * there is no valid translation for the TLB-missing address), common system 378 * software will need to accurately determine the %tpc as part of its page 379 * fault handling. We therefore modify the kernel to check the %tpc in this 380 * case: if the %tpc falls within the VA range controlled by trapstat and 381 * the TL is 2, TL is simply lowered back to 1 (this check is implemented 382 * by the TSTAT_CHECK_TL1 macro). Lowering TL to 1 has the effect of 383 * discarding the state pushed by trapstat. 384 * 385 * TLB Statistics: TLB Misses versus TSB Misses 386 * 387 * Distinguishing TLB misses from TSB misses requires further interposition 388 * on the TLB miss handler: we cannot know a priori or a posteriori if a 389 * given VA will or has hit in the TSB. 390 * 391 * We achieve this distinction by adding a second TLB return entry almost 392 * identical to the first -- differing only in the address to which it 393 * stores its results. We then modify the TLB miss handlers of the kernel 394 * such that they check the %tpc when they determine that a TLB miss has 395 * subsequently missed in the TSB: if the %tpc lies within trapstat's VA 396 * range and TL is 2 (that is, if trapstat is running), the TLB miss handler 397 * _increments_ the %tpc by the size of the TLB return entry. The ensuing 398 * "retry" will thus transfer control to the second TLB return entry, and 399 * the time spent in the handler will be accumulated in a memory location 400 * specific to TSB misses. 401 * 402 * N.B.: To minimize the amount of knowledge the kernel must have of trapstat, 403 * we do not allow the kernel to hard-code the size of the TLB return entry. 404 * Rather, the actual tsbmiss handler executes a known instruction at the 405 * corresponding tsbmiss patch points (see the tstat_tsbmiss_patch_table) with 406 * the %tpc in %g7: when trapstat is not running, these points contain the 407 * harmless TSTAT_TSBMISS_INSTR instruction ("add %g7, 0, %g7"). Before 408 * running, trapstat modifies the instructions at these patch points such 409 * that the simm13 equals the size of the TLB return entry. 410 * 411 * TLB Statistics: Kernel-level Misses versus User-level Misses 412 * 413 * Differentiating user-level misses from kernel-level misses employs a 414 * similar technique, but is simplified by the ability to distinguish a 415 * user-level miss from a kernel-level miss a priori by reading the context 416 * register: we implement kernel-/user-level differentiation by again doubling 417 * the number of TLB return entries, and setting the %tpc to the appropriate 418 * TLB return entry in trapstat's TLB miss handler. Together with the doubling 419 * of entries required for TLB-miss/TSB-miss differentiation, this yields a 420 * total of four TLB return entries: 421 * 422 * Level TSB hit? Structure member 423 * ------------------------------------------------------------ 424 * Kernel Yes tstat_tlbret_t.ttlbr_ktlb 425 * Kernel No tstat_tlbret_t.ttlbr_ktsb 426 * User Yes tstat_tlbret_t.ttlbr_utlb 427 * User No tstat_tlbret_t.ttlbr_utsb 428 * 429 * TLB Statistics: Misses per Pagesize 430 * 431 * As with the TLB-/TSB-miss differentiation, we have no way of determining 432 * pagesize a priori. This is therefore implemented by mandating a new rule: 433 * whenever the kernel fills the TLB in its TLB miss handler, the TTE 434 * corresponding to the TLB-missing VA must be in %g5 when the handler 435 * executes its "retry". This allows the TLB return entry to determine 436 * pagesize by simply looking at the pagesize field in the TTE stored in 437 * %g5. 438 * 439 * TLB Statistics: Probe Effect 440 * 441 * As one might imagine, gathering TLB statistics by pushing a trap level 442 * induces significant probe effect. To account for this probe effect, 443 * trapstat attempts to observe it by executing a code sequence with a known 444 * number of TLB misses both before and after interposing on the trap table. 445 * This allows trapstat to determine a per-trap probe effect which can then be 446 * factored into the "%tim" fields of the trapstat command. 447 * 448 * Note that on sun4v platforms, TLB misses are normally handled by the 449 * hypervisor or the hardware TSB walker. Thus no fast MMU miss information 450 * is reported for normal operation. However, when trapstat is invoked 451 * with -t or -T option to collect detailed TLB statistics, kernel takes 452 * over TLB miss handling. This results in significantly more overhead 453 * and TLB statistics may not be as accurate as on sun4u platforms. 454 * On some processors, hypervisor or hardware may provide a low overhead 455 * interface to collect TSB hit statistics. This support is exposed via 456 * a well defined CPU module interface (cpu_trapstat_conf to enable this 457 * interface and cpu_trapstat_data to get detailed TSB hit statistics). 458 * In this scenario, TSB miss statistics is collected by intercepting the 459 * IMMU_miss and DMMU_miss traps using above mentioned trap interposition 460 * approach. 461 * 462 * Locking 463 * 464 * The implementation uses two locks: tstat_lock (a local lock) and the global 465 * cpu_lock. tstat_lock is used to assure trapstat's consistency in the 466 * presence of multithreaded /dev/trapstat consumers (while as of this writing 467 * the only consumer of /dev/trapstat is single threaded, it is obviously 468 * necessary to correctly support multithreaded access). cpu_lock is held 469 * whenever CPUs are being manipulated directly, to prevent them from 470 * disappearing in the process. Because trapstat's DR callback 471 * (trapstat_cpu_setup()) must grab tstat_lock and is called with cpu_lock 472 * held, the lock ordering is necessarily cpu_lock before tstat_lock. 473 * 474 */ 475 /* END CSTYLED */ 476 477 static dev_info_t *tstat_devi; /* saved in xxattach() for xxinfo() */ 478 static int tstat_open; /* set if driver is open */ 479 static kmutex_t tstat_lock; /* serialize access */ 480 static vmem_t *tstat_arena; /* arena for TLB-locked pages */ 481 static tstat_percpu_t *tstat_percpu; /* per-CPU data */ 482 static int tstat_running; /* set if trapstat is running */ 483 static tstat_data_t *tstat_buffer; /* staging buffer for outgoing data */ 484 static int tstat_options; /* bit-wise indication of options */ 485 static int *tstat_enabled; /* map of enabled trap entries */ 486 static int tstat_tsbmiss_patched; /* tsbmiss patch flag */ 487 static callb_id_t tstat_cprcb; /* CPR callback */ 488 static char *tstat_probe_area; /* VA range used for probe effect */ 489 static caddr_t tstat_probe_phys; /* physical to back above VA */ 490 static hrtime_t tstat_probe_time; /* time spent on probe effect */ 491 static hrtime_t tstat_probe_before[TSTAT_PROBE_NLAPS]; 492 static hrtime_t tstat_probe_after[TSTAT_PROBE_NLAPS]; 493 static uint_t tstat_pgszs; /* # of kernel page sizes */ 494 static uint_t tstat_user_pgszs; /* # of user page sizes */ 495 496 /* 497 * sizeof tstat_data_t + pgsz data for the kernel. For simplicity's sake, when 498 * we collect data, we do it based upon szc, but when we report data back to 499 * userland, we have to do it based upon the userszc which may not match. 500 * So, these two variables are for internal use and exported use respectively. 501 */ 502 static size_t tstat_data_t_size; 503 static size_t tstat_data_t_exported_size; 504 505 static size_t tstat_data_pages; /* number of pages of tstat data */ 506 static size_t tstat_data_size; /* tstat data size in bytes */ 507 static size_t tstat_total_pages; /* #data pages + #instr pages */ 508 static size_t tstat_total_size; /* tstat data size + instr size */ 509 #ifdef sun4v 510 static caddr_t tstat_va; /* VA of memory reserved for TBA */ 511 static pfn_t tstat_pfn; /* PFN of memory reserved for TBA */ 512 static boolean_t tstat_fast_tlbstat = B_FALSE; 513 #endif 514 515 /* 516 * In the above block comment, see "TLB Statistics: TLB Misses versus 517 * TSB Misses" for an explanation of the tsbmiss patch points. 518 */ 519 extern uint32_t tsbmiss_trapstat_patch_point; 520 extern uint32_t tsbmiss_trapstat_patch_point_kpm; 521 extern uint32_t tsbmiss_trapstat_patch_point_kpm_small; 522 523 /* 524 * Trapstat tsbmiss patch table 525 */ 526 tstat_tsbmiss_patch_entry_t tstat_tsbmiss_patch_table[] = { 527 {(uint32_t *)&tsbmiss_trapstat_patch_point, 0}, 528 {(uint32_t *)&tsbmiss_trapstat_patch_point_kpm, 0}, 529 {(uint32_t *)&tsbmiss_trapstat_patch_point_kpm_small, 0}, 530 {(uint32_t *)NULL, 0} 531 }; 532 533 /* 534 * We define some general SPARC-specific constants to allow more readable 535 * relocations. 536 */ 537 #define NOP 0x01000000 538 #define HI22(v) ((uint32_t)(v) >> 10) 539 #define LO10(v) ((uint32_t)(v) & 0x3ff) 540 #define LO12(v) ((uint32_t)(v) & 0xfff) 541 #define DISP22(from, to) \ 542 ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff) 543 #define ASI(asi) ((asi) << 5) 544 545 /* 546 * The interposing trap table must be locked in the I-TLB, and any data 547 * referred to in the interposing trap handler must be locked in the D-TLB. 548 * This function locks these pages in the appropriate TLBs by creating TTEs 549 * from whole cloth, and manually loading them into the TLB. This function is 550 * called from cross call context. 551 * 552 * On sun4v platforms, we use 4M page size mappings to minimize the number 553 * of locked down entries (i.e. permanent mappings). Each CPU uses a 554 * reserved portion of that 4M page for its TBA and data. 555 */ 556 static void 557 trapstat_load_tlb(void) 558 { 559 #ifndef sun4v 560 int i; 561 #else 562 uint64_t ret; 563 #endif 564 tte_t tte; 565 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 566 caddr_t va = tcpu->tcpu_vabase; 567 568 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 569 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 570 571 #ifndef sun4v 572 for (i = 0; i < tstat_total_pages; i++, va += MMU_PAGESIZE) { 573 tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) | 574 TTE_PFN_INTHI(tcpu->tcpu_pfn[i]); 575 if (i < TSTAT_INSTR_PAGES) { 576 tte.tte_intlo = TTE_PFN_INTLO(tcpu->tcpu_pfn[i]) | 577 TTE_LCK_INT | TTE_CP_INT | TTE_PRIV_INT; 578 sfmmu_itlb_ld(va, KCONTEXT, &tte); 579 } else { 580 tte.tte_intlo = TTE_PFN_INTLO(tcpu->tcpu_pfn[i]) | 581 TTE_LCK_INT | TTE_CP_INT | TTE_CV_INT | 582 TTE_PRIV_INT | TTE_HWWR_INT; 583 sfmmu_dtlb_ld(va, KCONTEXT, &tte); 584 } 585 } 586 #else /* sun4v */ 587 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(tstat_pfn); 588 tte.tte_intlo = TTE_PFN_INTLO(tstat_pfn) | TTE_CP_INT | 589 TTE_CV_INT | TTE_PRIV_INT | TTE_HWWR_INT | 590 TTE_SZ_INTLO(TTE4M); 591 ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, 592 MAP_ITLB | MAP_DTLB); 593 594 if (ret != H_EOK) 595 cmn_err(CE_PANIC, "trapstat: cannot map new TBA " 596 "for cpu %d (error: 0x%lx)", CPU->cpu_id, ret); 597 #endif /* sun4v */ 598 } 599 600 /* 601 * As mentioned in the "TLB Statistics: TLB Misses versus TSB Misses" section 602 * of the block comment, TLB misses are differentiated from TSB misses in 603 * part by hot-patching the instructions at the tsbmiss patch points (see 604 * tstat_tsbmiss_patch_table). This routine is used both to initially patch 605 * the instructions, and to patch them back to their original values upon 606 * restoring the original trap table. 607 */ 608 static void 609 trapstat_hotpatch() 610 { 611 uint32_t instr; 612 uint32_t simm13; 613 tstat_tsbmiss_patch_entry_t *ep; 614 615 ASSERT(MUTEX_HELD(&tstat_lock)); 616 617 if (!(tstat_options & TSTAT_OPT_TLBDATA)) 618 return; 619 620 if (!tstat_tsbmiss_patched) { 621 /* 622 * We haven't patched the TSB paths; do so now. 623 */ 624 /*CONSTCOND*/ 625 ASSERT(offsetof(tstat_tlbret_t, ttlbr_ktsb) - 626 offsetof(tstat_tlbret_t, ttlbr_ktlb) == 627 offsetof(tstat_tlbret_t, ttlbr_utsb) - 628 offsetof(tstat_tlbret_t, ttlbr_utlb)); 629 630 simm13 = offsetof(tstat_tlbret_t, ttlbr_ktsb) - 631 offsetof(tstat_tlbret_t, ttlbr_ktlb); 632 633 for (ep = tstat_tsbmiss_patch_table; ep->tpe_addr; ep++) { 634 ASSERT(ep->tpe_instr == 0); 635 instr = ep->tpe_instr = *ep->tpe_addr; 636 637 /* 638 * Assert that the instruction we're about to patch is 639 * "add %g7, 0, %g7" (0x8e01e000). 640 */ 641 ASSERT(instr == TSTAT_TSBMISS_INSTR); 642 643 instr |= simm13; 644 hot_patch_kernel_text((caddr_t)ep->tpe_addr, 645 instr, sizeof (instr)); 646 } 647 648 tstat_tsbmiss_patched = 1; 649 650 } else { 651 /* 652 * Remove patches from the TSB paths. 653 */ 654 for (ep = tstat_tsbmiss_patch_table; ep->tpe_addr; ep++) { 655 ASSERT(ep->tpe_instr == TSTAT_TSBMISS_INSTR); 656 hot_patch_kernel_text((caddr_t)ep->tpe_addr, 657 ep->tpe_instr, sizeof (instr)); 658 ep->tpe_instr = 0; 659 } 660 661 tstat_tsbmiss_patched = 0; 662 } 663 } 664 665 /* 666 * This is the routine executed to clock the performance of the trap table, 667 * executed both before and after interposing on the trap table to attempt to 668 * determine probe effect. The probe effect is used to adjust the "%tim" 669 * fields of trapstat's -t and -T output; we only use TLB misses to clock the 670 * trap table. We execute the inner loop (which is designed to exceed the 671 * TLB's reach) nlaps times, taking the best time as our time (thereby 672 * factoring out the effects of interrupts, cache misses or other perturbing 673 * events. 674 */ 675 static hrtime_t 676 trapstat_probe_laps(int nlaps, hrtime_t *buf) 677 { 678 int i, j = 0; 679 hrtime_t ts, best = INT64_MAX; 680 681 while (nlaps--) { 682 ts = rdtick(); 683 684 for (i = 0; i < TSTAT_PROBE_SIZE; i += MMU_PAGESIZE) 685 *((volatile char *)&tstat_probe_area[i]); 686 687 if ((ts = rdtick() - ts) < best) 688 best = ts; 689 buf[j++] = ts; 690 } 691 692 return (best); 693 } 694 695 /* 696 * This routine determines the probe effect by calling trapstat_probe_laps() 697 * both without and with the interposing trap table. Note that this is 698 * called from a cross call on the desired CPU, and that it is called on 699 * every CPU (this is necessary because the probe effect may differ from 700 * one CPU to another). 701 */ 702 static void 703 trapstat_probe() 704 { 705 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 706 hrtime_t before, after; 707 708 if (!(tcpu->tcpu_flags & TSTAT_CPU_SELECTED)) 709 return; 710 711 if (tstat_probe_area == NULL || (tstat_options & TSTAT_OPT_NOGO)) 712 return; 713 714 /* 715 * We very much expect the %tba to be KERNELBASE; this is a 716 * precautionary measure to assure that trapstat doesn't melt the 717 * machine should the %tba point unexpectedly elsewhere. 718 */ 719 if (get_tba() != (caddr_t)KERNELBASE) 720 return; 721 722 /* 723 * Preserve this CPU's data before destroying it by enabling the 724 * interposing trap table. We can safely use tstat_buffer because 725 * the caller of the trapstat_probe() cross call is holding tstat_lock. 726 */ 727 bcopy(tcpu->tcpu_data, tstat_buffer, tstat_data_t_size); 728 729 tstat_probe_time = gethrtime(); 730 731 before = trapstat_probe_laps(TSTAT_PROBE_NLAPS, tstat_probe_before); 732 (void) set_tba(tcpu->tcpu_ibase); 733 734 after = trapstat_probe_laps(TSTAT_PROBE_NLAPS, tstat_probe_after); 735 (void) set_tba((caddr_t)KERNELBASE); 736 737 tstat_probe_time = gethrtime() - tstat_probe_time; 738 739 bcopy(tstat_buffer, tcpu->tcpu_data, tstat_data_t_size); 740 tcpu->tcpu_data->tdata_peffect = (after - before) / TSTAT_PROBE_NPAGES; 741 } 742 743 static void 744 trapstat_probe_alloc() 745 { 746 pfn_t pfn; 747 caddr_t va; 748 int i; 749 750 ASSERT(MUTEX_HELD(&tstat_lock)); 751 ASSERT(tstat_probe_area == NULL); 752 ASSERT(tstat_probe_phys == NULL); 753 754 if (!(tstat_options & TSTAT_OPT_TLBDATA)) 755 return; 756 757 /* 758 * Grab some virtual from the heap arena. 759 */ 760 tstat_probe_area = vmem_alloc(heap_arena, TSTAT_PROBE_SIZE, VM_SLEEP); 761 va = tstat_probe_area; 762 763 /* 764 * Grab a single physical page. 765 */ 766 tstat_probe_phys = vmem_alloc(tstat_arena, MMU_PAGESIZE, VM_SLEEP); 767 pfn = hat_getpfnum(kas.a_hat, tstat_probe_phys); 768 769 /* 770 * Now set the translation for every page in our virtual range 771 * to be our allocated physical page. 772 */ 773 for (i = 0; i < TSTAT_PROBE_NPAGES; i++) { 774 hat_devload(kas.a_hat, va, MMU_PAGESIZE, pfn, PROT_READ, 775 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 776 va += MMU_PAGESIZE; 777 } 778 } 779 780 static void 781 trapstat_probe_free() 782 { 783 caddr_t va; 784 int i; 785 786 ASSERT(MUTEX_HELD(&tstat_lock)); 787 788 if ((va = tstat_probe_area) == NULL) 789 return; 790 791 for (i = 0; i < TSTAT_PROBE_NPAGES; i++) { 792 hat_unload(kas.a_hat, va, MMU_PAGESIZE, HAT_UNLOAD_UNLOCK); 793 va += MMU_PAGESIZE; 794 } 795 796 vmem_free(tstat_arena, tstat_probe_phys, MMU_PAGESIZE); 797 vmem_free(heap_arena, tstat_probe_area, TSTAT_PROBE_SIZE); 798 799 tstat_probe_phys = NULL; 800 tstat_probe_area = NULL; 801 } 802 803 /* 804 * This routine actually enables a CPU by setting its %tba to be the 805 * CPU's interposing trap table. It is called out of cross call context. 806 */ 807 static void 808 trapstat_enable() 809 { 810 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 811 812 if (!(tcpu->tcpu_flags & TSTAT_CPU_SELECTED)) 813 return; 814 815 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 816 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 817 818 if (get_tba() != (caddr_t)KERNELBASE) 819 return; 820 821 if (!(tstat_options & TSTAT_OPT_NOGO)) 822 (void) set_tba(tcpu->tcpu_ibase); 823 tcpu->tcpu_flags |= TSTAT_CPU_ENABLED; 824 #ifdef sun4v 825 if ((tstat_options & TSTAT_OPT_TLBDATA) && 826 !(tstat_options & TSTAT_OPT_NOGO)) { 827 if (tstat_fast_tlbstat) { 828 /* 829 * Invoke processor specific interface to enable 830 * collection of TSB hit statistics. 831 */ 832 cpu_trapstat_conf(CPU_TSTATCONF_ENABLE); 833 } else { 834 /* 835 * Collect TLB miss statistics by taking over 836 * TLB miss handling from the hypervisor. This 837 * is done by telling the hypervisor that there 838 * is no TSB configured. Also set TSTAT_TLB_STATS 839 * flag so that no user TSB is configured during 840 * context switch time. 841 */ 842 cpu_t *cp = CPU; 843 844 cp->cpu_m.cpu_tstat_flags |= TSTAT_TLB_STATS; 845 (void) hv_set_ctx0(NULL, NULL); 846 (void) hv_set_ctxnon0(NULL, NULL); 847 } 848 } 849 #endif 850 } 851 852 /* 853 * This routine disables a CPU (vis a vis trapstat) by setting its %tba to be 854 * the actual, underlying trap table. It is called out of cross call context. 855 */ 856 static void 857 trapstat_disable() 858 { 859 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 860 861 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 862 return; 863 864 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 865 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 866 867 if (!(tstat_options & TSTAT_OPT_NOGO)) 868 (void) set_tba((caddr_t)KERNELBASE); 869 870 tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED; 871 872 #ifdef sun4v 873 if ((tstat_options & TSTAT_OPT_TLBDATA) && 874 !(tstat_options & TSTAT_OPT_NOGO)) { 875 if (tstat_fast_tlbstat) { 876 /* 877 * Invoke processor specific interface to disable 878 * collection of TSB hit statistics on each processor. 879 */ 880 cpu_trapstat_conf(CPU_TSTATCONF_DISABLE); 881 } else { 882 /* 883 * As part of collecting TLB miss statistics, we took 884 * over TLB miss handling from the hypervisor by 885 * telling the hypervisor that NO TSB is configured. 886 * We need to restore that by communicating proper 887 * kernel/user TSB information so that TLB misses 888 * can be handled by the hypervisor or the hardware 889 * more efficiently. 890 * 891 * We restore kernel TSB information right away. 892 * However, to minimize any locking dependency, we 893 * don't restore user TSB information right away. 894 * Instead, we simply clear the TSTAT_TLB_STATS flag 895 * so that the user TSB information is automatically 896 * restored on next context switch. 897 * 898 * Note that the call to restore kernel TSB information 899 * will normally not fail, unless wrong information is 900 * passed here. In that scenario, system will still 901 * continue to function properly with the exception of 902 * kernel handling all the TLB misses. 903 */ 904 struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock; 905 cpu_t *cp = CPU; 906 907 cp->cpu_m.cpu_tstat_flags &= ~TSTAT_TLB_STATS; 908 (void) hv_set_ctx0(hvbp->hv_tsb_info_cnt, 909 hvbp->hv_tsb_info_pa); 910 } 911 } 912 #endif 913 } 914 915 /* 916 * We use %tick as the time base when recording the time spent executing 917 * the trap handler. %tick, however, is not necessarily kept in sync 918 * across CPUs (indeed, different CPUs may have different %tick frequencies). 919 * We therefore cross call onto a CPU to get a snapshot of its data to 920 * copy out; this is the routine executed out of that cross call. 921 */ 922 static void 923 trapstat_snapshot() 924 { 925 tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id]; 926 tstat_data_t *data = tcpu->tcpu_data; 927 928 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 929 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 930 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ENABLED); 931 932 data->tdata_snapts = gethrtime(); 933 data->tdata_snaptick = rdtick(); 934 bcopy(data, tstat_buffer, tstat_data_t_size); 935 #ifdef sun4v 936 /* 937 * Invoke processor specific interface to collect TSB hit 938 * statistics on each processor. 939 */ 940 if ((tstat_options & TSTAT_OPT_TLBDATA) && tstat_fast_tlbstat) 941 cpu_trapstat_data((void *) tstat_buffer->tdata_pgsz, 942 tstat_pgszs); 943 #endif 944 } 945 946 /* 947 * The TSTAT_RETENT_* constants define offsets in the TLB return entry. 948 * They are used only in trapstat_tlbretent() (below) and #undef'd 949 * immediately afterwards. Any change to "retent" in trapstat_tlbretent() 950 * will likely require changes to these constants. 951 */ 952 953 #ifndef sun4v 954 #define TSTAT_RETENT_STATHI 1 955 #define TSTAT_RETENT_STATLO 2 956 #define TSTAT_RETENT_SHIFT 11 957 #define TSTAT_RETENT_COUNT_LD 13 958 #define TSTAT_RETENT_COUNT_ST 15 959 #define TSTAT_RETENT_TMPTSHI 16 960 #define TSTAT_RETENT_TMPTSLO 17 961 #define TSTAT_RETENT_TIME_LD 19 962 #define TSTAT_RETENT_TIME_ST 21 963 #else /* sun4v */ 964 #define TSTAT_RETENT_STATHI 1 965 #define TSTAT_RETENT_STATLO 2 966 #define TSTAT_RETENT_SHIFT 5 967 #define TSTAT_RETENT_COUNT_LD 7 968 #define TSTAT_RETENT_COUNT_ST 9 969 #define TSTAT_RETENT_TMPTSHI 10 970 #define TSTAT_RETENT_TMPTSLO 11 971 #define TSTAT_RETENT_TIME_LD 13 972 #define TSTAT_RETENT_TIME_ST 15 973 #endif /* sun4v */ 974 975 static void 976 trapstat_tlbretent(tstat_percpu_t *tcpu, tstat_tlbretent_t *ret, 977 tstat_missdata_t *data) 978 { 979 uint32_t *ent = ret->ttlbrent_instr, shift; 980 uintptr_t base, tmptick = TSTAT_DATA_OFFS(tcpu, tdata_tmptick); 981 982 /* 983 * This is the entry executed upon return from the TLB/TSB miss 984 * handler (i.e. the code interpositioned between the "retry" and 985 * the actual return to the TLB-missing instruction). Detail on its 986 * theory of operation can be found in the "TLB Statistics" section 987 * of the block comment. Note that we expect the TTE just loaded 988 * into the TLB to be in %g5; all other globals are available as 989 * scratch. Finally, note that the page size information in sun4v is 990 * located in the lower bits of the TTE -- requiring us to have a 991 * different return entry on sun4v. 992 */ 993 static const uint32_t retent[TSTAT_TLBRET_NINSTR] = { 994 #ifndef sun4v 995 0x87410000, /* rd %tick, %g3 */ 996 0x03000000, /* sethi %hi(stat), %g1 */ 997 0x82106000, /* or %g1, %lo(stat), %g1 */ 998 0x89297001, /* sllx %g5, 1, %g4 */ 999 0x8931303e, /* srlx %g4, 62, %g4 */ 1000 0x8531702e, /* srlx %g5, 46, %g2 */ 1001 0x8408a004, /* and %g2, 4, %g2 */ 1002 0x88110002, /* or %g4, %g2, %g4 */ 1003 0x80a12005, /* cmp %g4, 5 */ 1004 0x34400002, /* bg,a,pn %icc, +8 */ 1005 0x88102004, /* mov 4, %g4 */ 1006 0x89292000, /* sll %g4, shift, %g4 */ 1007 0x82004004, /* add %g1, %g4, %g1 */ 1008 0xc4586000, /* ldx [%g1 + tmiss_count], %g2 */ 1009 0x8400a001, /* add %g2, 1, %g2 */ 1010 0xc4706000, /* stx %g2, [%g1 + tmiss_count] */ 1011 0x0d000000, /* sethi %hi(tdata_tmptick), %g6 */ 1012 0xc459a000, /* ldx [%g6 + %lo(tdata_tmptick)], %g2 */ 1013 0x8620c002, /* sub %g3, %g2, %g3 */ 1014 0xc4586000, /* ldx [%g1 + tmiss_time], %g2 */ 1015 0x84008003, /* add %g2, %g3, %g2 */ 1016 0xc4706000, /* stx %g2, [%g1 + tmiss_time] */ 1017 0x83f00000 /* retry */ 1018 #else /* sun4v */ 1019 0x87410000, /* rd %tick, %g3 */ 1020 0x03000000, /* sethi %hi(stat), %g1 */ 1021 0x82106000, /* or %g1, %lo(stat), %g1 */ 1022 0x8929703d, /* sllx %g5, 61, %g4 */ 1023 0x8931303d, /* srlx %g4, 61, %g4 */ 1024 0x89292000, /* sll %g4, shift, %g4 */ 1025 0x82004004, /* add %g1, %g4, %g1 */ 1026 0xc4586000, /* ldx [%g1 + tmiss_count], %g2 */ 1027 0x8400a001, /* add %g2, 1, %g2 */ 1028 0xc4706000, /* stx %g2, [%g1 + tmiss_count] */ 1029 0x0d000000, /* sethi %hi(tdata_tmptick), %g6 */ 1030 0xc459a000, /* ldx [%g6 + %lo(tdata_tmptick)], %g2 */ 1031 0x8620c002, /* sub %g3, %g2, %g3 */ 1032 0xc4586000, /* ldx [%g1 + tmiss_time], %g2 */ 1033 0x84008003, /* add %g2, %g3, %g2 */ 1034 0xc4706000, /* stx %g2, [%g1 + tmiss_time] */ 1035 0x83f00000 /* retry */ 1036 #endif /* sun4v */ 1037 }; 1038 1039 ASSERT(MUTEX_HELD(&tstat_lock)); 1040 /*CONSTCOND*/ 1041 ASSERT(offsetof(tstat_missdata_t, tmiss_count) <= LO10(-1)); 1042 /*CONSTCOND*/ 1043 ASSERT(offsetof(tstat_missdata_t, tmiss_time) <= LO10(-1)); 1044 /*CONSTCOND*/ 1045 ASSERT(!((sizeof (tstat_pgszdata_t) - 1) & sizeof (tstat_pgszdata_t))); 1046 1047 for (shift = 1; (1 << shift) != sizeof (tstat_pgszdata_t); shift++) 1048 continue; 1049 1050 base = (uintptr_t)tcpu->tcpu_dbase + 1051 ((uintptr_t)data - (uintptr_t)tcpu->tcpu_data); 1052 1053 bcopy(retent, ent, sizeof (retent)); 1054 1055 ent[TSTAT_RETENT_STATHI] |= HI22(base); 1056 ent[TSTAT_RETENT_STATLO] |= LO10(base); 1057 ent[TSTAT_RETENT_SHIFT] |= shift; 1058 /* LINTED E_EXPR_NULL_EFFECT */ 1059 ent[TSTAT_RETENT_COUNT_LD] |= offsetof(tstat_missdata_t, tmiss_count); 1060 /* LINTED E_EXPR_NULL_EFFECT */ 1061 ent[TSTAT_RETENT_COUNT_ST] |= offsetof(tstat_missdata_t, tmiss_count); 1062 ent[TSTAT_RETENT_TMPTSHI] |= HI22(tmptick); 1063 ent[TSTAT_RETENT_TMPTSLO] |= LO10(tmptick); 1064 ent[TSTAT_RETENT_TIME_LD] |= offsetof(tstat_missdata_t, tmiss_time); 1065 ent[TSTAT_RETENT_TIME_ST] |= offsetof(tstat_missdata_t, tmiss_time); 1066 } 1067 1068 #undef TSTAT_RETENT_STATHI 1069 #undef TSTAT_RETENT_STATLO 1070 #undef TSTAT_RETENT_SHIFT 1071 #undef TSTAT_RETENT_COUNT_LD 1072 #undef TSTAT_RETENT_COUNT_ST 1073 #undef TSTAT_RETENT_TMPTSHI 1074 #undef TSTAT_RETENT_TMPTSLO 1075 #undef TSTAT_RETENT_TIME_LD 1076 #undef TSTAT_RETENT_TIME_ST 1077 1078 /* 1079 * The TSTAT_TLBENT_* constants define offsets in the TLB entry. They are 1080 * used only in trapstat_tlbent() (below) and #undef'd immediately afterwards. 1081 * Any change to "tlbent" in trapstat_tlbent() will likely require changes 1082 * to these constants. 1083 */ 1084 1085 #ifndef sun4v 1086 #define TSTAT_TLBENT_STATHI 0 1087 #define TSTAT_TLBENT_STATLO_LD 1 1088 #define TSTAT_TLBENT_STATLO_ST 3 1089 #define TSTAT_TLBENT_MMUASI 15 1090 #define TSTAT_TLBENT_TPCHI 18 1091 #define TSTAT_TLBENT_TPCLO_USER 19 1092 #define TSTAT_TLBENT_TPCLO_KERN 21 1093 #define TSTAT_TLBENT_TSHI 25 1094 #define TSTAT_TLBENT_TSLO 27 1095 #define TSTAT_TLBENT_BA 28 1096 #else /* sun4v */ 1097 #define TSTAT_TLBENT_STATHI 0 1098 #define TSTAT_TLBENT_STATLO_LD 1 1099 #define TSTAT_TLBENT_STATLO_ST 3 1100 #define TSTAT_TLBENT_TAGTARGET 19 1101 #define TSTAT_TLBENT_TPCHI 21 1102 #define TSTAT_TLBENT_TPCLO_USER 22 1103 #define TSTAT_TLBENT_TPCLO_KERN 24 1104 #define TSTAT_TLBENT_TSHI 28 1105 #define TSTAT_TLBENT_TSLO 30 1106 #define TSTAT_TLBENT_BA 31 1107 #endif /* sun4v */ 1108 1109 static void 1110 trapstat_tlbent(tstat_percpu_t *tcpu, int entno) 1111 { 1112 uint32_t *ent; 1113 uintptr_t orig, va, baoffs; 1114 #ifndef sun4v 1115 int itlb = entno == TSTAT_ENT_ITLBMISS; 1116 #else 1117 int itlb = (entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_ITLBMISS); 1118 #endif 1119 int entoffs = entno << TSTAT_ENT_SHIFT; 1120 uintptr_t tmptick, stat, tpc, utpc; 1121 tstat_pgszdata_t *data = &tcpu->tcpu_data->tdata_pgsz[0]; 1122 tstat_tlbdata_t *udata, *kdata; 1123 tstat_tlbret_t *ret; 1124 #ifndef sun4v 1125 uint32_t asi = itlb ? ASI(ASI_IMMU) : ASI(ASI_DMMU); 1126 #else 1127 uint32_t tagtarget_off = itlb ? MMFSA_I_CTX : MMFSA_D_CTX; 1128 #endif 1129 1130 /* 1131 * When trapstat is run with TLB statistics, this is the entry for 1132 * both I- and D-TLB misses; this code performs trap level pushing, 1133 * as described in the "TLB Statistics" section of the block comment. 1134 * This code is executing at TL 1; %tstate[0] contains the saved 1135 * state at the time of the TLB miss. Pushing trap level 1 (and thus 1136 * raising TL to 2) requires us to fill in %tstate[1] with our %pstate, 1137 * %cwp and %asi. We leave %tt unchanged, and we set %tpc and %tnpc to 1138 * the appropriate TLB return entry (based on the context of the miss). 1139 * Finally, we sample %tick, and stash it in the tdata_tmptick member 1140 * the per-CPU tstat_data structure. tdata_tmptick will be used in 1141 * the TLB return entry to determine the amount of time spent in the 1142 * TLB miss handler. 1143 * 1144 * Note that on sun4v platforms, we must obtain the context information 1145 * from the MMU fault status area. (The base address of this MMU fault 1146 * status area is kept in the scratchpad register 0.) 1147 */ 1148 static const uint32_t tlbent[] = { 1149 #ifndef sun4v 1150 0x03000000, /* sethi %hi(stat), %g1 */ 1151 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1152 0x8400a001, /* add %g2, 1, %g2 */ 1153 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1154 0x85524000, /* rdpr %cwp, %g2 */ 1155 0x87518000, /* rdpr %pstate, %g3 */ 1156 0x8728f008, /* sllx %g3, 8, %g3 */ 1157 0x84108003, /* or %g2, %g3, %g2 */ 1158 0x8740c000, /* rd %asi, %g3 */ 1159 0x8728f018, /* sllx %g3, 24, %g3 */ 1160 0x84108003, /* or %g2, %g3, %g2 */ 1161 0x8350c000, /* rdpr %tt, %g1 */ 1162 0x8f902002, /* wrpr %g0, 2, %tl */ 1163 0x85908000, /* wrpr %g2, %g0, %tstate */ 1164 0x87904000, /* wrpr %g1, %g0, %tt */ 1165 0xc2d80000, /* ldxa [%g0]ASI_MMU, %g1 */ 1166 0x83307030, /* srlx %g1, CTXSHIFT, %g1 */ 1167 0x02c04004, /* brz,pn %g1, .+0x10 */ 1168 0x03000000, /* sethi %hi(new_tpc), %g1 */ 1169 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1170 0x30800002, /* ba,a .+0x8 */ 1171 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1172 0x81904000, /* wrpr %g1, %g0, %tpc */ 1173 0x82006004, /* add %g1, 4, %g1 */ 1174 0x83904000, /* wrpr %g1, %g0, %tnpc */ 1175 0x03000000, /* sethi %hi(tmptick), %g1 */ 1176 0x85410000, /* rd %tick, %g2 */ 1177 0xc4706000, /* stx %g2, [%g1 + %lo(tmptick)] */ 1178 0x30800000, /* ba,a addr */ 1179 NOP, NOP, NOP 1180 #else /* sun4v */ 1181 0x03000000, /* sethi %hi(stat), %g1 */ 1182 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1183 0x8400a001, /* add %g2, 1, %g2 */ 1184 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1185 0x85524000, /* rdpr %cwp, %g2 */ 1186 0x87518000, /* rdpr %pstate, %g3 */ 1187 0x8728f008, /* sllx %g3, 8, %g3 */ 1188 0x84108003, /* or %g2, %g3, %g2 */ 1189 0x8740c000, /* rd %asi, %g3 */ 1190 0x8728f018, /* sllx %g3, 24, %g3 */ 1191 0x83540000, /* rdpr %gl, %g1 */ 1192 0x83287028, /* sllx %g1, 40, %g1 */ 1193 0x86104003, /* or %g1, %g3, %g3 */ 1194 0x84108003, /* or %g2, %g3, %g2 */ 1195 0x8350c000, /* rdpr %tt, %g1 */ 1196 0x8f902002, /* wrpr %g0, 2, %tl */ 1197 0x85908000, /* wrpr %g2, %g0, %tstate */ 1198 0x87904000, /* wrpr %g1, %g0, %tt */ 1199 0xc2d80400, /* ldxa [%g0]ASI_SCRATCHPAD, %g1 */ 1200 0xc2586000, /* ldx [%g1 + MMFSA_?_CTX], %g1 */ 1201 0x02c04004, /* brz,pn %g1, .+0x10 */ 1202 0x03000000, /* sethi %hi(new_tpc), %g1 */ 1203 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1204 0x30800002, /* ba,a .+0x8 */ 1205 0x82106000, /* or %g1, %lo(new_tpc), %g1 */ 1206 0x81904000, /* wrpr %g1, %g0, %tpc */ 1207 0x82006004, /* add %g1, 4, %g1 */ 1208 0x83904000, /* wrpr %g1, %g0, %tnpc */ 1209 0x03000000, /* sethi %hi(tmptick), %g1 */ 1210 0x85410000, /* rd %tick, %g2 */ 1211 0xc4706000, /* stx %g2, [%g1 + %lo(tmptick)] */ 1212 0x30800000 /* ba,a addr */ 1213 #endif /* sun4v */ 1214 }; 1215 1216 ASSERT(MUTEX_HELD(&tstat_lock)); 1217 #ifndef sun4v 1218 ASSERT(entno == TSTAT_ENT_ITLBMISS || entno == TSTAT_ENT_DTLBMISS); 1219 #else 1220 ASSERT(entno == TSTAT_ENT_ITLBMISS || entno == TSTAT_ENT_DTLBMISS || 1221 entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_DMMUMISS); 1222 #endif 1223 1224 stat = TSTAT_DATA_OFFS(tcpu, tdata_traps) + entoffs; 1225 tmptick = TSTAT_DATA_OFFS(tcpu, tdata_tmptick); 1226 1227 if (itlb) { 1228 ret = &tcpu->tcpu_instr->tinst_itlbret; 1229 udata = &data->tpgsz_user.tmode_itlb; 1230 kdata = &data->tpgsz_kernel.tmode_itlb; 1231 tpc = TSTAT_INSTR_OFFS(tcpu, tinst_itlbret.ttlbr_ktlb); 1232 } else { 1233 ret = &tcpu->tcpu_instr->tinst_dtlbret; 1234 udata = &data->tpgsz_user.tmode_dtlb; 1235 kdata = &data->tpgsz_kernel.tmode_dtlb; 1236 tpc = TSTAT_INSTR_OFFS(tcpu, tinst_dtlbret.ttlbr_ktlb); 1237 } 1238 1239 utpc = tpc + offsetof(tstat_tlbret_t, ttlbr_utlb) - 1240 offsetof(tstat_tlbret_t, ttlbr_ktlb); 1241 1242 ASSERT(HI22(tpc) == HI22(utpc)); 1243 1244 ent = (uint32_t *)((uintptr_t)tcpu->tcpu_instr + entoffs); 1245 orig = KERNELBASE + entoffs; 1246 va = (uintptr_t)tcpu->tcpu_ibase + entoffs; 1247 baoffs = TSTAT_TLBENT_BA * sizeof (uint32_t); 1248 1249 #ifdef sun4v 1250 if (entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_DMMUMISS) { 1251 /* 1252 * Because of lack of space, interposing tlbent trap 1253 * handler for IMMU_miss and DMMU_miss traps cannot be 1254 * placed in-line. Instead, we copy it to the space set 1255 * aside for these traps in per CPU trapstat area and 1256 * invoke it by placing a branch in the trap table itself. 1257 */ 1258 static const uint32_t mmumiss[TSTAT_ENT_NINSTR] = { 1259 0x30800000, /* ba,a addr */ 1260 NOP, NOP, NOP, NOP, NOP, NOP, NOP 1261 }; 1262 uint32_t *tent = ent; /* trap vector entry */ 1263 uintptr_t tentva = va; /* trap vector entry va */ 1264 1265 if (itlb) { 1266 ent = (uint32_t *)((uintptr_t) 1267 &tcpu->tcpu_instr->tinst_immumiss); 1268 va = TSTAT_INSTR_OFFS(tcpu, tinst_immumiss); 1269 } else { 1270 ent = (uint32_t *)((uintptr_t) 1271 &tcpu->tcpu_instr->tinst_dmmumiss); 1272 va = TSTAT_INSTR_OFFS(tcpu, tinst_dmmumiss); 1273 } 1274 bcopy(mmumiss, tent, sizeof (mmumiss)); 1275 tent[0] |= DISP22(tentva, va); 1276 } 1277 #endif /* sun4v */ 1278 1279 bcopy(tlbent, ent, sizeof (tlbent)); 1280 1281 ent[TSTAT_TLBENT_STATHI] |= HI22(stat); 1282 ent[TSTAT_TLBENT_STATLO_LD] |= LO10(stat); 1283 ent[TSTAT_TLBENT_STATLO_ST] |= LO10(stat); 1284 #ifndef sun4v 1285 ent[TSTAT_TLBENT_MMUASI] |= asi; 1286 #else 1287 ent[TSTAT_TLBENT_TAGTARGET] |= tagtarget_off; 1288 #endif 1289 ent[TSTAT_TLBENT_TPCHI] |= HI22(tpc); 1290 ent[TSTAT_TLBENT_TPCLO_USER] |= LO10(utpc); 1291 ent[TSTAT_TLBENT_TPCLO_KERN] |= LO10(tpc); 1292 ent[TSTAT_TLBENT_TSHI] |= HI22(tmptick); 1293 ent[TSTAT_TLBENT_TSLO] |= LO10(tmptick); 1294 ent[TSTAT_TLBENT_BA] |= DISP22(va + baoffs, orig); 1295 1296 /* 1297 * And now set up the TLB return entries. 1298 */ 1299 trapstat_tlbretent(tcpu, &ret->ttlbr_ktlb, &kdata->ttlb_tlb); 1300 trapstat_tlbretent(tcpu, &ret->ttlbr_ktsb, &kdata->ttlb_tsb); 1301 trapstat_tlbretent(tcpu, &ret->ttlbr_utlb, &udata->ttlb_tlb); 1302 trapstat_tlbretent(tcpu, &ret->ttlbr_utsb, &udata->ttlb_tsb); 1303 } 1304 1305 #undef TSTAT_TLBENT_STATHI 1306 #undef TSTAT_TLBENT_STATLO_LD 1307 #undef TSTAT_TLBENT_STATLO_ST 1308 #ifndef sun4v 1309 #undef TSTAT_TLBENT_MMUASI 1310 #else 1311 #undef TSTAT_TLBENT_TAGTARGET 1312 #endif 1313 #undef TSTAT_TLBENT_TPCHI 1314 #undef TSTAT_TLBENT_TPCLO_USER 1315 #undef TSTAT_TLBENT_TPCLO_KERN 1316 #undef TSTAT_TLBENT_TSHI 1317 #undef TSTAT_TLBENT_TSLO 1318 #undef TSTAT_TLBENT_BA 1319 1320 /* 1321 * The TSTAT_ENABLED_* constants define offsets in the enabled entry; the 1322 * TSTAT_DISABLED_BA constant defines an offset in the disabled entry. Both 1323 * sets of constants are used only in trapstat_make_traptab() (below) and 1324 * #undef'd immediately afterwards. Any change to "enabled" or "disabled" 1325 * in trapstat_make_traptab() will likely require changes to these constants. 1326 */ 1327 #define TSTAT_ENABLED_STATHI 0 1328 #define TSTAT_ENABLED_STATLO_LD 1 1329 #define TSTAT_ENABLED_STATLO_ST 3 1330 #define TSTAT_ENABLED_BA 4 1331 #define TSTAT_DISABLED_BA 0 1332 1333 static void 1334 trapstat_make_traptab(tstat_percpu_t *tcpu) 1335 { 1336 uint32_t *ent; 1337 uint64_t *stat; 1338 uintptr_t orig, va, en_baoffs, dis_baoffs; 1339 int nent; 1340 1341 /* 1342 * This is the entry in the interposing trap table for enabled trap 1343 * table entries. It loads a counter, increments it and stores it 1344 * back before branching to the actual trap table entry. 1345 */ 1346 static const uint32_t enabled[TSTAT_ENT_NINSTR] = { 1347 0x03000000, /* sethi %hi(stat), %g1 */ 1348 0xc4586000, /* ldx [%g1 + %lo(stat)], %g2 */ 1349 0x8400a001, /* add %g2, 1, %g2 */ 1350 0xc4706000, /* stx %g2, [%g1 + %lo(stat)] */ 1351 0x30800000, /* ba,a addr */ 1352 NOP, NOP, NOP 1353 }; 1354 1355 /* 1356 * This is the entry in the interposing trap table for disabled trap 1357 * table entries. It simply branches to the actual, underlying trap 1358 * table entry. As explained in the "Implementation Details" section 1359 * of the block comment, all TL>0 traps _must_ use the disabled entry; 1360 * additional entries may be explicitly disabled through the use 1361 * of TSTATIOC_ENTRY/TSTATIOC_NOENTRY. 1362 */ 1363 static const uint32_t disabled[TSTAT_ENT_NINSTR] = { 1364 0x30800000, /* ba,a addr */ 1365 NOP, NOP, NOP, NOP, NOP, NOP, NOP, 1366 }; 1367 1368 ASSERT(MUTEX_HELD(&tstat_lock)); 1369 1370 ent = tcpu->tcpu_instr->tinst_traptab; 1371 stat = (uint64_t *)TSTAT_DATA_OFFS(tcpu, tdata_traps); 1372 orig = KERNELBASE; 1373 va = (uintptr_t)tcpu->tcpu_ibase; 1374 en_baoffs = TSTAT_ENABLED_BA * sizeof (uint32_t); 1375 dis_baoffs = TSTAT_DISABLED_BA * sizeof (uint32_t); 1376 1377 for (nent = 0; nent < TSTAT_TOTAL_NENT; nent++) { 1378 if (tstat_enabled[nent]) { 1379 bcopy(enabled, ent, sizeof (enabled)); 1380 ent[TSTAT_ENABLED_STATHI] |= HI22((uintptr_t)stat); 1381 ent[TSTAT_ENABLED_STATLO_LD] |= LO10((uintptr_t)stat); 1382 ent[TSTAT_ENABLED_STATLO_ST] |= LO10((uintptr_t)stat); 1383 ent[TSTAT_ENABLED_BA] |= DISP22(va + en_baoffs, orig); 1384 } else { 1385 bcopy(disabled, ent, sizeof (disabled)); 1386 ent[TSTAT_DISABLED_BA] |= DISP22(va + dis_baoffs, orig); 1387 } 1388 1389 stat++; 1390 orig += sizeof (enabled); 1391 ent += sizeof (enabled) / sizeof (*ent); 1392 va += sizeof (enabled); 1393 } 1394 } 1395 1396 #undef TSTAT_ENABLED_STATHI 1397 #undef TSTAT_ENABLED_STATLO_LD 1398 #undef TSTAT_ENABLED_STATLO_ST 1399 #undef TSTAT_ENABLED_BA 1400 #undef TSTAT_DISABLED_BA 1401 1402 static void 1403 trapstat_setup(processorid_t cpu) 1404 { 1405 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1406 #ifndef sun4v 1407 int i; 1408 caddr_t va; 1409 pfn_t *pfn; 1410 #endif 1411 1412 ASSERT(tcpu->tcpu_pfn == NULL); 1413 ASSERT(tcpu->tcpu_instr == NULL); 1414 ASSERT(tcpu->tcpu_data == NULL); 1415 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1416 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED)); 1417 ASSERT(MUTEX_HELD(&cpu_lock)); 1418 ASSERT(MUTEX_HELD(&tstat_lock)); 1419 1420 /* 1421 * The lower fifteen bits of the %tba are always read as zero; we must 1422 * align our instruction base address appropriately. 1423 */ 1424 #ifndef sun4v 1425 tcpu->tcpu_ibase = (caddr_t)((KERNELBASE - tstat_total_size) 1426 & TSTAT_TBA_MASK); 1427 tcpu->tcpu_dbase = tcpu->tcpu_ibase + TSTAT_INSTR_SIZE; 1428 tcpu->tcpu_vabase = tcpu->tcpu_ibase; 1429 1430 tcpu->tcpu_pfn = vmem_alloc(tstat_arena, tstat_total_pages, VM_SLEEP); 1431 bzero(tcpu->tcpu_pfn, tstat_total_pages); 1432 pfn = tcpu->tcpu_pfn; 1433 1434 tcpu->tcpu_instr = vmem_alloc(tstat_arena, TSTAT_INSTR_SIZE, VM_SLEEP); 1435 1436 va = (caddr_t)tcpu->tcpu_instr; 1437 for (i = 0; i < TSTAT_INSTR_PAGES; i++, va += MMU_PAGESIZE) 1438 *pfn++ = hat_getpfnum(kas.a_hat, va); 1439 1440 /* 1441 * We must be sure that the pages that we will use to examine the data 1442 * have the same virtual color as the pages to which the data is being 1443 * recorded, hence the alignment and phase constraints on the 1444 * allocation. 1445 */ 1446 tcpu->tcpu_data = vmem_xalloc(tstat_arena, tstat_data_size, 1447 shm_alignment, (uintptr_t)tcpu->tcpu_dbase & (shm_alignment - 1), 1448 0, 0, NULL, VM_SLEEP); 1449 bzero(tcpu->tcpu_data, tstat_data_size); 1450 tcpu->tcpu_data->tdata_cpuid = cpu; 1451 1452 va = (caddr_t)tcpu->tcpu_data; 1453 for (i = 0; i < tstat_data_pages; i++, va += MMU_PAGESIZE) 1454 *pfn++ = hat_getpfnum(kas.a_hat, va); 1455 #else /* sun4v */ 1456 ASSERT(!(tstat_total_size > (1 + ~TSTAT_TBA_MASK))); 1457 tcpu->tcpu_vabase = (caddr_t)(KERNELBASE - MMU_PAGESIZE4M); 1458 tcpu->tcpu_ibase = tcpu->tcpu_vabase + (cpu * (1 + ~TSTAT_TBA_MASK)); 1459 tcpu->tcpu_dbase = tcpu->tcpu_ibase + TSTAT_INSTR_SIZE; 1460 1461 tcpu->tcpu_pfn = &tstat_pfn; 1462 tcpu->tcpu_instr = (tstat_instr_t *)(tstat_va + (cpu * 1463 (1 + ~TSTAT_TBA_MASK))); 1464 tcpu->tcpu_data = (tstat_data_t *)(tstat_va + (cpu * 1465 (1 + ~TSTAT_TBA_MASK)) + TSTAT_INSTR_SIZE); 1466 bzero(tcpu->tcpu_data, tstat_data_size); 1467 tcpu->tcpu_data->tdata_cpuid = cpu; 1468 #endif /* sun4v */ 1469 1470 /* 1471 * Now that we have all of the instruction and data pages allocated, 1472 * make the trap table from scratch. 1473 */ 1474 trapstat_make_traptab(tcpu); 1475 1476 if (tstat_options & TSTAT_OPT_TLBDATA) { 1477 /* 1478 * TLB Statistics have been specified; set up the I- and D-TLB 1479 * entries and corresponding TLB return entries. 1480 */ 1481 #ifndef sun4v 1482 trapstat_tlbent(tcpu, TSTAT_ENT_ITLBMISS); 1483 trapstat_tlbent(tcpu, TSTAT_ENT_DTLBMISS); 1484 #else 1485 if (tstat_fast_tlbstat) { 1486 trapstat_tlbent(tcpu, TSTAT_ENT_IMMUMISS); 1487 trapstat_tlbent(tcpu, TSTAT_ENT_DMMUMISS); 1488 } else { 1489 trapstat_tlbent(tcpu, TSTAT_ENT_ITLBMISS); 1490 trapstat_tlbent(tcpu, TSTAT_ENT_DTLBMISS); 1491 } 1492 #endif 1493 } 1494 1495 tcpu->tcpu_flags |= TSTAT_CPU_ALLOCATED; 1496 1497 /* 1498 * Finally, get the target CPU to load the locked pages into its TLBs. 1499 */ 1500 xc_one(cpu, (xcfunc_t *)trapstat_load_tlb, 0, 0); 1501 } 1502 1503 static void 1504 trapstat_teardown(processorid_t cpu) 1505 { 1506 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1507 #ifndef sun4v 1508 int i; 1509 #endif 1510 caddr_t va = tcpu->tcpu_vabase; 1511 1512 ASSERT(tcpu->tcpu_pfn != NULL); 1513 ASSERT(tcpu->tcpu_instr != NULL); 1514 ASSERT(tcpu->tcpu_data != NULL); 1515 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1516 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 1517 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1518 ASSERT(MUTEX_HELD(&cpu_lock)); 1519 ASSERT(MUTEX_HELD(&tstat_lock)); 1520 1521 #ifndef sun4v 1522 vmem_free(tstat_arena, tcpu->tcpu_pfn, tstat_total_pages); 1523 vmem_free(tstat_arena, tcpu->tcpu_instr, TSTAT_INSTR_SIZE); 1524 vmem_free(tstat_arena, tcpu->tcpu_data, tstat_data_size); 1525 1526 for (i = 0; i < tstat_total_pages; i++, va += MMU_PAGESIZE) { 1527 xt_one(cpu, vtag_flushpage_tl1, (uint64_t)va, KCONTEXT); 1528 } 1529 #else 1530 xt_one(cpu, vtag_unmap_perm_tl1, (uint64_t)va, KCONTEXT); 1531 #endif 1532 1533 tcpu->tcpu_pfn = NULL; 1534 tcpu->tcpu_instr = NULL; 1535 tcpu->tcpu_data = NULL; 1536 tcpu->tcpu_flags &= ~TSTAT_CPU_ALLOCATED; 1537 } 1538 1539 static int 1540 trapstat_go() 1541 { 1542 cpu_t *cp; 1543 1544 mutex_enter(&cpu_lock); 1545 mutex_enter(&tstat_lock); 1546 1547 if (tstat_running) { 1548 mutex_exit(&tstat_lock); 1549 mutex_exit(&cpu_lock); 1550 return (EBUSY); 1551 } 1552 1553 #ifdef sun4v 1554 /* 1555 * Allocate large page to hold interposing tables. 1556 */ 1557 tstat_va = contig_mem_alloc(MMU_PAGESIZE4M); 1558 tstat_pfn = va_to_pfn(tstat_va); 1559 if (tstat_pfn == PFN_INVALID) 1560 return (EAGAIN); 1561 1562 /* 1563 * For detailed TLB statistics, invoke CPU specific interface 1564 * to see if it supports a low overhead interface to collect 1565 * TSB hit statistics. If so, make set tstat_fast_tlbstat flag 1566 * to reflect that. 1567 */ 1568 if (tstat_options & TSTAT_OPT_TLBDATA) { 1569 int error; 1570 1571 error = cpu_trapstat_conf(CPU_TSTATCONF_INIT); 1572 if (error == 0) 1573 tstat_fast_tlbstat = B_TRUE; 1574 else if (error != ENOTSUP) { 1575 contig_mem_free(tstat_va, MMU_PAGESIZE4M); 1576 return (error); 1577 } 1578 } 1579 #endif 1580 1581 /* 1582 * First, perform any necessary hot patching. 1583 */ 1584 trapstat_hotpatch(); 1585 1586 /* 1587 * Allocate the resources we'll need to measure probe effect. 1588 */ 1589 trapstat_probe_alloc(); 1590 1591 1592 cp = cpu_list; 1593 do { 1594 if (!(tstat_percpu[cp->cpu_id].tcpu_flags & TSTAT_CPU_SELECTED)) 1595 continue; 1596 1597 trapstat_setup(cp->cpu_id); 1598 1599 /* 1600 * Note that due to trapstat_probe()'s use of global data, 1601 * we determine the probe effect on each CPU serially instead 1602 * of in parallel with an xc_all(). 1603 */ 1604 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_probe, 0, 0); 1605 } while ((cp = cp->cpu_next) != cpu_list); 1606 1607 xc_all((xcfunc_t *)trapstat_enable, 0, 0); 1608 1609 trapstat_probe_free(); 1610 tstat_running = 1; 1611 mutex_exit(&tstat_lock); 1612 mutex_exit(&cpu_lock); 1613 1614 return (0); 1615 } 1616 1617 static int 1618 trapstat_stop() 1619 { 1620 int i; 1621 1622 mutex_enter(&cpu_lock); 1623 mutex_enter(&tstat_lock); 1624 if (!tstat_running) { 1625 mutex_exit(&tstat_lock); 1626 mutex_exit(&cpu_lock); 1627 return (ENXIO); 1628 } 1629 1630 xc_all((xcfunc_t *)trapstat_disable, 0, 0); 1631 1632 for (i = 0; i <= max_cpuid; i++) { 1633 if (tstat_percpu[i].tcpu_flags & TSTAT_CPU_ALLOCATED) 1634 trapstat_teardown(i); 1635 } 1636 1637 #ifdef sun4v 1638 if (tstat_options & TSTAT_OPT_TLBDATA) 1639 cpu_trapstat_conf(CPU_TSTATCONF_FINI); 1640 contig_mem_free(tstat_va, MMU_PAGESIZE4M); 1641 #endif 1642 trapstat_hotpatch(); 1643 tstat_running = 0; 1644 mutex_exit(&tstat_lock); 1645 mutex_exit(&cpu_lock); 1646 1647 return (0); 1648 } 1649 1650 /* 1651 * This is trapstat's DR CPU configuration callback. It's called (with 1652 * cpu_lock held) to unconfigure a newly powered-off CPU, or to configure a 1653 * powered-off CPU that is to be brought into the system. We need only take 1654 * action in the unconfigure case: because a powered-off CPU will have its 1655 * trap table restored to KERNELBASE if it is ever powered back on, we must 1656 * update the flags to reflect that trapstat is no longer enabled on the 1657 * powered-off CPU. Note that this means that a TSTAT_CPU_ENABLED CPU that 1658 * is unconfigured/powered off and later powered back on/reconfigured will 1659 * _not_ be re-TSTAT_CPU_ENABLED. 1660 */ 1661 static int 1662 trapstat_cpu_setup(cpu_setup_t what, processorid_t cpu) 1663 { 1664 tstat_percpu_t *tcpu = &tstat_percpu[cpu]; 1665 1666 ASSERT(MUTEX_HELD(&cpu_lock)); 1667 mutex_enter(&tstat_lock); 1668 1669 if (!tstat_running) { 1670 mutex_exit(&tstat_lock); 1671 return (0); 1672 } 1673 1674 switch (what) { 1675 case CPU_CONFIG: 1676 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1677 break; 1678 1679 case CPU_UNCONFIG: 1680 if (tcpu->tcpu_flags & TSTAT_CPU_ENABLED) 1681 tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED; 1682 break; 1683 1684 default: 1685 break; 1686 } 1687 1688 mutex_exit(&tstat_lock); 1689 return (0); 1690 } 1691 1692 /* 1693 * This is called before a CPR suspend and after a CPR resume. We don't have 1694 * anything to do before a suspend, but after a restart we must restore the 1695 * trap table to be our interposing trap table. However, we don't actually 1696 * know whether or not the CPUs have been powered off -- this routine may be 1697 * called while restoring from a failed CPR suspend. We thus run through each 1698 * TSTAT_CPU_ENABLED CPU, and explicitly destroy and reestablish its 1699 * interposing trap table. This assures that our state is correct regardless 1700 * of whether or not the CPU has been newly powered on. 1701 */ 1702 /*ARGSUSED*/ 1703 static boolean_t 1704 trapstat_cpr(void *arg, int code) 1705 { 1706 cpu_t *cp; 1707 1708 if (code == CB_CODE_CPR_CHKPT) 1709 return (B_TRUE); 1710 1711 ASSERT(code == CB_CODE_CPR_RESUME); 1712 1713 mutex_enter(&cpu_lock); 1714 mutex_enter(&tstat_lock); 1715 1716 if (!tstat_running) { 1717 mutex_exit(&tstat_lock); 1718 mutex_exit(&cpu_lock); 1719 return (B_TRUE); 1720 } 1721 1722 cp = cpu_list; 1723 do { 1724 tstat_percpu_t *tcpu = &tstat_percpu[cp->cpu_id]; 1725 1726 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 1727 continue; 1728 1729 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1730 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 1731 1732 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_disable, 0, 0); 1733 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1734 1735 /* 1736 * Preserve this CPU's data in tstat_buffer and rip down its 1737 * interposing trap table. 1738 */ 1739 bcopy(tcpu->tcpu_data, tstat_buffer, tstat_data_t_size); 1740 trapstat_teardown(cp->cpu_id); 1741 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED)); 1742 1743 /* 1744 * Reestablish the interposing trap table and restore the old 1745 * data. 1746 */ 1747 trapstat_setup(cp->cpu_id); 1748 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 1749 bcopy(tstat_buffer, tcpu->tcpu_data, tstat_data_t_size); 1750 1751 xc_one(cp->cpu_id, (xcfunc_t *)trapstat_enable, 0, 0); 1752 } while ((cp = cp->cpu_next) != cpu_list); 1753 1754 mutex_exit(&tstat_lock); 1755 mutex_exit(&cpu_lock); 1756 1757 return (B_TRUE); 1758 } 1759 1760 /*ARGSUSED*/ 1761 static int 1762 trapstat_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 1763 { 1764 int i; 1765 1766 mutex_enter(&cpu_lock); 1767 mutex_enter(&tstat_lock); 1768 if (tstat_open != 0) { 1769 mutex_exit(&tstat_lock); 1770 mutex_exit(&cpu_lock); 1771 return (EBUSY); 1772 } 1773 1774 /* 1775 * Register this in open() rather than in attach() to prevent deadlock 1776 * with DR code. During attach, I/O device tree locks are grabbed 1777 * before trapstat_attach() is invoked - registering in attach 1778 * will result in the lock order: device tree lock, cpu_lock. 1779 * DR code however requires that cpu_lock be acquired before 1780 * device tree locks. 1781 */ 1782 ASSERT(!tstat_running); 1783 register_cpu_setup_func((cpu_setup_func_t *)trapstat_cpu_setup, NULL); 1784 1785 /* 1786 * Clear all options. And until specific CPUs are specified, we'll 1787 * mark all CPUs as selected. 1788 */ 1789 tstat_options = 0; 1790 1791 for (i = 0; i <= max_cpuid; i++) 1792 tstat_percpu[i].tcpu_flags |= TSTAT_CPU_SELECTED; 1793 1794 /* 1795 * By default, all traps at TL=0 are enabled. Traps at TL>0 must 1796 * be disabled. 1797 */ 1798 for (i = 0; i < TSTAT_TOTAL_NENT; i++) 1799 tstat_enabled[i] = i < TSTAT_NENT ? 1 : 0; 1800 1801 tstat_open = 1; 1802 mutex_exit(&tstat_lock); 1803 mutex_exit(&cpu_lock); 1804 1805 return (0); 1806 } 1807 1808 /*ARGSUSED*/ 1809 static int 1810 trapstat_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 1811 { 1812 (void) trapstat_stop(); 1813 1814 ASSERT(!tstat_running); 1815 1816 mutex_enter(&cpu_lock); 1817 unregister_cpu_setup_func((cpu_setup_func_t *)trapstat_cpu_setup, NULL); 1818 mutex_exit(&cpu_lock); 1819 1820 tstat_open = 0; 1821 return (DDI_SUCCESS); 1822 } 1823 1824 static int 1825 trapstat_option(int option) 1826 { 1827 mutex_enter(&tstat_lock); 1828 1829 if (tstat_running) { 1830 mutex_exit(&tstat_lock); 1831 return (EBUSY); 1832 } 1833 1834 tstat_options |= option; 1835 mutex_exit(&tstat_lock); 1836 1837 return (0); 1838 } 1839 1840 /*ARGSUSED*/ 1841 static int 1842 trapstat_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *crd, int *rval) 1843 { 1844 int i, j, out; 1845 size_t dsize; 1846 1847 switch (cmd) { 1848 case TSTATIOC_GO: 1849 return (trapstat_go()); 1850 1851 case TSTATIOC_NOGO: 1852 return (trapstat_option(TSTAT_OPT_NOGO)); 1853 1854 case TSTATIOC_STOP: 1855 return (trapstat_stop()); 1856 1857 case TSTATIOC_CPU: 1858 if (arg < 0 || arg > max_cpuid) 1859 return (EINVAL); 1860 /*FALLTHROUGH*/ 1861 1862 case TSTATIOC_NOCPU: 1863 mutex_enter(&tstat_lock); 1864 1865 if (tstat_running) { 1866 mutex_exit(&tstat_lock); 1867 return (EBUSY); 1868 } 1869 1870 /* 1871 * If this is the first CPU to be specified (or if we are 1872 * being asked to explicitly de-select CPUs), disable all CPUs. 1873 */ 1874 if (!(tstat_options & TSTAT_OPT_CPU) || cmd == TSTATIOC_NOCPU) { 1875 tstat_options |= TSTAT_OPT_CPU; 1876 1877 for (i = 0; i <= max_cpuid; i++) { 1878 tstat_percpu_t *tcpu = &tstat_percpu[i]; 1879 1880 ASSERT(cmd == TSTATIOC_NOCPU || 1881 (tcpu->tcpu_flags & TSTAT_CPU_SELECTED)); 1882 tcpu->tcpu_flags &= ~TSTAT_CPU_SELECTED; 1883 } 1884 } 1885 1886 if (cmd == TSTATIOC_CPU) 1887 tstat_percpu[arg].tcpu_flags |= TSTAT_CPU_SELECTED; 1888 1889 mutex_exit(&tstat_lock); 1890 1891 return (0); 1892 1893 case TSTATIOC_ENTRY: 1894 mutex_enter(&tstat_lock); 1895 1896 if (tstat_running) { 1897 mutex_exit(&tstat_lock); 1898 return (EBUSY); 1899 } 1900 1901 if (arg >= TSTAT_NENT || arg < 0) { 1902 mutex_exit(&tstat_lock); 1903 return (EINVAL); 1904 } 1905 1906 if (!(tstat_options & TSTAT_OPT_ENTRY)) { 1907 /* 1908 * If this is the first entry that we are explicitly 1909 * enabling, explicitly disable every TL=0 entry. 1910 */ 1911 for (i = 0; i < TSTAT_NENT; i++) 1912 tstat_enabled[i] = 0; 1913 1914 tstat_options |= TSTAT_OPT_ENTRY; 1915 } 1916 1917 tstat_enabled[arg] = 1; 1918 mutex_exit(&tstat_lock); 1919 return (0); 1920 1921 case TSTATIOC_NOENTRY: 1922 mutex_enter(&tstat_lock); 1923 1924 if (tstat_running) { 1925 mutex_exit(&tstat_lock); 1926 return (EBUSY); 1927 } 1928 1929 for (i = 0; i < TSTAT_NENT; i++) 1930 tstat_enabled[i] = 0; 1931 1932 mutex_exit(&tstat_lock); 1933 return (0); 1934 1935 case TSTATIOC_READ: 1936 mutex_enter(&tstat_lock); 1937 1938 if (tstat_options & TSTAT_OPT_TLBDATA) { 1939 dsize = tstat_data_t_exported_size; 1940 } else { 1941 dsize = sizeof (tstat_data_t); 1942 } 1943 1944 for (i = 0, out = 0; i <= max_cpuid; i++) { 1945 tstat_percpu_t *tcpu = &tstat_percpu[i]; 1946 1947 if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)) 1948 continue; 1949 1950 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED); 1951 ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED); 1952 1953 tstat_buffer->tdata_cpuid = -1; 1954 xc_one(i, (xcfunc_t *)trapstat_snapshot, 0, 0); 1955 1956 if (tstat_buffer->tdata_cpuid == -1) { 1957 /* 1958 * This CPU is not currently responding to 1959 * cross calls; we have caught it while it is 1960 * being unconfigured. We'll drop tstat_lock 1961 * and pick up and drop cpu_lock. By the 1962 * time we acquire cpu_lock, the DR operation 1963 * will appear consistent and we can assert 1964 * that trapstat_cpu_setup() has cleared 1965 * TSTAT_CPU_ENABLED. 1966 */ 1967 mutex_exit(&tstat_lock); 1968 mutex_enter(&cpu_lock); 1969 mutex_exit(&cpu_lock); 1970 mutex_enter(&tstat_lock); 1971 ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED)); 1972 continue; 1973 } 1974 1975 /* 1976 * Need to compensate for the difference between page 1977 * sizes exported to users and page sizes available 1978 * within the kernel. 1979 */ 1980 if ((tstat_options & TSTAT_OPT_TLBDATA) && 1981 (tstat_pgszs != tstat_user_pgszs)) { 1982 tstat_pgszdata_t *tp; 1983 uint_t szc; 1984 1985 tp = &tstat_buffer->tdata_pgsz[0]; 1986 for (j = 0; j < tstat_user_pgszs; j++) { 1987 if ((szc = USERSZC_2_SZC(j)) != j) { 1988 bcopy(&tp[szc], &tp[j], 1989 sizeof (tstat_pgszdata_t)); 1990 } 1991 } 1992 } 1993 1994 if (copyout(tstat_buffer, (void *)arg, dsize) != 0) { 1995 mutex_exit(&tstat_lock); 1996 return (EFAULT); 1997 } 1998 1999 out++; 2000 arg += dsize; 2001 } 2002 2003 if (out != max_cpuid + 1) { 2004 processorid_t cpuid = -1; 2005 arg += offsetof(tstat_data_t, tdata_cpuid); 2006 2007 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) { 2008 mutex_exit(&tstat_lock); 2009 return (EFAULT); 2010 } 2011 } 2012 2013 mutex_exit(&tstat_lock); 2014 2015 return (0); 2016 2017 case TSTATIOC_TLBDATA: 2018 return (trapstat_option(TSTAT_OPT_TLBDATA)); 2019 2020 default: 2021 break; 2022 } 2023 2024 return (ENOTTY); 2025 } 2026 2027 /*ARGSUSED*/ 2028 static int 2029 trapstat_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 2030 { 2031 int error; 2032 2033 switch (infocmd) { 2034 case DDI_INFO_DEVT2DEVINFO: 2035 *result = (void *)tstat_devi; 2036 error = DDI_SUCCESS; 2037 break; 2038 case DDI_INFO_DEVT2INSTANCE: 2039 *result = (void *)0; 2040 error = DDI_SUCCESS; 2041 break; 2042 default: 2043 error = DDI_FAILURE; 2044 } 2045 return (error); 2046 } 2047 2048 static int 2049 trapstat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 2050 { 2051 switch (cmd) { 2052 case DDI_ATTACH: 2053 break; 2054 2055 case DDI_RESUME: 2056 return (DDI_SUCCESS); 2057 2058 default: 2059 return (DDI_FAILURE); 2060 } 2061 2062 if (ddi_create_minor_node(devi, "trapstat", S_IFCHR, 2063 0, DDI_PSEUDO, 0) == DDI_FAILURE) { 2064 ddi_remove_minor_node(devi, NULL); 2065 return (DDI_FAILURE); 2066 } 2067 2068 ddi_report_dev(devi); 2069 tstat_devi = devi; 2070 2071 tstat_pgszs = page_num_pagesizes(); 2072 tstat_user_pgszs = page_num_user_pagesizes(); 2073 tstat_data_t_size = sizeof (tstat_data_t) + 2074 (tstat_pgszs - 1) * sizeof (tstat_pgszdata_t); 2075 tstat_data_t_exported_size = sizeof (tstat_data_t) + 2076 (tstat_user_pgszs - 1) * sizeof (tstat_pgszdata_t); 2077 #ifndef sun4v 2078 tstat_data_pages = (tstat_data_t_size >> MMU_PAGESHIFT) + 1; 2079 tstat_total_pages = TSTAT_INSTR_PAGES + tstat_data_pages; 2080 tstat_data_size = tstat_data_pages * MMU_PAGESIZE; 2081 tstat_total_size = TSTAT_INSTR_SIZE + tstat_data_size; 2082 #else 2083 tstat_data_pages = 0; 2084 tstat_data_size = tstat_data_t_size; 2085 tstat_total_pages = ((TSTAT_INSTR_SIZE + tstat_data_size) >> 2086 MMU_PAGESHIFT) + 1; 2087 tstat_total_size = tstat_total_pages * MMU_PAGESIZE; 2088 #endif 2089 2090 tstat_percpu = kmem_zalloc((max_cpuid + 1) * 2091 sizeof (tstat_percpu_t), KM_SLEEP); 2092 2093 /* 2094 * Create our own arena backed by segkmem to assure a source of 2095 * MMU_PAGESIZE-aligned allocations. We allocate out of the 2096 * heap32_arena to assure that we can address the allocated memory with 2097 * a single sethi/simm13 pair in the interposing trap table entries. 2098 */ 2099 tstat_arena = vmem_create("trapstat", NULL, 0, MMU_PAGESIZE, 2100 segkmem_alloc_permanent, segkmem_free, heap32_arena, 0, VM_SLEEP); 2101 2102 tstat_enabled = kmem_alloc(TSTAT_TOTAL_NENT * sizeof (int), KM_SLEEP); 2103 tstat_buffer = kmem_alloc(tstat_data_t_size, KM_SLEEP); 2104 2105 /* 2106 * CB_CL_CPR_POST_USER is the class that executes from cpr_resume() 2107 * after user threads can be restarted. By executing in this class, 2108 * we are assured of the availability of system services needed to 2109 * resume trapstat (specifically, we are assured that all CPUs are 2110 * restarted and responding to cross calls). 2111 */ 2112 tstat_cprcb = 2113 callb_add(trapstat_cpr, NULL, CB_CL_CPR_POST_USER, "trapstat"); 2114 2115 return (DDI_SUCCESS); 2116 } 2117 2118 static int 2119 trapstat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 2120 { 2121 int rval; 2122 2123 ASSERT(devi == tstat_devi); 2124 2125 switch (cmd) { 2126 case DDI_DETACH: 2127 break; 2128 2129 case DDI_SUSPEND: 2130 return (DDI_SUCCESS); 2131 2132 default: 2133 return (DDI_FAILURE); 2134 } 2135 2136 ASSERT(!tstat_running); 2137 2138 rval = callb_delete(tstat_cprcb); 2139 ASSERT(rval == 0); 2140 2141 kmem_free(tstat_buffer, tstat_data_t_size); 2142 kmem_free(tstat_enabled, TSTAT_TOTAL_NENT * sizeof (int)); 2143 vmem_destroy(tstat_arena); 2144 kmem_free(tstat_percpu, (max_cpuid + 1) * sizeof (tstat_percpu_t)); 2145 ddi_remove_minor_node(devi, NULL); 2146 2147 return (DDI_SUCCESS); 2148 } 2149 2150 /* 2151 * Configuration data structures 2152 */ 2153 static struct cb_ops trapstat_cb_ops = { 2154 trapstat_open, /* open */ 2155 trapstat_close, /* close */ 2156 nulldev, /* strategy */ 2157 nulldev, /* print */ 2158 nodev, /* dump */ 2159 nodev, /* read */ 2160 nodev, /* write */ 2161 trapstat_ioctl, /* ioctl */ 2162 nodev, /* devmap */ 2163 nodev, /* mmap */ 2164 nodev, /* segmap */ 2165 nochpoll, /* poll */ 2166 ddi_prop_op, /* cb_prop_op */ 2167 0, /* streamtab */ 2168 D_MP | D_NEW /* Driver compatibility flag */ 2169 }; 2170 2171 static struct dev_ops trapstat_ops = { 2172 DEVO_REV, /* devo_rev, */ 2173 0, /* refcnt */ 2174 trapstat_info, /* getinfo */ 2175 nulldev, /* identify */ 2176 nulldev, /* probe */ 2177 trapstat_attach, /* attach */ 2178 trapstat_detach, /* detach */ 2179 nulldev, /* reset */ 2180 &trapstat_cb_ops, /* cb_ops */ 2181 (struct bus_ops *)0, /* bus_ops */ 2182 }; 2183 2184 static struct modldrv modldrv = { 2185 &mod_driverops, /* Type of module. This one is a driver */ 2186 "Trap Statistics", /* name of module */ 2187 &trapstat_ops, /* driver ops */ 2188 }; 2189 2190 static struct modlinkage modlinkage = { 2191 MODREV_1, (void *)&modldrv, NULL 2192 }; 2193 2194 int 2195 _init(void) 2196 { 2197 return (mod_install(&modlinkage)); 2198 } 2199 2200 int 2201 _fini(void) 2202 { 2203 return (mod_remove(&modlinkage)); 2204 } 2205 2206 int 2207 _info(struct modinfo *modinfop) 2208 { 2209 return (mod_info(&modlinkage, modinfop)); 2210 } 2211