1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 */
25
26 /*
27 * x86-specific routines used by the CPU Performance counter driver.
28 */
29
30 #include <sys/types.h>
31 #include <sys/time.h>
32 #include <sys/atomic.h>
33 #include <sys/regset.h>
34 #include <sys/privregs.h>
35 #include <sys/x86_archext.h>
36 #include <sys/cpuvar.h>
37 #include <sys/machcpuvar.h>
38 #include <sys/archsystm.h>
39 #include <sys/cpc_pcbe.h>
40 #include <sys/cpc_impl.h>
41 #include <sys/x_call.h>
42 #include <sys/cmn_err.h>
43 #include <sys/cmt.h>
44 #include <sys/spl.h>
45 #include <sys/apic.h>
46
47 static const uint64_t allstopped = 0;
48 static kcpc_ctx_t *(*overflow_intr_handler)(caddr_t);
49
50 /* Do threads share performance monitoring hardware? */
51 static int strands_perfmon_shared = 0;
52
53 int kcpc_hw_overflow_intr_installed; /* set by APIC code */
54 extern kcpc_ctx_t *kcpc_overflow_intr(caddr_t arg, uint64_t bitmap);
55
56 extern int kcpc_counts_include_idle; /* Project Private /etc/system variable */
57
58 void (*kcpc_hw_enable_cpc_intr)(void); /* set by APIC code */
59
60 int
kcpc_hw_add_ovf_intr(kcpc_ctx_t * (* handler)(caddr_t))61 kcpc_hw_add_ovf_intr(kcpc_ctx_t *(*handler)(caddr_t))
62 {
63 if (x86_type != X86_TYPE_P6)
64 return (0);
65 overflow_intr_handler = handler;
66 return (ipltospl(APIC_PCINT_IPL));
67 }
68
69 void
kcpc_hw_rem_ovf_intr(void)70 kcpc_hw_rem_ovf_intr(void)
71 {
72 overflow_intr_handler = NULL;
73 }
74
75 /*
76 * Hook used on P4 systems to catch online/offline events.
77 */
78 /*ARGSUSED*/
79 static int
kcpc_cpu_setup(cpu_setup_t what,int cpuid,void * arg)80 kcpc_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
81 {
82 pg_cmt_t *chip_pg;
83 int active_cpus_cnt;
84
85 if (what != CPU_ON)
86 return (0);
87
88 /*
89 * If any CPU-bound contexts exist, we don't need to invalidate
90 * anything, as no per-LWP contexts can coexist.
91 */
92 if (kcpc_cpuctx || dtrace_cpc_in_use)
93 return (0);
94
95 /*
96 * If this chip now has more than 1 active cpu, we must invalidate all
97 * contexts in the system.
98 */
99 chip_pg = (pg_cmt_t *)pghw_find_pg(cpu[cpuid], PGHW_CHIP);
100 if (chip_pg != NULL) {
101 active_cpus_cnt = GROUP_SIZE(&chip_pg->cmt_cpus_actv);
102 if (active_cpus_cnt > 1)
103 kcpc_invalidate_all();
104 }
105
106 return (0);
107 }
108
109 static kmutex_t cpu_setup_lock; /* protects setup_registered */
110 static int setup_registered;
111
112
113 void
kcpc_hw_init(cpu_t * cp)114 kcpc_hw_init(cpu_t *cp)
115 {
116 kthread_t *t = cp->cpu_idle_thread;
117 uint32_t versionid;
118 struct cpuid_regs cpuid;
119
120 strands_perfmon_shared = 0;
121 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
122 if (cpuid_getvendor(cpu[0]) == X86_VENDOR_Intel) {
123 /*
124 * Intel processors that support Architectural
125 * Performance Monitoring Version 3 have per strand
126 * performance monitoring hardware.
127 * Hence we can allow use of performance counters on
128 * multiple strands on the same core simultaneously.
129 */
130 cpuid.cp_eax = 0x0;
131 (void) __cpuid_insn(&cpuid);
132 if (cpuid.cp_eax < 0xa) {
133 strands_perfmon_shared = 1;
134 } else {
135 cpuid.cp_eax = 0xa;
136 (void) __cpuid_insn(&cpuid);
137
138 versionid = cpuid.cp_eax & 0xFF;
139 if (versionid < 3) {
140 strands_perfmon_shared = 1;
141 }
142 }
143 } else if (cpuid_getvendor(cpu[0]) == X86_VENDOR_AMD) {
144 /*
145 * On AMD systems with HT, all of the performance
146 * monitors exist on a per-logical CPU basis.
147 */
148 strands_perfmon_shared = 0;
149 } else {
150 strands_perfmon_shared = 1;
151 }
152 }
153
154 if (strands_perfmon_shared) {
155 mutex_enter(&cpu_setup_lock);
156 if (setup_registered == 0) {
157 mutex_enter(&cpu_lock);
158 register_cpu_setup_func(kcpc_cpu_setup, NULL);
159 mutex_exit(&cpu_lock);
160 setup_registered = 1;
161 }
162 mutex_exit(&cpu_setup_lock);
163 }
164
165 mutex_init(&cp->cpu_cpc_ctxlock, "cpu_cpc_ctxlock", MUTEX_DEFAULT, 0);
166
167 if (kcpc_counts_include_idle)
168 return;
169
170 installctx(t, cp, kcpc_idle_save, kcpc_idle_restore,
171 NULL, NULL, NULL, NULL);
172 }
173
174 void
kcpc_hw_fini(cpu_t * cp)175 kcpc_hw_fini(cpu_t *cp)
176 {
177 ASSERT(cp->cpu_idle_thread == NULL);
178
179 mutex_destroy(&cp->cpu_cpc_ctxlock);
180 }
181
182 #define BITS(v, u, l) \
183 (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
184
185 #define PCBE_NAMELEN 30 /* Enough Room for pcbe.manuf.model.family.stepping */
186
187 /*
188 * Examine the processor and load an appropriate PCBE.
189 */
190 int
kcpc_hw_load_pcbe(void)191 kcpc_hw_load_pcbe(void)
192 {
193 return (kcpc_pcbe_tryload(cpuid_getvendorstr(CPU), cpuid_getfamily(CPU),
194 cpuid_getmodel(CPU), cpuid_getstep(CPU)));
195 }
196
197 /*
198 * Called by the generic framework to check if it's OK to bind a set to a CPU.
199 */
200 int
kcpc_hw_cpu_hook(processorid_t cpuid,ulong_t * kcpc_cpumap)201 kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap)
202 {
203 cpu_t *cpu, *p;
204 pg_t *chip_pg;
205 pg_cpu_itr_t itr;
206
207 if (!strands_perfmon_shared)
208 return (0);
209
210 /*
211 * Only one logical CPU on each Pentium 4 HT CPU may be bound to at
212 * once.
213 *
214 * This loop is protected by holding cpu_lock, in order to properly
215 * access the cpu_t of the desired cpu.
216 */
217 mutex_enter(&cpu_lock);
218 if ((cpu = cpu_get(cpuid)) == NULL) {
219 mutex_exit(&cpu_lock);
220 return (-1);
221 }
222
223 chip_pg = (pg_t *)pghw_find_pg(cpu, PGHW_CHIP);
224
225 PG_CPU_ITR_INIT(chip_pg, itr);
226 while ((p = pg_cpu_next(&itr)) != NULL) {
227 if (p == cpu)
228 continue;
229 if (BT_TEST(kcpc_cpumap, p->cpu_id)) {
230 mutex_exit(&cpu_lock);
231 return (-1);
232 }
233 }
234
235 mutex_exit(&cpu_lock);
236 return (0);
237 }
238
239 /*
240 * Called by the generic framework to check if it's OK to bind a set to an LWP.
241 */
242 int
kcpc_hw_lwp_hook(void)243 kcpc_hw_lwp_hook(void)
244 {
245 pg_cmt_t *chip;
246 group_t *chips;
247 group_iter_t i;
248
249 if (!strands_perfmon_shared)
250 return (0);
251
252 /*
253 * Only one CPU per chip may be online.
254 */
255 mutex_enter(&cpu_lock);
256
257 chips = pghw_set_lookup(PGHW_CHIP);
258 if (chips == NULL) {
259 mutex_exit(&cpu_lock);
260 return (0);
261 }
262
263 group_iter_init(&i);
264 while ((chip = group_iterate(chips, &i)) != NULL) {
265 if (GROUP_SIZE(&chip->cmt_cpus_actv) > 1) {
266 mutex_exit(&cpu_lock);
267 return (-1);
268 }
269 }
270
271 mutex_exit(&cpu_lock);
272 return (0);
273 }
274