xref: /freebsd/sys/x86/cpufreq/hwpstate_amd.c (revision 072f79d0a287528b11da866d6115f726a69eca0d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2005 Nate Lawson
5  * Copyright (c) 2004 Colin Percival
6  * Copyright (c) 2004-2005 Bruno Durcot
7  * Copyright (c) 2004 FUKUDA Nobuhiko
8  * Copyright (c) 2009 Michael Reifenberger
9  * Copyright (c) 2009 Norikatsu Shigemura
10  * Copyright (c) 2008-2009 Gen Otsuji
11  * Copyright (c) 2025 ShengYi Hung
12  * Copyright (c) 2026 The FreeBSD Foundation
13  *
14  * Portions of this software were developed by Olivier Certner
15  * <olce@FreeBSD.org> at Kumacom SARL under sponsorship from the FreeBSD
16  * Foundation.
17  *
18  * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c
19  * in various parts. The authors of these files are Nate Lawson,
20  * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko.
21  * This code contains patches by Michael Reifenberger and Norikatsu Shigemura.
22  * Thank you.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted providing that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
34  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
42  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43  * POSSIBILITY OF SUCH DAMAGE.
44  */
45 
46 /*
47  * For more info:
48  * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors
49  * 31116 Rev 3.20  February 04, 2009
50  * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors
51  * 41256 Rev 3.00 - July 07, 2008
52  * Processor Programming Reference (PPR) for AMD Family 1Ah Model 02h,
53  * Revision C1 Processors Volume 1 of 7 - Sep 29, 2024
54  */
55 
56 #include <sys/param.h>
57 #include <sys/bus.h>
58 #include <sys/cpu.h>
59 #include <sys/kernel.h>
60 #include <sys/malloc.h>
61 #include <sys/module.h>
62 #include <sys/pcpu.h>
63 #include <sys/proc.h>
64 #include <sys/sbuf.h>
65 #include <sys/sched.h>
66 #include <sys/smp.h>
67 
68 #include <machine/_inttypes.h>
69 #include <machine/cputypes.h>
70 #include <machine/md_var.h>
71 #include <machine/specialreg.h>
72 
73 #include <contrib/dev/acpica/include/acpi.h>
74 
75 #include <dev/acpica/acpivar.h>
76 
77 #include <x86/cpufreq/hwpstate_common.h>
78 
79 #include "acpi_if.h"
80 #include "cpufreq_if.h"
81 
82 
83 #define	MSR_AMD_10H_11H_LIMIT	0xc0010061
84 #define	MSR_AMD_10H_11H_CONTROL	0xc0010062
85 #define	MSR_AMD_10H_11H_STATUS	0xc0010063
86 #define	MSR_AMD_10H_11H_CONFIG	0xc0010064
87 
88 #define	MSR_AMD_CPPC_CAPS_1	0xc00102b0
89 #define	MSR_AMD_CPPC_ENABLE	0xc00102b1
90 #define	MSR_AMD_CPPC_CAPS_2	0xc00102b2
91 #define	MSR_AMD_CPPC_REQUEST	0xc00102b3
92 #define	MSR_AMD_CPPC_STATUS	0xc00102b4
93 
94 #define	MSR_AMD_CPPC_CAPS_1_NAME	"CPPC_CAPABILITY_1"
95 #define	MSR_AMD_CPPC_ENABLE_NAME	"CPPC_ENABLE"
96 #define	MSR_AMD_CPPC_REQUEST_NAME	"CPPC_REQUEST"
97 
98 #define	MSR_AMD_PWR_ACC		0xc001007a
99 #define	MSR_AMD_PWR_ACC_MX	0xc001007b
100 
101 #define	AMD_10H_11H_MAX_STATES	16
102 
103 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */
104 #define	AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)	(((msr) >> 4) & 0x7)
105 #define	AMD_10H_11H_GET_PSTATE_LIMIT(msr)	(((msr)) & 0x7)
106 /* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */
107 #define	AMD_10H_11H_CUR_VID(msr)		(((msr) >> 9) & 0x7F)
108 #define	AMD_10H_11H_CUR_DID(msr)		(((msr) >> 6) & 0x07)
109 #define	AMD_10H_11H_CUR_FID(msr)		((msr) & 0x3F)
110 
111 #define	AMD_17H_CUR_IDIV(msr)			(((msr) >> 30) & 0x03)
112 #define	AMD_17H_CUR_IDD(msr)			(((msr) >> 22) & 0xFF)
113 #define	AMD_17H_CUR_VID(msr)			(((msr) >> 14) & 0xFF)
114 #define	AMD_17H_CUR_DID(msr)			(((msr) >> 8) & 0x3F)
115 #define	AMD_17H_CUR_FID(msr)			((msr) & 0xFF)
116 
117 #define	AMD_1AH_CUR_FID(msr)			((msr) & 0xFFF)
118 
119 #define	AMD_CPPC_CAPS_1_HIGHEST_PERF_BITS	0xff000000
120 #define	AMD_CPPC_CAPS_1_NOMINAL_PERF_BITS	0x00ff0000
121 #define	AMD_CPPC_CAPS_1_EFFICIENT_PERF_BITS	0x0000ff00
122 #define	AMD_CPPC_CAPS_1_LOWEST_PERF_BITS	0x000000ff
123 
124 #define	AMD_CPPC_REQUEST_EPP_BITS		0xff000000
125 #define	AMD_CPPC_REQUEST_DES_PERF_BITS		0x00ff0000
126 #define	AMD_CPPC_REQUEST_MIN_PERF_BITS		0x0000ff00
127 #define	AMD_CPPC_REQUEST_MAX_PERF_BITS		0x000000ff
128 
129 #define	HWP_AMD_CLASSNAME			"hwpstate_amd"
130 
131 #define	BITS_VALUE(bits, val)						\
132 	(((val) & (bits)) >> (ffsll((bits)) - 1))
133 #define	BITS_WITH_VALUE(bits, val)					\
134 	(((uintmax_t)(val) << (ffsll((bits)) - 1)) & (bits))
135 #define	SET_BITS_VALUE(var, bits, val)					\
136 	((var) = ((var) & ~(bits)) | BITS_WITH_VALUE((bits), (val)))
137 
138 #define	HWPSTATE_DEBUG(dev, msg...)			\
139 	do {						\
140 		if (hwpstate_verbose)			\
141 			device_printf(dev, msg);	\
142 	} while (0)
143 
144 struct hwpstate_setting {
145 	int	freq;		/* CPU clock in Mhz or 100ths of a percent. */
146 	int	volts;		/* Voltage in mV. */
147 	int	power;		/* Power consumed in mW. */
148 	int	lat;		/* Transition latency in us. */
149 	int	pstate_id;	/* P-State id */
150 };
151 
152 #define HWPFL_USE_CPPC			(1 << 0)
153 #define HWPFL_CPPC_REQUEST_NOT_READ	(1 << 1)
154 
155 struct hwpstate_cpufreq_methods {
156 	int (*get)(device_t dev, struct cf_setting *cf);
157 	int (*set)(device_t dev, const struct cf_setting *cf);
158 	int (*settings)(device_t dev, struct cf_setting *sets, int *count);
159 	int (*type)(device_t dev, int *type);
160 };
161 
162 /*
163  * Atomicity is achieved by only modifying a given softc on its associated CPU
164  * and with interrupts disabled.
165  *
166  * XXX - Only the CPPC support complies at the moment.
167  */
168 struct hwpstate_softc {
169 	device_t	dev;
170 	u_int		flags;
171 	const struct hwpstate_cpufreq_methods *cpufreq_methods;
172 	union {
173 		struct {
174 			struct hwpstate_setting
175 			hwpstate_settings[AMD_10H_11H_MAX_STATES];
176 			int cfnum;
177 		};
178 		struct {
179 			uint64_t request;
180 		} cppc;
181 	};
182 	u_int cpuid;
183 };
184 
185 static void	hwpstate_identify(driver_t *driver, device_t parent);
186 static int	hwpstate_probe(device_t dev);
187 static int	hwpstate_attach(device_t dev);
188 static int	hwpstate_detach(device_t dev);
189 static int	hwpstate_set(device_t dev, const struct cf_setting *cf);
190 static int	hwpstate_get(device_t dev, struct cf_setting *cf);
191 static int	hwpstate_settings(device_t dev, struct cf_setting *sets, int *count);
192 static int	hwpstate_type(device_t dev, int *type);
193 static int	hwpstate_shutdown(device_t dev);
194 static int	hwpstate_features(driver_t *driver, u_int *features);
195 static int	hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev);
196 static int	hwpstate_get_info_from_msr(device_t dev);
197 static int	hwpstate_goto_pstate(device_t dev, int pstate_id);
198 
199 static int	hwpstate_verify;
200 SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN,
201     &hwpstate_verify, 0, "Verify P-state after setting");
202 
203 static bool	hwpstate_pstate_limit;
204 SYSCTL_BOOL(_debug, OID_AUTO, hwpstate_pstate_limit, CTLFLAG_RWTUN,
205     &hwpstate_pstate_limit, 0,
206     "If enabled (1), limit administrative control of P-states to the value in "
207     "CurPstateLimit");
208 
209 static bool	hwpstate_amd_cppc_enable = true;
210 SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_amd_cppc_enable, CTLFLAG_RDTUN,
211     &hwpstate_amd_cppc_enable, 0,
212     "Set 1 (default) to enable AMD CPPC, 0 to disable");
213 
214 static device_method_t hwpstate_methods[] = {
215 	/* Device interface */
216 	DEVMETHOD(device_identify,	hwpstate_identify),
217 	DEVMETHOD(device_probe,		hwpstate_probe),
218 	DEVMETHOD(device_attach,	hwpstate_attach),
219 	DEVMETHOD(device_detach,	hwpstate_detach),
220 	DEVMETHOD(device_shutdown,	hwpstate_shutdown),
221 
222 	/* cpufreq interface */
223 	DEVMETHOD(cpufreq_drv_set,	hwpstate_set),
224 	DEVMETHOD(cpufreq_drv_get,	hwpstate_get),
225 	DEVMETHOD(cpufreq_drv_settings,	hwpstate_settings),
226 	DEVMETHOD(cpufreq_drv_type,	hwpstate_type),
227 
228 	/* ACPI interface */
229 	DEVMETHOD(acpi_get_features,	hwpstate_features),
230 	{0, 0}
231 };
232 
233 static inline void
check_cppc_in_use(const struct hwpstate_softc * const sc,const char * const func)234 check_cppc_in_use(const struct hwpstate_softc *const sc, const char *const func)
235 {
236 	KASSERT((sc->flags & HWPFL_USE_CPPC) != 0, (HWP_AMD_CLASSNAME
237 	    ": %s() called but HWPFL_USE_CPPC not set", func));
238 }
239 
240 static void
print_msr_bits(struct sbuf * const sb,const char * const legend,const uint64_t bits,const uint64_t msr_value)241 print_msr_bits(struct sbuf *const sb, const char *const legend,
242     const uint64_t bits, const uint64_t msr_value)
243 {
244 	sbuf_printf(sb, "\t%s: %" PRIu64 "\n", legend,
245 	    BITS_VALUE(bits, msr_value));
246 }
247 
248 static void
print_cppc_caps_1(struct sbuf * const sb,const uint64_t caps)249 print_cppc_caps_1(struct sbuf *const sb, const uint64_t caps)
250 {
251 	sbuf_printf(sb, MSR_AMD_CPPC_CAPS_1_NAME ": %#016" PRIx64 "\n", caps);
252 	print_msr_bits(sb, "Highest Performance",
253 	    AMD_CPPC_CAPS_1_HIGHEST_PERF_BITS, caps);
254 	print_msr_bits(sb, "Guaranteed Performance",
255 	    AMD_CPPC_CAPS_1_NOMINAL_PERF_BITS, caps);
256 	print_msr_bits(sb, "Efficient Performance",
257 	    AMD_CPPC_CAPS_1_EFFICIENT_PERF_BITS, caps);
258 	print_msr_bits(sb, "Lowest Performance",
259 	    AMD_CPPC_CAPS_1_LOWEST_PERF_BITS, caps);
260 }
261 
262 #define MSR_NOT_READ_MSG	"Not read (fault or previous errors)"
263 
264 static void
print_cppc_no_caps_1(struct sbuf * const sb)265 print_cppc_no_caps_1(struct sbuf *const sb)
266 {
267 	sbuf_printf(sb, MSR_AMD_CPPC_CAPS_1_NAME ": " MSR_NOT_READ_MSG "\n");
268 }
269 
270 static void
print_cppc_request(struct sbuf * const sb,const uint64_t request)271 print_cppc_request(struct sbuf *const sb, const uint64_t request)
272 {
273 	sbuf_printf(sb, MSR_AMD_CPPC_REQUEST_NAME ": %#016" PRIx64 "\n",
274 	    request);
275 	print_msr_bits(sb, "Efficiency / Energy Preference",
276 	    AMD_CPPC_REQUEST_EPP_BITS, request);
277 	print_msr_bits(sb, "Desired Performance",
278 	    AMD_CPPC_REQUEST_DES_PERF_BITS, request);
279 	print_msr_bits(sb, "Minimum Performance",
280 	    AMD_CPPC_REQUEST_MIN_PERF_BITS, request);
281 	print_msr_bits(sb, "Maximum Performance",
282 	    AMD_CPPC_REQUEST_MAX_PERF_BITS, request);
283 }
284 
285 static void
print_cppc_no_request(struct sbuf * const sb)286 print_cppc_no_request(struct sbuf *const sb)
287 {
288 	sbuf_printf(sb, MSR_AMD_CPPC_REQUEST_NAME ": " MSR_NOT_READ_MSG "\n");
289 }
290 
291 /*
292  * Internal errors conveyed by code executing on another CPU.
293  */
294 #define HWP_ERROR_CPPC_ENABLE		(1 << 0)
295 #define HWP_ERROR_CPPC_CAPS		(1 << 1)
296 #define HWP_ERROR_CPPC_REQUEST		(1 << 2)
297 #define HWP_ERROR_CPPC_REQUEST_WRITE	(1 << 3)
298 
299 static inline bool
hwp_has_error(u_int res,u_int err)300 hwp_has_error(u_int res, u_int err)
301 {
302 	return ((res & err) != 0);
303 }
304 
305 struct get_cppc_regs_data {
306 	uint64_t enable;
307 	uint64_t caps;
308 	uint64_t req;
309 	/* HWP_ERROR_CPPC_* except HWP_ERROR_*_WRITE */
310 	u_int res;
311 };
312 
313 static void
get_cppc_regs_cb(void * args)314 get_cppc_regs_cb(void *args)
315 {
316 	struct get_cppc_regs_data *data = args;
317 	int error;
318 
319 	data->res = 0;
320 
321 	error = rdmsr_safe(MSR_AMD_CPPC_ENABLE, &data->enable);
322 	if (error != 0)
323 		data->res |= HWP_ERROR_CPPC_ENABLE;
324 
325 	error = rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &data->caps);
326 	if (error != 0)
327 		data->res |= HWP_ERROR_CPPC_CAPS;
328 
329 	error = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &data->req);
330 	if (error != 0)
331 		data->res |= HWP_ERROR_CPPC_REQUEST;
332 }
333 
334 /*
335  * Debug: Read all MSRs (bypassing the softc) and dump them.
336  */
337 static int
sysctl_cppc_dump_handler(SYSCTL_HANDLER_ARGS)338 sysctl_cppc_dump_handler(SYSCTL_HANDLER_ARGS)
339 {
340 	const struct hwpstate_softc *const sc = arg1;
341 	struct sbuf *sb;
342 	struct sbuf sbs;
343 	struct get_cppc_regs_data data;
344 	int error;
345 
346 	/* Sysctl knob does not exist if HWPFL_USE_CPPC is not set. */
347 	check_cppc_in_use(sc, __func__);
348 
349 	sb = sbuf_new_for_sysctl(&sbs, NULL, 0, req);
350 
351 	smp_rendezvous_cpu(sc->cpuid, smp_no_rendezvous_barrier,
352 	    get_cppc_regs_cb, smp_no_rendezvous_barrier, &data);
353 
354 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_ENABLE))
355 		sbuf_printf(sb, "CPU%u: " MSR_AMD_CPPC_ENABLE_NAME ": "
356 		    MSR_NOT_READ_MSG "\n", sc->cpuid);
357 	else
358 		sbuf_printf(sb, "CPU%u: HWP %sabled (" MSR_AMD_CPPC_REQUEST_NAME
359 		    ": %#" PRIx64 ")\n", sc->cpuid, data.enable & 1 ?
360 		    "En" : "Dis", data.enable);
361 
362 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_CAPS))
363 		print_cppc_no_caps_1(sb);
364 	else
365 		print_cppc_caps_1(sb, data.caps);
366 
367 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_REQUEST))
368 		print_cppc_no_request(sb);
369 	else
370 		print_cppc_request(sb, data.req);
371 
372 	error = sbuf_finish(sb);
373 	sbuf_delete(sb);
374 
375 	return (error);
376 }
377 
378 /*
379  * Read CPPC_REQUEST's value in the softc, if not already present.
380  */
381 static int
get_cppc_request(struct hwpstate_softc * const sc)382 get_cppc_request(struct hwpstate_softc *const sc)
383 {
384 	uint64_t val;
385 	int error;
386 
387 	check_cppc_in_use(sc, __func__);
388 
389 	if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0) {
390 		error = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &val);
391 		if (error != 0)
392 			return (EIO);
393 		sc->flags &= ~HWPFL_CPPC_REQUEST_NOT_READ;
394 		sc->cppc.request = val;
395 	}
396 
397 	return (0);
398 }
399 
400 struct set_cppc_request_cb {
401 	struct hwpstate_softc	*sc;
402 	uint64_t		 request;
403 	uint64_t		 mask;
404 	int			 res; /* 0 or HWP_ERROR_CPPC_REQUEST* */
405 };
406 
407 static void
set_cppc_request_cb(void * args)408 set_cppc_request_cb(void *args)
409 {
410 	struct set_cppc_request_cb *const data = args;
411 	uint64_t *const sc_req = &data->sc->cppc.request;
412 	uint64_t new_req;
413 	int error;
414 
415 	/* We proceed sequentially, so we'll clear out errors on progress. */
416 	data->res = HWP_ERROR_CPPC_REQUEST | HWP_ERROR_CPPC_REQUEST_WRITE;
417 
418 	error = get_cppc_request(data->sc);
419 	if (error != 0)
420 		return;
421 	data->res &= ~HWP_ERROR_CPPC_REQUEST;
422 
423 	new_req = (*sc_req & ~data->mask) | (data->request & data->mask);
424 
425 	error = wrmsr_safe(MSR_AMD_CPPC_REQUEST, new_req);
426 	if (error != 0)
427 		return;
428 	data->res &= ~HWP_ERROR_CPPC_REQUEST_WRITE;
429 	*sc_req = new_req;
430 }
431 
432 static inline void
set_cppc_request_send_one(struct set_cppc_request_cb * const data,device_t dev)433 set_cppc_request_send_one(struct set_cppc_request_cb *const data, device_t dev)
434 {
435 	data->sc = device_get_softc(dev);
436 	smp_rendezvous_cpu(data->sc->cpuid, smp_no_rendezvous_barrier,
437 	    set_cppc_request_cb, smp_no_rendezvous_barrier, data);
438 }
439 
440 static inline void
set_cppc_request_update_error(const struct set_cppc_request_cb * const data,int * const error)441 set_cppc_request_update_error(const struct set_cppc_request_cb *const data,
442     int *const error)
443 {
444 	/* A read error has precedence on a write error. */
445 	if (hwp_has_error(data->res, HWP_ERROR_CPPC_REQUEST))
446 		*error = EIO;
447 	else if (hwp_has_error(data->res, HWP_ERROR_CPPC_REQUEST_WRITE) &&
448 	    *error != EIO)
449 		*error = EOPNOTSUPP;
450 	else if (data->res != 0)
451 		/* Fallback case (normally not needed; defensive). */
452 		*error = EFAULT;
453 }
454 
455 static int
set_cppc_request(device_t hwp_dev,uint64_t request,uint64_t mask)456 set_cppc_request(device_t hwp_dev, uint64_t request, uint64_t mask)
457 {
458 	struct set_cppc_request_cb data = {
459 		.request = request,
460 		.mask = mask,
461 		/* 'sc' filled by set_cppc_request_send_one(). */
462 	};
463 	int error = 0;
464 
465 	if (hwpstate_pkg_ctrl_enable) {
466 		const devclass_t dc = devclass_find(HWP_AMD_CLASSNAME);
467 		const int units = devclass_get_maxunit(dc);
468 
469 		for (int i = 0; i < units; ++i) {
470 			const device_t dev = devclass_get_device(dc, i);
471 
472 			set_cppc_request_send_one(&data, dev);
473 			/* Note errors, but always continue. */
474 			set_cppc_request_update_error(&data, &error);
475 		}
476 	} else {
477 		set_cppc_request_send_one(&data, hwp_dev);
478 		set_cppc_request_update_error(&data, &error);
479 	}
480 
481 	return (error);
482 }
483 
484 static void
get_cppc_request_cb(void * args)485 get_cppc_request_cb(void *args)
486 {
487 	struct hwpstate_softc *const sc = args;
488 
489 	(void)get_cppc_request(sc);
490 }
491 
492 static int
sysctl_cppc_request_field_handler(SYSCTL_HANDLER_ARGS)493 sysctl_cppc_request_field_handler(SYSCTL_HANDLER_ARGS)
494 {
495 	const u_int max = BITS_VALUE(arg2, (uint64_t)-1);
496 	const device_t dev = arg1;
497 	struct hwpstate_softc *const sc = device_get_softc(dev);
498 	u_int val;
499 	int error;
500 
501 	/* Sysctl knob does not exist if HWPFL_USE_CPPC is not set. */
502 	check_cppc_in_use(sc, __func__);
503 
504 	if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0) {
505 		smp_rendezvous_cpu(sc->cpuid, smp_no_rendezvous_barrier,
506 		    get_cppc_request_cb, smp_no_rendezvous_barrier, sc);
507 
508 		if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0)
509 			return (EIO);
510 	}
511 
512 	val = BITS_VALUE(arg2, sc->cppc.request);
513 
514 	error = sysctl_handle_int(oidp, &val, 0, req);
515 	if (error != 0 || req->newptr == NULL)
516 		return (error);
517 
518 	if (val > max)
519 		return (EINVAL);
520 	error = set_cppc_request(dev, BITS_WITH_VALUE(arg2, val),
521 	    BITS_WITH_VALUE(arg2, -1));
522 	return (error);
523 }
524 
525 static driver_t hwpstate_driver = {
526 	HWP_AMD_CLASSNAME,
527 	hwpstate_methods,
528 	sizeof(struct hwpstate_softc),
529 };
530 
531 DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, 0, 0);
532 
533 static int
hwpstate_amd_iscale(int val,int div)534 hwpstate_amd_iscale(int val, int div)
535 {
536 	switch (div) {
537 	case 3: /* divide by 1000 */
538 		val /= 10;
539 	case 2: /* divide by 100 */
540 		val /= 10;
541 	case 1: /* divide by 10 */
542 		val /= 10;
543 	case 0: /* divide by 1 */
544 	    ;
545 	}
546 
547 	return (val);
548 }
549 
550 static void
hwpstate_pstate_read_limit(int cpu,uint64_t * msr)551 hwpstate_pstate_read_limit(int cpu, uint64_t *msr)
552 {
553 	(void)x86_msr_op(MSR_AMD_10H_11H_LIMIT,
554 	    MSR_OP_READ | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), 0, msr);
555 }
556 
557 static void
hwpstate_pstate_read_status(int cpu,uint64_t * msr)558 hwpstate_pstate_read_status(int cpu, uint64_t *msr)
559 {
560 	(void)x86_msr_op(MSR_AMD_10H_11H_STATUS,
561 	    MSR_OP_READ | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), 0, msr);
562 }
563 
564 /*
565  * Go to Px-state on all cpus, considering the limit register (if so
566  * configured).
567  */
568 static int
hwpstate_goto_pstate(device_t dev,int id)569 hwpstate_goto_pstate(device_t dev, int id)
570 {
571 	sbintime_t sbt;
572 	uint64_t msr;
573 	int cpu, j, limit;
574 	struct hwpstate_softc *sc;
575 
576 	sc = device_get_softc(dev);
577 	cpu = sc->cpuid;
578 
579 	if (hwpstate_pstate_limit) {
580 		/* get the current pstate limit */
581 		hwpstate_pstate_read_limit(cpu, &msr);
582 		limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
583 		if (limit > id) {
584 			HWPSTATE_DEBUG(dev, "Restricting requested P%d to P%d "
585 			    "due to HW limit\n", id, limit);
586 			id = limit;
587 		}
588 	}
589 
590 	HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu);
591 	/* Go To Px-state */
592 	x86_msr_op(MSR_AMD_10H_11H_CONTROL,
593 	    MSR_OP_WRITE | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), id, NULL);
594 
595 	/*
596 	 * Verify whether each core is in the requested P-state.
597 	 */
598 	if (hwpstate_verify) {
599 		/* wait loop (100*100 usec is enough ?) */
600 		for (j = 0; j < 100; j++) {
601 			/* get the result. not assure msr=id */
602 
603 			hwpstate_pstate_read_status(cpu, &msr);
604 			if (msr == id)
605 				break;
606 			sbt = SBT_1MS / 10;
607 			tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
608 			    sbt >> tc_precexp, 0);
609 		}
610 		HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n", (int)msr,
611 		    cpu);
612 		if (msr != id) {
613 			HWPSTATE_DEBUG(dev, "error: loop is not enough.\n");
614 			return (ENXIO);
615 		}
616 	}
617 
618 	return (0);
619 }
620 
621 static int
hwpstate_set_cppc(device_t dev __unused,const struct cf_setting * cf __unused)622 hwpstate_set_cppc(device_t dev __unused, const struct cf_setting *cf __unused)
623 {
624 	return (EOPNOTSUPP);
625 }
626 
627 static int
hwpstate_set_pstate(device_t dev,const struct cf_setting * cf)628 hwpstate_set_pstate(device_t dev, const struct cf_setting *cf)
629 {
630 	struct hwpstate_softc *sc;
631 	struct hwpstate_setting *set;
632 	int i;
633 
634 	sc = device_get_softc(dev);
635 	set = sc->hwpstate_settings;
636 	for (i = 0; i < sc->cfnum; i++)
637 		if (CPUFREQ_CMP(cf->freq, set[i].freq))
638 			break;
639 	if (i == sc->cfnum)
640 		return (EINVAL);
641 	return (hwpstate_goto_pstate(dev, set[i].pstate_id));
642 }
643 
644 static int
hwpstate_set(device_t dev,const struct cf_setting * cf)645 hwpstate_set(device_t dev, const struct cf_setting *cf)
646 {
647 	struct hwpstate_softc *sc = device_get_softc(dev);
648 
649 	if (cf == NULL)
650 		return (EINVAL);
651 	return (sc->cpufreq_methods->set(dev, cf));
652 }
653 
654 static int
hwpstate_get_cppc(device_t dev,struct cf_setting * cf)655 hwpstate_get_cppc(device_t dev, struct cf_setting *cf)
656 {
657 	struct hwpstate_softc *sc;
658 	uint64_t rate;
659 	int ret;
660 
661 	sc = device_get_softc(dev);
662 	memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf));
663 	cf->dev = dev;
664 	if ((ret = cpu_est_clockrate(sc->cpuid, &rate)))
665 		return (ret);
666 	cf->freq = rate / 1000000;
667 	return (0);
668 }
669 
670 static int
hwpstate_get_pstate(device_t dev,struct cf_setting * cf)671 hwpstate_get_pstate(device_t dev, struct cf_setting *cf)
672 {
673 	struct hwpstate_softc *sc;
674 	struct hwpstate_setting set;
675 	uint64_t msr;
676 	int cpu;
677 
678 	sc = device_get_softc(dev);
679 	cpu = sc->cpuid;
680 	hwpstate_pstate_read_status(cpu, &msr);
681 	if (msr >= sc->cfnum)
682 		return (EINVAL);
683 	set = sc->hwpstate_settings[msr];
684 	cf->freq = set.freq;
685 	cf->volts = set.volts;
686 	cf->power = set.power;
687 	cf->lat = set.lat;
688 	cf->dev = dev;
689 
690 	return (0);
691 }
692 
693 static int
hwpstate_get(device_t dev,struct cf_setting * cf)694 hwpstate_get(device_t dev, struct cf_setting *cf)
695 {
696 	struct hwpstate_softc *sc;
697 
698 	sc = device_get_softc(dev);
699 	if (cf == NULL)
700 		return (EINVAL);
701 	return (sc->cpufreq_methods->get(dev, cf));
702 }
703 
704 static int
hwpstate_settings_cppc(device_t dev __unused,struct cf_setting * sets __unused,int * count __unused)705 hwpstate_settings_cppc(device_t dev __unused, struct cf_setting *sets __unused,
706     int *count __unused)
707 {
708 	return (EOPNOTSUPP);
709 }
710 
711 static int
hwpstate_settings_pstate(device_t dev,struct cf_setting * sets,int * count)712 hwpstate_settings_pstate(device_t dev, struct cf_setting *sets, int *count)
713 {
714 	struct hwpstate_setting set;
715 	struct hwpstate_softc *sc;
716 	int i;
717 
718 	sc = device_get_softc(dev);
719 	if (*count < sc->cfnum)
720 		return (E2BIG);
721 	for (i = 0; i < sc->cfnum; i++, sets++) {
722 		set = sc->hwpstate_settings[i];
723 		sets->freq = set.freq;
724 		sets->volts = set.volts;
725 		sets->power = set.power;
726 		sets->lat = set.lat;
727 		sets->dev = dev;
728 	}
729 	*count = sc->cfnum;
730 
731 	return (0);
732 }
733 
734 static int
hwpstate_settings(device_t dev,struct cf_setting * sets,int * count)735 hwpstate_settings(device_t dev, struct cf_setting *sets, int *count)
736 {
737 	struct hwpstate_softc *sc;
738 
739 	if (sets == NULL || count == NULL)
740 		return (EINVAL);
741 	sc = device_get_softc(dev);
742 	return (sc->cpufreq_methods->settings(dev, sets, count));
743 }
744 
745 static int
hwpstate_type_cppc(device_t dev,int * type)746 hwpstate_type_cppc(device_t dev, int *type)
747 {
748 	*type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY |
749 	    CPUFREQ_FLAG_UNCACHED;
750 	return (0);
751 }
752 
753 static int
hwpstate_type_pstate(device_t dev,int * type)754 hwpstate_type_pstate(device_t dev, int *type)
755 {
756 	*type = CPUFREQ_TYPE_ABSOLUTE;
757 	return (0);
758 }
759 
760 static int
hwpstate_type(device_t dev,int * type)761 hwpstate_type(device_t dev, int *type)
762 {
763 	struct hwpstate_softc *sc;
764 
765 	sc = device_get_softc(dev);
766 	return (sc->cpufreq_methods->type(dev, type));
767 }
768 
769 static void
hwpstate_identify(driver_t * driver,device_t parent)770 hwpstate_identify(driver_t *driver, device_t parent)
771 {
772 	if (device_find_child(parent, HWP_AMD_CLASSNAME, DEVICE_UNIT_ANY) !=
773 	    NULL)
774 		return;
775 
776 	if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) &&
777 	    cpu_vendor_id != CPU_VENDOR_HYGON)
778 		return;
779 
780 	/*
781 	 * Check if hardware pstate enable bit is set.
782 	 */
783 	if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) {
784 		HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n");
785 		return;
786 	}
787 
788 	if (resource_disabled(HWP_AMD_CLASSNAME, 0))
789 		return;
790 
791 	if (BUS_ADD_CHILD(parent, 10, HWP_AMD_CLASSNAME,
792 		device_get_unit(parent)) == NULL)
793 		device_printf(parent, "hwpstate: add child failed\n");
794 }
795 
796 struct set_autonomous_hwp_data {
797 	/* Inputs */
798 	struct hwpstate_softc *sc;
799 	/* Outputs */
800 	/* HWP_ERROR_CPPC_* */
801 	u_int res;
802 	/* Below fields filled depending on 'res'. */
803 	uint64_t caps;
804 	uint64_t init_request;
805 	uint64_t request;
806 };
807 
808 static void
enable_cppc_cb(void * args)809 enable_cppc_cb(void *args)
810 {
811 	struct set_autonomous_hwp_data *const data = args;
812 	struct hwpstate_softc *const sc = data->sc;
813 	uint64_t lowest_perf, highest_perf;
814 	int error;
815 
816 	/*
817 	 * We proceed mostly sequentially, so we'll clear out errors on
818 	 * progress.
819 	 */
820 	data->res = HWP_ERROR_CPPC_ENABLE | HWP_ERROR_CPPC_CAPS |
821 	    HWP_ERROR_CPPC_REQUEST | HWP_ERROR_CPPC_REQUEST_WRITE;
822 
823 	sc->flags |= HWPFL_CPPC_REQUEST_NOT_READ;
824 
825 	error = wrmsr_safe(MSR_AMD_CPPC_ENABLE, 1);
826 	if (error != 0)
827 		return;
828 	data->res &= ~HWP_ERROR_CPPC_ENABLE;
829 
830 	error = rdmsr_safe(MSR_AMD_CPPC_CAPS_1, &data->caps);
831 	/* We can do away without CAPABILITY_1, so just continue on error. */
832 	if (error == 0)
833 		data->res &= ~HWP_ERROR_CPPC_CAPS;
834 
835 	error = get_cppc_request(sc);
836 	if (error != 0)
837 		return;
838 	data->res &= ~HWP_ERROR_CPPC_REQUEST;
839 	data->init_request = sc->cppc.request;
840 
841 	data->request = sc->cppc.request;
842 	/*
843 	 * Assuming reading MSR_AMD_CPPC_CAPS_1 succeeded, if it stays at its
844 	 * reset value (0) before CPPC activation (not supposed to happen, but
845 	 * happens in the field), we use reasonable default values that are
846 	 * explicitly described by the ACPI spec (all 0s for the minimum value,
847 	 * all 1s for the maximum one).  Going further, we actually do the same
848 	 * as long as the minimum and maximum performance levels are not sorted
849 	 * or are equal (in which case CPPC is not supposed to make sense at
850 	 * all), which covers the reset value case.  And we also fallback to
851 	 * these if MSR_AMD_CPPC_CAPS_1 could not be read at all.
852 	 */
853 	lowest_perf = 0;
854 	highest_perf = -1;
855 	if (!hwp_has_error(data->res, HWP_ERROR_CPPC_CAPS)) {
856 		const uint64_t lowest_cand =
857 		    BITS_VALUE(AMD_CPPC_CAPS_1_LOWEST_PERF_BITS, data->caps);
858 		const uint64_t highest_cand =
859 		    BITS_VALUE(AMD_CPPC_CAPS_1_HIGHEST_PERF_BITS, data->caps);
860 
861 		if (lowest_cand < highest_cand) {
862 			lowest_perf = lowest_cand;
863 			highest_perf = highest_cand;
864 		}
865 	}
866 	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_MIN_PERF_BITS,
867 	    lowest_perf);
868 	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_MAX_PERF_BITS,
869 	    highest_perf);
870 	/*
871 	 * Set controls to maximum performance to avoid regressions now that
872 	 * CPPC is activated by default and to match what the P-state support
873 	 * does.
874 	 */
875 	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_EPP_BITS, 0);
876 	/* 0 in "Desired Performance" is autonomous mode. */
877 	MPASS(highest_perf != 0);
878 	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_DES_PERF_BITS,
879 	    highest_perf);
880 
881 	error = wrmsr_safe(MSR_AMD_CPPC_REQUEST, data->request);
882 	if (error != 0)
883 		return;
884 	data->res &= ~HWP_ERROR_CPPC_REQUEST_WRITE;
885 	sc->cppc.request = data->request;
886 }
887 
888 static int
enable_cppc(struct hwpstate_softc * sc)889 enable_cppc(struct hwpstate_softc *sc)
890 {
891 	const device_t dev = sc->dev;
892 	const u_int cpuid = sc->cpuid;
893 	struct set_autonomous_hwp_data data;
894 	struct sbuf sbs;
895 	struct sbuf *sb;
896 
897 	data.sc = sc;
898 	smp_rendezvous_cpu(cpuid, smp_no_rendezvous_barrier,
899 	    enable_cppc_cb, smp_no_rendezvous_barrier, &data);
900 
901 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_ENABLE)) {
902 		device_printf(dev, "CPU%u: Failed to enable CPPC!\n", cpuid);
903 		return (ENXIO);
904 	}
905 	device_printf(dev, "CPU%u: CPPC enabled.\n", cpuid);
906 
907 	/*
908 	 * Now that we have enabled CPPC, we can't go back (hardware does not
909 	 * support doing so), so we'll attach even in case of further
910 	 * malfunction, allowing the user to retry retrieving/setting MSRs via
911 	 * the sysctl knobs.
912 	 */
913 
914 	sb = sbuf_new(&sbs, NULL, 0, SBUF_AUTOEXTEND);
915 
916 	if (hwpstate_verbose)
917 		sbuf_printf(sb,
918 		    "CPU%u: Initial MSR values after CPPC enable:\n", cpuid);
919 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_CAPS))
920 		print_cppc_no_caps_1(sb);
921 	else if (hwpstate_verbose)
922 		print_cppc_caps_1(sb, data.caps);
923 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_REQUEST))
924 		print_cppc_no_request(sb);
925 	else if (hwpstate_verbose)
926 		print_cppc_request(sb, data.init_request);
927 	if (hwp_has_error(data.res, HWP_ERROR_CPPC_REQUEST_WRITE)) {
928 		const bool request_read = !hwp_has_error(data.res,
929 		    HWP_ERROR_CPPC_REQUEST);
930 
931 		/* This is printed first, as it is not printed into 'sb'. */
932 		device_printf(dev, "CPU%u: %s not write into "
933 		    MSR_AMD_CPPC_REQUEST_NAME "!\n", cpuid,
934 		    request_read ? "Could" : "Did");
935 		if (request_read) {
936 			sbuf_printf(sb, "CPU%u: Failed when trying to set:",
937 			    cpuid);
938 			print_cppc_request(sb, data.request);
939 		}
940 	} else if (hwpstate_verbose) {
941 		sbuf_printf(sb, "CPU%u: Tweaked MSR values:\n", cpuid);
942 		print_cppc_request(sb, data.request);
943 	}
944 
945 	sbuf_finish(sb);
946 	sbuf_putbuf(sb);
947 	sbuf_delete(sb);
948 
949 	return (0);
950 }
951 
952 static int
hwpstate_probe_pstate(device_t dev)953 hwpstate_probe_pstate(device_t dev)
954 {
955 	struct hwpstate_softc *sc;
956 	device_t perf_dev;
957 	int error, type;
958 	uint64_t msr;
959 	int cpu;
960 
961 	sc = device_get_softc(dev);
962 	cpu = sc->cpuid;
963 	/*
964 	 * Check if acpi_perf has INFO only flag.
965 	 */
966 	perf_dev = device_find_child(device_get_parent(dev), "acpi_perf",
967 	    DEVICE_UNIT_ANY);
968 	error = TRUE;
969 	if (perf_dev && device_is_attached(perf_dev)) {
970 		error = CPUFREQ_DRV_TYPE(perf_dev, &type);
971 		if (error == 0) {
972 			if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) {
973 				/*
974 				 * If acpi_perf doesn't have INFO_ONLY flag,
975 				 * it will take care of pstate transitions.
976 				 */
977 				HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
978 				return (ENXIO);
979 			}
980 			/*
981 			 * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
982 			 * we can get _PSS info from acpi_perf
983 			 * without going into ACPI.
984 			 */
985 			HWPSTATE_DEBUG(dev,
986 			    "going to fetch info from acpi_perf\n");
987 			error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
988 		}
989 	}
990 
991 	if (error == 0) {
992 		/*
993 		 * Now we get _PSS info from acpi_perf without error.
994 		 * Let's check it.
995 		 */
996 		hwpstate_pstate_read_limit(cpu, &msr);
997 		if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
998 			HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)"
999 			    " count mismatch\n", (intmax_t)msr, sc->cfnum);
1000 			error = TRUE;
1001 		}
1002 	}
1003 
1004 	/*
1005 	 * If we cannot get info from acpi_perf,
1006 	 * Let's get info from MSRs.
1007 	 */
1008 	if (error)
1009 		error = hwpstate_get_info_from_msr(dev);
1010 	return (error);
1011 }
1012 
1013 static const struct hwpstate_cpufreq_methods cppc_methods = {
1014 	.get = hwpstate_get_cppc,
1015 	.set = hwpstate_set_cppc,
1016 	.settings = hwpstate_settings_cppc,
1017 	.type = hwpstate_type_cppc };
1018 
1019 static const struct hwpstate_cpufreq_methods pstate_methods = {
1020 	.get = hwpstate_get_pstate,
1021 	.set = hwpstate_set_pstate,
1022 	.settings = hwpstate_settings_pstate,
1023 	.type = hwpstate_type_pstate };
1024 
1025 static int
hwpstate_probe(device_t dev)1026 hwpstate_probe(device_t dev)
1027 {
1028 	struct hwpstate_softc *sc;
1029 	sc = device_get_softc(dev);
1030 
1031 	if (hwpstate_amd_cppc_enable &&
1032 	    (amd_extended_feature_extensions & AMDFEID_CPPC)) {
1033 		sc->flags |= HWPFL_USE_CPPC;
1034 		device_set_desc(dev,
1035 		    "AMD Collaborative Processor Performance Control (CPPC)");
1036 	} else
1037 		device_set_desc(dev, "Cool`n'Quiet 2.0");
1038 
1039 	sc->dev = dev;
1040 	sc->cpuid = cpu_get_pcpu(dev)->pc_cpuid;
1041 	if ((sc->flags & HWPFL_USE_CPPC) != 0) {
1042 		sc->cpufreq_methods = &cppc_methods;
1043 		return (0);
1044 	}
1045 	sc->cpufreq_methods = &pstate_methods;
1046 	return (hwpstate_probe_pstate(dev));
1047 }
1048 
1049 static int
hwpstate_attach(device_t dev)1050 hwpstate_attach(device_t dev)
1051 {
1052 	struct hwpstate_softc *sc;
1053 	int res;
1054 
1055 	sc = device_get_softc(dev);
1056 	if ((sc->flags & HWPFL_USE_CPPC) != 0) {
1057 		if ((res = enable_cppc(sc)) != 0)
1058 			return (res);
1059 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1060 		    SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO,
1061 		    device_get_nameunit(dev),
1062 		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
1063 		    sc, 0, sysctl_cppc_dump_handler, "A", "");
1064 
1065 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1066 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1067 		    "epp", CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
1068 		    dev, AMD_CPPC_REQUEST_EPP_BITS,
1069 		    sysctl_cppc_request_field_handler, "IU",
1070 		    "Efficiency/Performance Preference (from 0, "
1071 		    "most performant, to 255, most efficient)");
1072 
1073 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1074 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1075 		    "minimum_performance",
1076 		    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
1077 		    dev, AMD_CPPC_REQUEST_MIN_PERF_BITS,
1078 		    sysctl_cppc_request_field_handler, "IU",
1079 		    "Minimum allowed performance level (from 0 to 255; "
1080 		    "should be smaller than 'maximum_performance'; "
1081 		    "effective range limited by CPU)");
1082 
1083 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1084 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1085 		    "maximum_performance",
1086 		    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
1087 		    dev, AMD_CPPC_REQUEST_MAX_PERF_BITS,
1088 		    sysctl_cppc_request_field_handler, "IU",
1089 		    "Maximum allowed performance level (from 0 to 255; "
1090 		    "should be larger than 'minimum_performance'; "
1091 		    "effective range limited by CPU)");
1092 
1093 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1094 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1095 		    "desired_performance",
1096 		    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev,
1097 		    AMD_CPPC_REQUEST_DES_PERF_BITS,
1098 		    sysctl_cppc_request_field_handler, "IU",
1099 		    "Desired performance level (from 0 to 255; "
1100 		    "0 enables autonomous mode, otherwise value should be "
1101 		    "between 'minimum_performance' and 'maximum_performance' "
1102 		    "inclusive)");
1103 	}
1104 	return (cpufreq_register(dev));
1105 }
1106 
1107 struct hwpstate_pstate_read_settings_cb {
1108 	struct hwpstate_softc *sc;
1109 	uint64_t *vals;
1110 	int err;
1111 };
1112 
1113 static void
hwpstate_pstate_read_settings_cb(void * args)1114 hwpstate_pstate_read_settings_cb(void *args)
1115 {
1116 	struct hwpstate_pstate_read_settings_cb *req = args;
1117 	int i;
1118 
1119 	req->err = 0;
1120 	for (i = 0; i < req->sc->cfnum; i++) {
1121 		req->err = rdmsr_safe(MSR_AMD_10H_11H_CONFIG + i,
1122 		    &req->vals[i]);
1123 		if (req->err != 0)
1124 			return;
1125 	}
1126 }
1127 
1128 static int
hwpstate_pstate_read_settings(struct hwpstate_softc * sc,uint64_t vals[])1129 hwpstate_pstate_read_settings(struct hwpstate_softc *sc, uint64_t vals[])
1130 {
1131 	struct hwpstate_pstate_read_settings_cb req;
1132 
1133 	req.sc = sc;
1134 	req.vals = vals;
1135 	smp_rendezvous_cpu(sc->cpuid, smp_no_rendezvous_barrier,
1136 	    hwpstate_pstate_read_settings_cb, smp_no_rendezvous_barrier, &req);
1137 	return (req.err);
1138 }
1139 
1140 static int
hwpstate_get_info_from_msr(device_t dev)1141 hwpstate_get_info_from_msr(device_t dev)
1142 {
1143 	struct hwpstate_softc *sc;
1144 	struct hwpstate_setting *hwpstate_set;
1145 	uint64_t state_settings[AMD_10H_11H_MAX_STATES], msr;
1146 	int family, i, fid, did;
1147 
1148 	family = CPUID_TO_FAMILY(cpu_id);
1149 	sc = device_get_softc(dev);
1150 	/* Get pstate count */
1151 	hwpstate_pstate_read_limit(sc->cpuid, &msr);
1152 	sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
1153 	hwpstate_set = sc->hwpstate_settings;
1154 	hwpstate_pstate_read_settings(sc, state_settings);
1155 	for (i = 0; i < sc->cfnum; i++) {
1156 		msr = state_settings[i];
1157 		if ((msr & ((uint64_t)1 << 63)) == 0) {
1158 			HWPSTATE_DEBUG(dev, "msr is not valid.\n");
1159 			return (ENXIO);
1160 		}
1161 		did = AMD_10H_11H_CUR_DID(msr);
1162 		fid = AMD_10H_11H_CUR_FID(msr);
1163 
1164 		hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN;
1165 		hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN;
1166 		hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN;
1167 		/* Convert fid/did to frequency. */
1168 		switch (family) {
1169 		case 0x11:
1170 			hwpstate_set[i].freq = (100 * (fid + 0x08)) >> did;
1171 			break;
1172 		case 0x10:
1173 		case 0x12:
1174 		case 0x15:
1175 		case 0x16:
1176 			hwpstate_set[i].freq = (100 * (fid + 0x10)) >> did;
1177 			break;
1178 		case 0x17:
1179 		case 0x18:
1180 		case 0x19:
1181 		case 0x1A:
1182 			/* calculate freq */
1183 			if (family == 0x1A) {
1184 				fid = AMD_1AH_CUR_FID(msr);
1185 				/* 1Ah CPU don't use a divisor */
1186 				hwpstate_set[i].freq = fid;
1187 				if (fid > 0x0f)
1188 					hwpstate_set[i].freq *= 5;
1189 				else {
1190 					HWPSTATE_DEBUG(dev,
1191 					    "unexpected fid: %d\n", fid);
1192 					return (ENXIO);
1193 				}
1194 			} else {
1195 				did = AMD_17H_CUR_DID(msr);
1196 				if (did == 0) {
1197 					HWPSTATE_DEBUG(dev,
1198 					    "unexpected did: 0\n");
1199 					did = 1;
1200 				}
1201 				fid = AMD_17H_CUR_FID(msr);
1202 				hwpstate_set[i].freq = (200 * fid) / did;
1203 			}
1204 
1205 			/* Vid step is 6.25mV, so scale by 100. */
1206 			hwpstate_set[i].volts =
1207 			    (155000 - (625 * AMD_17H_CUR_VID(msr))) / 100;
1208 			/*
1209 			 * Calculate current first.
1210 			 * This equation is mentioned in
1211 			 * "BKDG for AMD Family 15h Models 70h-7fh Processors",
1212 			 * section 2.5.2.1.6.
1213 			 */
1214 			hwpstate_set[i].power = AMD_17H_CUR_IDD(msr) * 1000;
1215 			hwpstate_set[i].power = hwpstate_amd_iscale(
1216 			    hwpstate_set[i].power, AMD_17H_CUR_IDIV(msr));
1217 			hwpstate_set[i].power *= hwpstate_set[i].volts;
1218 			/* Milli amps * milli volts to milli watts. */
1219 			hwpstate_set[i].power /= 1000;
1220 			break;
1221 		default:
1222 			HWPSTATE_DEBUG(dev, "get_info_from_msr: %s family"
1223 			    " 0x%02x CPUs are not supported yet\n",
1224 			    cpu_vendor_id == CPU_VENDOR_HYGON ? "Hygon" : "AMD",
1225 			    family);
1226 			return (ENXIO);
1227 		}
1228 		hwpstate_set[i].pstate_id = i;
1229 	}
1230 	return (0);
1231 }
1232 
1233 static int
hwpstate_get_info_from_acpi_perf(device_t dev,device_t perf_dev)1234 hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev)
1235 {
1236 	struct hwpstate_softc *sc;
1237 	struct cf_setting *perf_set;
1238 	struct hwpstate_setting *hwpstate_set;
1239 	int count, error, i;
1240 
1241 	perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT);
1242 	if (perf_set == NULL) {
1243 		HWPSTATE_DEBUG(dev, "nomem\n");
1244 		return (ENOMEM);
1245 	}
1246 	/*
1247 	 * Fetch settings from acpi_perf.
1248 	 * Now it is attached, and has info only flag.
1249 	 */
1250 	count = MAX_SETTINGS;
1251 	error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count);
1252 	if (error) {
1253 		HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n");
1254 		goto out;
1255 	}
1256 	sc = device_get_softc(dev);
1257 	sc->cfnum = count;
1258 	hwpstate_set = sc->hwpstate_settings;
1259 	for (i = 0; i < count; i++) {
1260 		if (i == perf_set[i].spec[0]) {
1261 			hwpstate_set[i].pstate_id = i;
1262 			hwpstate_set[i].freq = perf_set[i].freq;
1263 			hwpstate_set[i].volts = perf_set[i].volts;
1264 			hwpstate_set[i].power = perf_set[i].power;
1265 			hwpstate_set[i].lat = perf_set[i].lat;
1266 		} else {
1267 			HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n");
1268 			error = ENXIO;
1269 			goto out;
1270 		}
1271 	}
1272 out:
1273 	if (perf_set)
1274 		free(perf_set, M_TEMP);
1275 	return (error);
1276 }
1277 
1278 static int
hwpstate_detach(device_t dev)1279 hwpstate_detach(device_t dev)
1280 {
1281 	struct hwpstate_softc *sc;
1282 
1283 	sc = device_get_softc(dev);
1284 	if ((sc->flags & HWPFL_USE_CPPC) == 0)
1285 		hwpstate_goto_pstate(dev, 0);
1286 	return (cpufreq_unregister(dev));
1287 }
1288 
1289 static int
hwpstate_shutdown(device_t dev)1290 hwpstate_shutdown(device_t dev)
1291 {
1292 
1293 	/* hwpstate_goto_pstate(dev, 0); */
1294 	return (0);
1295 }
1296 
1297 static int
hwpstate_features(driver_t * driver,u_int * features)1298 hwpstate_features(driver_t *driver, u_int *features)
1299 {
1300 
1301 	/* Notify the ACPI CPU that we support direct access to MSRs */
1302 	*features = ACPI_CAP_PERF_MSRS;
1303 	return (0);
1304 }
1305