xref: /freebsd/sys/dev/amdtemp/amdtemp.c (revision b670c9bafc0e31c7609969bf374b2e80bdc00211)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008, 2009 Rui Paulo <rpaulo@FreeBSD.org>
5  * Copyright (c) 2009 Norikatsu Shigemura <nork@FreeBSD.org>
6  * Copyright (c) 2009-2012 Jung-uk Kim <jkim@FreeBSD.org>
7  * All rights reserved.
8  * Copyright (c) 2017-2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
23  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Driver for the AMD CPU on-die thermal sensors.
34  * Initially based on the k8temp Linux driver.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/bus.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 
47 #include <machine/cpufunc.h>
48 #include <machine/md_var.h>
49 #include <machine/specialreg.h>
50 
51 #include <dev/pci/pcivar.h>
52 #include <x86/pci_cfgreg.h>
53 
54 #include <dev/amdsmn/amdsmn.h>
55 
56 typedef enum {
57 	CORE0_SENSOR0,
58 	CORE0_SENSOR1,
59 	CORE1_SENSOR0,
60 	CORE1_SENSOR1,
61 	CORE0,
62 	CORE1,
63 	CCD1,
64 	CCD_BASE = CCD1,
65 	CCD2,
66 	CCD3,
67 	CCD4,
68 	CCD5,
69 	CCD6,
70 	CCD7,
71 	CCD8,
72 	CCD9,
73 	CCD10,
74 	CCD11,
75 	CCD12,
76 	CCD_MAX = CCD12,
77 	NUM_CCDS = CCD_MAX - CCD_BASE + 1,
78 } amdsensor_t;
79 
80 struct amdtemp_softc {
81 	int		sc_ncores;
82 	int		sc_ntemps;
83 	int		sc_flags;
84 #define	AMDTEMP_FLAG_CS_SWAP	0x01	/* ThermSenseCoreSel is inverted. */
85 #define	AMDTEMP_FLAG_CT_10BIT	0x02	/* CurTmp is 10-bit wide. */
86 #define	AMDTEMP_FLAG_ALT_OFFSET	0x04	/* CurTmp starts at -28C. */
87 	int32_t		sc_offset;
88 	int32_t		sc_temp_base;
89 	int32_t		(*sc_gettemp)(device_t, amdsensor_t);
90 	struct sysctl_oid *sc_sysctl_cpu[MAXCPU];
91 	struct intr_config_hook sc_ich;
92 	device_t	sc_smn;
93 	struct mtx	sc_lock;
94 };
95 
96 /*
97  * N.B. The numbers in macro names below are significant and represent CPU
98  * family and model numbers.  Do not make up fictitious family or model numbers
99  * when adding support for new devices.
100  */
101 #define	VENDORID_AMD		0x1022
102 #define	DEVICEID_AMD_MISC0F	0x1103
103 #define	DEVICEID_AMD_MISC10	0x1203
104 #define	DEVICEID_AMD_MISC11	0x1303
105 #define	DEVICEID_AMD_MISC14	0x1703
106 #define	DEVICEID_AMD_MISC15	0x1603
107 #define	DEVICEID_AMD_MISC15_M10H	0x1403
108 #define	DEVICEID_AMD_MISC15_M30H	0x141d
109 #define	DEVICEID_AMD_MISC15_M60H_ROOT	0x1576
110 #define	DEVICEID_AMD_MISC16	0x1533
111 #define	DEVICEID_AMD_MISC16_M30H	0x1583
112 #define	DEVICEID_AMD_HOSTB17H_ROOT	0x1450
113 #define	DEVICEID_AMD_HOSTB17H_M10H_ROOT	0x15d0
114 #define	DEVICEID_AMD_HOSTB17H_M30H_ROOT	0x1480	/* Also M70H, F19H M00H/M20H */
115 #define	DEVICEID_AMD_HOSTB17H_M60H_ROOT	0x1630	/* Also F19H M50H */
116 #define	DEVICEID_AMD_HOSTB19H_M10H_ROOT	0x14a4
117 #define	DEVICEID_AMD_HOSTB19H_M40H_ROOT	0x14b5
118 #define	DEVICEID_AMD_HOSTB19H_M60H_ROOT	0x14d8	/* Also F1AH M40H */
119 #define	DEVICEID_AMD_HOSTB19H_M70H_ROOT	0x14e8
120 #define	DEVICEID_AMD_HOSTB1AH_M00H_ROOT	0x153a
121 #define	DEVICEID_AMD_HOSTB1AH_M20H_ROOT	0x1507
122 #define	DEVICEID_AMD_HOSTB1AH_M60H_ROOT	0x1122
123 
124 static const struct amdtemp_product {
125 	uint16_t	amdtemp_vendorid;
126 	uint16_t	amdtemp_deviceid;
127 	/*
128 	 * 0xFC register is only valid on the D18F3 PCI device; SMN temp
129 	 * drivers do not attach to that device.
130 	 */
131 	bool		amdtemp_has_cpuid;
132 } amdtemp_products[] = {
133 	{ VENDORID_AMD,	DEVICEID_AMD_MISC0F, true },
134 	{ VENDORID_AMD,	DEVICEID_AMD_MISC10, true },
135 	{ VENDORID_AMD,	DEVICEID_AMD_MISC11, true },
136 	{ VENDORID_AMD,	DEVICEID_AMD_MISC14, true },
137 	{ VENDORID_AMD,	DEVICEID_AMD_MISC15, true },
138 	{ VENDORID_AMD,	DEVICEID_AMD_MISC15_M10H, true },
139 	{ VENDORID_AMD,	DEVICEID_AMD_MISC15_M30H, true },
140 	{ VENDORID_AMD,	DEVICEID_AMD_MISC15_M60H_ROOT, false },
141 	{ VENDORID_AMD,	DEVICEID_AMD_MISC16, true },
142 	{ VENDORID_AMD,	DEVICEID_AMD_MISC16_M30H, true },
143 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_ROOT, false },
144 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_M10H_ROOT, false },
145 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_M30H_ROOT, false },
146 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_M60H_ROOT, false },
147 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB19H_M10H_ROOT, false },
148 	{ VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M40H_ROOT, false },
149 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB19H_M60H_ROOT, false },
150 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB19H_M70H_ROOT, false },
151 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB1AH_M00H_ROOT, false },
152 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB1AH_M20H_ROOT, false },
153 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB1AH_M60H_ROOT, false },
154 };
155 
156 /*
157  * Reported Temperature Control Register, family 0Fh-15h (some models), 16h.
158  */
159 #define	AMDTEMP_REPTMP_CTRL	0xa4
160 
161 #define	AMDTEMP_REPTMP10H_CURTMP_MASK	0x7ff
162 #define	AMDTEMP_REPTMP10H_CURTMP_SHIFT	21
163 #define	AMDTEMP_REPTMP10H_TJSEL_MASK	0x3
164 #define	AMDTEMP_REPTMP10H_TJSEL_SHIFT	16
165 
166 /*
167  * Reported Temperature, Family 15h, M60+
168  *
169  * Same register bit definitions as other Family 15h CPUs, but access is
170  * indirect via SMN, like Family 17h.
171  */
172 #define	AMDTEMP_15H_M60H_REPTMP_CTRL	0xd8200ca4
173 
174 /*
175  * Reported Temperature, Family 17h - 1Ah
176  *
177  * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register
178  * provide the current temp.  bit 19, when clear, means the temp is reported in
179  * a range 0.."225C" (probable typo for 255C), and when set changes the range
180  * to -49..206C.
181  */
182 #define	AMDTEMP_17H_CUR_TMP		0x59800
183 #define	AMDTEMP_17H_CUR_TMP_RANGE_SEL	(1u << 19)
184 /*
185  * Bits 16-17, when set, mean that CUR_TMP is read-write. When it is, the
186  * 49 degree offset should apply as well. This was revealed in a Linux
187  * patch from an AMD employee.
188  */
189 #define	AMDTEMP_17H_CUR_TMP_TJ_SEL	((1u << 17) | (1u << 16))
190 /*
191  * The following register set was discovered experimentally by Ondrej Čerman
192  * and collaborators, but is not (yet) documented in a PPR/OSRR (other than
193  * the M70H PPR SMN memory map showing [0x59800, +0x314] as allocated to
194  * SMU::THM).  It seems plausible and the Linux sensor folks have adopted it.
195  */
196 #define	AMDTEMP_17H_CCD_TMP_BASE	0x59954
197 #define	AMDTEMP_17H_CCD_TMP_VALID	(1u << 11)
198 
199 #define	AMDTEMP_ZEN4_10H_CCD_TMP_BASE	0x59b00
200 #define	AMDTEMP_ZEN4_CCD_TMP_BASE	0x59b08
201 
202 /*
203  * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius).
204  */
205 #define	AMDTEMP_CURTMP_RANGE_ADJUST	490
206 
207 /*
208  * Thermaltrip Status Register (Family 0Fh only)
209  */
210 #define	AMDTEMP_THERMTP_STAT	0xe4
211 #define	AMDTEMP_TTSR_SELCORE	0x04
212 #define	AMDTEMP_TTSR_SELSENSOR	0x40
213 
214 /*
215  * DRAM Configuration High Register
216  */
217 #define	AMDTEMP_DRAM_CONF_HIGH	0x94	/* Function 2 */
218 #define	AMDTEMP_DRAM_MODE_DDR3	0x0100
219 
220 /*
221  * CPU Family/Model Register
222  */
223 #define	AMDTEMP_CPUID		0xfc
224 
225 /*
226  * Device methods.
227  */
228 static void 	amdtemp_identify(driver_t *driver, device_t parent);
229 static int	amdtemp_probe(device_t dev);
230 static int	amdtemp_attach(device_t dev);
231 static void	amdtemp_intrhook(void *arg);
232 static int	amdtemp_detach(device_t dev);
233 static int32_t	amdtemp_gettemp0f(device_t dev, amdsensor_t sensor);
234 static int32_t	amdtemp_gettemp(device_t dev, amdsensor_t sensor);
235 static int32_t	amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor);
236 static int32_t	amdtemp_gettemp17h(device_t dev, amdsensor_t sensor);
237 static void	amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model);
238 static void	amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model);
239 static void	amdtemp_probe_ccd_sensors1ah(device_t dev, uint32_t model);
240 static int	amdtemp_sysctl(SYSCTL_HANDLER_ARGS);
241 
242 static device_method_t amdtemp_methods[] = {
243 	/* Device interface */
244 	DEVMETHOD(device_identify,	amdtemp_identify),
245 	DEVMETHOD(device_probe,		amdtemp_probe),
246 	DEVMETHOD(device_attach,	amdtemp_attach),
247 	DEVMETHOD(device_detach,	amdtemp_detach),
248 
249 	DEVMETHOD_END
250 };
251 
252 static driver_t amdtemp_driver = {
253 	"amdtemp",
254 	amdtemp_methods,
255 	sizeof(struct amdtemp_softc),
256 };
257 
258 DRIVER_MODULE(amdtemp, hostb, amdtemp_driver, NULL, NULL);
259 MODULE_VERSION(amdtemp, 1);
260 MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1);
261 MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products,
262     nitems(amdtemp_products));
263 
264 static bool
amdtemp_match(device_t dev,const struct amdtemp_product ** product_out)265 amdtemp_match(device_t dev, const struct amdtemp_product **product_out)
266 {
267 	int i;
268 	uint16_t vendor, devid;
269 
270 	vendor = pci_get_vendor(dev);
271 	devid = pci_get_device(dev);
272 
273 	for (i = 0; i < nitems(amdtemp_products); i++) {
274 		if (vendor == amdtemp_products[i].amdtemp_vendorid &&
275 		    devid == amdtemp_products[i].amdtemp_deviceid) {
276 			if (product_out != NULL)
277 				*product_out = &amdtemp_products[i];
278 			return (true);
279 		}
280 	}
281 	return (false);
282 }
283 
284 static void
amdtemp_identify(driver_t * driver,device_t parent)285 amdtemp_identify(driver_t *driver, device_t parent)
286 {
287 	device_t child;
288 
289 	/* Make sure we're not being doubly invoked. */
290 	if (device_find_child(parent, "amdtemp", DEVICE_UNIT_ANY) != NULL)
291 		return;
292 
293 	if (amdtemp_match(parent, NULL)) {
294 		child = device_add_child(parent, "amdtemp", DEVICE_UNIT_ANY);
295 		if (child == NULL)
296 			device_printf(parent, "add amdtemp child failed\n");
297 	}
298 }
299 
300 static int
amdtemp_probe(device_t dev)301 amdtemp_probe(device_t dev)
302 {
303 	uint32_t family, model, stepping;
304 
305 	if (resource_disabled("amdtemp", 0)) {
306 		if (bootverbose)
307 			device_printf(dev, "Resource disabled\n");
308 		return (ENXIO);
309 	}
310 	if (!amdtemp_match(device_get_parent(dev), NULL)) {
311 		if (bootverbose)
312 			device_printf(dev, "amdtemp_match() failed\n");
313 		return (ENXIO);
314 	}
315 
316 	family = CPUID_TO_FAMILY(cpu_id);
317 	model = CPUID_TO_MODEL(cpu_id);
318 	stepping = CPUID_TO_STEPPING(cpu_id);
319 
320 	switch (family) {
321 	case 0x0f:
322 		if ((model == 0x04 && stepping == 0) ||
323 		    (model == 0x05 && stepping <= 1)) {
324 			if (bootverbose)
325 				device_printf(dev,
326 				    "Unsupported (Family=%02Xh, Model=%02Xh, Stepping=%02Xh)\n",
327 				    family, model, stepping);
328 			return (ENXIO);
329 		}
330 		break;
331 	case 0x10:
332 	case 0x11:
333 	case 0x12:
334 	case 0x14:
335 	case 0x15:
336 	case 0x16:
337 	case 0x17:
338 	case 0x19:
339 	case 0x1a:
340 		break;
341 	default:
342 		return (ENXIO);
343 	}
344 	device_set_descf(dev, "AMD Family %02Xh CPU On-Die Thermal Sensors",
345 	    family);
346 
347 	return (BUS_PROBE_GENERIC);
348 }
349 
350 static int
amdtemp_attach(device_t dev)351 amdtemp_attach(device_t dev)
352 {
353 	char tn[32];
354 	u_int regs[4];
355 	const struct amdtemp_product *product;
356 	struct amdtemp_softc *sc;
357 	struct sysctl_ctx_list *sysctlctx;
358 	struct sysctl_oid *sysctlnode;
359 	uint32_t cpuid, family, model;
360 	u_int bid;
361 	int erratum319, unit;
362 	bool needsmn;
363 
364 	sc = device_get_softc(dev);
365 	erratum319 = 0;
366 	needsmn = false;
367 
368 	if (!amdtemp_match(device_get_parent(dev), &product))
369 		return (ENXIO);
370 
371 	cpuid = cpu_id;
372 	family = CPUID_TO_FAMILY(cpuid);
373 	model = CPUID_TO_MODEL(cpuid);
374 
375 	/*
376 	 * This checks for the byzantine condition of running a heterogenous
377 	 * revision multi-socket system where the attach thread is potentially
378 	 * probing a remote socket's PCI device.
379 	 *
380 	 * Currently, such scenarios are unsupported on models using the SMN
381 	 * (because on those models, amdtemp(4) attaches to a different PCI
382 	 * device than the one that contains AMDTEMP_CPUID).
383 	 *
384 	 * The ancient 0x0F family of devices only supports this register from
385 	 * models 40h+.
386 	 */
387 	if (product->amdtemp_has_cpuid && (family > 0x0f ||
388 	    (family == 0x0f && model >= 0x40))) {
389 		cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID,
390 		    4);
391 		family = CPUID_TO_FAMILY(cpuid);
392 		model = CPUID_TO_MODEL(cpuid);
393 	}
394 
395 	switch (family) {
396 	case 0x0f:
397 		/*
398 		 * Thermaltrip Status Register
399 		 *
400 		 * - ThermSenseCoreSel
401 		 *
402 		 * Revision F & G:	0 - Core1, 1 - Core0
403 		 * Other:		0 - Core0, 1 - Core1
404 		 *
405 		 * - CurTmp
406 		 *
407 		 * Revision G:		bits 23-14
408 		 * Other:		bits 23-16
409 		 *
410 		 * XXX According to the BKDG, CurTmp, ThermSenseSel and
411 		 * ThermSenseCoreSel bits were introduced in Revision F
412 		 * but CurTmp seems working fine as early as Revision C.
413 		 * However, it is not clear whether ThermSenseSel and/or
414 		 * ThermSenseCoreSel work in undocumented cases as well.
415 		 * In fact, the Linux driver suggests it may not work but
416 		 * we just assume it does until we find otherwise.
417 		 *
418 		 * XXX According to Linux, CurTmp starts at -28C on
419 		 * Socket AM2 Revision G processors, which is not
420 		 * documented anywhere.
421 		 */
422 		if (model >= 0x40)
423 			sc->sc_flags |= AMDTEMP_FLAG_CS_SWAP;
424 		if (model >= 0x60 && model != 0xc1) {
425 			do_cpuid(0x80000001, regs);
426 			bid = (regs[1] >> 9) & 0x1f;
427 			switch (model) {
428 			case 0x68: /* Socket S1g1 */
429 			case 0x6c:
430 			case 0x7c:
431 				break;
432 			case 0x6b: /* Socket AM2 and ASB1 (2 cores) */
433 				if (bid != 0x0b && bid != 0x0c)
434 					sc->sc_flags |=
435 					    AMDTEMP_FLAG_ALT_OFFSET;
436 				break;
437 			case 0x6f: /* Socket AM2 and ASB1 (1 core) */
438 			case 0x7f:
439 				if (bid != 0x07 && bid != 0x09 &&
440 				    bid != 0x0c)
441 					sc->sc_flags |=
442 					    AMDTEMP_FLAG_ALT_OFFSET;
443 				break;
444 			default:
445 				sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET;
446 			}
447 			sc->sc_flags |= AMDTEMP_FLAG_CT_10BIT;
448 		}
449 
450 		/*
451 		 * There are two sensors per core.
452 		 */
453 		sc->sc_ntemps = 2;
454 
455 		sc->sc_gettemp = amdtemp_gettemp0f;
456 		break;
457 	case 0x10:
458 		/*
459 		 * Erratum 319 Inaccurate Temperature Measurement
460 		 *
461 		 * http://support.amd.com/us/Processor_TechDocs/41322.pdf
462 		 */
463 		do_cpuid(0x80000001, regs);
464 		switch ((regs[1] >> 28) & 0xf) {
465 		case 0:	/* Socket F */
466 			erratum319 = 1;
467 			break;
468 		case 1:	/* Socket AM2+ or AM3 */
469 			if ((pci_cfgregread(pci_get_domain(dev),
470 			    pci_get_bus(dev), pci_get_slot(dev), 2,
471 			    AMDTEMP_DRAM_CONF_HIGH, 2) &
472 			    AMDTEMP_DRAM_MODE_DDR3) != 0 || model > 0x04 ||
473 			    (model == 0x04 && (cpuid & CPUID_STEPPING) >= 3))
474 				break;
475 			/* XXX 00100F42h (RB-C2) exists in both formats. */
476 			erratum319 = 1;
477 			break;
478 		}
479 		/* FALLTHROUGH */
480 	case 0x11:
481 	case 0x12:
482 	case 0x14:
483 	case 0x15:
484 	case 0x16:
485 		sc->sc_ntemps = 1;
486 		/*
487 		 * Some later (60h+) models of family 15h use a similar SMN
488 		 * network as family 17h.  (However, the register index differs
489 		 * from 17h and the decoding matches other 10h-15h models,
490 		 * which differ from 17h.)
491 		 */
492 		if (family == 0x15 && model >= 0x60) {
493 			sc->sc_gettemp = amdtemp_gettemp15hm60h;
494 			needsmn = true;
495 		} else
496 			sc->sc_gettemp = amdtemp_gettemp;
497 		break;
498 	case 0x17:
499 	case 0x19:
500 	case 0x1a:
501 		sc->sc_ntemps = 1;
502 		sc->sc_gettemp = amdtemp_gettemp17h;
503 		needsmn = true;
504 		break;
505 	default:
506 		device_printf(dev, "Bogus family %02Xh\n", family);
507 		return (ENXIO);
508 	}
509 
510 	if (needsmn) {
511 		sc->sc_smn = device_find_child(
512 		    device_get_parent(dev), "amdsmn", -1);
513 		if (sc->sc_smn == NULL) {
514 			if (bootverbose)
515 				device_printf(dev, "No amdsmn(4) device found\n");
516 			return (ENXIO);
517 		}
518 	}
519 
520 	/* Find number of cores per package. */
521 	sc->sc_ncores = (amd_feature2 & AMDID2_CMP) != 0 ?
522 	    (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1;
523 	if (sc->sc_ncores > MAXCPU)
524 		return (ENXIO);
525 
526 	mtx_init(&sc->sc_lock, "amdtemp", NULL, MTX_DEF);
527 	if (erratum319)
528 		device_printf(dev,
529 		    "Erratum 319: temperature measurement may be inaccurate\n");
530 	if (bootverbose)
531 		device_printf(dev, "Found %d cores and %d sensors\n",
532 		    sc->sc_ncores,
533 		    sc->sc_ntemps > 1 ? sc->sc_ntemps * sc->sc_ncores : 1);
534 
535 	/*
536 	 * dev.amdtemp.N tree.
537 	 */
538 	unit = device_get_unit(dev);
539 	snprintf(tn, sizeof(tn), "dev.amdtemp.%d.sensor_offset", unit);
540 	TUNABLE_INT_FETCH(tn, &sc->sc_offset);
541 
542 	sysctlctx = device_get_sysctl_ctx(dev);
543 	SYSCTL_ADD_INT(sysctlctx,
544 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
545 	    "sensor_offset", CTLFLAG_RW, &sc->sc_offset, 0,
546 	    "Temperature sensor offset");
547 	sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
548 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
549 	    "core0", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Core 0");
550 
551 	SYSCTL_ADD_PROC(sysctlctx,
552 	    SYSCTL_CHILDREN(sysctlnode),
553 	    OID_AUTO, "sensor0",
554 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
555 	    dev, CORE0_SENSOR0, amdtemp_sysctl, "IK",
556 	    "Core 0 / Sensor 0 temperature");
557 
558 	sc->sc_temp_base = AMDTEMP_17H_CCD_TMP_BASE;
559 
560 	if (family == 0x17)
561 		amdtemp_probe_ccd_sensors17h(dev, model);
562 	else if (family == 0x19)
563 		amdtemp_probe_ccd_sensors19h(dev, model);
564 	else if (family == 0x1a)
565 		amdtemp_probe_ccd_sensors1ah(dev, model);
566 	else if (sc->sc_ntemps > 1) {
567 		SYSCTL_ADD_PROC(sysctlctx,
568 		    SYSCTL_CHILDREN(sysctlnode),
569 		    OID_AUTO, "sensor1",
570 		    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
571 		    dev, CORE0_SENSOR1, amdtemp_sysctl, "IK",
572 		    "Core 0 / Sensor 1 temperature");
573 
574 		if (sc->sc_ncores > 1) {
575 			sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
576 			    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
577 			    OID_AUTO, "core1", CTLFLAG_RD | CTLFLAG_MPSAFE,
578 			    0, "Core 1");
579 
580 			SYSCTL_ADD_PROC(sysctlctx,
581 			    SYSCTL_CHILDREN(sysctlnode),
582 			    OID_AUTO, "sensor0",
583 			    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
584 			    dev, CORE1_SENSOR0, amdtemp_sysctl, "IK",
585 			    "Core 1 / Sensor 0 temperature");
586 
587 			SYSCTL_ADD_PROC(sysctlctx,
588 			    SYSCTL_CHILDREN(sysctlnode),
589 			    OID_AUTO, "sensor1",
590 			    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
591 			    dev, CORE1_SENSOR1, amdtemp_sysctl, "IK",
592 			    "Core 1 / Sensor 1 temperature");
593 		}
594 	}
595 
596 	/*
597 	 * Try to create dev.cpu sysctl entries and setup intrhook function.
598 	 * This is needed because the cpu driver may be loaded late on boot,
599 	 * after us.
600 	 */
601 	amdtemp_intrhook(dev);
602 	sc->sc_ich.ich_func = amdtemp_intrhook;
603 	sc->sc_ich.ich_arg = dev;
604 	if (config_intrhook_establish(&sc->sc_ich) != 0) {
605 		device_printf(dev, "config_intrhook_establish failed!\n");
606 		return (ENXIO);
607 	}
608 
609 	return (0);
610 }
611 
612 void
amdtemp_intrhook(void * arg)613 amdtemp_intrhook(void *arg)
614 {
615 	struct amdtemp_softc *sc;
616 	struct sysctl_ctx_list *sysctlctx;
617 	device_t dev = (device_t)arg;
618 	device_t acpi, cpu, nexus;
619 	amdsensor_t sensor;
620 	int i;
621 
622 	sc = device_get_softc(dev);
623 
624 	/*
625 	 * dev.cpu.N.temperature.
626 	 */
627 	nexus = device_find_child(root_bus, "nexus", 0);
628 	acpi = device_find_child(nexus, "acpi", 0);
629 
630 	for (i = 0; i < sc->sc_ncores; i++) {
631 		if (sc->sc_sysctl_cpu[i] != NULL)
632 			continue;
633 		cpu = device_find_child(acpi, "cpu",
634 		    device_get_unit(dev) * sc->sc_ncores + i);
635 		if (cpu != NULL) {
636 			sysctlctx = device_get_sysctl_ctx(cpu);
637 
638 			sensor = sc->sc_ntemps > 1 ?
639 			    (i == 0 ? CORE0 : CORE1) : CORE0_SENSOR0;
640 			sc->sc_sysctl_cpu[i] = SYSCTL_ADD_PROC(sysctlctx,
641 			    SYSCTL_CHILDREN(device_get_sysctl_tree(cpu)),
642 			    OID_AUTO, "temperature",
643 			    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
644 			    dev, sensor, amdtemp_sysctl, "IK",
645 			    "Current temparature");
646 		}
647 	}
648 	if (sc->sc_ich.ich_arg != NULL)
649 		config_intrhook_disestablish(&sc->sc_ich);
650 }
651 
652 int
amdtemp_detach(device_t dev)653 amdtemp_detach(device_t dev)
654 {
655 	struct amdtemp_softc *sc = device_get_softc(dev);
656 	int i;
657 
658 	for (i = 0; i < sc->sc_ncores; i++)
659 		if (sc->sc_sysctl_cpu[i] != NULL)
660 			sysctl_remove_oid(sc->sc_sysctl_cpu[i], 1, 0);
661 
662 	/* NewBus removes the dev.amdtemp.N tree by itself. */
663 
664 	mtx_destroy(&sc->sc_lock);
665 	return (0);
666 }
667 
668 static int
amdtemp_sysctl(SYSCTL_HANDLER_ARGS)669 amdtemp_sysctl(SYSCTL_HANDLER_ARGS)
670 {
671 	device_t dev = (device_t)arg1;
672 	struct amdtemp_softc *sc = device_get_softc(dev);
673 	amdsensor_t sensor = (amdsensor_t)arg2;
674 	int32_t auxtemp[2], temp;
675 	int error;
676 
677 	switch (sensor) {
678 	case CORE0:
679 		auxtemp[0] = sc->sc_gettemp(dev, CORE0_SENSOR0);
680 		auxtemp[1] = sc->sc_gettemp(dev, CORE0_SENSOR1);
681 		temp = imax(auxtemp[0], auxtemp[1]);
682 		break;
683 	case CORE1:
684 		auxtemp[0] = sc->sc_gettemp(dev, CORE1_SENSOR0);
685 		auxtemp[1] = sc->sc_gettemp(dev, CORE1_SENSOR1);
686 		temp = imax(auxtemp[0], auxtemp[1]);
687 		break;
688 	default:
689 		temp = sc->sc_gettemp(dev, sensor);
690 		break;
691 	}
692 	error = sysctl_handle_int(oidp, &temp, 0, req);
693 
694 	return (error);
695 }
696 
697 #define	AMDTEMP_ZERO_C_TO_K	2731
698 
699 static int32_t
amdtemp_gettemp0f(device_t dev,amdsensor_t sensor)700 amdtemp_gettemp0f(device_t dev, amdsensor_t sensor)
701 {
702 	struct amdtemp_softc *sc = device_get_softc(dev);
703 	uint32_t mask, offset, temp;
704 
705 	mtx_lock(&sc->sc_lock);
706 
707 	/* Set Sensor/Core selector. */
708 	temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 1);
709 	temp &= ~(AMDTEMP_TTSR_SELCORE | AMDTEMP_TTSR_SELSENSOR);
710 	switch (sensor) {
711 	case CORE0_SENSOR1:
712 		temp |= AMDTEMP_TTSR_SELSENSOR;
713 		/* FALLTHROUGH */
714 	case CORE0_SENSOR0:
715 	case CORE0:
716 		if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) != 0)
717 			temp |= AMDTEMP_TTSR_SELCORE;
718 		break;
719 	case CORE1_SENSOR1:
720 		temp |= AMDTEMP_TTSR_SELSENSOR;
721 		/* FALLTHROUGH */
722 	case CORE1_SENSOR0:
723 	case CORE1:
724 		if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) == 0)
725 			temp |= AMDTEMP_TTSR_SELCORE;
726 		break;
727 	default:
728 		__assert_unreachable();
729 	}
730 	pci_write_config(dev, AMDTEMP_THERMTP_STAT, temp, 1);
731 
732 	mask = (sc->sc_flags & AMDTEMP_FLAG_CT_10BIT) != 0 ? 0x3ff : 0x3fc;
733 	offset = (sc->sc_flags & AMDTEMP_FLAG_ALT_OFFSET) != 0 ? 28 : 49;
734 	temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 4);
735 	temp = ((temp >> 14) & mask) * 5 / 2;
736 	temp += AMDTEMP_ZERO_C_TO_K + (sc->sc_offset - offset) * 10;
737 
738 	mtx_unlock(&sc->sc_lock);
739 	return (temp);
740 }
741 
742 static uint32_t
amdtemp_decode_fam10h_to_17h(int32_t sc_offset,uint32_t val,bool minus49)743 amdtemp_decode_fam10h_to_17h(int32_t sc_offset, uint32_t val, bool minus49)
744 {
745 	uint32_t temp;
746 
747 	/* Convert raw register subfield units (0.125C) to units of 0.1C. */
748 	temp = (val & AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4;
749 
750 	if (minus49)
751 		temp -= AMDTEMP_CURTMP_RANGE_ADJUST;
752 
753 	temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10;
754 	return (temp);
755 }
756 
757 static uint32_t
amdtemp_decode_fam10h_to_16h(int32_t sc_offset,uint32_t val)758 amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val)
759 {
760 	bool minus49;
761 
762 	/*
763 	 * On Family 15h and higher, if CurTmpTjSel is 11b, the range is
764 	 * adjusted down by 49.0 degrees Celsius.  (This adjustment is not
765 	 * documented in BKDGs prior to family 15h model 00h.)
766 	 */
767 	minus49 = (CPUID_TO_FAMILY(cpu_id) >= 0x15 &&
768 	    ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) &
769 	    AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3);
770 
771 	return (amdtemp_decode_fam10h_to_17h(sc_offset,
772 	    val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
773 }
774 
775 static uint32_t
amdtemp_decode_fam17h_tctl(int32_t sc_offset,uint32_t val)776 amdtemp_decode_fam17h_tctl(int32_t sc_offset, uint32_t val)
777 {
778 	bool minus49;
779 
780 	minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0)
781 	    || ((val & AMDTEMP_17H_CUR_TMP_TJ_SEL) == AMDTEMP_17H_CUR_TMP_TJ_SEL);
782 	return (amdtemp_decode_fam10h_to_17h(sc_offset,
783 	    val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
784 }
785 
786 static int32_t
amdtemp_gettemp(device_t dev,amdsensor_t sensor)787 amdtemp_gettemp(device_t dev, amdsensor_t sensor)
788 {
789 	struct amdtemp_softc *sc = device_get_softc(dev);
790 	uint32_t temp;
791 
792 	temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4);
793 	return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp));
794 }
795 
796 static int32_t
amdtemp_gettemp15hm60h(device_t dev,amdsensor_t sensor)797 amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor)
798 {
799 	struct amdtemp_softc *sc = device_get_softc(dev);
800 	uint32_t val;
801 	int error __diagused;
802 
803 	error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val);
804 	KASSERT(error == 0, ("amdsmn_read"));
805 	return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val));
806 }
807 
808 static int32_t
amdtemp_gettemp17h(device_t dev,amdsensor_t sensor)809 amdtemp_gettemp17h(device_t dev, amdsensor_t sensor)
810 {
811 	struct amdtemp_softc *sc = device_get_softc(dev);
812 	uint32_t val;
813 	int error __diagused;
814 
815 	switch (sensor) {
816 	case CORE0_SENSOR0:
817 		/* Tctl */
818 		error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val);
819 		KASSERT(error == 0, ("amdsmn_read"));
820 		return (amdtemp_decode_fam17h_tctl(sc->sc_offset, val));
821 	case CCD_BASE ... CCD_MAX:
822 		/* Tccd<N> */
823 		error = amdsmn_read(sc->sc_smn, sc->sc_temp_base +
824 		    (((int)sensor - CCD_BASE) * sizeof(val)), &val);
825 		KASSERT(error == 0, ("amdsmn_read2"));
826 		KASSERT((val & AMDTEMP_17H_CCD_TMP_VALID) != 0,
827 		    ("sensor %d: not valid", (int)sensor));
828 		return (amdtemp_decode_fam10h_to_17h(sc->sc_offset, val, true));
829 	default:
830 		__assert_unreachable();
831 	}
832 }
833 
834 static void
amdtemp_probe_ccd_sensors(device_t dev,uint32_t maxreg)835 amdtemp_probe_ccd_sensors(device_t dev, uint32_t maxreg)
836 {
837 	char sensor_name[16], sensor_descr[32];
838 	struct amdtemp_softc *sc;
839 	uint32_t i, val;
840 	int error;
841 
842 	sc = device_get_softc(dev);
843 	for (i = 0; i < maxreg; i++) {
844 		error = amdsmn_read(sc->sc_smn, sc->sc_temp_base +
845 		    (i * sizeof(val)), &val);
846 		if (error != 0)
847 			continue;
848 		if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0)
849 			continue;
850 
851 		snprintf(sensor_name, sizeof(sensor_name), "ccd%u", i);
852 		snprintf(sensor_descr, sizeof(sensor_descr),
853 		    "CCD %u temperature (Tccd%u)", i, i);
854 
855 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
856 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
857 		    sensor_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
858 		    dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr);
859 	}
860 }
861 
862 static void
amdtemp_probe_ccd_sensors17h(device_t dev,uint32_t model)863 amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model)
864 {
865 	uint32_t maxreg;
866 
867 	switch (model) {
868 	case 0x00 ... 0x2f: /* Zen1, Zen+ */
869 		maxreg = 4;
870 		break;
871 	case 0x30 ... 0x3f: /* Zen2 TR (Castle Peak)/EPYC (Rome) */
872 	case 0x60 ... 0x7f: /* Zen2 Ryzen (Renoir APU, Matisse) */
873 	case 0x90 ... 0x9f: /* Zen2 Ryzen (Van Gogh APU) */
874 		maxreg = 8;
875 		_Static_assert((int)NUM_CCDS >= 8, "");
876 		break;
877 	default:
878 		device_printf(dev,
879 		    "Unrecognized Family 17h Model: %02Xh\n", model);
880 		return;
881 	}
882 
883 	amdtemp_probe_ccd_sensors(dev, maxreg);
884 }
885 
886 static void
amdtemp_probe_ccd_sensors19h(device_t dev,uint32_t model)887 amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model)
888 {
889 	struct amdtemp_softc *sc = device_get_softc(dev);
890 	uint32_t maxreg;
891 
892 	switch (model) {
893 	case 0x00 ... 0x0f: /* Zen3 EPYC "Milan" */
894 	case 0x20 ... 0x2f: /* Zen3 Ryzen "Vermeer" */
895 	case 0x50 ... 0x5f: /* Zen3 Ryzen "Cezanne" */
896 		maxreg = 8;
897 		_Static_assert((int)NUM_CCDS >= 8, "");
898 		break;
899 	case 0x10 ... 0x1f: /* Zen4 EPYC "Genoa" */
900 		sc->sc_temp_base = AMDTEMP_ZEN4_10H_CCD_TMP_BASE;
901 		maxreg = 12;
902 		_Static_assert((int)NUM_CCDS >= 12, "");
903 		break;
904 	case 0x40 ... 0x4f: /* Zen3+ Ryzen "Rembrandt" */
905 	case 0x60 ... 0x6f: /* Zen4 Ryzen "Raphael" */
906 	case 0x70 ... 0x7f: /* Zen4 Ryzen "Phoenix" */
907 		sc->sc_temp_base = AMDTEMP_ZEN4_CCD_TMP_BASE;
908 		maxreg = 8;
909 		_Static_assert((int)NUM_CCDS >= 8, "");
910 		break;
911 	default:
912 		device_printf(dev,
913 		    "Unrecognized Family 19h Model: %02Xh\n", model);
914 		return;
915 	}
916 
917 	amdtemp_probe_ccd_sensors(dev, maxreg);
918 }
919 
920 static void
amdtemp_probe_ccd_sensors1ah(device_t dev,uint32_t model)921 amdtemp_probe_ccd_sensors1ah(device_t dev, uint32_t model)
922 {
923 	struct amdtemp_softc *sc = device_get_softc(dev);
924 	uint32_t maxreg;
925 
926 	switch (model) {
927 	case 0x00 ... 0x2f: /* Zen5 EPYC "Turin" */
928 	case 0x40 ... 0x4f: /* Zen5 Ryzen "Granite Ridge" */
929 	case 0x60 ... 0x7f: /* ??? */
930 		sc->sc_temp_base = AMDTEMP_ZEN4_CCD_TMP_BASE;
931 		maxreg = 8;
932 		_Static_assert((int)NUM_CCDS >= 8, "");
933 		break;
934 	default:
935 		device_printf(dev,
936 		    "Unrecognized Family 1Ah Model: %02Xh\n", model);
937 		return;
938 	}
939 
940 	amdtemp_probe_ccd_sensors(dev, maxreg);
941 }
942