1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008, 2009 Rui Paulo <rpaulo@FreeBSD.org>
5 * Copyright (c) 2009 Norikatsu Shigemura <nork@FreeBSD.org>
6 * Copyright (c) 2009-2012 Jung-uk Kim <jkim@FreeBSD.org>
7 * All rights reserved.
8 * Copyright (c) 2017-2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
23 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Driver for the AMD CPU on-die thermal sensors.
34 * Initially based on the k8temp Linux driver.
35 */
36
37 #include <sys/param.h>
38 #include <sys/bus.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46
47 #include <machine/cpufunc.h>
48 #include <machine/md_var.h>
49 #include <machine/specialreg.h>
50
51 #include <dev/pci/pcivar.h>
52 #include <x86/pci_cfgreg.h>
53
54 #include <dev/amdsmn/amdsmn.h>
55
56 typedef enum {
57 CORE0_SENSOR0,
58 CORE0_SENSOR1,
59 CORE1_SENSOR0,
60 CORE1_SENSOR1,
61 CORE0,
62 CORE1,
63 CCD1,
64 CCD_BASE = CCD1,
65 CCD2,
66 CCD3,
67 CCD4,
68 CCD5,
69 CCD6,
70 CCD7,
71 CCD8,
72 CCD9,
73 CCD10,
74 CCD11,
75 CCD12,
76 CCD_MAX = CCD12,
77 NUM_CCDS = CCD_MAX - CCD_BASE + 1,
78 } amdsensor_t;
79
80 struct amdtemp_softc {
81 int sc_ncores;
82 int sc_ntemps;
83 int sc_flags;
84 #define AMDTEMP_FLAG_CS_SWAP 0x01 /* ThermSenseCoreSel is inverted. */
85 #define AMDTEMP_FLAG_CT_10BIT 0x02 /* CurTmp is 10-bit wide. */
86 #define AMDTEMP_FLAG_ALT_OFFSET 0x04 /* CurTmp starts at -28C. */
87 int32_t sc_offset;
88 int32_t sc_temp_base;
89 int32_t (*sc_gettemp)(device_t, amdsensor_t);
90 struct sysctl_oid *sc_sysctl_cpu[MAXCPU];
91 struct intr_config_hook sc_ich;
92 device_t sc_smn;
93 struct mtx sc_lock;
94 };
95
96 /*
97 * N.B. The numbers in macro names below are significant and represent CPU
98 * family and model numbers. Do not make up fictitious family or model numbers
99 * when adding support for new devices.
100 */
101 #define VENDORID_AMD 0x1022
102 #define DEVICEID_AMD_MISC0F 0x1103
103 #define DEVICEID_AMD_MISC10 0x1203
104 #define DEVICEID_AMD_MISC11 0x1303
105 #define DEVICEID_AMD_MISC14 0x1703
106 #define DEVICEID_AMD_MISC15 0x1603
107 #define DEVICEID_AMD_MISC15_M10H 0x1403
108 #define DEVICEID_AMD_MISC15_M30H 0x141d
109 #define DEVICEID_AMD_MISC15_M60H_ROOT 0x1576
110 #define DEVICEID_AMD_MISC16 0x1533
111 #define DEVICEID_AMD_MISC16_M30H 0x1583
112 #define DEVICEID_AMD_HOSTB17H_ROOT 0x1450
113 #define DEVICEID_AMD_HOSTB17H_M10H_ROOT 0x15d0
114 #define DEVICEID_AMD_HOSTB17H_M30H_ROOT 0x1480 /* Also M70H, F19H M00H/M20H */
115 #define DEVICEID_AMD_HOSTB17H_M60H_ROOT 0x1630 /* Also F19H M50H */
116 #define DEVICEID_AMD_HOSTB19H_M10H_ROOT 0x14a4
117 #define DEVICEID_AMD_HOSTB19H_M40H_ROOT 0x14b5
118 #define DEVICEID_AMD_HOSTB19H_M60H_ROOT 0x14d8 /* Also F1AH M40H */
119 #define DEVICEID_AMD_HOSTB19H_M70H_ROOT 0x14e8
120 #define DEVICEID_AMD_HOSTB1AH_M00H_ROOT 0x153a
121 #define DEVICEID_AMD_HOSTB1AH_M20H_ROOT 0x1507
122 #define DEVICEID_AMD_HOSTB1AH_M60H_ROOT 0x1122
123
124 static const struct amdtemp_product {
125 uint16_t amdtemp_vendorid;
126 uint16_t amdtemp_deviceid;
127 /*
128 * 0xFC register is only valid on the D18F3 PCI device; SMN temp
129 * drivers do not attach to that device.
130 */
131 bool amdtemp_has_cpuid;
132 } amdtemp_products[] = {
133 { VENDORID_AMD, DEVICEID_AMD_MISC0F, true },
134 { VENDORID_AMD, DEVICEID_AMD_MISC10, true },
135 { VENDORID_AMD, DEVICEID_AMD_MISC11, true },
136 { VENDORID_AMD, DEVICEID_AMD_MISC14, true },
137 { VENDORID_AMD, DEVICEID_AMD_MISC15, true },
138 { VENDORID_AMD, DEVICEID_AMD_MISC15_M10H, true },
139 { VENDORID_AMD, DEVICEID_AMD_MISC15_M30H, true },
140 { VENDORID_AMD, DEVICEID_AMD_MISC15_M60H_ROOT, false },
141 { VENDORID_AMD, DEVICEID_AMD_MISC16, true },
142 { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H, true },
143 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT, false },
144 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT, false },
145 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M30H_ROOT, false },
146 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M60H_ROOT, false },
147 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M10H_ROOT, false },
148 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M40H_ROOT, false },
149 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M60H_ROOT, false },
150 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M70H_ROOT, false },
151 { VENDORID_AMD, DEVICEID_AMD_HOSTB1AH_M00H_ROOT, false },
152 { VENDORID_AMD, DEVICEID_AMD_HOSTB1AH_M20H_ROOT, false },
153 { VENDORID_AMD, DEVICEID_AMD_HOSTB1AH_M60H_ROOT, false },
154 };
155
156 /*
157 * Reported Temperature Control Register, family 0Fh-15h (some models), 16h.
158 */
159 #define AMDTEMP_REPTMP_CTRL 0xa4
160
161 #define AMDTEMP_REPTMP10H_CURTMP_MASK 0x7ff
162 #define AMDTEMP_REPTMP10H_CURTMP_SHIFT 21
163 #define AMDTEMP_REPTMP10H_TJSEL_MASK 0x3
164 #define AMDTEMP_REPTMP10H_TJSEL_SHIFT 16
165
166 /*
167 * Reported Temperature, Family 15h, M60+
168 *
169 * Same register bit definitions as other Family 15h CPUs, but access is
170 * indirect via SMN, like Family 17h.
171 */
172 #define AMDTEMP_15H_M60H_REPTMP_CTRL 0xd8200ca4
173
174 /*
175 * Reported Temperature, Family 17h - 1Ah
176 *
177 * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register
178 * provide the current temp. bit 19, when clear, means the temp is reported in
179 * a range 0.."225C" (probable typo for 255C), and when set changes the range
180 * to -49..206C.
181 */
182 #define AMDTEMP_17H_CUR_TMP 0x59800
183 #define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1u << 19)
184 /*
185 * Bits 16-17, when set, mean that CUR_TMP is read-write. When it is, the
186 * 49 degree offset should apply as well. This was revealed in a Linux
187 * patch from an AMD employee.
188 */
189 #define AMDTEMP_17H_CUR_TMP_TJ_SEL ((1u << 17) | (1u << 16))
190 /*
191 * The following register set was discovered experimentally by Ondrej Čerman
192 * and collaborators, but is not (yet) documented in a PPR/OSRR (other than
193 * the M70H PPR SMN memory map showing [0x59800, +0x314] as allocated to
194 * SMU::THM). It seems plausible and the Linux sensor folks have adopted it.
195 */
196 #define AMDTEMP_17H_CCD_TMP_BASE 0x59954
197 #define AMDTEMP_17H_CCD_TMP_VALID (1u << 11)
198
199 #define AMDTEMP_ZEN4_10H_CCD_TMP_BASE 0x59b00
200 #define AMDTEMP_ZEN4_CCD_TMP_BASE 0x59b08
201
202 /*
203 * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius).
204 */
205 #define AMDTEMP_CURTMP_RANGE_ADJUST 490
206
207 /*
208 * Thermaltrip Status Register (Family 0Fh only)
209 */
210 #define AMDTEMP_THERMTP_STAT 0xe4
211 #define AMDTEMP_TTSR_SELCORE 0x04
212 #define AMDTEMP_TTSR_SELSENSOR 0x40
213
214 /*
215 * DRAM Configuration High Register
216 */
217 #define AMDTEMP_DRAM_CONF_HIGH 0x94 /* Function 2 */
218 #define AMDTEMP_DRAM_MODE_DDR3 0x0100
219
220 /*
221 * CPU Family/Model Register
222 */
223 #define AMDTEMP_CPUID 0xfc
224
225 /*
226 * Device methods.
227 */
228 static void amdtemp_identify(driver_t *driver, device_t parent);
229 static int amdtemp_probe(device_t dev);
230 static int amdtemp_attach(device_t dev);
231 static void amdtemp_intrhook(void *arg);
232 static int amdtemp_detach(device_t dev);
233 static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor);
234 static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor);
235 static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor);
236 static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor);
237 static void amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model);
238 static void amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model);
239 static void amdtemp_probe_ccd_sensors1ah(device_t dev, uint32_t model);
240 static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS);
241
242 static device_method_t amdtemp_methods[] = {
243 /* Device interface */
244 DEVMETHOD(device_identify, amdtemp_identify),
245 DEVMETHOD(device_probe, amdtemp_probe),
246 DEVMETHOD(device_attach, amdtemp_attach),
247 DEVMETHOD(device_detach, amdtemp_detach),
248
249 DEVMETHOD_END
250 };
251
252 static driver_t amdtemp_driver = {
253 "amdtemp",
254 amdtemp_methods,
255 sizeof(struct amdtemp_softc),
256 };
257
258 DRIVER_MODULE(amdtemp, hostb, amdtemp_driver, NULL, NULL);
259 MODULE_VERSION(amdtemp, 1);
260 MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1);
261 MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products,
262 nitems(amdtemp_products));
263
264 static bool
amdtemp_match(device_t dev,const struct amdtemp_product ** product_out)265 amdtemp_match(device_t dev, const struct amdtemp_product **product_out)
266 {
267 int i;
268 uint16_t vendor, devid;
269
270 vendor = pci_get_vendor(dev);
271 devid = pci_get_device(dev);
272
273 for (i = 0; i < nitems(amdtemp_products); i++) {
274 if (vendor == amdtemp_products[i].amdtemp_vendorid &&
275 devid == amdtemp_products[i].amdtemp_deviceid) {
276 if (product_out != NULL)
277 *product_out = &amdtemp_products[i];
278 return (true);
279 }
280 }
281 return (false);
282 }
283
284 static void
amdtemp_identify(driver_t * driver,device_t parent)285 amdtemp_identify(driver_t *driver, device_t parent)
286 {
287 device_t child;
288
289 /* Make sure we're not being doubly invoked. */
290 if (device_find_child(parent, "amdtemp", DEVICE_UNIT_ANY) != NULL)
291 return;
292
293 if (amdtemp_match(parent, NULL)) {
294 child = device_add_child(parent, "amdtemp", DEVICE_UNIT_ANY);
295 if (child == NULL)
296 device_printf(parent, "add amdtemp child failed\n");
297 }
298 }
299
300 static int
amdtemp_probe(device_t dev)301 amdtemp_probe(device_t dev)
302 {
303 uint32_t family, model, stepping;
304
305 if (resource_disabled("amdtemp", 0)) {
306 if (bootverbose)
307 device_printf(dev, "Resource disabled\n");
308 return (ENXIO);
309 }
310 if (!amdtemp_match(device_get_parent(dev), NULL)) {
311 if (bootverbose)
312 device_printf(dev, "amdtemp_match() failed\n");
313 return (ENXIO);
314 }
315
316 family = CPUID_TO_FAMILY(cpu_id);
317 model = CPUID_TO_MODEL(cpu_id);
318 stepping = CPUID_TO_STEPPING(cpu_id);
319
320 switch (family) {
321 case 0x0f:
322 if ((model == 0x04 && stepping == 0) ||
323 (model == 0x05 && stepping <= 1)) {
324 if (bootverbose)
325 device_printf(dev,
326 "Unsupported (Family=%02Xh, Model=%02Xh, Stepping=%02Xh)\n",
327 family, model, stepping);
328 return (ENXIO);
329 }
330 break;
331 case 0x10:
332 case 0x11:
333 case 0x12:
334 case 0x14:
335 case 0x15:
336 case 0x16:
337 case 0x17:
338 case 0x19:
339 case 0x1a:
340 break;
341 default:
342 return (ENXIO);
343 }
344 device_set_descf(dev, "AMD Family %02Xh CPU On-Die Thermal Sensors",
345 family);
346
347 return (BUS_PROBE_GENERIC);
348 }
349
350 static int
amdtemp_attach(device_t dev)351 amdtemp_attach(device_t dev)
352 {
353 char tn[32];
354 u_int regs[4];
355 const struct amdtemp_product *product;
356 struct amdtemp_softc *sc;
357 struct sysctl_ctx_list *sysctlctx;
358 struct sysctl_oid *sysctlnode;
359 uint32_t cpuid, family, model;
360 u_int bid;
361 int erratum319, unit;
362 bool needsmn;
363
364 sc = device_get_softc(dev);
365 erratum319 = 0;
366 needsmn = false;
367
368 if (!amdtemp_match(device_get_parent(dev), &product))
369 return (ENXIO);
370
371 cpuid = cpu_id;
372 family = CPUID_TO_FAMILY(cpuid);
373 model = CPUID_TO_MODEL(cpuid);
374
375 /*
376 * This checks for the byzantine condition of running a heterogenous
377 * revision multi-socket system where the attach thread is potentially
378 * probing a remote socket's PCI device.
379 *
380 * Currently, such scenarios are unsupported on models using the SMN
381 * (because on those models, amdtemp(4) attaches to a different PCI
382 * device than the one that contains AMDTEMP_CPUID).
383 *
384 * The ancient 0x0F family of devices only supports this register from
385 * models 40h+.
386 */
387 if (product->amdtemp_has_cpuid && (family > 0x0f ||
388 (family == 0x0f && model >= 0x40))) {
389 cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID,
390 4);
391 family = CPUID_TO_FAMILY(cpuid);
392 model = CPUID_TO_MODEL(cpuid);
393 }
394
395 switch (family) {
396 case 0x0f:
397 /*
398 * Thermaltrip Status Register
399 *
400 * - ThermSenseCoreSel
401 *
402 * Revision F & G: 0 - Core1, 1 - Core0
403 * Other: 0 - Core0, 1 - Core1
404 *
405 * - CurTmp
406 *
407 * Revision G: bits 23-14
408 * Other: bits 23-16
409 *
410 * XXX According to the BKDG, CurTmp, ThermSenseSel and
411 * ThermSenseCoreSel bits were introduced in Revision F
412 * but CurTmp seems working fine as early as Revision C.
413 * However, it is not clear whether ThermSenseSel and/or
414 * ThermSenseCoreSel work in undocumented cases as well.
415 * In fact, the Linux driver suggests it may not work but
416 * we just assume it does until we find otherwise.
417 *
418 * XXX According to Linux, CurTmp starts at -28C on
419 * Socket AM2 Revision G processors, which is not
420 * documented anywhere.
421 */
422 if (model >= 0x40)
423 sc->sc_flags |= AMDTEMP_FLAG_CS_SWAP;
424 if (model >= 0x60 && model != 0xc1) {
425 do_cpuid(0x80000001, regs);
426 bid = (regs[1] >> 9) & 0x1f;
427 switch (model) {
428 case 0x68: /* Socket S1g1 */
429 case 0x6c:
430 case 0x7c:
431 break;
432 case 0x6b: /* Socket AM2 and ASB1 (2 cores) */
433 if (bid != 0x0b && bid != 0x0c)
434 sc->sc_flags |=
435 AMDTEMP_FLAG_ALT_OFFSET;
436 break;
437 case 0x6f: /* Socket AM2 and ASB1 (1 core) */
438 case 0x7f:
439 if (bid != 0x07 && bid != 0x09 &&
440 bid != 0x0c)
441 sc->sc_flags |=
442 AMDTEMP_FLAG_ALT_OFFSET;
443 break;
444 default:
445 sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET;
446 }
447 sc->sc_flags |= AMDTEMP_FLAG_CT_10BIT;
448 }
449
450 /*
451 * There are two sensors per core.
452 */
453 sc->sc_ntemps = 2;
454
455 sc->sc_gettemp = amdtemp_gettemp0f;
456 break;
457 case 0x10:
458 /*
459 * Erratum 319 Inaccurate Temperature Measurement
460 *
461 * http://support.amd.com/us/Processor_TechDocs/41322.pdf
462 */
463 do_cpuid(0x80000001, regs);
464 switch ((regs[1] >> 28) & 0xf) {
465 case 0: /* Socket F */
466 erratum319 = 1;
467 break;
468 case 1: /* Socket AM2+ or AM3 */
469 if ((pci_cfgregread(pci_get_domain(dev),
470 pci_get_bus(dev), pci_get_slot(dev), 2,
471 AMDTEMP_DRAM_CONF_HIGH, 2) &
472 AMDTEMP_DRAM_MODE_DDR3) != 0 || model > 0x04 ||
473 (model == 0x04 && (cpuid & CPUID_STEPPING) >= 3))
474 break;
475 /* XXX 00100F42h (RB-C2) exists in both formats. */
476 erratum319 = 1;
477 break;
478 }
479 /* FALLTHROUGH */
480 case 0x11:
481 case 0x12:
482 case 0x14:
483 case 0x15:
484 case 0x16:
485 sc->sc_ntemps = 1;
486 /*
487 * Some later (60h+) models of family 15h use a similar SMN
488 * network as family 17h. (However, the register index differs
489 * from 17h and the decoding matches other 10h-15h models,
490 * which differ from 17h.)
491 */
492 if (family == 0x15 && model >= 0x60) {
493 sc->sc_gettemp = amdtemp_gettemp15hm60h;
494 needsmn = true;
495 } else
496 sc->sc_gettemp = amdtemp_gettemp;
497 break;
498 case 0x17:
499 case 0x19:
500 case 0x1a:
501 sc->sc_ntemps = 1;
502 sc->sc_gettemp = amdtemp_gettemp17h;
503 needsmn = true;
504 break;
505 default:
506 device_printf(dev, "Bogus family %02Xh\n", family);
507 return (ENXIO);
508 }
509
510 if (needsmn) {
511 sc->sc_smn = device_find_child(
512 device_get_parent(dev), "amdsmn", -1);
513 if (sc->sc_smn == NULL) {
514 if (bootverbose)
515 device_printf(dev, "No amdsmn(4) device found\n");
516 return (ENXIO);
517 }
518 }
519
520 /* Find number of cores per package. */
521 sc->sc_ncores = (amd_feature2 & AMDID2_CMP) != 0 ?
522 (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1;
523 if (sc->sc_ncores > MAXCPU)
524 return (ENXIO);
525
526 mtx_init(&sc->sc_lock, "amdtemp", NULL, MTX_DEF);
527 if (erratum319)
528 device_printf(dev,
529 "Erratum 319: temperature measurement may be inaccurate\n");
530 if (bootverbose)
531 device_printf(dev, "Found %d cores and %d sensors\n",
532 sc->sc_ncores,
533 sc->sc_ntemps > 1 ? sc->sc_ntemps * sc->sc_ncores : 1);
534
535 /*
536 * dev.amdtemp.N tree.
537 */
538 unit = device_get_unit(dev);
539 snprintf(tn, sizeof(tn), "dev.amdtemp.%d.sensor_offset", unit);
540 TUNABLE_INT_FETCH(tn, &sc->sc_offset);
541
542 sysctlctx = device_get_sysctl_ctx(dev);
543 SYSCTL_ADD_INT(sysctlctx,
544 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
545 "sensor_offset", CTLFLAG_RW, &sc->sc_offset, 0,
546 "Temperature sensor offset");
547 sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
548 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
549 "core0", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Core 0");
550
551 SYSCTL_ADD_PROC(sysctlctx,
552 SYSCTL_CHILDREN(sysctlnode),
553 OID_AUTO, "sensor0",
554 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
555 dev, CORE0_SENSOR0, amdtemp_sysctl, "IK",
556 "Core 0 / Sensor 0 temperature");
557
558 sc->sc_temp_base = AMDTEMP_17H_CCD_TMP_BASE;
559
560 if (family == 0x17)
561 amdtemp_probe_ccd_sensors17h(dev, model);
562 else if (family == 0x19)
563 amdtemp_probe_ccd_sensors19h(dev, model);
564 else if (family == 0x1a)
565 amdtemp_probe_ccd_sensors1ah(dev, model);
566 else if (sc->sc_ntemps > 1) {
567 SYSCTL_ADD_PROC(sysctlctx,
568 SYSCTL_CHILDREN(sysctlnode),
569 OID_AUTO, "sensor1",
570 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
571 dev, CORE0_SENSOR1, amdtemp_sysctl, "IK",
572 "Core 0 / Sensor 1 temperature");
573
574 if (sc->sc_ncores > 1) {
575 sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
576 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
577 OID_AUTO, "core1", CTLFLAG_RD | CTLFLAG_MPSAFE,
578 0, "Core 1");
579
580 SYSCTL_ADD_PROC(sysctlctx,
581 SYSCTL_CHILDREN(sysctlnode),
582 OID_AUTO, "sensor0",
583 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
584 dev, CORE1_SENSOR0, amdtemp_sysctl, "IK",
585 "Core 1 / Sensor 0 temperature");
586
587 SYSCTL_ADD_PROC(sysctlctx,
588 SYSCTL_CHILDREN(sysctlnode),
589 OID_AUTO, "sensor1",
590 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
591 dev, CORE1_SENSOR1, amdtemp_sysctl, "IK",
592 "Core 1 / Sensor 1 temperature");
593 }
594 }
595
596 /*
597 * Try to create dev.cpu sysctl entries and setup intrhook function.
598 * This is needed because the cpu driver may be loaded late on boot,
599 * after us.
600 */
601 amdtemp_intrhook(dev);
602 sc->sc_ich.ich_func = amdtemp_intrhook;
603 sc->sc_ich.ich_arg = dev;
604 if (config_intrhook_establish(&sc->sc_ich) != 0) {
605 device_printf(dev, "config_intrhook_establish failed!\n");
606 return (ENXIO);
607 }
608
609 return (0);
610 }
611
612 void
amdtemp_intrhook(void * arg)613 amdtemp_intrhook(void *arg)
614 {
615 struct amdtemp_softc *sc;
616 struct sysctl_ctx_list *sysctlctx;
617 device_t dev = (device_t)arg;
618 device_t acpi, cpu, nexus;
619 amdsensor_t sensor;
620 int i;
621
622 sc = device_get_softc(dev);
623
624 /*
625 * dev.cpu.N.temperature.
626 */
627 nexus = device_find_child(root_bus, "nexus", 0);
628 acpi = device_find_child(nexus, "acpi", 0);
629
630 for (i = 0; i < sc->sc_ncores; i++) {
631 if (sc->sc_sysctl_cpu[i] != NULL)
632 continue;
633 cpu = device_find_child(acpi, "cpu",
634 device_get_unit(dev) * sc->sc_ncores + i);
635 if (cpu != NULL) {
636 sysctlctx = device_get_sysctl_ctx(cpu);
637
638 sensor = sc->sc_ntemps > 1 ?
639 (i == 0 ? CORE0 : CORE1) : CORE0_SENSOR0;
640 sc->sc_sysctl_cpu[i] = SYSCTL_ADD_PROC(sysctlctx,
641 SYSCTL_CHILDREN(device_get_sysctl_tree(cpu)),
642 OID_AUTO, "temperature",
643 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
644 dev, sensor, amdtemp_sysctl, "IK",
645 "Current temparature");
646 }
647 }
648 if (sc->sc_ich.ich_arg != NULL)
649 config_intrhook_disestablish(&sc->sc_ich);
650 }
651
652 int
amdtemp_detach(device_t dev)653 amdtemp_detach(device_t dev)
654 {
655 struct amdtemp_softc *sc = device_get_softc(dev);
656 int i;
657
658 for (i = 0; i < sc->sc_ncores; i++)
659 if (sc->sc_sysctl_cpu[i] != NULL)
660 sysctl_remove_oid(sc->sc_sysctl_cpu[i], 1, 0);
661
662 /* NewBus removes the dev.amdtemp.N tree by itself. */
663
664 mtx_destroy(&sc->sc_lock);
665 return (0);
666 }
667
668 static int
amdtemp_sysctl(SYSCTL_HANDLER_ARGS)669 amdtemp_sysctl(SYSCTL_HANDLER_ARGS)
670 {
671 device_t dev = (device_t)arg1;
672 struct amdtemp_softc *sc = device_get_softc(dev);
673 amdsensor_t sensor = (amdsensor_t)arg2;
674 int32_t auxtemp[2], temp;
675 int error;
676
677 switch (sensor) {
678 case CORE0:
679 auxtemp[0] = sc->sc_gettemp(dev, CORE0_SENSOR0);
680 auxtemp[1] = sc->sc_gettemp(dev, CORE0_SENSOR1);
681 temp = imax(auxtemp[0], auxtemp[1]);
682 break;
683 case CORE1:
684 auxtemp[0] = sc->sc_gettemp(dev, CORE1_SENSOR0);
685 auxtemp[1] = sc->sc_gettemp(dev, CORE1_SENSOR1);
686 temp = imax(auxtemp[0], auxtemp[1]);
687 break;
688 default:
689 temp = sc->sc_gettemp(dev, sensor);
690 break;
691 }
692 error = sysctl_handle_int(oidp, &temp, 0, req);
693
694 return (error);
695 }
696
697 #define AMDTEMP_ZERO_C_TO_K 2731
698
699 static int32_t
amdtemp_gettemp0f(device_t dev,amdsensor_t sensor)700 amdtemp_gettemp0f(device_t dev, amdsensor_t sensor)
701 {
702 struct amdtemp_softc *sc = device_get_softc(dev);
703 uint32_t mask, offset, temp;
704
705 mtx_lock(&sc->sc_lock);
706
707 /* Set Sensor/Core selector. */
708 temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 1);
709 temp &= ~(AMDTEMP_TTSR_SELCORE | AMDTEMP_TTSR_SELSENSOR);
710 switch (sensor) {
711 case CORE0_SENSOR1:
712 temp |= AMDTEMP_TTSR_SELSENSOR;
713 /* FALLTHROUGH */
714 case CORE0_SENSOR0:
715 case CORE0:
716 if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) != 0)
717 temp |= AMDTEMP_TTSR_SELCORE;
718 break;
719 case CORE1_SENSOR1:
720 temp |= AMDTEMP_TTSR_SELSENSOR;
721 /* FALLTHROUGH */
722 case CORE1_SENSOR0:
723 case CORE1:
724 if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) == 0)
725 temp |= AMDTEMP_TTSR_SELCORE;
726 break;
727 default:
728 __assert_unreachable();
729 }
730 pci_write_config(dev, AMDTEMP_THERMTP_STAT, temp, 1);
731
732 mask = (sc->sc_flags & AMDTEMP_FLAG_CT_10BIT) != 0 ? 0x3ff : 0x3fc;
733 offset = (sc->sc_flags & AMDTEMP_FLAG_ALT_OFFSET) != 0 ? 28 : 49;
734 temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 4);
735 temp = ((temp >> 14) & mask) * 5 / 2;
736 temp += AMDTEMP_ZERO_C_TO_K + (sc->sc_offset - offset) * 10;
737
738 mtx_unlock(&sc->sc_lock);
739 return (temp);
740 }
741
742 static uint32_t
amdtemp_decode_fam10h_to_17h(int32_t sc_offset,uint32_t val,bool minus49)743 amdtemp_decode_fam10h_to_17h(int32_t sc_offset, uint32_t val, bool minus49)
744 {
745 uint32_t temp;
746
747 /* Convert raw register subfield units (0.125C) to units of 0.1C. */
748 temp = (val & AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4;
749
750 if (minus49)
751 temp -= AMDTEMP_CURTMP_RANGE_ADJUST;
752
753 temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10;
754 return (temp);
755 }
756
757 static uint32_t
amdtemp_decode_fam10h_to_16h(int32_t sc_offset,uint32_t val)758 amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val)
759 {
760 bool minus49;
761
762 /*
763 * On Family 15h and higher, if CurTmpTjSel is 11b, the range is
764 * adjusted down by 49.0 degrees Celsius. (This adjustment is not
765 * documented in BKDGs prior to family 15h model 00h.)
766 */
767 minus49 = (CPUID_TO_FAMILY(cpu_id) >= 0x15 &&
768 ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) &
769 AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3);
770
771 return (amdtemp_decode_fam10h_to_17h(sc_offset,
772 val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
773 }
774
775 static uint32_t
amdtemp_decode_fam17h_tctl(int32_t sc_offset,uint32_t val)776 amdtemp_decode_fam17h_tctl(int32_t sc_offset, uint32_t val)
777 {
778 bool minus49;
779
780 minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0)
781 || ((val & AMDTEMP_17H_CUR_TMP_TJ_SEL) == AMDTEMP_17H_CUR_TMP_TJ_SEL);
782 return (amdtemp_decode_fam10h_to_17h(sc_offset,
783 val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
784 }
785
786 static int32_t
amdtemp_gettemp(device_t dev,amdsensor_t sensor)787 amdtemp_gettemp(device_t dev, amdsensor_t sensor)
788 {
789 struct amdtemp_softc *sc = device_get_softc(dev);
790 uint32_t temp;
791
792 temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4);
793 return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp));
794 }
795
796 static int32_t
amdtemp_gettemp15hm60h(device_t dev,amdsensor_t sensor)797 amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor)
798 {
799 struct amdtemp_softc *sc = device_get_softc(dev);
800 uint32_t val;
801 int error __diagused;
802
803 error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val);
804 KASSERT(error == 0, ("amdsmn_read"));
805 return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val));
806 }
807
808 static int32_t
amdtemp_gettemp17h(device_t dev,amdsensor_t sensor)809 amdtemp_gettemp17h(device_t dev, amdsensor_t sensor)
810 {
811 struct amdtemp_softc *sc = device_get_softc(dev);
812 uint32_t val;
813 int error __diagused;
814
815 switch (sensor) {
816 case CORE0_SENSOR0:
817 /* Tctl */
818 error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val);
819 KASSERT(error == 0, ("amdsmn_read"));
820 return (amdtemp_decode_fam17h_tctl(sc->sc_offset, val));
821 case CCD_BASE ... CCD_MAX:
822 /* Tccd<N> */
823 error = amdsmn_read(sc->sc_smn, sc->sc_temp_base +
824 (((int)sensor - CCD_BASE) * sizeof(val)), &val);
825 KASSERT(error == 0, ("amdsmn_read2"));
826 KASSERT((val & AMDTEMP_17H_CCD_TMP_VALID) != 0,
827 ("sensor %d: not valid", (int)sensor));
828 return (amdtemp_decode_fam10h_to_17h(sc->sc_offset, val, true));
829 default:
830 __assert_unreachable();
831 }
832 }
833
834 static void
amdtemp_probe_ccd_sensors(device_t dev,uint32_t maxreg)835 amdtemp_probe_ccd_sensors(device_t dev, uint32_t maxreg)
836 {
837 char sensor_name[16], sensor_descr[32];
838 struct amdtemp_softc *sc;
839 uint32_t i, val;
840 int error;
841
842 sc = device_get_softc(dev);
843 for (i = 0; i < maxreg; i++) {
844 error = amdsmn_read(sc->sc_smn, sc->sc_temp_base +
845 (i * sizeof(val)), &val);
846 if (error != 0)
847 continue;
848 if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0)
849 continue;
850
851 snprintf(sensor_name, sizeof(sensor_name), "ccd%u", i);
852 snprintf(sensor_descr, sizeof(sensor_descr),
853 "CCD %u temperature (Tccd%u)", i, i);
854
855 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
856 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
857 sensor_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
858 dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr);
859 }
860 }
861
862 static void
amdtemp_probe_ccd_sensors17h(device_t dev,uint32_t model)863 amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model)
864 {
865 uint32_t maxreg;
866
867 switch (model) {
868 case 0x00 ... 0x2f: /* Zen1, Zen+ */
869 maxreg = 4;
870 break;
871 case 0x30 ... 0x3f: /* Zen2 TR (Castle Peak)/EPYC (Rome) */
872 case 0x60 ... 0x7f: /* Zen2 Ryzen (Renoir APU, Matisse) */
873 case 0x90 ... 0x9f: /* Zen2 Ryzen (Van Gogh APU) */
874 maxreg = 8;
875 _Static_assert((int)NUM_CCDS >= 8, "");
876 break;
877 default:
878 device_printf(dev,
879 "Unrecognized Family 17h Model: %02Xh\n", model);
880 return;
881 }
882
883 amdtemp_probe_ccd_sensors(dev, maxreg);
884 }
885
886 static void
amdtemp_probe_ccd_sensors19h(device_t dev,uint32_t model)887 amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model)
888 {
889 struct amdtemp_softc *sc = device_get_softc(dev);
890 uint32_t maxreg;
891
892 switch (model) {
893 case 0x00 ... 0x0f: /* Zen3 EPYC "Milan" */
894 case 0x20 ... 0x2f: /* Zen3 Ryzen "Vermeer" */
895 case 0x50 ... 0x5f: /* Zen3 Ryzen "Cezanne" */
896 maxreg = 8;
897 _Static_assert((int)NUM_CCDS >= 8, "");
898 break;
899 case 0x10 ... 0x1f: /* Zen4 EPYC "Genoa" */
900 sc->sc_temp_base = AMDTEMP_ZEN4_10H_CCD_TMP_BASE;
901 maxreg = 12;
902 _Static_assert((int)NUM_CCDS >= 12, "");
903 break;
904 case 0x40 ... 0x4f: /* Zen3+ Ryzen "Rembrandt" */
905 case 0x60 ... 0x6f: /* Zen4 Ryzen "Raphael" */
906 case 0x70 ... 0x7f: /* Zen4 Ryzen "Phoenix" */
907 sc->sc_temp_base = AMDTEMP_ZEN4_CCD_TMP_BASE;
908 maxreg = 8;
909 _Static_assert((int)NUM_CCDS >= 8, "");
910 break;
911 default:
912 device_printf(dev,
913 "Unrecognized Family 19h Model: %02Xh\n", model);
914 return;
915 }
916
917 amdtemp_probe_ccd_sensors(dev, maxreg);
918 }
919
920 static void
amdtemp_probe_ccd_sensors1ah(device_t dev,uint32_t model)921 amdtemp_probe_ccd_sensors1ah(device_t dev, uint32_t model)
922 {
923 struct amdtemp_softc *sc = device_get_softc(dev);
924 uint32_t maxreg;
925
926 switch (model) {
927 case 0x00 ... 0x2f: /* Zen5 EPYC "Turin" */
928 case 0x40 ... 0x4f: /* Zen5 Ryzen "Granite Ridge" */
929 case 0x60 ... 0x7f: /* ??? */
930 sc->sc_temp_base = AMDTEMP_ZEN4_CCD_TMP_BASE;
931 maxreg = 8;
932 _Static_assert((int)NUM_CCDS >= 8, "");
933 break;
934 default:
935 device_printf(dev,
936 "Unrecognized Family 1Ah Model: %02Xh\n", model);
937 return;
938 }
939
940 amdtemp_probe_ccd_sensors(dev, maxreg);
941 }
942