1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008, 2009 Rui Paulo <rpaulo@FreeBSD.org>
5 * Copyright (c) 2009 Norikatsu Shigemura <nork@FreeBSD.org>
6 * Copyright (c) 2009-2012 Jung-uk Kim <jkim@FreeBSD.org>
7 * All rights reserved.
8 * Copyright (c) 2017-2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
23 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Driver for the AMD CPU on-die thermal sensors.
34 * Initially based on the k8temp Linux driver.
35 */
36
37 #include <sys/param.h>
38 #include <sys/bus.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46
47 #include <machine/cpufunc.h>
48 #include <machine/md_var.h>
49 #include <machine/specialreg.h>
50
51 #include <dev/pci/pcivar.h>
52 #include <x86/pci_cfgreg.h>
53
54 #include <dev/amdsmn/amdsmn.h>
55
56 typedef enum {
57 CORE0_SENSOR0,
58 CORE0_SENSOR1,
59 CORE1_SENSOR0,
60 CORE1_SENSOR1,
61 CORE0,
62 CORE1,
63 CCD1,
64 CCD_BASE = CCD1,
65 CCD2,
66 CCD3,
67 CCD4,
68 CCD5,
69 CCD6,
70 CCD7,
71 CCD8,
72 CCD9,
73 CCD10,
74 CCD11,
75 CCD12,
76 CCD_MAX = CCD12,
77 NUM_CCDS = CCD_MAX - CCD_BASE + 1,
78 } amdsensor_t;
79
80 struct amdtemp_softc {
81 int sc_ncores;
82 int sc_ntemps;
83 int sc_flags;
84 #define AMDTEMP_FLAG_CS_SWAP 0x01 /* ThermSenseCoreSel is inverted. */
85 #define AMDTEMP_FLAG_CT_10BIT 0x02 /* CurTmp is 10-bit wide. */
86 #define AMDTEMP_FLAG_ALT_OFFSET 0x04 /* CurTmp starts at -28C. */
87 int32_t sc_offset;
88 int32_t sc_temp_base;
89 int32_t (*sc_gettemp)(device_t, amdsensor_t);
90 struct sysctl_oid *sc_sysctl_cpu[MAXCPU];
91 struct intr_config_hook sc_ich;
92 device_t sc_smn;
93 struct mtx sc_lock;
94 };
95
96 /*
97 * N.B. The numbers in macro names below are significant and represent CPU
98 * family and model numbers. Do not make up fictitious family or model numbers
99 * when adding support for new devices.
100 */
101 #define VENDORID_AMD 0x1022
102 #define DEVICEID_AMD_MISC0F 0x1103
103 #define DEVICEID_AMD_MISC10 0x1203
104 #define DEVICEID_AMD_MISC11 0x1303
105 #define DEVICEID_AMD_MISC14 0x1703
106 #define DEVICEID_AMD_MISC15 0x1603
107 #define DEVICEID_AMD_MISC15_M10H 0x1403
108 #define DEVICEID_AMD_MISC15_M30H 0x141d
109 #define DEVICEID_AMD_MISC15_M60H_ROOT 0x1576
110 #define DEVICEID_AMD_MISC16 0x1533
111 #define DEVICEID_AMD_MISC16_M30H 0x1583
112 #define DEVICEID_AMD_HOSTB17H_ROOT 0x1450
113 #define DEVICEID_AMD_HOSTB17H_M10H_ROOT 0x15d0
114 #define DEVICEID_AMD_HOSTB17H_M30H_ROOT 0x1480 /* Also M70H, F19H M00H/M20H */
115 #define DEVICEID_AMD_HOSTB17H_M60H_ROOT 0x1630
116 #define DEVICEID_AMD_HOSTB19H_M10H_ROOT 0x14a4
117 #define DEVICEID_AMD_HOSTB19H_M40H_ROOT 0x14b5
118 #define DEVICEID_AMD_HOSTB19H_M60H_ROOT 0x14d8
119 #define DEVICEID_AMD_HOSTB19H_M70H_ROOT 0x14e8
120
121 static const struct amdtemp_product {
122 uint16_t amdtemp_vendorid;
123 uint16_t amdtemp_deviceid;
124 /*
125 * 0xFC register is only valid on the D18F3 PCI device; SMN temp
126 * drivers do not attach to that device.
127 */
128 bool amdtemp_has_cpuid;
129 } amdtemp_products[] = {
130 { VENDORID_AMD, DEVICEID_AMD_MISC0F, true },
131 { VENDORID_AMD, DEVICEID_AMD_MISC10, true },
132 { VENDORID_AMD, DEVICEID_AMD_MISC11, true },
133 { VENDORID_AMD, DEVICEID_AMD_MISC14, true },
134 { VENDORID_AMD, DEVICEID_AMD_MISC15, true },
135 { VENDORID_AMD, DEVICEID_AMD_MISC15_M10H, true },
136 { VENDORID_AMD, DEVICEID_AMD_MISC15_M30H, true },
137 { VENDORID_AMD, DEVICEID_AMD_MISC15_M60H_ROOT, false },
138 { VENDORID_AMD, DEVICEID_AMD_MISC16, true },
139 { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H, true },
140 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT, false },
141 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT, false },
142 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M30H_ROOT, false },
143 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M60H_ROOT, false },
144 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M10H_ROOT, false },
145 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M40H_ROOT, false },
146 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M60H_ROOT, false },
147 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M70H_ROOT, false },
148 };
149
150 /*
151 * Reported Temperature Control Register, family 0Fh-15h (some models), 16h.
152 */
153 #define AMDTEMP_REPTMP_CTRL 0xa4
154
155 #define AMDTEMP_REPTMP10H_CURTMP_MASK 0x7ff
156 #define AMDTEMP_REPTMP10H_CURTMP_SHIFT 21
157 #define AMDTEMP_REPTMP10H_TJSEL_MASK 0x3
158 #define AMDTEMP_REPTMP10H_TJSEL_SHIFT 16
159
160 /*
161 * Reported Temperature, Family 15h, M60+
162 *
163 * Same register bit definitions as other Family 15h CPUs, but access is
164 * indirect via SMN, like Family 17h.
165 */
166 #define AMDTEMP_15H_M60H_REPTMP_CTRL 0xd8200ca4
167
168 /*
169 * Reported Temperature, Family 17h
170 *
171 * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register
172 * provide the current temp. bit 19, when clear, means the temp is reported in
173 * a range 0.."225C" (probable typo for 255C), and when set changes the range
174 * to -49..206C.
175 */
176 #define AMDTEMP_17H_CUR_TMP 0x59800
177 #define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1u << 19)
178 /*
179 * Bits 16-17, when set, mean that CUR_TMP is read-write. When it is, the
180 * 49 degree offset should apply as well. This was revealed in a Linux
181 * patch from an AMD employee.
182 */
183 #define AMDTEMP_17H_CUR_TMP_TJ_SEL ((1u << 17) | (1u << 16))
184 /*
185 * The following register set was discovered experimentally by Ondrej Čerman
186 * and collaborators, but is not (yet) documented in a PPR/OSRR (other than
187 * the M70H PPR SMN memory map showing [0x59800, +0x314] as allocated to
188 * SMU::THM). It seems plausible and the Linux sensor folks have adopted it.
189 */
190 #define AMDTEMP_17H_CCD_TMP_BASE 0x59954
191 #define AMDTEMP_17H_CCD_TMP_VALID (1u << 11)
192
193 #define AMDTEMP_ZEN4_10H_CCD_TMP_BASE 0x59b00
194 #define AMDTEMP_ZEN4_CCD_TMP_BASE 0x59b08
195
196 /*
197 * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius).
198 */
199 #define AMDTEMP_CURTMP_RANGE_ADJUST 490
200
201 /*
202 * Thermaltrip Status Register (Family 0Fh only)
203 */
204 #define AMDTEMP_THERMTP_STAT 0xe4
205 #define AMDTEMP_TTSR_SELCORE 0x04
206 #define AMDTEMP_TTSR_SELSENSOR 0x40
207
208 /*
209 * DRAM Configuration High Register
210 */
211 #define AMDTEMP_DRAM_CONF_HIGH 0x94 /* Function 2 */
212 #define AMDTEMP_DRAM_MODE_DDR3 0x0100
213
214 /*
215 * CPU Family/Model Register
216 */
217 #define AMDTEMP_CPUID 0xfc
218
219 /*
220 * Device methods.
221 */
222 static void amdtemp_identify(driver_t *driver, device_t parent);
223 static int amdtemp_probe(device_t dev);
224 static int amdtemp_attach(device_t dev);
225 static void amdtemp_intrhook(void *arg);
226 static int amdtemp_detach(device_t dev);
227 static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor);
228 static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor);
229 static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor);
230 static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor);
231 static void amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model);
232 static void amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model);
233 static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS);
234
235 static device_method_t amdtemp_methods[] = {
236 /* Device interface */
237 DEVMETHOD(device_identify, amdtemp_identify),
238 DEVMETHOD(device_probe, amdtemp_probe),
239 DEVMETHOD(device_attach, amdtemp_attach),
240 DEVMETHOD(device_detach, amdtemp_detach),
241
242 DEVMETHOD_END
243 };
244
245 static driver_t amdtemp_driver = {
246 "amdtemp",
247 amdtemp_methods,
248 sizeof(struct amdtemp_softc),
249 };
250
251 DRIVER_MODULE(amdtemp, hostb, amdtemp_driver, NULL, NULL);
252 MODULE_VERSION(amdtemp, 1);
253 MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1);
254 MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products,
255 nitems(amdtemp_products));
256
257 static bool
amdtemp_match(device_t dev,const struct amdtemp_product ** product_out)258 amdtemp_match(device_t dev, const struct amdtemp_product **product_out)
259 {
260 int i;
261 uint16_t vendor, devid;
262
263 vendor = pci_get_vendor(dev);
264 devid = pci_get_device(dev);
265
266 for (i = 0; i < nitems(amdtemp_products); i++) {
267 if (vendor == amdtemp_products[i].amdtemp_vendorid &&
268 devid == amdtemp_products[i].amdtemp_deviceid) {
269 if (product_out != NULL)
270 *product_out = &amdtemp_products[i];
271 return (true);
272 }
273 }
274 return (false);
275 }
276
277 static void
amdtemp_identify(driver_t * driver,device_t parent)278 amdtemp_identify(driver_t *driver, device_t parent)
279 {
280 device_t child;
281
282 /* Make sure we're not being doubly invoked. */
283 if (device_find_child(parent, "amdtemp", -1) != NULL)
284 return;
285
286 if (amdtemp_match(parent, NULL)) {
287 child = device_add_child(parent, "amdtemp", DEVICE_UNIT_ANY);
288 if (child == NULL)
289 device_printf(parent, "add amdtemp child failed\n");
290 }
291 }
292
293 static int
amdtemp_probe(device_t dev)294 amdtemp_probe(device_t dev)
295 {
296 uint32_t family, model;
297
298 if (resource_disabled("amdtemp", 0))
299 return (ENXIO);
300 if (!amdtemp_match(device_get_parent(dev), NULL))
301 return (ENXIO);
302
303 family = CPUID_TO_FAMILY(cpu_id);
304 model = CPUID_TO_MODEL(cpu_id);
305
306 switch (family) {
307 case 0x0f:
308 if ((model == 0x04 && (cpu_id & CPUID_STEPPING) == 0) ||
309 (model == 0x05 && (cpu_id & CPUID_STEPPING) <= 1))
310 return (ENXIO);
311 break;
312 case 0x10:
313 case 0x11:
314 case 0x12:
315 case 0x14:
316 case 0x15:
317 case 0x16:
318 case 0x17:
319 case 0x19:
320 break;
321 default:
322 return (ENXIO);
323 }
324 device_set_desc(dev, "AMD CPU On-Die Thermal Sensors");
325
326 return (BUS_PROBE_GENERIC);
327 }
328
329 static int
amdtemp_attach(device_t dev)330 amdtemp_attach(device_t dev)
331 {
332 char tn[32];
333 u_int regs[4];
334 const struct amdtemp_product *product;
335 struct amdtemp_softc *sc;
336 struct sysctl_ctx_list *sysctlctx;
337 struct sysctl_oid *sysctlnode;
338 uint32_t cpuid, family, model;
339 u_int bid;
340 int erratum319, unit;
341 bool needsmn;
342
343 sc = device_get_softc(dev);
344 erratum319 = 0;
345 needsmn = false;
346
347 if (!amdtemp_match(device_get_parent(dev), &product))
348 return (ENXIO);
349
350 cpuid = cpu_id;
351 family = CPUID_TO_FAMILY(cpuid);
352 model = CPUID_TO_MODEL(cpuid);
353
354 /*
355 * This checks for the byzantine condition of running a heterogenous
356 * revision multi-socket system where the attach thread is potentially
357 * probing a remote socket's PCI device.
358 *
359 * Currently, such scenarios are unsupported on models using the SMN
360 * (because on those models, amdtemp(4) attaches to a different PCI
361 * device than the one that contains AMDTEMP_CPUID).
362 *
363 * The ancient 0x0F family of devices only supports this register from
364 * models 40h+.
365 */
366 if (product->amdtemp_has_cpuid && (family > 0x0f ||
367 (family == 0x0f && model >= 0x40))) {
368 cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID,
369 4);
370 family = CPUID_TO_FAMILY(cpuid);
371 model = CPUID_TO_MODEL(cpuid);
372 }
373
374 switch (family) {
375 case 0x0f:
376 /*
377 * Thermaltrip Status Register
378 *
379 * - ThermSenseCoreSel
380 *
381 * Revision F & G: 0 - Core1, 1 - Core0
382 * Other: 0 - Core0, 1 - Core1
383 *
384 * - CurTmp
385 *
386 * Revision G: bits 23-14
387 * Other: bits 23-16
388 *
389 * XXX According to the BKDG, CurTmp, ThermSenseSel and
390 * ThermSenseCoreSel bits were introduced in Revision F
391 * but CurTmp seems working fine as early as Revision C.
392 * However, it is not clear whether ThermSenseSel and/or
393 * ThermSenseCoreSel work in undocumented cases as well.
394 * In fact, the Linux driver suggests it may not work but
395 * we just assume it does until we find otherwise.
396 *
397 * XXX According to Linux, CurTmp starts at -28C on
398 * Socket AM2 Revision G processors, which is not
399 * documented anywhere.
400 */
401 if (model >= 0x40)
402 sc->sc_flags |= AMDTEMP_FLAG_CS_SWAP;
403 if (model >= 0x60 && model != 0xc1) {
404 do_cpuid(0x80000001, regs);
405 bid = (regs[1] >> 9) & 0x1f;
406 switch (model) {
407 case 0x68: /* Socket S1g1 */
408 case 0x6c:
409 case 0x7c:
410 break;
411 case 0x6b: /* Socket AM2 and ASB1 (2 cores) */
412 if (bid != 0x0b && bid != 0x0c)
413 sc->sc_flags |=
414 AMDTEMP_FLAG_ALT_OFFSET;
415 break;
416 case 0x6f: /* Socket AM2 and ASB1 (1 core) */
417 case 0x7f:
418 if (bid != 0x07 && bid != 0x09 &&
419 bid != 0x0c)
420 sc->sc_flags |=
421 AMDTEMP_FLAG_ALT_OFFSET;
422 break;
423 default:
424 sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET;
425 }
426 sc->sc_flags |= AMDTEMP_FLAG_CT_10BIT;
427 }
428
429 /*
430 * There are two sensors per core.
431 */
432 sc->sc_ntemps = 2;
433
434 sc->sc_gettemp = amdtemp_gettemp0f;
435 break;
436 case 0x10:
437 /*
438 * Erratum 319 Inaccurate Temperature Measurement
439 *
440 * http://support.amd.com/us/Processor_TechDocs/41322.pdf
441 */
442 do_cpuid(0x80000001, regs);
443 switch ((regs[1] >> 28) & 0xf) {
444 case 0: /* Socket F */
445 erratum319 = 1;
446 break;
447 case 1: /* Socket AM2+ or AM3 */
448 if ((pci_cfgregread(pci_get_domain(dev),
449 pci_get_bus(dev), pci_get_slot(dev), 2,
450 AMDTEMP_DRAM_CONF_HIGH, 2) &
451 AMDTEMP_DRAM_MODE_DDR3) != 0 || model > 0x04 ||
452 (model == 0x04 && (cpuid & CPUID_STEPPING) >= 3))
453 break;
454 /* XXX 00100F42h (RB-C2) exists in both formats. */
455 erratum319 = 1;
456 break;
457 }
458 /* FALLTHROUGH */
459 case 0x11:
460 case 0x12:
461 case 0x14:
462 case 0x15:
463 case 0x16:
464 sc->sc_ntemps = 1;
465 /*
466 * Some later (60h+) models of family 15h use a similar SMN
467 * network as family 17h. (However, the register index differs
468 * from 17h and the decoding matches other 10h-15h models,
469 * which differ from 17h.)
470 */
471 if (family == 0x15 && model >= 0x60) {
472 sc->sc_gettemp = amdtemp_gettemp15hm60h;
473 needsmn = true;
474 } else
475 sc->sc_gettemp = amdtemp_gettemp;
476 break;
477 case 0x17:
478 case 0x19:
479 sc->sc_ntemps = 1;
480 sc->sc_gettemp = amdtemp_gettemp17h;
481 needsmn = true;
482 break;
483 default:
484 device_printf(dev, "Bogus family 0x%x\n", family);
485 return (ENXIO);
486 }
487
488 if (needsmn) {
489 sc->sc_smn = device_find_child(
490 device_get_parent(dev), "amdsmn", -1);
491 if (sc->sc_smn == NULL) {
492 if (bootverbose)
493 device_printf(dev, "No SMN device found\n");
494 return (ENXIO);
495 }
496 }
497
498 /* Find number of cores per package. */
499 sc->sc_ncores = (amd_feature2 & AMDID2_CMP) != 0 ?
500 (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1;
501 if (sc->sc_ncores > MAXCPU)
502 return (ENXIO);
503
504 mtx_init(&sc->sc_lock, "amdtemp", NULL, MTX_DEF);
505 if (erratum319)
506 device_printf(dev,
507 "Erratum 319: temperature measurement may be inaccurate\n");
508 if (bootverbose)
509 device_printf(dev, "Found %d cores and %d sensors.\n",
510 sc->sc_ncores,
511 sc->sc_ntemps > 1 ? sc->sc_ntemps * sc->sc_ncores : 1);
512
513 /*
514 * dev.amdtemp.N tree.
515 */
516 unit = device_get_unit(dev);
517 snprintf(tn, sizeof(tn), "dev.amdtemp.%d.sensor_offset", unit);
518 TUNABLE_INT_FETCH(tn, &sc->sc_offset);
519
520 sysctlctx = device_get_sysctl_ctx(dev);
521 SYSCTL_ADD_INT(sysctlctx,
522 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
523 "sensor_offset", CTLFLAG_RW, &sc->sc_offset, 0,
524 "Temperature sensor offset");
525 sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
526 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
527 "core0", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Core 0");
528
529 SYSCTL_ADD_PROC(sysctlctx,
530 SYSCTL_CHILDREN(sysctlnode),
531 OID_AUTO, "sensor0",
532 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
533 dev, CORE0_SENSOR0, amdtemp_sysctl, "IK",
534 "Core 0 / Sensor 0 temperature");
535
536 sc->sc_temp_base = AMDTEMP_17H_CCD_TMP_BASE;
537
538 if (family == 0x17)
539 amdtemp_probe_ccd_sensors17h(dev, model);
540 else if (family == 0x19)
541 amdtemp_probe_ccd_sensors19h(dev, model);
542 else if (sc->sc_ntemps > 1) {
543 SYSCTL_ADD_PROC(sysctlctx,
544 SYSCTL_CHILDREN(sysctlnode),
545 OID_AUTO, "sensor1",
546 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
547 dev, CORE0_SENSOR1, amdtemp_sysctl, "IK",
548 "Core 0 / Sensor 1 temperature");
549
550 if (sc->sc_ncores > 1) {
551 sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
552 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
553 OID_AUTO, "core1", CTLFLAG_RD | CTLFLAG_MPSAFE,
554 0, "Core 1");
555
556 SYSCTL_ADD_PROC(sysctlctx,
557 SYSCTL_CHILDREN(sysctlnode),
558 OID_AUTO, "sensor0",
559 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
560 dev, CORE1_SENSOR0, amdtemp_sysctl, "IK",
561 "Core 1 / Sensor 0 temperature");
562
563 SYSCTL_ADD_PROC(sysctlctx,
564 SYSCTL_CHILDREN(sysctlnode),
565 OID_AUTO, "sensor1",
566 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
567 dev, CORE1_SENSOR1, amdtemp_sysctl, "IK",
568 "Core 1 / Sensor 1 temperature");
569 }
570 }
571
572 /*
573 * Try to create dev.cpu sysctl entries and setup intrhook function.
574 * This is needed because the cpu driver may be loaded late on boot,
575 * after us.
576 */
577 amdtemp_intrhook(dev);
578 sc->sc_ich.ich_func = amdtemp_intrhook;
579 sc->sc_ich.ich_arg = dev;
580 if (config_intrhook_establish(&sc->sc_ich) != 0) {
581 device_printf(dev, "config_intrhook_establish failed!\n");
582 return (ENXIO);
583 }
584
585 return (0);
586 }
587
588 void
amdtemp_intrhook(void * arg)589 amdtemp_intrhook(void *arg)
590 {
591 struct amdtemp_softc *sc;
592 struct sysctl_ctx_list *sysctlctx;
593 device_t dev = (device_t)arg;
594 device_t acpi, cpu, nexus;
595 amdsensor_t sensor;
596 int i;
597
598 sc = device_get_softc(dev);
599
600 /*
601 * dev.cpu.N.temperature.
602 */
603 nexus = device_find_child(root_bus, "nexus", 0);
604 acpi = device_find_child(nexus, "acpi", 0);
605
606 for (i = 0; i < sc->sc_ncores; i++) {
607 if (sc->sc_sysctl_cpu[i] != NULL)
608 continue;
609 cpu = device_find_child(acpi, "cpu",
610 device_get_unit(dev) * sc->sc_ncores + i);
611 if (cpu != NULL) {
612 sysctlctx = device_get_sysctl_ctx(cpu);
613
614 sensor = sc->sc_ntemps > 1 ?
615 (i == 0 ? CORE0 : CORE1) : CORE0_SENSOR0;
616 sc->sc_sysctl_cpu[i] = SYSCTL_ADD_PROC(sysctlctx,
617 SYSCTL_CHILDREN(device_get_sysctl_tree(cpu)),
618 OID_AUTO, "temperature",
619 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
620 dev, sensor, amdtemp_sysctl, "IK",
621 "Current temparature");
622 }
623 }
624 if (sc->sc_ich.ich_arg != NULL)
625 config_intrhook_disestablish(&sc->sc_ich);
626 }
627
628 int
amdtemp_detach(device_t dev)629 amdtemp_detach(device_t dev)
630 {
631 struct amdtemp_softc *sc = device_get_softc(dev);
632 int i;
633
634 for (i = 0; i < sc->sc_ncores; i++)
635 if (sc->sc_sysctl_cpu[i] != NULL)
636 sysctl_remove_oid(sc->sc_sysctl_cpu[i], 1, 0);
637
638 /* NewBus removes the dev.amdtemp.N tree by itself. */
639
640 mtx_destroy(&sc->sc_lock);
641 return (0);
642 }
643
644 static int
amdtemp_sysctl(SYSCTL_HANDLER_ARGS)645 amdtemp_sysctl(SYSCTL_HANDLER_ARGS)
646 {
647 device_t dev = (device_t)arg1;
648 struct amdtemp_softc *sc = device_get_softc(dev);
649 amdsensor_t sensor = (amdsensor_t)arg2;
650 int32_t auxtemp[2], temp;
651 int error;
652
653 switch (sensor) {
654 case CORE0:
655 auxtemp[0] = sc->sc_gettemp(dev, CORE0_SENSOR0);
656 auxtemp[1] = sc->sc_gettemp(dev, CORE0_SENSOR1);
657 temp = imax(auxtemp[0], auxtemp[1]);
658 break;
659 case CORE1:
660 auxtemp[0] = sc->sc_gettemp(dev, CORE1_SENSOR0);
661 auxtemp[1] = sc->sc_gettemp(dev, CORE1_SENSOR1);
662 temp = imax(auxtemp[0], auxtemp[1]);
663 break;
664 default:
665 temp = sc->sc_gettemp(dev, sensor);
666 break;
667 }
668 error = sysctl_handle_int(oidp, &temp, 0, req);
669
670 return (error);
671 }
672
673 #define AMDTEMP_ZERO_C_TO_K 2731
674
675 static int32_t
amdtemp_gettemp0f(device_t dev,amdsensor_t sensor)676 amdtemp_gettemp0f(device_t dev, amdsensor_t sensor)
677 {
678 struct amdtemp_softc *sc = device_get_softc(dev);
679 uint32_t mask, offset, temp;
680
681 mtx_lock(&sc->sc_lock);
682
683 /* Set Sensor/Core selector. */
684 temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 1);
685 temp &= ~(AMDTEMP_TTSR_SELCORE | AMDTEMP_TTSR_SELSENSOR);
686 switch (sensor) {
687 case CORE0_SENSOR1:
688 temp |= AMDTEMP_TTSR_SELSENSOR;
689 /* FALLTHROUGH */
690 case CORE0_SENSOR0:
691 case CORE0:
692 if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) != 0)
693 temp |= AMDTEMP_TTSR_SELCORE;
694 break;
695 case CORE1_SENSOR1:
696 temp |= AMDTEMP_TTSR_SELSENSOR;
697 /* FALLTHROUGH */
698 case CORE1_SENSOR0:
699 case CORE1:
700 if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) == 0)
701 temp |= AMDTEMP_TTSR_SELCORE;
702 break;
703 default:
704 __assert_unreachable();
705 }
706 pci_write_config(dev, AMDTEMP_THERMTP_STAT, temp, 1);
707
708 mask = (sc->sc_flags & AMDTEMP_FLAG_CT_10BIT) != 0 ? 0x3ff : 0x3fc;
709 offset = (sc->sc_flags & AMDTEMP_FLAG_ALT_OFFSET) != 0 ? 28 : 49;
710 temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 4);
711 temp = ((temp >> 14) & mask) * 5 / 2;
712 temp += AMDTEMP_ZERO_C_TO_K + (sc->sc_offset - offset) * 10;
713
714 mtx_unlock(&sc->sc_lock);
715 return (temp);
716 }
717
718 static uint32_t
amdtemp_decode_fam10h_to_17h(int32_t sc_offset,uint32_t val,bool minus49)719 amdtemp_decode_fam10h_to_17h(int32_t sc_offset, uint32_t val, bool minus49)
720 {
721 uint32_t temp;
722
723 /* Convert raw register subfield units (0.125C) to units of 0.1C. */
724 temp = (val & AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4;
725
726 if (minus49)
727 temp -= AMDTEMP_CURTMP_RANGE_ADJUST;
728
729 temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10;
730 return (temp);
731 }
732
733 static uint32_t
amdtemp_decode_fam10h_to_16h(int32_t sc_offset,uint32_t val)734 amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val)
735 {
736 bool minus49;
737
738 /*
739 * On Family 15h and higher, if CurTmpTjSel is 11b, the range is
740 * adjusted down by 49.0 degrees Celsius. (This adjustment is not
741 * documented in BKDGs prior to family 15h model 00h.)
742 */
743 minus49 = (CPUID_TO_FAMILY(cpu_id) >= 0x15 &&
744 ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) &
745 AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3);
746
747 return (amdtemp_decode_fam10h_to_17h(sc_offset,
748 val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
749 }
750
751 static uint32_t
amdtemp_decode_fam17h_tctl(int32_t sc_offset,uint32_t val)752 amdtemp_decode_fam17h_tctl(int32_t sc_offset, uint32_t val)
753 {
754 bool minus49;
755
756 minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0)
757 || ((val & AMDTEMP_17H_CUR_TMP_TJ_SEL) == AMDTEMP_17H_CUR_TMP_TJ_SEL);
758 return (amdtemp_decode_fam10h_to_17h(sc_offset,
759 val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
760 }
761
762 static int32_t
amdtemp_gettemp(device_t dev,amdsensor_t sensor)763 amdtemp_gettemp(device_t dev, amdsensor_t sensor)
764 {
765 struct amdtemp_softc *sc = device_get_softc(dev);
766 uint32_t temp;
767
768 temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4);
769 return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp));
770 }
771
772 static int32_t
amdtemp_gettemp15hm60h(device_t dev,amdsensor_t sensor)773 amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor)
774 {
775 struct amdtemp_softc *sc = device_get_softc(dev);
776 uint32_t val;
777 int error __diagused;
778
779 error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val);
780 KASSERT(error == 0, ("amdsmn_read"));
781 return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val));
782 }
783
784 static int32_t
amdtemp_gettemp17h(device_t dev,amdsensor_t sensor)785 amdtemp_gettemp17h(device_t dev, amdsensor_t sensor)
786 {
787 struct amdtemp_softc *sc = device_get_softc(dev);
788 uint32_t val;
789 int error __diagused;
790
791 switch (sensor) {
792 case CORE0_SENSOR0:
793 /* Tctl */
794 error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val);
795 KASSERT(error == 0, ("amdsmn_read"));
796 return (amdtemp_decode_fam17h_tctl(sc->sc_offset, val));
797 case CCD_BASE ... CCD_MAX:
798 /* Tccd<N> */
799 error = amdsmn_read(sc->sc_smn, sc->sc_temp_base +
800 (((int)sensor - CCD_BASE) * sizeof(val)), &val);
801 KASSERT(error == 0, ("amdsmn_read2"));
802 KASSERT((val & AMDTEMP_17H_CCD_TMP_VALID) != 0,
803 ("sensor %d: not valid", (int)sensor));
804 return (amdtemp_decode_fam10h_to_17h(sc->sc_offset, val, true));
805 default:
806 __assert_unreachable();
807 }
808 }
809
810 static void
amdtemp_probe_ccd_sensors(device_t dev,uint32_t maxreg)811 amdtemp_probe_ccd_sensors(device_t dev, uint32_t maxreg)
812 {
813 char sensor_name[16], sensor_descr[32];
814 struct amdtemp_softc *sc;
815 uint32_t i, val;
816 int error;
817
818 sc = device_get_softc(dev);
819 for (i = 0; i < maxreg; i++) {
820 error = amdsmn_read(sc->sc_smn, sc->sc_temp_base +
821 (i * sizeof(val)), &val);
822 if (error != 0)
823 continue;
824 if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0)
825 continue;
826
827 snprintf(sensor_name, sizeof(sensor_name), "ccd%u", i);
828 snprintf(sensor_descr, sizeof(sensor_descr),
829 "CCD %u temperature (Tccd%u)", i, i);
830
831 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
832 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
833 sensor_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
834 dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr);
835 }
836 }
837
838 static void
amdtemp_probe_ccd_sensors17h(device_t dev,uint32_t model)839 amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model)
840 {
841 uint32_t maxreg;
842
843 switch (model) {
844 case 0x00 ... 0x2f: /* Zen1, Zen+ */
845 maxreg = 4;
846 break;
847 case 0x30 ... 0x3f: /* Zen2 TR (Castle Peak)/EPYC (Rome) */
848 case 0x60 ... 0x7f: /* Zen2 Ryzen (Renoir APU, Matisse) */
849 case 0x90 ... 0x9f: /* Zen2 Ryzen (Van Gogh APU) */
850 maxreg = 8;
851 _Static_assert((int)NUM_CCDS >= 8, "");
852 break;
853 default:
854 device_printf(dev,
855 "Unrecognized Family 17h Model: %02xh\n", model);
856 return;
857 }
858
859 amdtemp_probe_ccd_sensors(dev, maxreg);
860 }
861
862 static void
amdtemp_probe_ccd_sensors19h(device_t dev,uint32_t model)863 amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model)
864 {
865 struct amdtemp_softc *sc = device_get_softc(dev);
866 uint32_t maxreg;
867
868 switch (model) {
869 case 0x00 ... 0x0f: /* Zen3 EPYC "Milan" */
870 case 0x20 ... 0x2f: /* Zen3 Ryzen "Vermeer" */
871 maxreg = 8;
872 _Static_assert((int)NUM_CCDS >= 8, "");
873 break;
874 case 0x10 ... 0x1f:
875 sc->sc_temp_base = AMDTEMP_ZEN4_10H_CCD_TMP_BASE;
876 maxreg = 12;
877 _Static_assert((int)NUM_CCDS >= 12, "");
878 break;
879 case 0x40 ... 0x4f: /* Zen3+ Ryzen "Rembrandt" */
880 case 0x60 ... 0x6f: /* Zen4 Ryzen "Raphael" */
881 case 0x70 ... 0x7f: /* Zen4 Ryzen "Phoenix" */
882 sc->sc_temp_base = AMDTEMP_ZEN4_CCD_TMP_BASE;
883 maxreg = 8;
884 _Static_assert((int)NUM_CCDS >= 8, "");
885 break;
886 default:
887 device_printf(dev,
888 "Unrecognized Family 19h Model: %02xh\n", model);
889 return;
890 }
891
892 amdtemp_probe_ccd_sensors(dev, maxreg);
893 }
894