xref: /illumos-gate/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c (revision 1bff1300cebf1ea8e11ce928b10e208097e67f24)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Robert Mustacchi
14  */
15 
16 /*
17  * AMD Northbridge CPU Temperature Driver
18  *
19  * The AMD northbridge CPU temperature driver supports the temperature sensor
20  * that was found on the AMD northbridge on AMD CPUs from approximately AMD
21  * Family 10h to Family 16h. For Zen and newer processors (Family 17h+) see the
22  * 'amdf17nbdf' driver.
23  *
24  * The temperature is stored on the 'miscellaneous' device on the northbridge.
25  * This is always found at PCI Device 18h, Function 3h. When there is more than
26  * one 'node' (see cpuid.c for the AMD parlance), then the node id is added to
27  * the device to create a unique device. This allows us to map the given PCI
28  * device we find back to the corresponding CPU.
29  *
30  * While all family 10h, 11h, 12h, 14h, and 16h CPUs are supported, not all
31  * family 15h CPUs are. Models 60h+ require the SMN interface, which this does
32  * not know how to consume.
33  */
34 
35 #include <sys/modctl.h>
36 #include <sys/conf.h>
37 #include <sys/devops.h>
38 #include <sys/types.h>
39 #include <sys/file.h>
40 #include <sys/open.h>
41 #include <sys/cred.h>
42 #include <sys/stat.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/cmn_err.h>
46 #include <sys/pci.h>
47 #include <sys/stddef.h>
48 #include <sys/cpuvar.h>
49 #include <sys/x86_archext.h>
50 #include <sys/list.h>
51 #include <sys/bitset.h>
52 #include <sys/sensors.h>
53 
54 /*
55  * This register offset, in PCI config space, has the current temperature of the
56  * device.
57  */
58 #define	AMDNBTEMP_TEMPREG	0xa4
59 #define	AMDNBTEMP_TEMPREG_CURTMP(x)	BITX(x, 31, 21)
60 #define	AMDNBTEMP_TEMPREG_TJSEL(x)	BITX(x, 17, 16)
61 
62 /*
63  * Each bit in the temperature range represents 1/8th of a degree C.
64  */
65 #define	AMDNBTEMP_GRANULARITY	8
66 #define	AMDNBTEMP_GSHIFT	3
67 
68 /*
69  * If the value of the current CurTmpTjSel is set to three, then the range that
70  * the data is in is shifted by -49 degrees. In this mode, the bottom two bits
71  * always read as zero.
72  */
73 #define	AMDNBTEMP_TJSEL_ADJUST	0x3
74 #define	AMDNBTEMP_TEMP_ADJUST	(49 << AMDNBTEMP_GSHIFT)
75 
76 /*
77  * There are a variable number of northbridges that exist in the system. The AMD
78  * BIOS and Kernel Developer's Guide (BKDG) says that for these families, the
79  * first node has a device of 0x18. This means that node 7, the maximum, has a
80  * device of 0x1f.
81  */
82 #define	AMDNBTEMP_FIRST_DEV	0x18
83 
84 typedef enum andnbtemp_state {
85 	AMDNBTEMP_S_CFGSPACE	= 1 << 0,
86 	AMDNBTEMP_S_MUTEX	= 1 << 1,
87 	AMDNBTEMP_S_MINOR	= 1 << 2,
88 	AMDNBTEMP_S_LIST	= 1 << 3
89 } amdnbtemp_state_t;
90 
91 typedef struct amdnbtemp {
92 	amdnbtemp_state_t	at_state;
93 	list_node_t		at_link;
94 	dev_info_t		*at_dip;
95 	ddi_acc_handle_t	at_cfgspace;
96 	uint_t			at_bus;
97 	uint_t			at_dev;
98 	uint_t			at_func;
99 	minor_t			at_minor;
100 	boolean_t		at_tjsel;
101 	kmutex_t		at_mutex;
102 	uint32_t		at_raw;
103 	int64_t			at_temp;
104 } amdnbtemp_t;
105 
106 static void *amdnbtemp_state;
107 static list_t amdnbtemp_list;
108 static kmutex_t amdnbtemp_mutex;
109 
110 static amdnbtemp_t *
111 amdnbtemp_find_by_dev(dev_t dev)
112 {
113 	minor_t m = getminor(dev);
114 	amdnbtemp_t *at;
115 
116 	mutex_enter(&amdnbtemp_mutex);
117 	for (at = list_head(&amdnbtemp_list); at != NULL;
118 	    at = list_next(&amdnbtemp_list, at)) {
119 		if (at->at_minor == m) {
120 			break;
121 		}
122 	}
123 	mutex_exit(&amdnbtemp_mutex);
124 
125 	return (at);
126 }
127 
128 static int
129 amdnbtemp_read(amdnbtemp_t *at)
130 {
131 	ASSERT(MUTEX_HELD(&at->at_mutex));
132 
133 	at->at_raw = pci_config_get32(at->at_cfgspace, AMDNBTEMP_TEMPREG);
134 	if (at->at_raw == PCI_EINVAL32) {
135 		return (EIO);
136 	}
137 
138 	at->at_temp = AMDNBTEMP_TEMPREG_CURTMP(at->at_raw);
139 	if (at->at_tjsel &&
140 	    AMDNBTEMP_TEMPREG_TJSEL(at->at_raw) == AMDNBTEMP_TJSEL_ADJUST) {
141 		at->at_temp -= AMDNBTEMP_TEMP_ADJUST;
142 	}
143 
144 	return (0);
145 }
146 
147 static int
148 amdnbtemp_open(dev_t *devp, int flags, int otype, cred_t *credp)
149 {
150 	amdnbtemp_t *at;
151 
152 	if (crgetzoneid(credp) != GLOBAL_ZONEID || drv_priv(credp) != 0) {
153 		return (EPERM);
154 	}
155 
156 	if ((flags & (FEXCL | FNDELAY | FWRITE)) != 0) {
157 		return (EINVAL);
158 	}
159 
160 	if (otype != OTYP_CHR) {
161 		return (EINVAL);
162 	}
163 
164 	at = amdnbtemp_find_by_dev(*devp);
165 	if (at == NULL) {
166 		return (ENXIO);
167 	}
168 
169 	return (0);
170 }
171 
172 static int
173 amdnbtemp_ioctl_kind(intptr_t arg, int mode)
174 {
175 	sensor_ioctl_kind_t kind;
176 
177 	bzero(&kind, sizeof (kind));
178 	kind.sik_kind = SENSOR_KIND_TEMPERATURE;
179 
180 	if (ddi_copyout(&kind, (void *)arg, sizeof (kind), mode & FKIOCTL) !=
181 	    0) {
182 		return (EFAULT);
183 	}
184 
185 	return (0);
186 }
187 
188 static int
189 amdnbtemp_ioctl_temp(amdnbtemp_t *at, intptr_t arg, int mode)
190 {
191 	int ret;
192 	sensor_ioctl_temperature_t temp;
193 
194 	bzero(&temp, sizeof (temp));
195 
196 	mutex_enter(&at->at_mutex);
197 	if ((ret = amdnbtemp_read(at)) != 0) {
198 		mutex_exit(&at->at_mutex);
199 		return (ret);
200 	}
201 
202 	temp.sit_unit = SENSOR_UNIT_CELSIUS;
203 	temp.sit_gran = AMDNBTEMP_GRANULARITY;
204 	temp.sit_temp = at->at_temp;
205 	mutex_exit(&at->at_mutex);
206 
207 	if (ddi_copyout(&temp, (void *)arg, sizeof (temp), mode & FKIOCTL) !=
208 	    0) {
209 		return (EFAULT);
210 	}
211 
212 	return (0);
213 }
214 
215 static int
216 amdnbtemp_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
217     int *rvalp)
218 {
219 	amdnbtemp_t *at;
220 
221 	at = amdnbtemp_find_by_dev(dev);
222 	if (at == NULL) {
223 		return (ENXIO);
224 	}
225 
226 	if ((mode & FREAD) == 0) {
227 		return (EINVAL);
228 	}
229 
230 	switch (cmd) {
231 	case SENSOR_IOCTL_TYPE:
232 		return (amdnbtemp_ioctl_kind(arg, mode));
233 	case SENSOR_IOCTL_TEMPERATURE:
234 		return (amdnbtemp_ioctl_temp(at, arg, mode));
235 	default:
236 		return (ENOTTY);
237 	}
238 }
239 
240 static int
241 amdnbtemp_close(dev_t dev, int flags, int otype, cred_t *credp)
242 {
243 	return (0);
244 }
245 
246 static void
247 amdnbtemp_cleanup(amdnbtemp_t *at)
248 {
249 	int inst;
250 	inst = ddi_get_instance(at->at_dip);
251 
252 	if ((at->at_state & AMDNBTEMP_S_LIST) != 0) {
253 		mutex_enter(&amdnbtemp_mutex);
254 		list_remove(&amdnbtemp_list, at);
255 		mutex_exit(&amdnbtemp_mutex);
256 		at->at_state &= ~AMDNBTEMP_S_LIST;
257 	}
258 
259 	if ((at->at_state & AMDNBTEMP_S_MINOR) != 0) {
260 		ddi_remove_minor_node(at->at_dip, NULL);
261 		at->at_state &= ~AMDNBTEMP_S_MINOR;
262 	}
263 
264 	if ((at->at_state & AMDNBTEMP_S_MUTEX) != 0) {
265 		mutex_destroy(&at->at_mutex);
266 		at->at_state &= ~AMDNBTEMP_S_MUTEX;
267 	}
268 
269 	if ((at->at_state & AMDNBTEMP_S_CFGSPACE) != 0) {
270 		pci_config_teardown(&at->at_cfgspace);
271 		at->at_state &= ~AMDNBTEMP_S_CFGSPACE;
272 	}
273 
274 	ASSERT0(at->at_state);
275 	ddi_soft_state_free(amdnbtemp_state, inst);
276 }
277 
278 /*
279  * For several family 10h processors, certain models have an erratum which says
280  * that temperature information is unreliable. If we're on a platform that is
281  * subject to this erratum, do not attach to the device.
282  */
283 static boolean_t
284 amdnbtemp_erratum_319(void)
285 {
286 	uint32_t socket;
287 
288 	if (cpuid_getfamily(CPU) != 0x10) {
289 		return (B_FALSE);
290 	}
291 
292 	/*
293 	 * All Family 10h socket F parts are impacted. Socket AM2 parts are all
294 	 * impacted. The family 10h socket bits in cpuid share the same bit for
295 	 * socket AM2 and AM3. If you look at the erratum description, they use
296 	 * information about the memory controller to do DDR2/DDR3
297 	 * disambiguation to determine whether it's socket AM2 or AM3. Our cpuid
298 	 * subroutines already do the DDR2/DDR3 disambiguation so we can just
299 	 * check the socket type as the disambiguation has already been done.
300 	 */
301 	socket = cpuid_getsockettype(CPU);
302 	if (socket == X86_SOCKET_F1207 || socket == X86_SOCKET_AM2R2) {
303 		return (B_TRUE);
304 	}
305 
306 	return (B_FALSE);
307 }
308 
309 static int
310 amdnbtemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
311 {
312 	int inst, *regs;
313 	amdnbtemp_t *at;
314 	uint_t nregs;
315 	char buf[128];
316 
317 	switch (cmd) {
318 	case DDI_RESUME:
319 		return (DDI_SUCCESS);
320 	case DDI_ATTACH:
321 		break;
322 	default:
323 		return (DDI_FAILURE);
324 	}
325 
326 	inst = ddi_get_instance(dip);
327 	if (ddi_soft_state_zalloc(amdnbtemp_state, inst) != DDI_SUCCESS) {
328 		dev_err(dip, CE_WARN, "failed to allocate soft state entry %d",
329 		    inst);
330 		return (DDI_FAILURE);
331 	}
332 
333 	at = ddi_get_soft_state(amdnbtemp_state, inst);
334 	if (at == NULL) {
335 		dev_err(dip, CE_WARN, "failed to retrieve soft state entry %d",
336 		    inst);
337 		return (DDI_FAILURE);
338 	}
339 
340 	at->at_dip = dip;
341 
342 	if (pci_config_setup(dip, &at->at_cfgspace) != DDI_SUCCESS) {
343 		dev_err(dip, CE_WARN, "failed to set up PCI config space");
344 		goto err;
345 	}
346 	at->at_state |= AMDNBTEMP_S_CFGSPACE;
347 
348 	if (amdnbtemp_erratum_319()) {
349 		dev_err(dip, CE_WARN, "!device subject to AMD Erratum 319, "
350 		    "not attaching to unreliable sensor");
351 		goto err;
352 	}
353 
354 	mutex_init(&at->at_mutex, NULL, MUTEX_DRIVER, NULL);
355 	at->at_state |= AMDNBTEMP_S_MUTEX;
356 
357 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, 0, "reg",
358 	    &regs, &nregs) != DDI_PROP_SUCCESS) {
359 		dev_err(dip, CE_WARN, "failed to get pci 'reg' property");
360 		goto err;
361 	}
362 
363 	if (nregs < 1) {
364 		dev_err(dip, CE_WARN, "'reg' property missing PCI b/d/f");
365 		ddi_prop_free(regs);
366 		goto err;
367 	}
368 
369 	at->at_bus = PCI_REG_BUS_G(regs[0]);
370 	at->at_dev = PCI_REG_DEV_G(regs[0]);
371 	at->at_func = PCI_REG_DEV_G(regs[0]);
372 	ddi_prop_free(regs);
373 
374 	if (at->at_dev < AMDNBTEMP_FIRST_DEV) {
375 		dev_err(dip, CE_WARN, "Invalid pci b/d/f device, found 0x%x",
376 		    at->at_dev);
377 		goto err;
378 	}
379 
380 	at->at_minor = at->at_dev - AMDNBTEMP_FIRST_DEV;
381 	if (snprintf(buf, sizeof (buf), "procnode.%u", at->at_minor) >=
382 	    sizeof (buf)) {
383 		dev_err(dip, CE_WARN, "unexpected buffer name overrun "
384 		    "constructing minor %u", at->at_minor);
385 		goto err;
386 	}
387 
388 	if (ddi_create_minor_node(dip, buf, S_IFCHR, at->at_minor,
389 	    DDI_NT_SENSOR_TEMP_CPU, 0) != DDI_SUCCESS) {
390 		dev_err(dip, CE_WARN, "failed to create minor node %s",
391 		    buf);
392 		goto err;
393 	}
394 	at->at_state |= AMDNBTEMP_S_MINOR;
395 
396 	mutex_enter(&amdnbtemp_mutex);
397 	list_insert_tail(&amdnbtemp_list, at);
398 	mutex_exit(&amdnbtemp_mutex);
399 	at->at_state |= AMDNBTEMP_S_LIST;
400 
401 	/*
402 	 * On families 15h and 16h the BKDG documents that the CurTmpTjSel bits
403 	 * of the temperature register dictate how the temperature reading
404 	 * should be interpreted. Capture that now.
405 	 */
406 	if (cpuid_getfamily(CPU) >= 0x15) {
407 		at->at_tjsel = B_TRUE;
408 	}
409 
410 	mutex_enter(&at->at_mutex);
411 	(void) amdnbtemp_read(at);
412 	mutex_exit(&at->at_mutex);
413 
414 	return (DDI_SUCCESS);
415 
416 err:
417 	amdnbtemp_cleanup(at);
418 	return (DDI_FAILURE);
419 }
420 
421 static int
422 amdnbtemp_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
423     void **resultp)
424 {
425 	amdnbtemp_t *at;
426 
427 	if (cmd != DDI_INFO_DEVT2DEVINFO && cmd != DDI_INFO_DEVT2INSTANCE) {
428 		return (DDI_FAILURE);
429 	}
430 
431 	at = amdnbtemp_find_by_dev((dev_t)arg);
432 	if (at == NULL) {
433 		return (DDI_FAILURE);
434 	}
435 
436 	if (cmd == DDI_INFO_DEVT2DEVINFO) {
437 		*resultp = at->at_dip;
438 	} else {
439 		*resultp = (void *)(uintptr_t)ddi_get_instance(at->at_dip);
440 	}
441 
442 	return (DDI_SUCCESS);
443 }
444 
445 static int
446 amdnbtemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
447 {
448 	int inst;
449 	amdnbtemp_t *at;
450 
451 	switch (cmd) {
452 	case DDI_DETACH:
453 		break;
454 	case DDI_SUSPEND:
455 		return (DDI_SUCCESS);
456 	default:
457 		return (DDI_FAILURE);
458 	}
459 
460 	inst = ddi_get_instance(dip);
461 	at = ddi_get_soft_state(amdnbtemp_state, inst);
462 	if (at == NULL) {
463 		dev_err(dip, CE_WARN, "asked to detach instance %d, but it is "
464 		    "missing from the soft state", inst);
465 		return (DDI_FAILURE);
466 	}
467 
468 	amdnbtemp_cleanup(at);
469 	return (DDI_SUCCESS);
470 }
471 
472 static struct cb_ops amdnbtemp_cb_ops = {
473 	.cb_open = amdnbtemp_open,
474 	.cb_close = amdnbtemp_close,
475 	.cb_strategy = nodev,
476 	.cb_print = nodev,
477 	.cb_dump = nodev,
478 	.cb_read = nodev,
479 	.cb_write = nodev,
480 	.cb_ioctl = amdnbtemp_ioctl,
481 	.cb_devmap = nodev,
482 	.cb_mmap = nodev,
483 	.cb_segmap = nodev,
484 	.cb_chpoll = nochpoll,
485 	.cb_prop_op = ddi_prop_op,
486 	.cb_flag = D_MP,
487 	.cb_rev = CB_REV,
488 	.cb_aread = nodev,
489 	.cb_awrite = nodev
490 };
491 
492 static struct dev_ops amdnbtemp_dev_ops = {
493 	.devo_rev = DEVO_REV,
494 	.devo_refcnt = 0,
495 	.devo_getinfo = amdnbtemp_getinfo,
496 	.devo_identify = nulldev,
497 	.devo_probe = nulldev,
498 	.devo_attach = amdnbtemp_attach,
499 	.devo_detach = amdnbtemp_detach,
500 	.devo_reset = nodev,
501 	.devo_power = ddi_power,
502 	.devo_quiesce = ddi_quiesce_not_needed,
503 	.devo_cb_ops = &amdnbtemp_cb_ops
504 };
505 
506 static struct modldrv amdnbtemp_modldrv = {
507 	.drv_modops = &mod_driverops,
508 	.drv_linkinfo = "AMD NB Temp Driver",
509 	.drv_dev_ops = &amdnbtemp_dev_ops
510 };
511 
512 static struct modlinkage amdnbtemp_modlinkage = {
513 	.ml_rev = MODREV_1,
514 	.ml_linkage = { &amdnbtemp_modldrv, NULL }
515 };
516 
517 int
518 _init(void)
519 {
520 	int ret;
521 
522 	if (ddi_soft_state_init(&amdnbtemp_state, sizeof (amdnbtemp_t), 2) !=
523 	    DDI_SUCCESS) {
524 		return (ENOMEM);
525 	}
526 
527 	if ((ret = mod_install(&amdnbtemp_modlinkage)) != 0) {
528 		ddi_soft_state_fini(&amdnbtemp_state);
529 		return (ret);
530 	}
531 
532 	list_create(&amdnbtemp_list, sizeof (amdnbtemp_t),
533 	    offsetof(amdnbtemp_t, at_link));
534 	mutex_init(&amdnbtemp_mutex, NULL, MUTEX_DRIVER, NULL);
535 
536 	return (ret);
537 }
538 
539 int
540 _info(struct modinfo *modinfop)
541 {
542 	return (mod_info(&amdnbtemp_modlinkage, modinfop));
543 }
544 
545 int
546 _fini(void)
547 {
548 	int ret;
549 
550 	if ((ret = mod_remove(&amdnbtemp_modlinkage)) != 0) {
551 		return (ret);
552 	}
553 
554 	mutex_destroy(&amdnbtemp_mutex);
555 	list_destroy(&amdnbtemp_list);
556 	ddi_soft_state_fini(&amdnbtemp_state);
557 	return (ret);
558 }
559