1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Robert Mustacchi
14  * Copyright 2023 Oxide Computer Company
15  */
16 
17 /*
18  * AMD Northbridge CPU Temperature Driver
19  *
20  * The AMD northbridge CPU temperature driver supports the temperature sensor
21  * that was found on the AMD northbridge on AMD CPUs from approximately AMD
22  * Family 10h to Family 16h. For Zen and newer processors (Family 17h+) see the
23  * 'smntemp' driver.
24  *
25  * The temperature is stored on the 'miscellaneous' device on the northbridge.
26  * This is always found at PCI Device 18h, Function 3h. When there is more than
27  * one 'node' (see cpuid.c for the AMD parlance), then the node id is added to
28  * the device to create a unique device. This allows us to map the given PCI
29  * device we find back to the corresponding CPU.
30  *
31  * While all family 10h, 11h, 12h, 14h, and 16h CPUs are supported, not all
32  * family 15h CPUs are. Models 60h+ require the SMN interface, which this does
33  * not know how to consume.
34  */
35 
36 #include <sys/modctl.h>
37 #include <sys/conf.h>
38 #include <sys/devops.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/cmn_err.h>
44 #include <sys/pci.h>
45 #include <sys/stddef.h>
46 #include <sys/cpuvar.h>
47 #include <sys/x86_archext.h>
48 #include <sys/list.h>
49 #include <sys/bitset.h>
50 #include <sys/sensors.h>
51 
52 /*
53  * This register offset, in PCI config space, has the current temperature of the
54  * device.
55  */
56 #define	AMDNBTEMP_TEMPREG	0xa4
57 #define	AMDNBTEMP_TEMPREG_CURTMP(x)	BITX(x, 31, 21)
58 #define	AMDNBTEMP_TEMPREG_TJSEL(x)	BITX(x, 17, 16)
59 
60 /*
61  * Each bit in the temperature range represents 1/8th of a degree C.
62  */
63 #define	AMDNBTEMP_GRANULARITY	8
64 #define	AMDNBTEMP_GSHIFT	3
65 
66 /*
67  * If the value of the current CurTmpTjSel is set to three, then the range that
68  * the data is in is shifted by -49 degrees. In this mode, the bottom two bits
69  * always read as zero.
70  */
71 #define	AMDNBTEMP_TJSEL_ADJUST	0x3
72 #define	AMDNBTEMP_TEMP_ADJUST	(49 << AMDNBTEMP_GSHIFT)
73 
74 /*
75  * There are a variable number of northbridges that exist in the system. The AMD
76  * BIOS and Kernel Developer's Guide (BKDG) says that for these families, the
77  * first node has a device of 0x18. This means that node 7, the maximum, has a
78  * device of 0x1f.
79  */
80 #define	AMDNBTEMP_FIRST_DEV	0x18
81 
82 typedef enum andnbtemp_state {
83 	AMDNBTEMP_S_CFGSPACE	= 1 << 0,
84 	AMDNBTEMP_S_MUTEX	= 1 << 1,
85 	AMDNBTMEP_S_KSENSOR	= 1 << 2
86 } amdnbtemp_state_t;
87 
88 typedef struct amdnbtemp {
89 	amdnbtemp_state_t	at_state;
90 	dev_info_t		*at_dip;
91 	ddi_acc_handle_t	at_cfgspace;
92 	uint_t			at_bus;
93 	uint_t			at_dev;
94 	uint_t			at_func;
95 	id_t			at_ksensor;
96 	minor_t			at_minor;
97 	boolean_t		at_tjsel;
98 	kmutex_t		at_mutex;
99 	uint32_t		at_raw;
100 	int64_t			at_temp;
101 } amdnbtemp_t;
102 
103 static void *amdnbtemp_state;
104 
105 static int
amdnbtemp_read(void * arg,sensor_ioctl_scalar_t * scalar)106 amdnbtemp_read(void *arg, sensor_ioctl_scalar_t *scalar)
107 {
108 	amdnbtemp_t *at = arg;
109 
110 	mutex_enter(&at->at_mutex);
111 	at->at_raw = pci_config_get32(at->at_cfgspace, AMDNBTEMP_TEMPREG);
112 	if (at->at_raw == PCI_EINVAL32) {
113 		mutex_exit(&at->at_mutex);
114 		return (EIO);
115 	}
116 
117 	at->at_temp = AMDNBTEMP_TEMPREG_CURTMP(at->at_raw);
118 	if (at->at_tjsel &&
119 	    AMDNBTEMP_TEMPREG_TJSEL(at->at_raw) == AMDNBTEMP_TJSEL_ADJUST) {
120 		at->at_temp -= AMDNBTEMP_TEMP_ADJUST;
121 	}
122 
123 	scalar->sis_unit = SENSOR_UNIT_CELSIUS;
124 	scalar->sis_gran = AMDNBTEMP_GRANULARITY;
125 	scalar->sis_value = at->at_temp;
126 	mutex_exit(&at->at_mutex);
127 
128 	return (0);
129 }
130 
131 static const ksensor_ops_t amdnbtemp_temp_ops = {
132 	.kso_kind = ksensor_kind_temperature,
133 	.kso_scalar = amdnbtemp_read
134 };
135 
136 static void
amdnbtemp_cleanup(amdnbtemp_t * at)137 amdnbtemp_cleanup(amdnbtemp_t *at)
138 {
139 	int inst;
140 	inst = ddi_get_instance(at->at_dip);
141 
142 	if ((at->at_state & AMDNBTMEP_S_KSENSOR) != 0) {
143 		(void) ksensor_remove(at->at_dip, KSENSOR_ALL_IDS);
144 		at->at_state &= ~AMDNBTMEP_S_KSENSOR;
145 	}
146 
147 	if ((at->at_state & AMDNBTEMP_S_MUTEX) != 0) {
148 		mutex_destroy(&at->at_mutex);
149 		at->at_state &= ~AMDNBTEMP_S_MUTEX;
150 	}
151 
152 	if ((at->at_state & AMDNBTEMP_S_CFGSPACE) != 0) {
153 		pci_config_teardown(&at->at_cfgspace);
154 		at->at_state &= ~AMDNBTEMP_S_CFGSPACE;
155 	}
156 
157 	ASSERT0(at->at_state);
158 	ddi_soft_state_free(amdnbtemp_state, inst);
159 }
160 
161 /*
162  * For several family 10h processors, certain models have an erratum which says
163  * that temperature information is unreliable. If we're on a platform that is
164  * subject to this erratum, do not attach to the device.
165  */
166 static boolean_t
amdnbtemp_erratum_319(void)167 amdnbtemp_erratum_319(void)
168 {
169 	uint32_t socket;
170 
171 	if (cpuid_getfamily(CPU) != 0x10) {
172 		return (B_FALSE);
173 	}
174 
175 	/*
176 	 * All Family 10h socket F parts are impacted. Socket AM2 parts are all
177 	 * impacted. The family 10h socket bits in cpuid share the same bit for
178 	 * socket AM2 and AM3. If you look at the erratum description, they use
179 	 * information about the memory controller to do DDR2/DDR3
180 	 * disambiguation to determine whether it's socket AM2 or AM3. Our cpuid
181 	 * subroutines already do the DDR2/DDR3 disambiguation so we can just
182 	 * check the socket type as the disambiguation has already been done.
183 	 */
184 	socket = cpuid_getsockettype(CPU);
185 	if (socket == X86_SOCKET_F1207 || socket == X86_SOCKET_AM2R2) {
186 		return (B_TRUE);
187 	}
188 
189 	return (B_FALSE);
190 }
191 
192 static int
amdnbtemp_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)193 amdnbtemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
194 {
195 	int inst, *regs, ret;
196 	amdnbtemp_t *at;
197 	uint_t nregs, id;
198 	char buf[128];
199 
200 	switch (cmd) {
201 	case DDI_RESUME:
202 		return (DDI_SUCCESS);
203 	case DDI_ATTACH:
204 		break;
205 	default:
206 		return (DDI_FAILURE);
207 	}
208 
209 	inst = ddi_get_instance(dip);
210 	if (ddi_soft_state_zalloc(amdnbtemp_state, inst) != DDI_SUCCESS) {
211 		dev_err(dip, CE_WARN, "failed to allocate soft state entry %d",
212 		    inst);
213 		return (DDI_FAILURE);
214 	}
215 
216 	at = ddi_get_soft_state(amdnbtemp_state, inst);
217 	if (at == NULL) {
218 		dev_err(dip, CE_WARN, "failed to retrieve soft state entry %d",
219 		    inst);
220 		return (DDI_FAILURE);
221 	}
222 
223 	at->at_dip = dip;
224 
225 	if (pci_config_setup(dip, &at->at_cfgspace) != DDI_SUCCESS) {
226 		dev_err(dip, CE_WARN, "failed to set up PCI config space");
227 		goto err;
228 	}
229 	at->at_state |= AMDNBTEMP_S_CFGSPACE;
230 
231 	if (amdnbtemp_erratum_319()) {
232 		dev_err(dip, CE_WARN, "!device subject to AMD Erratum 319, "
233 		    "not attaching to unreliable sensor");
234 		goto err;
235 	}
236 
237 	mutex_init(&at->at_mutex, NULL, MUTEX_DRIVER, NULL);
238 	at->at_state |= AMDNBTEMP_S_MUTEX;
239 
240 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, 0, "reg",
241 	    ®s, &nregs) != DDI_PROP_SUCCESS) {
242 		dev_err(dip, CE_WARN, "failed to get pci 'reg' property");
243 		goto err;
244 	}
245 
246 	if (nregs < 1) {
247 		dev_err(dip, CE_WARN, "'reg' property missing PCI b/d/f");
248 		ddi_prop_free(regs);
249 		goto err;
250 	}
251 
252 	at->at_bus = PCI_REG_BUS_G(regs[0]);
253 	at->at_dev = PCI_REG_DEV_G(regs[0]);
254 	at->at_func = PCI_REG_DEV_G(regs[0]);
255 	ddi_prop_free(regs);
256 
257 	if (at->at_dev < AMDNBTEMP_FIRST_DEV) {
258 		dev_err(dip, CE_WARN, "Invalid pci b/d/f device, found 0x%x",
259 		    at->at_dev);
260 		goto err;
261 	}
262 
263 	id = at->at_dev - AMDNBTEMP_FIRST_DEV;
264 	if (snprintf(buf, sizeof (buf), "procnode.%u", id) >= sizeof (buf)) {
265 		dev_err(dip, CE_WARN, "unexpected buffer name overrun "
266 		    "constructing sensor %u", id);
267 		goto err;
268 	}
269 
270 	/*
271 	 * On families 15h and 16h the BKDG documents that the CurTmpTjSel bits
272 	 * of the temperature register dictate how the temperature reading
273 	 * should be interpreted. Capture that now.
274 	 */
275 	if (cpuid_getfamily(CPU) >= 0x15) {
276 		at->at_tjsel = B_TRUE;
277 	}
278 
279 	if ((ret = ksensor_create(dip, &amdnbtemp_temp_ops, at, buf,
280 	    DDI_NT_SENSOR_TEMP_CPU, &at->at_ksensor)) != 0) {
281 		dev_err(dip, CE_WARN, "failed to create ksensor for %s: %d",
282 		    buf, ret);
283 		goto err;
284 	}
285 	at->at_state |= AMDNBTMEP_S_KSENSOR;
286 
287 	return (DDI_SUCCESS);
288 
289 err:
290 	amdnbtemp_cleanup(at);
291 	return (DDI_FAILURE);
292 }
293 
294 static int
amdnbtemp_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)295 amdnbtemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
296 {
297 	int inst;
298 	amdnbtemp_t *at;
299 
300 	switch (cmd) {
301 	case DDI_DETACH:
302 		break;
303 	case DDI_SUSPEND:
304 		return (DDI_SUCCESS);
305 	default:
306 		return (DDI_FAILURE);
307 	}
308 
309 	inst = ddi_get_instance(dip);
310 	at = ddi_get_soft_state(amdnbtemp_state, inst);
311 	if (at == NULL) {
312 		dev_err(dip, CE_WARN, "asked to detach instance %d, but it is "
313 		    "missing from the soft state", inst);
314 		return (DDI_FAILURE);
315 	}
316 
317 	amdnbtemp_cleanup(at);
318 	return (DDI_SUCCESS);
319 }
320 
321 static struct dev_ops amdnbtemp_dev_ops = {
322 	.devo_rev = DEVO_REV,
323 	.devo_refcnt = 0,
324 	.devo_getinfo = nodev,
325 	.devo_identify = nulldev,
326 	.devo_probe = nulldev,
327 	.devo_attach = amdnbtemp_attach,
328 	.devo_detach = amdnbtemp_detach,
329 	.devo_reset = nodev,
330 	.devo_quiesce = ddi_quiesce_not_needed
331 };
332 
333 static struct modldrv amdnbtemp_modldrv = {
334 	.drv_modops = &mod_driverops,
335 	.drv_linkinfo = "AMD NB Temp Driver",
336 	.drv_dev_ops = &amdnbtemp_dev_ops
337 };
338 
339 static struct modlinkage amdnbtemp_modlinkage = {
340 	.ml_rev = MODREV_1,
341 	.ml_linkage = { &amdnbtemp_modldrv, NULL }
342 };
343 
344 int
_init(void)345 _init(void)
346 {
347 	int ret;
348 
349 	if (ddi_soft_state_init(&amdnbtemp_state, sizeof (amdnbtemp_t), 2) !=
350 	    DDI_SUCCESS) {
351 		return (ENOMEM);
352 	}
353 
354 	if ((ret = mod_install(&amdnbtemp_modlinkage)) != 0) {
355 		ddi_soft_state_fini(&amdnbtemp_state);
356 		return (ret);
357 	}
358 
359 	return (ret);
360 }
361 
362 int
_info(struct modinfo * modinfop)363 _info(struct modinfo *modinfop)
364 {
365 	return (mod_info(&amdnbtemp_modlinkage, modinfop));
366 }
367 
368 int
_fini(void)369 _fini(void)
370 {
371 	int ret;
372 
373 	if ((ret = mod_remove(&amdnbtemp_modlinkage)) != 0) {
374 		return (ret);
375 	}
376 
377 	ddi_soft_state_fini(&amdnbtemp_state);
378 	return (ret);
379 }
380