1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2007, 2008 Rui Paulo <rpaulo@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 25 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Device driver for Intel's On Die thermal sensor via MSR. 31 * First introduced in Intel's Core line of processors. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/bus.h> 36 #include <sys/conf.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/module.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> /* for curthread */ 42 #include <sys/smp.h> 43 #include <sys/sysctl.h> 44 #include <sys/systm.h> 45 46 #include <machine/specialreg.h> 47 #include <machine/cpufunc.h> 48 #include <machine/cputypes.h> 49 #include <machine/md_var.h> 50 51 #define TZ_ZEROC 2731 52 53 #define THERM_CRITICAL_STATUS_LOG 0x20 54 #define THERM_CRITICAL_STATUS 0x10 55 #define THERM_STATUS_LOG 0x02 56 #define THERM_STATUS 0x01 57 #define THERM_STATUS_TEMP_SHIFT 16 58 #define THERM_STATUS_TEMP_MASK 0x7f 59 #define THERM_STATUS_RES_SHIFT 27 60 #define THERM_STATUS_RES_MASK 0x0f 61 #define THERM_STATUS_VALID_SHIFT 31 62 #define THERM_STATUS_VALID_MASK 0x01 63 64 struct coretemp_softc { 65 device_t sc_dev; 66 int sc_tjmax; 67 unsigned int sc_throttle_log; 68 }; 69 70 /* 71 * Device methods. 72 */ 73 static void coretemp_identify(driver_t *driver, device_t parent); 74 static int coretemp_probe(device_t dev); 75 static int coretemp_attach(device_t dev); 76 static int coretemp_detach(device_t dev); 77 78 static uint64_t coretemp_get_thermal_msr(int cpu); 79 static void coretemp_clear_thermal_msr(int cpu); 80 static int coretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS); 81 static int coretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS); 82 83 static device_method_t coretemp_methods[] = { 84 /* Device interface */ 85 DEVMETHOD(device_identify, coretemp_identify), 86 DEVMETHOD(device_probe, coretemp_probe), 87 DEVMETHOD(device_attach, coretemp_attach), 88 DEVMETHOD(device_detach, coretemp_detach), 89 90 DEVMETHOD_END 91 }; 92 93 static driver_t coretemp_driver = { 94 "coretemp", 95 coretemp_methods, 96 sizeof(struct coretemp_softc), 97 }; 98 99 enum therm_info { 100 CORETEMP_TEMP, 101 CORETEMP_DELTA, 102 CORETEMP_RESOLUTION, 103 CORETEMP_TJMAX, 104 }; 105 106 DRIVER_MODULE(coretemp, cpu, coretemp_driver, NULL, NULL); 107 108 static void 109 coretemp_identify(driver_t *driver, device_t parent) 110 { 111 device_t child; 112 u_int regs[4]; 113 114 /* Make sure we're not being doubly invoked. */ 115 if (device_find_child(parent, "coretemp", -1) != NULL) 116 return; 117 118 /* Check that CPUID 0x06 is supported and the vendor is Intel.*/ 119 if (cpu_high < 6 || cpu_vendor_id != CPU_VENDOR_INTEL) 120 return; 121 /* 122 * CPUID 0x06 returns 1 if the processor has on-die thermal 123 * sensors. EBX[0:3] contains the number of sensors. 124 */ 125 do_cpuid(0x06, regs); 126 if ((regs[0] & 0x1) != 1) 127 return; 128 129 /* 130 * We add a child for each CPU since settings must be performed 131 * on each CPU in the SMP case. 132 */ 133 child = device_add_child(parent, "coretemp", device_get_unit(parent)); 134 if (child == NULL) 135 device_printf(parent, "add coretemp child failed\n"); 136 } 137 138 static int 139 coretemp_probe(device_t dev) 140 { 141 if (resource_disabled("coretemp", 0)) 142 return (ENXIO); 143 144 device_set_desc(dev, "CPU On-Die Thermal Sensors"); 145 146 if (!bootverbose && device_get_unit(dev) != 0) 147 device_quiet(dev); 148 149 return (BUS_PROBE_GENERIC); 150 } 151 152 static int 153 coretemp_attach(device_t dev) 154 { 155 struct coretemp_softc *sc = device_get_softc(dev); 156 device_t pdev; 157 uint64_t msr; 158 int cpu_model, cpu_stepping; 159 int ret, tjtarget; 160 struct sysctl_oid *oid; 161 struct sysctl_ctx_list *ctx; 162 163 sc->sc_dev = dev; 164 pdev = device_get_parent(dev); 165 cpu_model = CPUID_TO_MODEL(cpu_id); 166 cpu_stepping = CPUID_TO_STEPPING(cpu_id); 167 168 /* 169 * Some CPUs, namely the PIII, don't have thermal sensors, but 170 * report them when the CPUID check is performed in 171 * coretemp_identify(). This leads to a later GPF when the sensor 172 * is queried via a MSR, so we stop here. 173 */ 174 if (cpu_model < 0xe) 175 return (ENXIO); 176 177 #if 0 /* 178 * XXXrpaulo: I have this CPU model and when it returns from C3 179 * coretemp continues to function properly. 180 */ 181 182 /* 183 * Check for errata AE18. 184 * "Processor Digital Thermal Sensor (DTS) Readout stops 185 * updating upon returning from C3/C4 state." 186 * 187 * Adapted from the Linux coretemp driver. 188 */ 189 if (cpu_model == 0xe && cpu_stepping < 0xc) { 190 msr = rdmsr(MSR_BIOS_SIGN); 191 msr = msr >> 32; 192 if (msr < 0x39) { 193 device_printf(dev, "not supported (Intel errata " 194 "AE18), try updating your BIOS\n"); 195 return (ENXIO); 196 } 197 } 198 #endif 199 200 /* 201 * Use 100C as the initial value. 202 */ 203 sc->sc_tjmax = 100; 204 205 if ((cpu_model == 0xf && cpu_stepping >= 2) || cpu_model == 0xe) { 206 /* 207 * On some Core 2 CPUs, there's an undocumented MSR that 208 * can tell us if Tj(max) is 100 or 85. 209 * 210 * The if-clause for CPUs having the MSR_IA32_EXT_CONFIG was adapted 211 * from the Linux coretemp driver. 212 */ 213 msr = rdmsr(MSR_IA32_EXT_CONFIG); 214 if (msr & (1 << 30)) 215 sc->sc_tjmax = 85; 216 } else if (cpu_model == 0x17) { 217 switch (cpu_stepping) { 218 case 0x6: /* Mobile Core 2 Duo */ 219 sc->sc_tjmax = 105; 220 break; 221 default: /* Unknown stepping */ 222 break; 223 } 224 } else if (cpu_model == 0x1c) { 225 switch (cpu_stepping) { 226 case 0xa: /* 45nm Atom D400, N400 and D500 series */ 227 sc->sc_tjmax = 100; 228 break; 229 default: 230 sc->sc_tjmax = 90; 231 break; 232 } 233 } else { 234 /* 235 * Attempt to get Tj(max) from MSR IA32_TEMPERATURE_TARGET. 236 * 237 * This method is described in Intel white paper "CPU 238 * Monitoring With DTS/PECI". (#322683) 239 */ 240 ret = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &msr); 241 if (ret == 0) { 242 tjtarget = (msr >> 16) & 0xff; 243 244 /* 245 * On earlier generation of processors, the value 246 * obtained from IA32_TEMPERATURE_TARGET register is 247 * an offset that needs to be summed with a model 248 * specific base. It is however not clear what 249 * these numbers are, with the publicly available 250 * documents from Intel. 251 * 252 * For now, we consider [70, 110]C range, as 253 * described in #322683, as "reasonable" and accept 254 * these values whenever the MSR is available for 255 * read, regardless the CPU model. 256 */ 257 if (tjtarget >= 70 && tjtarget <= 110) 258 sc->sc_tjmax = tjtarget; 259 else 260 device_printf(dev, "Tj(target) value %d " 261 "does not seem right.\n", tjtarget); 262 } else 263 device_printf(dev, "Can not get Tj(target) " 264 "from your CPU, using 100C.\n"); 265 } 266 267 if (bootverbose) 268 device_printf(dev, "Setting TjMax=%d\n", sc->sc_tjmax); 269 270 ctx = device_get_sysctl_ctx(dev); 271 272 oid = SYSCTL_ADD_NODE(ctx, 273 SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)), OID_AUTO, 274 "coretemp", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 275 "Per-CPU thermal information"); 276 277 /* 278 * Add the MIBs to dev.cpu.N and dev.cpu.N.coretemp. 279 */ 280 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)), 281 OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 282 dev, CORETEMP_TEMP, coretemp_get_val_sysctl, "IK", 283 "Current temperature"); 284 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "delta", 285 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_DELTA, 286 coretemp_get_val_sysctl, "I", 287 "Delta between TCC activation and current temperature"); 288 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "resolution", 289 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_RESOLUTION, 290 coretemp_get_val_sysctl, "I", 291 "Resolution of CPU thermal sensor"); 292 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "tjmax", 293 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_TJMAX, 294 coretemp_get_val_sysctl, "IK", 295 "TCC activation temperature"); 296 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 297 "throttle_log", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, dev, 0, 298 coretemp_throttle_log_sysctl, "I", 299 "Set to 1 if the thermal sensor has tripped"); 300 301 return (0); 302 } 303 304 static int 305 coretemp_detach(device_t dev) 306 { 307 return (0); 308 } 309 310 struct coretemp_args { 311 u_int msr; 312 uint64_t val; 313 }; 314 315 /* 316 * The digital temperature reading is located at bit 16 317 * of MSR_THERM_STATUS. 318 * 319 * There is a bit on that MSR that indicates whether the 320 * temperature is valid or not. 321 * 322 * The temperature is computed by subtracting the temperature 323 * reading by Tj(max). 324 */ 325 static uint64_t 326 coretemp_get_thermal_msr(int cpu) 327 { 328 uint64_t res; 329 330 x86_msr_op(MSR_THERM_STATUS, MSR_OP_RENDEZVOUS_ONE | MSR_OP_READ | 331 MSR_OP_CPUID(cpu), 0, &res); 332 return (res); 333 } 334 335 static void 336 coretemp_clear_thermal_msr(int cpu) 337 { 338 x86_msr_op(MSR_THERM_STATUS, MSR_OP_RENDEZVOUS_ONE | MSR_OP_WRITE | 339 MSR_OP_CPUID(cpu), 0, NULL); 340 } 341 342 static int 343 coretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS) 344 { 345 device_t dev; 346 uint64_t msr; 347 int val, tmp; 348 struct coretemp_softc *sc; 349 enum therm_info type; 350 char stemp[16]; 351 352 dev = (device_t) arg1; 353 msr = coretemp_get_thermal_msr(device_get_unit(dev)); 354 sc = device_get_softc(dev); 355 type = arg2; 356 357 if (((msr >> THERM_STATUS_VALID_SHIFT) & THERM_STATUS_VALID_MASK) != 1) { 358 val = -1; 359 } else { 360 switch (type) { 361 case CORETEMP_TEMP: 362 tmp = (msr >> THERM_STATUS_TEMP_SHIFT) & 363 THERM_STATUS_TEMP_MASK; 364 val = (sc->sc_tjmax - tmp) * 10 + TZ_ZEROC; 365 break; 366 case CORETEMP_DELTA: 367 val = (msr >> THERM_STATUS_TEMP_SHIFT) & 368 THERM_STATUS_TEMP_MASK; 369 break; 370 case CORETEMP_RESOLUTION: 371 val = (msr >> THERM_STATUS_RES_SHIFT) & 372 THERM_STATUS_RES_MASK; 373 break; 374 case CORETEMP_TJMAX: 375 val = sc->sc_tjmax * 10 + TZ_ZEROC; 376 break; 377 } 378 } 379 380 if (msr & THERM_STATUS_LOG) { 381 coretemp_clear_thermal_msr(device_get_unit(dev)); 382 sc->sc_throttle_log = 1; 383 384 /* 385 * Check for Critical Temperature Status and Critical 386 * Temperature Log. It doesn't really matter if the 387 * current temperature is invalid because the "Critical 388 * Temperature Log" bit will tell us if the Critical 389 * Temperature has * been reached in past. It's not 390 * directly related to the current temperature. 391 * 392 * If we reach a critical level, allow devctl(4) 393 * to catch this and shutdown the system. 394 */ 395 if (msr & THERM_CRITICAL_STATUS) { 396 tmp = (msr >> THERM_STATUS_TEMP_SHIFT) & 397 THERM_STATUS_TEMP_MASK; 398 tmp = (sc->sc_tjmax - tmp) * 10 + TZ_ZEROC; 399 device_printf(dev, "critical temperature detected, " 400 "suggest system shutdown\n"); 401 snprintf(stemp, sizeof(stemp), "%d", tmp); 402 devctl_notify("coretemp", "Thermal", stemp, 403 "notify=0xcc"); 404 } 405 } 406 407 return (sysctl_handle_int(oidp, &val, 0, req)); 408 } 409 410 static int 411 coretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS) 412 { 413 device_t dev; 414 uint64_t msr; 415 int error, val; 416 struct coretemp_softc *sc; 417 418 dev = (device_t) arg1; 419 msr = coretemp_get_thermal_msr(device_get_unit(dev)); 420 sc = device_get_softc(dev); 421 422 if (msr & THERM_STATUS_LOG) { 423 coretemp_clear_thermal_msr(device_get_unit(dev)); 424 sc->sc_throttle_log = 1; 425 } 426 427 val = sc->sc_throttle_log; 428 429 error = sysctl_handle_int(oidp, &val, 0, req); 430 431 if (error || !req->newptr) 432 return (error); 433 else if (val != 0) 434 return (EINVAL); 435 436 coretemp_clear_thermal_msr(device_get_unit(dev)); 437 sc->sc_throttle_log = 0; 438 439 return (0); 440 } 441