xref: /freebsd/sys/dev/acpica/acpi_thermal.c (revision 657729a89dd578d8cfc70d6616f5c65a48a8b33a)
1 /*-
2  * Copyright (c) 2000, 2001 Michael Smith
3  * Copyright (c) 2000 BSDi
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_acpi.h"
32 #include <sys/param.h>
33 #include <sys/eventhandler.h>
34 #include <sys/kernel.h>
35 #include <sys/bus.h>
36 #include <sys/cpu.h>
37 #include <sys/kthread.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/proc.h>
41 #include <sys/reboot.h>
42 #include <sys/sysctl.h>
43 #include <sys/unistd.h>
44 #include <sys/power.h>
45 
46 #include "cpufreq_if.h"
47 
48 #include <contrib/dev/acpica/include/acpi.h>
49 #include <contrib/dev/acpica/include/accommon.h>
50 
51 #include <dev/acpica/acpivar.h>
52 
53 /* Hooks for the ACPI CA debugging infrastructure */
54 #define _COMPONENT	ACPI_THERMAL
55 ACPI_MODULE_NAME("THERMAL")
56 
57 #define TZ_ZEROC	2731
58 #define TZ_KELVTOC(x)	(((x) - TZ_ZEROC) / 10), abs(((x) - TZ_ZEROC) % 10)
59 
60 #define TZ_NOTIFY_TEMPERATURE	0x80 /* Temperature changed. */
61 #define TZ_NOTIFY_LEVELS	0x81 /* Cooling levels changed. */
62 #define TZ_NOTIFY_DEVICES	0x82 /* Device lists changed. */
63 #define TZ_NOTIFY_CRITICAL	0xcc /* Fake notify that _CRT/_HOT/_CR3 reached. */
64 
65 /* Check for temperature changes every 10 seconds by default */
66 #define TZ_POLLRATE	10
67 
68 /* Make sure the reported temperature is valid for this number of polls. */
69 #define TZ_VALIDCHECKS	3
70 
71 /* Notify the user we will be shutting down in one more poll cycle. */
72 #define TZ_NOTIFYCOUNT	(TZ_VALIDCHECKS - 1)
73 
74 /* ACPI spec defines this */
75 #define TZ_NUMLEVELS	10
76 struct acpi_tz_zone {
77     int		ac[TZ_NUMLEVELS];
78     ACPI_BUFFER	al[TZ_NUMLEVELS];
79     int		crt;
80     int		hot;
81     int		cr3;
82     ACPI_BUFFER	psl;
83     int		psv;
84     int		tc1;
85     int		tc2;
86     int		tsp;
87     int		tzp;
88 };
89 
90 struct acpi_tz_softc {
91     device_t			tz_dev;
92     ACPI_HANDLE			tz_handle;	/*Thermal zone handle*/
93     int				tz_temperature;	/*Current temperature*/
94     int				tz_active;	/*Current active cooling*/
95 #define TZ_ACTIVE_NONE		-1
96 #define TZ_ACTIVE_UNKNOWN	-2
97     int				tz_requested;	/*Minimum active cooling*/
98     int				tz_thflags;	/*Current temp-related flags*/
99 #define TZ_THFLAG_NONE		0
100 #define TZ_THFLAG_PSV		(1<<0)
101 #define TZ_THFLAG_CR3		(1<<2)
102 #define TZ_THFLAG_HOT		(1<<3)
103 #define TZ_THFLAG_CRT		(1<<4)
104     int				tz_flags;
105 #define TZ_FLAG_NO_SCP		(1<<0)		/*No _SCP method*/
106 #define TZ_FLAG_GETPROFILE	(1<<1)		/*Get power_profile in timeout*/
107 #define TZ_FLAG_GETSETTINGS	(1<<2)		/*Get devs/setpoints*/
108     struct timespec		tz_cooling_started;
109 					/*Current cooling starting time*/
110 
111     struct sysctl_ctx_list	tz_sysctl_ctx;
112     struct sysctl_oid		*tz_sysctl_tree;
113     eventhandler_tag		tz_event;
114 
115     struct acpi_tz_zone 	tz_zone;	/*Thermal zone parameters*/
116     int				tz_validchecks;
117     int				tz_insane_tmp_notified;
118 
119     /* passive cooling */
120     struct proc			*tz_cooling_proc;
121     int				tz_cooling_proc_running;
122     int				tz_cooling_enabled;
123     int				tz_cooling_active;
124     int				tz_cooling_updated;
125     int				tz_cooling_saved_freq;
126 };
127 
128 #define	TZ_ACTIVE_LEVEL(act)	((act) >= 0 ? (act) : TZ_NUMLEVELS)
129 
130 #define CPUFREQ_MAX_LEVELS	64 /* XXX cpufreq should export this */
131 
132 static int	acpi_tz_probe(device_t dev);
133 static int	acpi_tz_attach(device_t dev);
134 static int	acpi_tz_establish(struct acpi_tz_softc *sc);
135 static void	acpi_tz_monitor(void *Context);
136 static void	acpi_tz_switch_cooler_off(ACPI_OBJECT *obj, void *arg);
137 static void	acpi_tz_switch_cooler_on(ACPI_OBJECT *obj, void *arg);
138 static void	acpi_tz_getparam(struct acpi_tz_softc *sc, char *node,
139 				 int *data);
140 static void	acpi_tz_sanity(struct acpi_tz_softc *sc, int *val, char *what);
141 static int	acpi_tz_active_sysctl(SYSCTL_HANDLER_ARGS);
142 static int	acpi_tz_cooling_sysctl(SYSCTL_HANDLER_ARGS);
143 static int	acpi_tz_temp_sysctl(SYSCTL_HANDLER_ARGS);
144 static int	acpi_tz_passive_sysctl(SYSCTL_HANDLER_ARGS);
145 static void	acpi_tz_notify_handler(ACPI_HANDLE h, UINT32 notify,
146 				       void *context);
147 static void	acpi_tz_signal(struct acpi_tz_softc *sc, int flags);
148 static void	acpi_tz_timeout(struct acpi_tz_softc *sc, int flags);
149 static void	acpi_tz_power_profile(void *arg);
150 static void	acpi_tz_thread(void *arg);
151 static int	acpi_tz_cooling_is_available(struct acpi_tz_softc *sc);
152 static int	acpi_tz_cooling_thread_start(struct acpi_tz_softc *sc);
153 
154 static device_method_t acpi_tz_methods[] = {
155     /* Device interface */
156     DEVMETHOD(device_probe,	acpi_tz_probe),
157     DEVMETHOD(device_attach,	acpi_tz_attach),
158 
159     DEVMETHOD_END
160 };
161 
162 static driver_t acpi_tz_driver = {
163     "acpi_tz",
164     acpi_tz_methods,
165     sizeof(struct acpi_tz_softc),
166 };
167 
168 static char *acpi_tz_tmp_name = "_TMP";
169 
170 DRIVER_MODULE(acpi_tz, acpi, acpi_tz_driver, 0, 0);
171 MODULE_DEPEND(acpi_tz, acpi, 1, 1, 1);
172 
173 static struct sysctl_ctx_list	acpi_tz_sysctl_ctx;
174 static struct sysctl_oid	*acpi_tz_sysctl_tree;
175 
176 /* Minimum cooling run time */
177 static int			acpi_tz_min_runtime;
178 static int			acpi_tz_polling_rate = TZ_POLLRATE;
179 static int			acpi_tz_override;
180 
181 /* Thermal zone polling thread */
182 static struct proc		*acpi_tz_proc;
183 ACPI_LOCK_DECL(thermal, "ACPI thermal zone");
184 
185 static int			acpi_tz_cooling_unit = -1;
186 
187 static int
188 acpi_tz_probe(device_t dev)
189 {
190     int		result;
191 
192     if (acpi_get_type(dev) == ACPI_TYPE_THERMAL && !acpi_disabled("thermal")) {
193 	device_set_desc(dev, "Thermal Zone");
194 	result = -10;
195     } else
196 	result = ENXIO;
197     return (result);
198 }
199 
200 static int
201 acpi_tz_attach(device_t dev)
202 {
203     struct acpi_tz_softc	*sc;
204     struct acpi_softc		*acpi_sc;
205     int				error;
206     char			oidname[16];
207 
208     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
209 
210     sc = device_get_softc(dev);
211     sc->tz_dev = dev;
212     sc->tz_handle = acpi_get_handle(dev);
213     sc->tz_requested = TZ_ACTIVE_NONE;
214     sc->tz_active = TZ_ACTIVE_UNKNOWN;
215     sc->tz_thflags = TZ_THFLAG_NONE;
216     sc->tz_cooling_proc = NULL;
217     sc->tz_cooling_proc_running = FALSE;
218     sc->tz_cooling_active = FALSE;
219     sc->tz_cooling_updated = FALSE;
220     sc->tz_cooling_enabled = FALSE;
221 
222     /*
223      * Parse the current state of the thermal zone and build control
224      * structures.  We don't need to worry about interference with the
225      * control thread since we haven't fully attached this device yet.
226      */
227     if ((error = acpi_tz_establish(sc)) != 0)
228 	return (error);
229 
230     /*
231      * Register for any Notify events sent to this zone.
232      */
233     AcpiInstallNotifyHandler(sc->tz_handle, ACPI_DEVICE_NOTIFY,
234 			     acpi_tz_notify_handler, sc);
235 
236     /*
237      * Create our sysctl nodes.
238      *
239      * XXX we need a mechanism for adding nodes under ACPI.
240      */
241     if (device_get_unit(dev) == 0) {
242 	acpi_sc = acpi_device_get_parent_softc(dev);
243 	sysctl_ctx_init(&acpi_tz_sysctl_ctx);
244 	acpi_tz_sysctl_tree = SYSCTL_ADD_NODE(&acpi_tz_sysctl_ctx,
245 	    SYSCTL_CHILDREN(acpi_sc->acpi_sysctl_tree), OID_AUTO, "thermal",
246 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
247 	SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx,
248 		       SYSCTL_CHILDREN(acpi_tz_sysctl_tree),
249 		       OID_AUTO, "min_runtime", CTLFLAG_RW,
250 		       &acpi_tz_min_runtime, 0,
251 		       "minimum cooling run time in sec");
252 	SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx,
253 		       SYSCTL_CHILDREN(acpi_tz_sysctl_tree),
254 		       OID_AUTO, "polling_rate", CTLFLAG_RW,
255 		       &acpi_tz_polling_rate, 0, "monitor polling interval in seconds");
256 	SYSCTL_ADD_INT(&acpi_tz_sysctl_ctx,
257 		       SYSCTL_CHILDREN(acpi_tz_sysctl_tree), OID_AUTO,
258 		       "user_override", CTLFLAG_RW, &acpi_tz_override, 0,
259 		       "allow override of thermal settings");
260     }
261     sysctl_ctx_init(&sc->tz_sysctl_ctx);
262     snprintf(oidname, sizeof(oidname), "tz%d", device_get_unit(dev));
263     sc->tz_sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&sc->tz_sysctl_ctx,
264         SYSCTL_CHILDREN(acpi_tz_sysctl_tree), OID_AUTO, oidname,
265 	CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "", "thermal_zone");
266     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
267         OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
268 	&sc->tz_temperature, 0, sysctl_handle_int, "IK",
269 	"current thermal zone temperature");
270     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
271         OID_AUTO, "active", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
272 	0, acpi_tz_active_sysctl, "I", "cooling is active");
273     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
274         OID_AUTO, "passive_cooling",
275 	CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
276 	acpi_tz_cooling_sysctl, "I",
277 	"enable passive (speed reduction) cooling");
278 
279     SYSCTL_ADD_INT(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
280 		   OID_AUTO, "thermal_flags", CTLFLAG_RD,
281 		   &sc->tz_thflags, 0, "thermal zone flags");
282     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
283         OID_AUTO, "_PSV", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
284 	offsetof(struct acpi_tz_softc, tz_zone.psv), acpi_tz_temp_sysctl, "IK",
285 	"passive cooling temp setpoint");
286     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
287         OID_AUTO, "_CR3", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
288 	offsetof(struct acpi_tz_softc, tz_zone.cr3), acpi_tz_temp_sysctl, "IK",
289 	"too warm temp setpoint (standby now)");
290     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
291         OID_AUTO, "_HOT", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
292 	offsetof(struct acpi_tz_softc, tz_zone.hot), acpi_tz_temp_sysctl, "IK",
293 	"too hot temp setpoint (suspend now)");
294     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
295         OID_AUTO, "_CRT", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
296 	offsetof(struct acpi_tz_softc, tz_zone.crt), acpi_tz_temp_sysctl, "IK",
297 	"critical temp setpoint (shutdown now)");
298     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
299         OID_AUTO, "_ACx", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
300 	&sc->tz_zone.ac, sizeof(sc->tz_zone.ac), sysctl_handle_opaque, "IK",
301 	"");
302     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
303         OID_AUTO, "_TC1", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
304 	offsetof(struct acpi_tz_softc, tz_zone.tc1), acpi_tz_passive_sysctl,
305 	"I", "thermal constant 1 for passive cooling");
306     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
307         OID_AUTO, "_TC2", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
308 	offsetof(struct acpi_tz_softc, tz_zone.tc2), acpi_tz_passive_sysctl,
309 	"I", "thermal constant 2 for passive cooling");
310     SYSCTL_ADD_PROC(&sc->tz_sysctl_ctx, SYSCTL_CHILDREN(sc->tz_sysctl_tree),
311         OID_AUTO, "_TSP", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
312 	offsetof(struct acpi_tz_softc, tz_zone.tsp), acpi_tz_passive_sysctl,
313 	"I", "thermal sampling period for passive cooling");
314 
315     /*
316      * Register our power profile event handler.
317      */
318     sc->tz_event = EVENTHANDLER_REGISTER(power_profile_change,
319 	acpi_tz_power_profile, sc, 0);
320 
321     /*
322      * Flag the event handler for a manual invocation by our timeout.
323      * We defer it like this so that the rest of the subsystem has time
324      * to come up.  Don't bother evaluating/printing the temperature at
325      * this point; on many systems it'll be bogus until the EC is running.
326      */
327     sc->tz_flags |= TZ_FLAG_GETPROFILE;
328 
329     return_VALUE (0);
330 }
331 
332 static void
333 acpi_tz_startup(void *arg __unused)
334 {
335     struct acpi_tz_softc *sc;
336     device_t *devs;
337     int devcount, error, i;
338 
339     devclass_get_devices(devclass_find("acpi_tz"), &devs, &devcount);
340     if (devcount == 0) {
341 	free(devs, M_TEMP);
342 	return;
343     }
344 
345     /*
346      * Create thread to service all of the thermal zones.
347      */
348     error = kproc_create(acpi_tz_thread, NULL, &acpi_tz_proc, RFHIGHPID, 0,
349 	"acpi_thermal");
350     if (error != 0)
351 	printf("acpi_tz: could not create thread - %d", error);
352 
353     /*
354      * Create a thread to handle passive cooling for 1st zone which
355      * has _PSV, _TSP, _TC1 and _TC2.  Users can enable it for other
356      * zones manually for now.
357      *
358      * XXX We enable only one zone to avoid multiple zones conflict
359      * with each other since cpufreq currently sets all CPUs to the
360      * given frequency whereas it's possible for different thermal
361      * zones to specify independent settings for multiple CPUs.
362      */
363     for (i = 0; i < devcount; i++) {
364 	sc = device_get_softc(devs[i]);
365 	if (acpi_tz_cooling_is_available(sc)) {
366 	    sc->tz_cooling_enabled = TRUE;
367 	    error = acpi_tz_cooling_thread_start(sc);
368 	    if (error != 0) {
369 		sc->tz_cooling_enabled = FALSE;
370 		break;
371 	    }
372 	    acpi_tz_cooling_unit = device_get_unit(devs[i]);
373 	    break;
374 	}
375     }
376     free(devs, M_TEMP);
377 }
378 SYSINIT(acpi_tz, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, acpi_tz_startup, NULL);
379 
380 /*
381  * Parse the current state of this thermal zone and set up to use it.
382  *
383  * Note that we may have previous state, which will have to be discarded.
384  */
385 static int
386 acpi_tz_establish(struct acpi_tz_softc *sc)
387 {
388     ACPI_OBJECT	*obj;
389     int		i;
390     char	nbuf[8];
391 
392     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
393 
394     /* Erase any existing state. */
395     for (i = 0; i < TZ_NUMLEVELS; i++)
396 	if (sc->tz_zone.al[i].Pointer != NULL)
397 	    AcpiOsFree(sc->tz_zone.al[i].Pointer);
398     if (sc->tz_zone.psl.Pointer != NULL)
399 	AcpiOsFree(sc->tz_zone.psl.Pointer);
400 
401     /*
402      * XXX: We initialize only ACPI_BUFFER to avoid race condition
403      * with passive cooling thread which refers psv, tc1, tc2 and tsp.
404      */
405     bzero(sc->tz_zone.ac, sizeof(sc->tz_zone.ac));
406     bzero(sc->tz_zone.al, sizeof(sc->tz_zone.al));
407     bzero(&sc->tz_zone.psl, sizeof(sc->tz_zone.psl));
408 
409     /* Evaluate thermal zone parameters. */
410     for (i = 0; i < TZ_NUMLEVELS; i++) {
411 	sprintf(nbuf, "_AC%d", i);
412 	acpi_tz_getparam(sc, nbuf, &sc->tz_zone.ac[i]);
413 	sprintf(nbuf, "_AL%d", i);
414 	sc->tz_zone.al[i].Length = ACPI_ALLOCATE_BUFFER;
415 	sc->tz_zone.al[i].Pointer = NULL;
416 	AcpiEvaluateObject(sc->tz_handle, nbuf, NULL, &sc->tz_zone.al[i]);
417 	obj = (ACPI_OBJECT *)sc->tz_zone.al[i].Pointer;
418 	if (obj != NULL) {
419 	    /* Should be a package containing a list of power objects */
420 	    if (obj->Type != ACPI_TYPE_PACKAGE) {
421 		device_printf(sc->tz_dev, "%s has unknown type %d, rejecting\n",
422 			      nbuf, obj->Type);
423 		return_VALUE (ENXIO);
424 	    }
425 	}
426     }
427     acpi_tz_getparam(sc, "_CRT", &sc->tz_zone.crt);
428     acpi_tz_getparam(sc, "_HOT", &sc->tz_zone.hot);
429     acpi_tz_getparam(sc, "_CR3", &sc->tz_zone.cr3);
430     sc->tz_zone.psl.Length = ACPI_ALLOCATE_BUFFER;
431     sc->tz_zone.psl.Pointer = NULL;
432     AcpiEvaluateObject(sc->tz_handle, "_PSL", NULL, &sc->tz_zone.psl);
433     acpi_tz_getparam(sc, "_PSV", &sc->tz_zone.psv);
434     acpi_tz_getparam(sc, "_TC1", &sc->tz_zone.tc1);
435     acpi_tz_getparam(sc, "_TC2", &sc->tz_zone.tc2);
436     acpi_tz_getparam(sc, "_TSP", &sc->tz_zone.tsp);
437     acpi_tz_getparam(sc, "_TZP", &sc->tz_zone.tzp);
438 
439     /*
440      * Sanity-check the values we've been given.
441      *
442      * XXX what do we do about systems that give us the same value for
443      *     more than one of these setpoints?
444      */
445     acpi_tz_sanity(sc, &sc->tz_zone.crt, "_CRT");
446     acpi_tz_sanity(sc, &sc->tz_zone.hot, "_HOT");
447     acpi_tz_sanity(sc, &sc->tz_zone.cr3, "_CR3");
448     acpi_tz_sanity(sc, &sc->tz_zone.psv, "_PSV");
449     for (i = 0; i < TZ_NUMLEVELS; i++)
450 	acpi_tz_sanity(sc, &sc->tz_zone.ac[i], "_ACx");
451 
452     return_VALUE (0);
453 }
454 
455 static char *aclevel_string[] = {
456     "NONE", "_AC0", "_AC1", "_AC2", "_AC3", "_AC4",
457     "_AC5", "_AC6", "_AC7", "_AC8", "_AC9"
458 };
459 
460 static __inline const char *
461 acpi_tz_aclevel_string(int active)
462 {
463     if (active < -1 || active >= TZ_NUMLEVELS)
464 	return (aclevel_string[0]);
465 
466     return (aclevel_string[active + 1]);
467 }
468 
469 /*
470  * Get the current temperature.
471  */
472 static int
473 acpi_tz_get_temperature(struct acpi_tz_softc *sc)
474 {
475     int		temp;
476     ACPI_STATUS	status;
477 
478     ACPI_FUNCTION_NAME ("acpi_tz_get_temperature");
479 
480     /* Evaluate the thermal zone's _TMP method. */
481     status = acpi_GetInteger(sc->tz_handle, acpi_tz_tmp_name, &temp);
482     if (ACPI_FAILURE(status)) {
483 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
484 	    "error fetching current temperature -- %s\n",
485 	     AcpiFormatException(status));
486 	return (FALSE);
487     }
488 
489     /* Check it for validity. */
490     acpi_tz_sanity(sc, &temp, acpi_tz_tmp_name);
491     if (temp == -1)
492 	return (FALSE);
493 
494     ACPI_DEBUG_PRINT((ACPI_DB_VALUES, "got %d.%dC\n", TZ_KELVTOC(temp)));
495     sc->tz_temperature = temp;
496     return (TRUE);
497 }
498 
499 /*
500  * Evaluate the condition of a thermal zone, take appropriate actions.
501  */
502 static void
503 acpi_tz_monitor(void *Context)
504 {
505     struct acpi_softc	 *acpi_sc;
506     struct acpi_tz_softc *sc;
507     struct	timespec curtime;
508     int		temp;
509     int		i;
510     int		newactive, newflags;
511 
512     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
513 
514     sc = (struct acpi_tz_softc *)Context;
515 
516     /* Get the current temperature. */
517     if (!acpi_tz_get_temperature(sc)) {
518 	/* XXX disable zone? go to max cooling? */
519 	return_VOID;
520     }
521     temp = sc->tz_temperature;
522 
523     /*
524      * Work out what we ought to be doing right now.
525      *
526      * Note that the _ACx levels sort from hot to cold.
527      */
528     newactive = TZ_ACTIVE_NONE;
529     for (i = TZ_NUMLEVELS - 1; i >= 0; i--) {
530 	if (sc->tz_zone.ac[i] != -1 && temp >= sc->tz_zone.ac[i])
531 	    newactive = i;
532     }
533 
534     /*
535      * We are going to get _ACx level down (colder side), but give a guaranteed
536      * minimum cooling run time if requested.
537      */
538     if (acpi_tz_min_runtime > 0 && sc->tz_active != TZ_ACTIVE_NONE &&
539 	sc->tz_active != TZ_ACTIVE_UNKNOWN &&
540 	(newactive == TZ_ACTIVE_NONE || newactive > sc->tz_active)) {
541 	getnanotime(&curtime);
542 	timespecsub(&curtime, &sc->tz_cooling_started, &curtime);
543 	if (curtime.tv_sec < acpi_tz_min_runtime)
544 	    newactive = sc->tz_active;
545     }
546 
547     /* Handle user override of active mode */
548     if (sc->tz_requested != TZ_ACTIVE_NONE && (newactive == TZ_ACTIVE_NONE
549         || sc->tz_requested < newactive))
550 	newactive = sc->tz_requested;
551 
552     /* update temperature-related flags */
553     newflags = TZ_THFLAG_NONE;
554     if (sc->tz_zone.psv != -1 && temp >= sc->tz_zone.psv)
555 	newflags |= TZ_THFLAG_PSV;
556     if (sc->tz_zone.cr3 != -1 && temp >= sc->tz_zone.cr3)
557 	newflags |= TZ_THFLAG_CR3;
558     if (sc->tz_zone.hot != -1 && temp >= sc->tz_zone.hot)
559 	newflags |= TZ_THFLAG_HOT;
560     if (sc->tz_zone.crt != -1 && temp >= sc->tz_zone.crt)
561 	newflags |= TZ_THFLAG_CRT;
562 
563     /* If the active cooling state has changed, we have to switch things. */
564     if (sc->tz_active == TZ_ACTIVE_UNKNOWN) {
565 	/*
566 	 * We don't know which cooling device is on or off,
567 	 * so stop them all, because we now know which
568 	 * should be on (if any).
569 	 */
570 	for (i = 0; i < TZ_NUMLEVELS; i++) {
571 	    if (sc->tz_zone.al[i].Pointer != NULL) {
572 		acpi_ForeachPackageObject(
573 		    (ACPI_OBJECT *)sc->tz_zone.al[i].Pointer,
574 		    acpi_tz_switch_cooler_off, sc);
575 	    }
576 	}
577 	/* now we know that all devices are off */
578 	sc->tz_active = TZ_ACTIVE_NONE;
579     }
580 
581     if (newactive != sc->tz_active) {
582 	/* Turn off unneeded cooling devices that are on, if any are */
583 	for (i = TZ_ACTIVE_LEVEL(sc->tz_active);
584 	     i < TZ_ACTIVE_LEVEL(newactive); i++) {
585 	    acpi_ForeachPackageObject(
586 		(ACPI_OBJECT *)sc->tz_zone.al[i].Pointer,
587 		acpi_tz_switch_cooler_off, sc);
588 	}
589 	/* Turn on cooling devices that are required, if any are */
590 	for (i = TZ_ACTIVE_LEVEL(sc->tz_active) - 1;
591 	     i >= TZ_ACTIVE_LEVEL(newactive); i--) {
592 	    acpi_ForeachPackageObject(
593 		(ACPI_OBJECT *)sc->tz_zone.al[i].Pointer,
594 		acpi_tz_switch_cooler_on, sc);
595 	}
596 
597 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
598 		    "switched from %s to %s: %d.%dC\n",
599 		    acpi_tz_aclevel_string(sc->tz_active),
600 		    acpi_tz_aclevel_string(newactive), TZ_KELVTOC(temp));
601 	sc->tz_active = newactive;
602 	getnanotime(&sc->tz_cooling_started);
603     }
604 
605     /* XXX (de)activate any passive cooling that may be required. */
606 
607     /*
608      * If the temperature is at _HOT or _CRT, increment our event count.
609      * If it has occurred enough times, shutdown the system.  This is
610      * needed because some systems will report an invalid high temperature
611      * for one poll cycle.  It is suspected this is due to the embedded
612      * controller timing out.  A typical value is 138C for one cycle on
613      * a system that is otherwise 65C.
614      *
615      * If we're almost at that threshold, notify the user through devd(8).
616      */
617     if ((newflags & (TZ_THFLAG_CR3 | TZ_THFLAG_HOT | TZ_THFLAG_CRT)) != 0) {
618 	sc->tz_validchecks++;
619 	if (sc->tz_validchecks == TZ_VALIDCHECKS) {
620 	    device_printf(sc->tz_dev,
621 		"WARNING - current temperature (%d.%dC) exceeds safe limits\n",
622 		TZ_KELVTOC(sc->tz_temperature));
623 	    if ((newflags & (TZ_THFLAG_HOT | TZ_THFLAG_CRT)) != 0)
624 		shutdown_nice(RB_POWEROFF);
625 	    else {
626 		acpi_sc = acpi_device_get_parent_softc(sc->tz_dev);
627 		acpi_ReqSleepState(acpi_sc, ACPI_STATE_S3);
628 	    }
629 	} else if (sc->tz_validchecks == TZ_NOTIFYCOUNT)
630 	    acpi_UserNotify("Thermal", sc->tz_handle, TZ_NOTIFY_CRITICAL);
631     } else {
632 	sc->tz_validchecks = 0;
633     }
634     sc->tz_thflags = newflags;
635 
636     return_VOID;
637 }
638 
639 /*
640  * Given an object, verify that it's a reference to a device of some sort,
641  * and try to switch it off.
642  */
643 static void
644 acpi_tz_switch_cooler_off(ACPI_OBJECT *obj, void *arg)
645 {
646     ACPI_HANDLE			cooler;
647 
648     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
649 
650     cooler = acpi_GetReference(NULL, obj);
651     if (cooler == NULL) {
652 	ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "can't get handle\n"));
653 	return_VOID;
654     }
655 
656     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "called to turn %s off\n",
657 		     acpi_name(cooler)));
658     acpi_pwr_switch_consumer(cooler, ACPI_STATE_D3);
659 
660     return_VOID;
661 }
662 
663 /*
664  * Given an object, verify that it's a reference to a device of some sort,
665  * and try to switch it on.
666  *
667  * XXX replication of off/on function code is bad.
668  */
669 static void
670 acpi_tz_switch_cooler_on(ACPI_OBJECT *obj, void *arg)
671 {
672     struct acpi_tz_softc	*sc = (struct acpi_tz_softc *)arg;
673     ACPI_HANDLE			cooler;
674     ACPI_STATUS			status;
675 
676     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
677 
678     cooler = acpi_GetReference(NULL, obj);
679     if (cooler == NULL) {
680 	ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "can't get handle\n"));
681 	return_VOID;
682     }
683 
684     ACPI_DEBUG_PRINT((ACPI_DB_OBJECTS, "called to turn %s on\n",
685 		     acpi_name(cooler)));
686     status = acpi_pwr_switch_consumer(cooler, ACPI_STATE_D0);
687     if (ACPI_FAILURE(status)) {
688 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
689 		    "failed to activate %s - %s\n", acpi_name(cooler),
690 		    AcpiFormatException(status));
691     }
692 
693     return_VOID;
694 }
695 
696 /*
697  * Read/debug-print a parameter, default it to -1.
698  */
699 static void
700 acpi_tz_getparam(struct acpi_tz_softc *sc, char *node, int *data)
701 {
702 
703     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
704 
705     if (ACPI_FAILURE(acpi_GetInteger(sc->tz_handle, node, data))) {
706 	*data = -1;
707     } else {
708 	ACPI_DEBUG_PRINT((ACPI_DB_VALUES, "%s.%s = %d\n",
709 			 acpi_name(sc->tz_handle), node, *data));
710     }
711 
712     return_VOID;
713 }
714 
715 /*
716  * Sanity-check a temperature value.  Assume that setpoints
717  * should be between 0C and 200C.
718  */
719 static void
720 acpi_tz_sanity(struct acpi_tz_softc *sc, int *val, char *what)
721 {
722     if (*val != -1 && (*val < TZ_ZEROC || *val > TZ_ZEROC + 2000)) {
723 	/*
724 	 * If the value we are checking is _TMP, warn the user only
725 	 * once. This avoids spamming messages if, for instance, the
726 	 * sensor is broken and always returns an invalid temperature.
727 	 *
728 	 * This is only done for _TMP; other values always emit a
729 	 * warning.
730 	 */
731 	if (what != acpi_tz_tmp_name || !sc->tz_insane_tmp_notified) {
732 	    device_printf(sc->tz_dev, "%s value is absurd, ignored (%d.%dC)\n",
733 			  what, TZ_KELVTOC(*val));
734 
735 	    /* Don't warn the user again if the read value doesn't improve. */
736 	    if (what == acpi_tz_tmp_name)
737 		sc->tz_insane_tmp_notified = 1;
738 	}
739 	*val = -1;
740 	return;
741     }
742 
743     /* This value is correct. Warn if it's incorrect again. */
744     if (what == acpi_tz_tmp_name)
745 	sc->tz_insane_tmp_notified = 0;
746 }
747 
748 /*
749  * Respond to a sysctl on the active state node.
750  */
751 static int
752 acpi_tz_active_sysctl(SYSCTL_HANDLER_ARGS)
753 {
754     struct acpi_tz_softc	*sc;
755     int				active;
756     int		 		error;
757 
758     sc = (struct acpi_tz_softc *)oidp->oid_arg1;
759     active = sc->tz_active;
760     error = sysctl_handle_int(oidp, &active, 0, req);
761 
762     /* Error or no new value */
763     if (error != 0 || req->newptr == NULL)
764 	return (error);
765     if (active < -1 || active >= TZ_NUMLEVELS)
766 	return (EINVAL);
767 
768     /* Set new preferred level and re-switch */
769     sc->tz_requested = active;
770     acpi_tz_signal(sc, 0);
771     return (0);
772 }
773 
774 static int
775 acpi_tz_cooling_sysctl(SYSCTL_HANDLER_ARGS)
776 {
777     struct acpi_tz_softc *sc;
778     int enabled, error;
779 
780     sc = (struct acpi_tz_softc *)oidp->oid_arg1;
781     enabled = sc->tz_cooling_enabled;
782     error = sysctl_handle_int(oidp, &enabled, 0, req);
783 
784     /* Error or no new value */
785     if (error != 0 || req->newptr == NULL)
786 	return (error);
787     if (enabled != TRUE && enabled != FALSE)
788 	return (EINVAL);
789 
790     if (enabled) {
791 	if (acpi_tz_cooling_is_available(sc))
792 	    error = acpi_tz_cooling_thread_start(sc);
793 	else
794 	    error = ENODEV;
795 	if (error)
796 	    enabled = FALSE;
797     }
798     sc->tz_cooling_enabled = enabled;
799     return (error);
800 }
801 
802 static int
803 acpi_tz_temp_sysctl(SYSCTL_HANDLER_ARGS)
804 {
805     struct acpi_tz_softc	*sc;
806     int				temp, *temp_ptr;
807     int		 		error;
808 
809     sc = oidp->oid_arg1;
810     temp_ptr = (int *)(void *)(uintptr_t)((uintptr_t)sc + oidp->oid_arg2);
811     temp = *temp_ptr;
812     error = sysctl_handle_int(oidp, &temp, 0, req);
813 
814     /* Error or no new value */
815     if (error != 0 || req->newptr == NULL)
816 	return (error);
817 
818     /* Only allow changing settings if override is set. */
819     if (!acpi_tz_override)
820 	return (EPERM);
821 
822     /* Check user-supplied value for sanity. */
823     acpi_tz_sanity(sc, &temp, "user-supplied temp");
824     if (temp == -1)
825 	return (EINVAL);
826 
827     *temp_ptr = temp;
828     return (0);
829 }
830 
831 static int
832 acpi_tz_passive_sysctl(SYSCTL_HANDLER_ARGS)
833 {
834     struct acpi_tz_softc	*sc;
835     int				val, *val_ptr;
836     int				error;
837 
838     sc = oidp->oid_arg1;
839     val_ptr = (int *)(void *)(uintptr_t)((uintptr_t)sc + oidp->oid_arg2);
840     val = *val_ptr;
841     error = sysctl_handle_int(oidp, &val, 0, req);
842 
843     /* Error or no new value */
844     if (error != 0 || req->newptr == NULL)
845 	return (error);
846 
847     /* Only allow changing settings if override is set. */
848     if (!acpi_tz_override)
849 	return (EPERM);
850 
851     *val_ptr = val;
852     return (0);
853 }
854 
855 static void
856 acpi_tz_notify_handler(ACPI_HANDLE h, UINT32 notify, void *context)
857 {
858     struct acpi_tz_softc	*sc = (struct acpi_tz_softc *)context;
859 
860     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
861 
862     switch (notify) {
863     case TZ_NOTIFY_TEMPERATURE:
864 	/* Temperature change occurred */
865 	acpi_tz_signal(sc, 0);
866 	break;
867     case TZ_NOTIFY_DEVICES:
868     case TZ_NOTIFY_LEVELS:
869 	/* Zone devices/setpoints changed */
870 	acpi_tz_signal(sc, TZ_FLAG_GETSETTINGS);
871 	break;
872     default:
873 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
874 		    "unknown Notify event 0x%x\n", notify);
875 	break;
876     }
877 
878     acpi_UserNotify("Thermal", h, notify);
879 
880     return_VOID;
881 }
882 
883 static void
884 acpi_tz_signal(struct acpi_tz_softc *sc, int flags)
885 {
886     ACPI_LOCK(thermal);
887     sc->tz_flags |= flags;
888     ACPI_UNLOCK(thermal);
889     wakeup(&acpi_tz_proc);
890 }
891 
892 /*
893  * Notifies can be generated asynchronously but have also been seen to be
894  * triggered by other thermal methods.  One system generates a notify of
895  * 0x81 when the fan is turned on or off.  Another generates it when _SCP
896  * is called.  To handle these situations, we check the zone via
897  * acpi_tz_monitor() before evaluating changes to setpoints or the cooling
898  * policy.
899  */
900 static void
901 acpi_tz_timeout(struct acpi_tz_softc *sc, int flags)
902 {
903 
904     /* Check the current temperature and take action based on it */
905     acpi_tz_monitor(sc);
906 
907     /* If requested, get the power profile settings. */
908     if (flags & TZ_FLAG_GETPROFILE)
909 	acpi_tz_power_profile(sc);
910 
911     /*
912      * If requested, check for new devices/setpoints.  After finding them,
913      * check if we need to switch fans based on the new values.
914      */
915     if (flags & TZ_FLAG_GETSETTINGS) {
916 	acpi_tz_establish(sc);
917 	acpi_tz_monitor(sc);
918     }
919 
920     /* XXX passive cooling actions? */
921 }
922 
923 /*
924  * System power profile may have changed; fetch and notify the
925  * thermal zone accordingly.
926  *
927  * Since this can be called from an arbitrary eventhandler, it needs
928  * to get the ACPI lock itself.
929  */
930 static void
931 acpi_tz_power_profile(void *arg)
932 {
933     ACPI_STATUS			status;
934     struct acpi_tz_softc	*sc = (struct acpi_tz_softc *)arg;
935     int				state;
936 
937     state = power_profile_get_state();
938     if (state != POWER_PROFILE_PERFORMANCE && state != POWER_PROFILE_ECONOMY)
939 	return;
940 
941     /* check that we haven't decided there's no _SCP method */
942     if ((sc->tz_flags & TZ_FLAG_NO_SCP) == 0) {
943 	/* Call _SCP to set the new profile */
944 	status = acpi_SetInteger(sc->tz_handle, "_SCP",
945 	    (state == POWER_PROFILE_PERFORMANCE) ? 0 : 1);
946 	if (ACPI_FAILURE(status)) {
947 	    if (status != AE_NOT_FOUND)
948 		ACPI_VPRINT(sc->tz_dev,
949 			    acpi_device_get_parent_softc(sc->tz_dev),
950 			    "can't evaluate %s._SCP - %s\n",
951 			    acpi_name(sc->tz_handle),
952 			    AcpiFormatException(status));
953 	    sc->tz_flags |= TZ_FLAG_NO_SCP;
954 	} else {
955 	    /* We have to re-evaluate the entire zone now */
956 	    acpi_tz_signal(sc, TZ_FLAG_GETSETTINGS);
957 	}
958     }
959 }
960 
961 /*
962  * Thermal zone monitor thread.
963  */
964 static void
965 acpi_tz_thread(void *arg)
966 {
967     devclass_t	acpi_tz_devclass;
968     device_t	*devs;
969     int		devcount, i;
970     int		flags;
971     struct acpi_tz_softc **sc;
972 
973     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
974 
975     acpi_tz_devclass = devclass_find("acpi_tz");
976     devs = NULL;
977     devcount = 0;
978     sc = NULL;
979 
980     for (;;) {
981 	/* If the number of devices has changed, re-evaluate. */
982 	if (devclass_get_count(acpi_tz_devclass) != devcount) {
983 	    if (devs != NULL) {
984 		free(devs, M_TEMP);
985 		free(sc, M_TEMP);
986 	    }
987 	    devclass_get_devices(acpi_tz_devclass, &devs, &devcount);
988 	    sc = malloc(sizeof(struct acpi_tz_softc *) * devcount, M_TEMP,
989 			M_WAITOK | M_ZERO);
990 	    for (i = 0; i < devcount; i++)
991 		sc[i] = device_get_softc(devs[i]);
992 	}
993 
994 	/* Check for temperature events and act on them. */
995 	for (i = 0; i < devcount; i++) {
996 	    ACPI_LOCK(thermal);
997 	    flags = sc[i]->tz_flags;
998 	    sc[i]->tz_flags &= TZ_FLAG_NO_SCP;
999 	    ACPI_UNLOCK(thermal);
1000 	    acpi_tz_timeout(sc[i], flags);
1001 	}
1002 
1003 	/* If more work to do, don't go to sleep yet. */
1004 	ACPI_LOCK(thermal);
1005 	for (i = 0; i < devcount; i++) {
1006 	    if (sc[i]->tz_flags & ~TZ_FLAG_NO_SCP)
1007 		break;
1008 	}
1009 
1010 	/*
1011 	 * If we have no more work, sleep for a while, setting PDROP so that
1012 	 * the mutex will not be reacquired.  Otherwise, drop the mutex and
1013 	 * loop to handle more events.
1014 	 */
1015 	if (i == devcount)
1016 	    msleep(&acpi_tz_proc, &thermal_mutex, PZERO | PDROP, "tzpoll",
1017 		hz * acpi_tz_polling_rate);
1018 	else
1019 	    ACPI_UNLOCK(thermal);
1020     }
1021 }
1022 
1023 static int
1024 acpi_tz_cpufreq_restore(struct acpi_tz_softc *sc)
1025 {
1026     device_t dev;
1027     int error;
1028 
1029     if (!sc->tz_cooling_updated)
1030 	return (0);
1031     if ((dev = devclass_get_device(devclass_find("cpufreq"), 0)) == NULL)
1032 	return (ENXIO);
1033     ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
1034 	"temperature %d.%dC: resuming previous clock speed (%d MHz)\n",
1035 	TZ_KELVTOC(sc->tz_temperature), sc->tz_cooling_saved_freq);
1036     error = CPUFREQ_SET(dev, NULL, CPUFREQ_PRIO_KERN);
1037     if (error == 0)
1038 	sc->tz_cooling_updated = FALSE;
1039     return (error);
1040 }
1041 
1042 static int
1043 acpi_tz_cpufreq_update(struct acpi_tz_softc *sc, int req)
1044 {
1045     device_t dev;
1046     struct cf_level *levels;
1047     int num_levels, error, freq, desired_freq, perf, i;
1048 
1049     levels = malloc(CPUFREQ_MAX_LEVELS * sizeof(*levels), M_TEMP, M_NOWAIT);
1050     if (levels == NULL)
1051 	return (ENOMEM);
1052 
1053     /*
1054      * Find the main device, cpufreq0.  We don't yet support independent
1055      * CPU frequency control on SMP.
1056      */
1057     if ((dev = devclass_get_device(devclass_find("cpufreq"), 0)) == NULL) {
1058 	error = ENXIO;
1059 	goto out;
1060     }
1061 
1062     /* Get the current frequency. */
1063     error = CPUFREQ_GET(dev, &levels[0]);
1064     if (error)
1065 	goto out;
1066     freq = levels[0].total_set.freq;
1067 
1068     /* Get the current available frequency levels. */
1069     num_levels = CPUFREQ_MAX_LEVELS;
1070     error = CPUFREQ_LEVELS(dev, levels, &num_levels);
1071     if (error) {
1072 	if (error == E2BIG)
1073 	    printf("cpufreq: need to increase CPUFREQ_MAX_LEVELS\n");
1074 	goto out;
1075     }
1076 
1077     /* Calculate the desired frequency as a percent of the max frequency. */
1078     perf = 100 * freq / levels[0].total_set.freq - req;
1079     if (perf < 0)
1080 	perf = 0;
1081     else if (perf > 100)
1082 	perf = 100;
1083     desired_freq = levels[0].total_set.freq * perf / 100;
1084 
1085     if (desired_freq < freq) {
1086 	/* Find the closest available frequency, rounding down. */
1087 	for (i = 0; i < num_levels; i++)
1088 	    if (levels[i].total_set.freq <= desired_freq)
1089 		break;
1090 
1091 	/* If we didn't find a relevant setting, use the lowest. */
1092 	if (i == num_levels)
1093 	    i--;
1094     } else {
1095 	/* If we didn't decrease frequency yet, don't increase it. */
1096 	if (!sc->tz_cooling_updated) {
1097 	    sc->tz_cooling_active = FALSE;
1098 	    goto out;
1099 	}
1100 
1101 	/* Use saved cpu frequency as maximum value. */
1102 	if (desired_freq > sc->tz_cooling_saved_freq)
1103 	    desired_freq = sc->tz_cooling_saved_freq;
1104 
1105 	/* Find the closest available frequency, rounding up. */
1106 	for (i = num_levels - 1; i >= 0; i--)
1107 	    if (levels[i].total_set.freq >= desired_freq)
1108 		break;
1109 
1110 	/* If we didn't find a relevant setting, use the highest. */
1111 	if (i == -1)
1112 	    i++;
1113 
1114 	/* If we're going to the highest frequency, restore the old setting. */
1115 	if (i == 0 || desired_freq == sc->tz_cooling_saved_freq) {
1116 	    error = acpi_tz_cpufreq_restore(sc);
1117 	    if (error == 0)
1118 		sc->tz_cooling_active = FALSE;
1119 	    goto out;
1120 	}
1121     }
1122 
1123     /* If we are going to a new frequency, activate it. */
1124     if (levels[i].total_set.freq != freq) {
1125 	ACPI_VPRINT(sc->tz_dev, acpi_device_get_parent_softc(sc->tz_dev),
1126 	    "temperature %d.%dC: %screasing clock speed "
1127 	    "from %d MHz to %d MHz\n",
1128 	    TZ_KELVTOC(sc->tz_temperature),
1129 	    (freq > levels[i].total_set.freq) ? "de" : "in",
1130 	    freq, levels[i].total_set.freq);
1131 	error = CPUFREQ_SET(dev, &levels[i], CPUFREQ_PRIO_KERN);
1132 	if (error == 0 && !sc->tz_cooling_updated) {
1133 	    sc->tz_cooling_saved_freq = freq;
1134 	    sc->tz_cooling_updated = TRUE;
1135 	}
1136     }
1137 
1138 out:
1139     if (levels)
1140 	free(levels, M_TEMP);
1141     return (error);
1142 }
1143 
1144 /*
1145  * Passive cooling thread; monitors current temperature according to the
1146  * cooling interval and calculates whether to scale back CPU frequency.
1147  */
1148 static void
1149 acpi_tz_cooling_thread(void *arg)
1150 {
1151     struct acpi_tz_softc *sc;
1152     int error, perf, curr_temp, prev_temp;
1153 
1154     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
1155 
1156     sc = (struct acpi_tz_softc *)arg;
1157 
1158     prev_temp = sc->tz_temperature;
1159     while (sc->tz_cooling_enabled) {
1160 	if (sc->tz_cooling_active)
1161 	    (void)acpi_tz_get_temperature(sc);
1162 	curr_temp = sc->tz_temperature;
1163 	if (curr_temp >= sc->tz_zone.psv)
1164 	    sc->tz_cooling_active = TRUE;
1165 	if (sc->tz_cooling_active) {
1166 	    perf = sc->tz_zone.tc1 * (curr_temp - prev_temp) +
1167 		   sc->tz_zone.tc2 * (curr_temp - sc->tz_zone.psv);
1168 	    perf /= 10;
1169 
1170 	    if (perf != 0) {
1171 		error = acpi_tz_cpufreq_update(sc, perf);
1172 
1173 		/*
1174 		 * If error and not simply a higher priority setting was
1175 		 * active, disable cooling.
1176 		 */
1177 		if (error != 0 && error != EPERM) {
1178 		    device_printf(sc->tz_dev,
1179 			"failed to set new freq, disabling passive cooling\n");
1180 		    sc->tz_cooling_enabled = FALSE;
1181 		}
1182 	    }
1183 	}
1184 	prev_temp = curr_temp;
1185 	tsleep(&sc->tz_cooling_proc, PZERO, "cooling",
1186 	    hz * sc->tz_zone.tsp / 10);
1187     }
1188     if (sc->tz_cooling_active) {
1189 	acpi_tz_cpufreq_restore(sc);
1190 	sc->tz_cooling_active = FALSE;
1191     }
1192     sc->tz_cooling_proc = NULL;
1193     ACPI_LOCK(thermal);
1194     sc->tz_cooling_proc_running = FALSE;
1195     ACPI_UNLOCK(thermal);
1196     kproc_exit(0);
1197 }
1198 
1199 /*
1200  * TODO: We ignore _PSL (list of cooling devices) since cpufreq enumerates
1201  * all CPUs for us.  However, it's possible in the future _PSL will
1202  * reference non-CPU devices so we may want to support it then.
1203  */
1204 static int
1205 acpi_tz_cooling_is_available(struct acpi_tz_softc *sc)
1206 {
1207     return (sc->tz_zone.tc1 != -1 && sc->tz_zone.tc2 != -1 &&
1208 	sc->tz_zone.tsp != -1 && sc->tz_zone.tsp != 0 &&
1209 	sc->tz_zone.psv != -1);
1210 }
1211 
1212 static int
1213 acpi_tz_cooling_thread_start(struct acpi_tz_softc *sc)
1214 {
1215     int error;
1216 
1217     ACPI_LOCK(thermal);
1218     if (sc->tz_cooling_proc_running) {
1219 	ACPI_UNLOCK(thermal);
1220 	return (0);
1221     }
1222     sc->tz_cooling_proc_running = TRUE;
1223     ACPI_UNLOCK(thermal);
1224     error = 0;
1225     if (sc->tz_cooling_proc == NULL) {
1226 	error = kproc_create(acpi_tz_cooling_thread, sc,
1227 	    &sc->tz_cooling_proc, RFHIGHPID, 0, "acpi_cooling%d",
1228 	    device_get_unit(sc->tz_dev));
1229 	if (error != 0) {
1230 	    device_printf(sc->tz_dev, "could not create thread - %d", error);
1231 	    ACPI_LOCK(thermal);
1232 	    sc->tz_cooling_proc_running = FALSE;
1233 	    ACPI_UNLOCK(thermal);
1234 	}
1235     }
1236     return (error);
1237 }
1238