xref: /freebsd/sys/kern/kern_cpu.c (revision 6af83ee0d2941d18880b6aaa2b4facd1d30c6106)
1 /*-
2  * Copyright (c) 2004-2005 Nate Lawson (SDG)
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 #include <sys/cpu.h>
33 #include <sys/eventhandler.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/proc.h>
38 #include <sys/queue.h>
39 #include <sys/sched.h>
40 #include <sys/sysctl.h>
41 #include <sys/systm.h>
42 #include <sys/sbuf.h>
43 #include <sys/timetc.h>
44 
45 #include "cpufreq_if.h"
46 
47 /*
48  * Common CPU frequency glue code.  Drivers for specific hardware can
49  * attach this interface to allow users to get/set the CPU frequency.
50  */
51 
52 /*
53  * Number of levels we can handle.  Levels are synthesized from settings
54  * so for N settings there may be N^2 levels.
55  */
56 #define CF_MAX_LEVELS	32
57 
58 struct cpufreq_softc {
59 	struct cf_level			curr_level;
60 	int				priority;
61 	int				all_count;
62 	struct cf_level_lst		all_levels;
63 	device_t			dev;
64 	struct sysctl_ctx_list		sysctl_ctx;
65 };
66 
67 struct cf_setting_array {
68 	struct cf_setting		sets[MAX_SETTINGS];
69 	int				count;
70 	TAILQ_ENTRY(cf_setting_array)	link;
71 };
72 
73 TAILQ_HEAD(cf_setting_lst, cf_setting_array);
74 
75 static int	cpufreq_attach(device_t dev);
76 static int	cpufreq_detach(device_t dev);
77 static void	cpufreq_evaluate(void *arg);
78 static int	cf_set_method(device_t dev, const struct cf_level *level,
79 		    int priority);
80 static int	cf_get_method(device_t dev, struct cf_level *level);
81 static int	cf_levels_method(device_t dev, struct cf_level *levels,
82 		    int *count);
83 static int	cpufreq_insert_abs(struct cpufreq_softc *sc,
84 		    struct cf_setting *sets, int count);
85 static int	cpufreq_expand_set(struct cpufreq_softc *sc,
86 		    struct cf_setting_array *set_arr);
87 static struct cf_level *cpufreq_dup_set(struct cpufreq_softc *sc,
88 		    struct cf_level *dup, struct cf_setting *set);
89 static int	cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS);
90 static int	cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS);
91 
92 static device_method_t cpufreq_methods[] = {
93 	DEVMETHOD(device_probe,		bus_generic_probe),
94 	DEVMETHOD(device_attach,	cpufreq_attach),
95 	DEVMETHOD(device_detach,	cpufreq_detach),
96 
97         DEVMETHOD(cpufreq_set,		cf_set_method),
98         DEVMETHOD(cpufreq_get,		cf_get_method),
99         DEVMETHOD(cpufreq_levels,	cf_levels_method),
100 	{0, 0}
101 };
102 static driver_t cpufreq_driver = {
103 	"cpufreq", cpufreq_methods, sizeof(struct cpufreq_softc)
104 };
105 static devclass_t cpufreq_dc;
106 DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0);
107 
108 static eventhandler_tag cf_ev_tag;
109 
110 static int
111 cpufreq_attach(device_t dev)
112 {
113 	struct cpufreq_softc *sc;
114 	device_t parent;
115 	int numdevs;
116 
117 	sc = device_get_softc(dev);
118 	parent = device_get_parent(dev);
119 	sc->dev = dev;
120 	sysctl_ctx_init(&sc->sysctl_ctx);
121 	TAILQ_INIT(&sc->all_levels);
122 	sc->curr_level.total_set.freq = CPUFREQ_VAL_UNKNOWN;
123 
124 	/*
125 	 * Only initialize one set of sysctls for all CPUs.  In the future,
126 	 * if multiple CPUs can have different settings, we can move these
127 	 * sysctls to be under every CPU instead of just the first one.
128 	 */
129 	numdevs = devclass_get_count(cpufreq_dc);
130 	if (numdevs > 1)
131 		return (0);
132 
133 	SYSCTL_ADD_PROC(&sc->sysctl_ctx,
134 	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
135 	    OID_AUTO, "freq", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
136 	    cpufreq_curr_sysctl, "I", "Current CPU frequency");
137 	SYSCTL_ADD_PROC(&sc->sysctl_ctx,
138 	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
139 	    OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
140 	    cpufreq_levels_sysctl, "A", "CPU frequency levels");
141 	cf_ev_tag = EVENTHANDLER_REGISTER(cpufreq_changed, cpufreq_evaluate,
142 	    NULL, EVENTHANDLER_PRI_ANY);
143 
144 	return (0);
145 }
146 
147 static int
148 cpufreq_detach(device_t dev)
149 {
150 	struct cpufreq_softc *sc;
151 	int numdevs;
152 
153 	sc = device_get_softc(dev);
154 	sysctl_ctx_free(&sc->sysctl_ctx);
155 
156 	/* Only clean up these resources when the last device is detaching. */
157 	numdevs = devclass_get_count(cpufreq_dc);
158 	if (numdevs == 1)
159 		EVENTHANDLER_DEREGISTER(cpufreq_changed, cf_ev_tag);
160 
161 	return (0);
162 }
163 
164 static void
165 cpufreq_evaluate(void *arg)
166 {
167 	/* TODO: Re-evaluate when notified of changes to drivers. */
168 }
169 
170 static int
171 cf_set_method(device_t dev, const struct cf_level *level, int priority)
172 {
173 	struct cpufreq_softc *sc;
174 	const struct cf_setting *set;
175 	struct pcpu *pc;
176 	int cpu_id, error, i;
177 
178 	sc = device_get_softc(dev);
179 
180 	/*
181 	 * Check that the TSC isn't being used as a timecounter.
182 	 * If it is, then return EBUSY and refuse to change the
183 	 * clock speed.
184 	 */
185 	if (strcmp(timecounter->tc_name, "TSC") == 0)
186 		return (EBUSY);
187 
188 	/* If already at this level, just return. */
189 	if (CPUFREQ_CMP(sc->curr_level.total_set.freq, level->total_set.freq))
190 		return (0);
191 
192 	/* If the setting is for a different CPU, switch to it. */
193 	cpu_id = PCPU_GET(cpuid);
194 	pc = cpu_get_pcpu(dev);
195 	KASSERT(pc, ("NULL pcpu for dev %p", dev));
196 	if (cpu_id != pc->pc_cpuid) {
197 		mtx_lock_spin(&sched_lock);
198 		sched_bind(curthread, pc->pc_cpuid);
199 		mtx_unlock_spin(&sched_lock);
200 	}
201 
202 	/* First, set the absolute frequency via its driver. */
203 	set = &level->abs_set;
204 	if (set->dev) {
205 		if (!device_is_attached(set->dev)) {
206 			error = ENXIO;
207 			goto out;
208 		}
209 		error = CPUFREQ_DRV_SET(set->dev, set);
210 		if (error) {
211 			goto out;
212 		}
213 	}
214 
215 	/* Next, set any/all relative frequencies via their drivers. */
216 	for (i = 0; i < level->rel_count; i++) {
217 		set = &level->rel_set[i];
218 		if (!device_is_attached(set->dev)) {
219 			error = ENXIO;
220 			goto out;
221 		}
222 		error = CPUFREQ_DRV_SET(set->dev, set);
223 		if (error) {
224 			/* XXX Back out any successful setting? */
225 			goto out;
226 		}
227 	}
228 
229 	/* Record the current level. */
230 	sc->curr_level = *level;
231 	sc->priority = priority;
232 	error = 0;
233 
234 out:
235 	/* If we switched to another CPU, switch back before exiting. */
236 	if (cpu_id != pc->pc_cpuid) {
237 		mtx_lock_spin(&sched_lock);
238 		sched_unbind(curthread);
239 		mtx_unlock_spin(&sched_lock);
240 	}
241 	if (error)
242 		device_printf(set->dev, "set freq failed, err %d\n", error);
243 	return (error);
244 }
245 
246 static int
247 cf_get_method(device_t dev, struct cf_level *level)
248 {
249 	struct cpufreq_softc *sc;
250 	struct cf_level *levels;
251 	struct cf_setting *curr_set, set;
252 	struct pcpu *pc;
253 	device_t *devs;
254 	int count, error, i, numdevs;
255 	uint64_t rate;
256 
257 	sc = device_get_softc(dev);
258 	curr_set = &sc->curr_level.total_set;
259 	levels = NULL;
260 
261 	/* If we already know the current frequency, we're done. */
262 	if (curr_set->freq != CPUFREQ_VAL_UNKNOWN)
263 		goto out;
264 
265 	/*
266 	 * We need to figure out the current level.  Loop through every
267 	 * driver, getting the current setting.  Then, attempt to get a best
268 	 * match of settings against each level.
269 	 */
270 	count = CF_MAX_LEVELS;
271 	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
272 	if (levels == NULL)
273 		return (ENOMEM);
274 	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
275 	if (error)
276 		goto out;
277 	error = device_get_children(device_get_parent(dev), &devs, &numdevs);
278 	if (error)
279 		goto out;
280 	for (i = 0; i < numdevs && curr_set->freq == CPUFREQ_VAL_UNKNOWN; i++) {
281 		if (!device_is_attached(devs[i]))
282 			continue;
283 		error = CPUFREQ_DRV_GET(devs[i], &set);
284 		if (error)
285 			continue;
286 		for (i = 0; i < count; i++) {
287 			if (CPUFREQ_CMP(set.freq, levels[i].total_set.freq)) {
288 				sc->curr_level = levels[i];
289 				break;
290 			}
291 		}
292 	}
293 	free(devs, M_TEMP);
294 	if (curr_set->freq != CPUFREQ_VAL_UNKNOWN)
295 		goto out;
296 
297 	/*
298 	 * We couldn't find an exact match, so attempt to estimate and then
299 	 * match against a level.
300 	 */
301 	pc = cpu_get_pcpu(dev);
302 	if (pc == NULL) {
303 		error = ENXIO;
304 		goto out;
305 	}
306 	cpu_est_clockrate(pc->pc_cpuid, &rate);
307 	rate /= 1000000;
308 	for (i = 0; i < count; i++) {
309 		if (CPUFREQ_CMP(rate, levels[i].total_set.freq)) {
310 			sc->curr_level = levels[i];
311 			break;
312 		}
313 	}
314 
315 out:
316 	if (levels)
317 		free(levels, M_TEMP);
318 	*level = sc->curr_level;
319 	return (0);
320 }
321 
322 static int
323 cf_levels_method(device_t dev, struct cf_level *levels, int *count)
324 {
325 	struct cf_setting_array *set_arr;
326 	struct cf_setting_lst rel_sets;
327 	struct cpufreq_softc *sc;
328 	struct cf_level *lev;
329 	struct cf_setting *sets;
330 	struct pcpu *pc;
331 	device_t *devs;
332 	int error, i, numdevs, set_count, type;
333 	uint64_t rate;
334 
335 	if (levels == NULL || count == NULL)
336 		return (EINVAL);
337 
338 	TAILQ_INIT(&rel_sets);
339 	sc = device_get_softc(dev);
340 	error = device_get_children(device_get_parent(dev), &devs, &numdevs);
341 	if (error)
342 		return (error);
343 	sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT);
344 	if (sets == NULL) {
345 		free(devs, M_TEMP);
346 		return (ENOMEM);
347 	}
348 
349 	/* Get settings from all cpufreq drivers. */
350 	for (i = 0; i < numdevs; i++) {
351 		/* Skip devices that aren't ready. */
352 		if (!device_is_attached(devs[i]))
353 			continue;
354 
355 		/*
356 		 * Get settings, skipping drivers that offer no settings or
357 		 * provide settings for informational purposes only.
358 		 */
359 		set_count = MAX_SETTINGS;
360 		error = CPUFREQ_DRV_SETTINGS(devs[i], sets, &set_count, &type);
361 		if (error || set_count == 0 || (type & CPUFREQ_FLAG_INFO_ONLY))
362 			continue;
363 
364 		/* Add the settings to our absolute/relative lists. */
365 		switch (type & CPUFREQ_TYPE_MASK) {
366 		case CPUFREQ_TYPE_ABSOLUTE:
367 			error = cpufreq_insert_abs(sc, sets, set_count);
368 			break;
369 		case CPUFREQ_TYPE_RELATIVE:
370 			set_arr = malloc(sizeof(*set_arr), M_TEMP, M_NOWAIT);
371 			if (set_arr == NULL) {
372 				error = ENOMEM;
373 				goto out;
374 			}
375 			bcopy(sets, set_arr->sets, set_count * sizeof(*sets));
376 			set_arr->count = set_count;
377 			TAILQ_INSERT_TAIL(&rel_sets, set_arr, link);
378 			break;
379 		default:
380 			error = EINVAL;
381 			break;
382 		}
383 		if (error)
384 			goto out;
385 	}
386 
387 	/* If there are no absolute levels, create a fake one at 100%. */
388 	if (TAILQ_EMPTY(&sc->all_levels)) {
389 		bzero(&sets[0], sizeof(*sets));
390 		pc = cpu_get_pcpu(dev);
391 		if (pc == NULL) {
392 			error = ENXIO;
393 			goto out;
394 		}
395 		cpu_est_clockrate(pc->pc_cpuid, &rate);
396 		sets[0].freq = rate / 1000000;
397 		error = cpufreq_insert_abs(sc, sets, 1);
398 		if (error)
399 			goto out;
400 	}
401 
402 	/* Create a combined list of absolute + relative levels. */
403 	TAILQ_FOREACH(set_arr, &rel_sets, link)
404 		cpufreq_expand_set(sc, set_arr);
405 
406 	/* If the caller doesn't have enough space, return the actual count. */
407 	if (sc->all_count > *count) {
408 		*count = sc->all_count;
409 		error = E2BIG;
410 		goto out;
411 	}
412 
413 	/* Finally, output the list of levels. */
414 	i = 0;
415 	TAILQ_FOREACH(lev, &sc->all_levels, link) {
416 		levels[i] = *lev;
417 		i++;
418 	}
419 	*count = sc->all_count;
420 	error = 0;
421 
422 out:
423 	/* Clear all levels since we regenerate them each time. */
424 	while ((lev = TAILQ_FIRST(&sc->all_levels)) != NULL) {
425 		TAILQ_REMOVE(&sc->all_levels, lev, link);
426 		free(lev, M_TEMP);
427 	}
428 	while ((set_arr = TAILQ_FIRST(&rel_sets)) != NULL) {
429 		TAILQ_REMOVE(&rel_sets, set_arr, link);
430 		free(set_arr, M_TEMP);
431 	}
432 	sc->all_count = 0;
433 	free(devs, M_TEMP);
434 	free(sets, M_TEMP);
435 	return (error);
436 }
437 
438 /*
439  * Create levels for an array of absolute settings and insert them in
440  * sorted order in the specified list.
441  */
442 static int
443 cpufreq_insert_abs(struct cpufreq_softc *sc, struct cf_setting *sets,
444     int count)
445 {
446 	struct cf_level_lst *list;
447 	struct cf_level *level, *search;
448 	int i;
449 
450 	list = &sc->all_levels;
451 	for (i = 0; i < count; i++) {
452 		level = malloc(sizeof(*level), M_TEMP, M_NOWAIT | M_ZERO);
453 		if (level == NULL)
454 			return (ENOMEM);
455 		level->abs_set = sets[i];
456 		level->total_set = sets[i];
457 		level->total_set.dev = NULL;
458 		sc->all_count++;
459 
460 		if (TAILQ_EMPTY(list)) {
461 			TAILQ_INSERT_HEAD(list, level, link);
462 			continue;
463 		}
464 
465 		TAILQ_FOREACH_REVERSE(search, list, cf_level_lst, link) {
466 			if (sets[i].freq <= search->total_set.freq) {
467 				TAILQ_INSERT_AFTER(list, search, level, link);
468 				break;
469 			}
470 		}
471 	}
472 	return (0);
473 }
474 
475 /*
476  * Expand a group of relative settings, creating derived levels from them.
477  */
478 static int
479 cpufreq_expand_set(struct cpufreq_softc *sc, struct cf_setting_array *set_arr)
480 {
481 	struct cf_level *fill, *search;
482 	struct cf_setting *set;
483 	int i;
484 
485 	TAILQ_FOREACH(search, &sc->all_levels, link) {
486 		/* Skip this level if we've already modified it. */
487 		for (i = 0; i < search->rel_count; i++) {
488 			if (search->rel_set[i].dev == set_arr->sets[0].dev)
489 				break;
490 		}
491 		if (i != search->rel_count)
492 			continue;
493 
494 		/* Add each setting to the level, duplicating if necessary. */
495 		for (i = 0; i < set_arr->count; i++) {
496 			set = &set_arr->sets[i];
497 
498 			/*
499 			 * If this setting is less than 100%, split the level
500 			 * into two and add this setting to the new level.
501 			 */
502 			fill = search;
503 			if (set->freq < 10000)
504 				fill = cpufreq_dup_set(sc, search, set);
505 
506 			/*
507 			 * The new level was a duplicate of an existing level
508 			 * so we freed it.  Go to the next setting.
509 			 */
510 			if (fill == NULL)
511 				continue;
512 
513 			/* Add this setting to the existing or new level. */
514 			KASSERT(fill->rel_count < MAX_SETTINGS,
515 			    ("cpufreq: too many relative drivers (%d)",
516 			    MAX_SETTINGS));
517 			fill->rel_set[fill->rel_count] = *set;
518 			fill->rel_count++;
519 		}
520 	}
521 
522 	return (0);
523 }
524 
525 static struct cf_level *
526 cpufreq_dup_set(struct cpufreq_softc *sc, struct cf_level *dup,
527     struct cf_setting *set)
528 {
529 	struct cf_level_lst *list;
530 	struct cf_level *fill, *itr;
531 	struct cf_setting *fill_set, *itr_set;
532 	int i;
533 
534 	/*
535 	 * Create a new level, copy it from the old one, and update the
536 	 * total frequency and power by the percentage specified in the
537 	 * relative setting.
538 	 */
539 	fill = malloc(sizeof(*fill), M_TEMP, M_NOWAIT);
540 	if (fill == NULL)
541 		return (NULL);
542 	*fill = *dup;
543 	fill_set = &fill->total_set;
544 	fill_set->freq =
545 	    ((uint64_t)fill_set->freq * set->freq) / 10000;
546 	if (fill_set->power != CPUFREQ_VAL_UNKNOWN) {
547 		fill_set->power = ((uint64_t)fill_set->power * set->freq)
548 		    / 10000;
549 	}
550 	if (set->lat != CPUFREQ_VAL_UNKNOWN) {
551 		if (fill_set->lat != CPUFREQ_VAL_UNKNOWN)
552 			fill_set->lat += set->lat;
553 		else
554 			fill_set->lat = set->lat;
555 	}
556 
557 	/*
558 	 * If we copied an old level that we already modified (say, at 100%),
559 	 * we need to remove that setting before adding this one.  Since we
560 	 * process each setting array in order, we know any settings for this
561 	 * driver will be found at the end.
562 	 */
563 	for (i = fill->rel_count; i != 0; i--) {
564 		if (fill->rel_set[i - 1].dev != set->dev)
565 			break;
566 		fill->rel_count--;
567 	}
568 
569 	/*
570 	 * Insert the new level in sorted order.  If we find a duplicate,
571 	 * free the new level.  We can do this since any existing level will
572 	 * be guaranteed to have the same or less settings and thus consume
573 	 * less power.  For example, a level with one absolute setting of
574 	 * 800 Mhz uses less power than one composed of an absolute setting
575 	 * of 1600 Mhz and a relative setting at 50%.
576 	 */
577 	list = &sc->all_levels;
578 	if (TAILQ_EMPTY(list)) {
579 		TAILQ_INSERT_HEAD(list, fill, link);
580 	} else {
581 		TAILQ_FOREACH_REVERSE(itr, list, cf_level_lst, link) {
582 			itr_set = &itr->total_set;
583 			if (CPUFREQ_CMP(fill_set->freq, itr_set->freq)) {
584 				free(fill, M_TEMP);
585 				fill = NULL;
586 				break;
587 			} else if (fill_set->freq < itr_set->freq) {
588 				TAILQ_INSERT_AFTER(list, itr, fill, link);
589 				sc->all_count++;
590 				break;
591 			}
592 		}
593 	}
594 
595 	return (fill);
596 }
597 
598 static int
599 cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS)
600 {
601 	struct cpufreq_softc *sc;
602 	struct cf_level *levels;
603 	int count, devcount, error, freq, i, n;
604 	device_t *devs;
605 
606 	devs = NULL;
607 	sc = oidp->oid_arg1;
608 	levels = malloc(CF_MAX_LEVELS * sizeof(*levels), M_TEMP, M_NOWAIT);
609 	if (levels == NULL)
610 		return (ENOMEM);
611 
612 	error = CPUFREQ_GET(sc->dev, &levels[0]);
613 	if (error)
614 		goto out;
615 	freq = levels[0].total_set.freq;
616 	error = sysctl_handle_int(oidp, &freq, 0, req);
617 	if (error != 0 || req->newptr == NULL)
618 		goto out;
619 
620 	/*
621 	 * While we only call cpufreq_get() on one device (assuming all
622 	 * CPUs have equal levels), we call cpufreq_set() on all CPUs.
623 	 * This is needed for some MP systems.
624 	 */
625 	error = devclass_get_devices(cpufreq_dc, &devs, &devcount);
626 	if (error)
627 		goto out;
628 	for (n = 0; n < devcount; n++) {
629 		count = CF_MAX_LEVELS;
630 		error = CPUFREQ_LEVELS(devs[n], levels, &count);
631 		if (error)
632 			break;
633 		for (i = 0; i < count; i++) {
634 			if (CPUFREQ_CMP(levels[i].total_set.freq, freq)) {
635 				error = CPUFREQ_SET(devs[n], &levels[i],
636 				    CPUFREQ_PRIO_USER);
637 				break;
638 			}
639 		}
640 		if (i == count) {
641 			error = EINVAL;
642 			break;
643 		}
644 	}
645 
646 out:
647 	if (devs)
648 		free(devs, M_TEMP);
649 	if (levels)
650 		free(levels, M_TEMP);
651 	return (error);
652 }
653 
654 static int
655 cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS)
656 {
657 	struct cpufreq_softc *sc;
658 	struct cf_level *levels;
659 	struct cf_setting *set;
660 	struct sbuf sb;
661 	int count, error, i;
662 
663 	sc = oidp->oid_arg1;
664 	sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
665 
666 	/* Get settings from the device and generate the output string. */
667 	count = CF_MAX_LEVELS;
668 	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
669 	if (levels == NULL)
670 		return (ENOMEM);
671 	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
672 	if (error)
673 		goto out;
674 	if (count) {
675 		for (i = 0; i < count; i++) {
676 			set = &levels[i].total_set;
677 			sbuf_printf(&sb, "%d/%d ", set->freq, set->power);
678 		}
679 	} else
680 		sbuf_cpy(&sb, "0");
681 	sbuf_trim(&sb);
682 	sbuf_finish(&sb);
683 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
684 
685 out:
686 	free(levels, M_TEMP);
687 	sbuf_delete(&sb);
688 	return (error);
689 }
690 
691 int
692 cpufreq_register(device_t dev)
693 {
694 	device_t cf_dev, cpu_dev;
695 
696 	/*
697 	 * Add only one cpufreq device to each CPU.  Currently, all CPUs
698 	 * must offer the same levels and be switched at the same time.
699 	 */
700 	cpu_dev = device_get_parent(dev);
701 	KASSERT(cpu_dev != NULL, ("no parent for %p", dev));
702 	if (device_find_child(cpu_dev, "cpufreq", -1))
703 		return (0);
704 
705 	/* Add the child device and possibly sysctls. */
706 	cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", -1);
707 	if (cf_dev == NULL)
708 		return (ENOMEM);
709 	device_quiet(cf_dev);
710 
711 	return (device_probe_and_attach(cf_dev));
712 }
713 
714 int
715 cpufreq_unregister(device_t dev)
716 {
717 	device_t cf_dev, *devs;
718 	int cfcount, count, devcount, error, i, type;
719 	struct cf_setting set;
720 
721 	/*
722 	 * If this is the last cpufreq child device, remove the control
723 	 * device as well.  We identify cpufreq children by calling a method
724 	 * they support.
725 	 */
726 	error = device_get_children(device_get_parent(dev), &devs, &devcount);
727 	if (error)
728 		return (error);
729 	cf_dev = devclass_get_device(cpufreq_dc, 0);
730 	KASSERT(cf_dev != NULL, ("unregister with no cpufreq dev"));
731 	cfcount = 0;
732 	for (i = 0; i < devcount; i++) {
733 		if (!device_is_attached(devs[i]))
734 			continue;
735 		count = 1;
736 		if (CPUFREQ_DRV_SETTINGS(devs[i], &set, &count, &type) == 0)
737 			cfcount++;
738 	}
739 	if (cfcount <= 1)
740 		device_delete_child(device_get_parent(cf_dev), cf_dev);
741 	free(devs, M_TEMP);
742 
743 	return (0);
744 }
745