xref: /freebsd/sys/kern/kern_cpu.c (revision e22cd41c01f173af255596d1a051d61dd45ef1c3)
173347b07SNate Lawson /*-
273347b07SNate Lawson  * Copyright (c) 2004-2005 Nate Lawson (SDG)
373347b07SNate Lawson  * All rights reserved.
473347b07SNate Lawson  *
573347b07SNate Lawson  * Redistribution and use in source and binary forms, with or without
673347b07SNate Lawson  * modification, are permitted provided that the following conditions
773347b07SNate Lawson  * are met:
873347b07SNate Lawson  * 1. Redistributions of source code must retain the above copyright
973347b07SNate Lawson  *    notice, this list of conditions and the following disclaimer.
1073347b07SNate Lawson  * 2. Redistributions in binary form must reproduce the above copyright
1173347b07SNate Lawson  *    notice, this list of conditions and the following disclaimer in the
1273347b07SNate Lawson  *    documentation and/or other materials provided with the distribution.
1373347b07SNate Lawson  *
1473347b07SNate Lawson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1573347b07SNate Lawson  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1673347b07SNate Lawson  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1773347b07SNate Lawson  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1873347b07SNate Lawson  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1973347b07SNate Lawson  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2073347b07SNate Lawson  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2173347b07SNate Lawson  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2273347b07SNate Lawson  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2373347b07SNate Lawson  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2473347b07SNate Lawson  * SUCH DAMAGE.
2573347b07SNate Lawson  */
2673347b07SNate Lawson 
2773347b07SNate Lawson #include <sys/cdefs.h>
2873347b07SNate Lawson __FBSDID("$FreeBSD$");
2973347b07SNate Lawson 
3073347b07SNate Lawson #include <sys/param.h>
3173347b07SNate Lawson #include <sys/bus.h>
3273347b07SNate Lawson #include <sys/cpu.h>
3373347b07SNate Lawson #include <sys/eventhandler.h>
3473347b07SNate Lawson #include <sys/kernel.h>
3573347b07SNate Lawson #include <sys/malloc.h>
3673347b07SNate Lawson #include <sys/module.h>
3773347b07SNate Lawson #include <sys/proc.h>
3873347b07SNate Lawson #include <sys/queue.h>
3973347b07SNate Lawson #include <sys/sched.h>
4073347b07SNate Lawson #include <sys/sysctl.h>
4173347b07SNate Lawson #include <sys/systm.h>
4273347b07SNate Lawson #include <sys/sbuf.h>
430325089dSNate Lawson #include <sys/timetc.h>
4473347b07SNate Lawson 
4573347b07SNate Lawson #include "cpufreq_if.h"
4673347b07SNate Lawson 
4773347b07SNate Lawson /*
4873347b07SNate Lawson  * Common CPU frequency glue code.  Drivers for specific hardware can
4973347b07SNate Lawson  * attach this interface to allow users to get/set the CPU frequency.
5073347b07SNate Lawson  */
5173347b07SNate Lawson 
5273347b07SNate Lawson /*
5373347b07SNate Lawson  * Number of levels we can handle.  Levels are synthesized from settings
5473347b07SNate Lawson  * so for N settings there may be N^2 levels.
5573347b07SNate Lawson  */
5673347b07SNate Lawson #define CF_MAX_LEVELS	32
5773347b07SNate Lawson 
5873347b07SNate Lawson struct cpufreq_softc {
5973347b07SNate Lawson 	struct cf_level			curr_level;
6073347b07SNate Lawson 	int				priority;
6188c9b54cSNate Lawson 	int				all_count;
6273347b07SNate Lawson 	struct cf_level_lst		all_levels;
6373347b07SNate Lawson 	device_t			dev;
6473347b07SNate Lawson 	struct sysctl_ctx_list		sysctl_ctx;
6573347b07SNate Lawson };
6673347b07SNate Lawson 
6773347b07SNate Lawson struct cf_setting_array {
6873347b07SNate Lawson 	struct cf_setting		sets[MAX_SETTINGS];
6973347b07SNate Lawson 	int				count;
7073347b07SNate Lawson 	TAILQ_ENTRY(cf_setting_array)	link;
7173347b07SNate Lawson };
7273347b07SNate Lawson 
7373347b07SNate Lawson TAILQ_HEAD(cf_setting_lst, cf_setting_array);
7473347b07SNate Lawson 
7573347b07SNate Lawson static int	cpufreq_attach(device_t dev);
7673347b07SNate Lawson static int	cpufreq_detach(device_t dev);
7773347b07SNate Lawson static void	cpufreq_evaluate(void *arg);
7873347b07SNate Lawson static int	cf_set_method(device_t dev, const struct cf_level *level,
7973347b07SNate Lawson 		    int priority);
8073347b07SNate Lawson static int	cf_get_method(device_t dev, struct cf_level *level);
8173347b07SNate Lawson static int	cf_levels_method(device_t dev, struct cf_level *levels,
8273347b07SNate Lawson 		    int *count);
8388c9b54cSNate Lawson static int	cpufreq_insert_abs(struct cpufreq_softc *sc,
8473347b07SNate Lawson 		    struct cf_setting *sets, int count);
8588c9b54cSNate Lawson static int	cpufreq_expand_set(struct cpufreq_softc *sc,
8688c9b54cSNate Lawson 		    struct cf_setting_array *set_arr);
8788c9b54cSNate Lawson static struct cf_level *cpufreq_dup_set(struct cpufreq_softc *sc,
8888c9b54cSNate Lawson 		    struct cf_level *dup, struct cf_setting *set);
8973347b07SNate Lawson static int	cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS);
9073347b07SNate Lawson static int	cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS);
9173347b07SNate Lawson 
9273347b07SNate Lawson static device_method_t cpufreq_methods[] = {
9373347b07SNate Lawson 	DEVMETHOD(device_probe,		bus_generic_probe),
9473347b07SNate Lawson 	DEVMETHOD(device_attach,	cpufreq_attach),
9573347b07SNate Lawson 	DEVMETHOD(device_detach,	cpufreq_detach),
9673347b07SNate Lawson 
9773347b07SNate Lawson         DEVMETHOD(cpufreq_set,		cf_set_method),
9873347b07SNate Lawson         DEVMETHOD(cpufreq_get,		cf_get_method),
9973347b07SNate Lawson         DEVMETHOD(cpufreq_levels,	cf_levels_method),
10073347b07SNate Lawson 	{0, 0}
10173347b07SNate Lawson };
10273347b07SNate Lawson static driver_t cpufreq_driver = {
10373347b07SNate Lawson 	"cpufreq", cpufreq_methods, sizeof(struct cpufreq_softc)
10473347b07SNate Lawson };
10573347b07SNate Lawson static devclass_t cpufreq_dc;
10673347b07SNate Lawson DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0);
10773347b07SNate Lawson 
10873347b07SNate Lawson static eventhandler_tag cf_ev_tag;
10973347b07SNate Lawson 
11073347b07SNate Lawson static int
11173347b07SNate Lawson cpufreq_attach(device_t dev)
11273347b07SNate Lawson {
11373347b07SNate Lawson 	struct cpufreq_softc *sc;
11473347b07SNate Lawson 	device_t parent;
11573347b07SNate Lawson 	int numdevs;
11673347b07SNate Lawson 
11773347b07SNate Lawson 	sc = device_get_softc(dev);
11873347b07SNate Lawson 	parent = device_get_parent(dev);
11973347b07SNate Lawson 	sc->dev = dev;
12073347b07SNate Lawson 	sysctl_ctx_init(&sc->sysctl_ctx);
12173347b07SNate Lawson 	TAILQ_INIT(&sc->all_levels);
12273347b07SNate Lawson 	sc->curr_level.total_set.freq = CPUFREQ_VAL_UNKNOWN;
12373347b07SNate Lawson 
12473347b07SNate Lawson 	/*
12573347b07SNate Lawson 	 * Only initialize one set of sysctls for all CPUs.  In the future,
12673347b07SNate Lawson 	 * if multiple CPUs can have different settings, we can move these
12773347b07SNate Lawson 	 * sysctls to be under every CPU instead of just the first one.
12873347b07SNate Lawson 	 */
12973347b07SNate Lawson 	numdevs = devclass_get_count(cpufreq_dc);
13073347b07SNate Lawson 	if (numdevs > 1)
13173347b07SNate Lawson 		return (0);
13273347b07SNate Lawson 
13373347b07SNate Lawson 	SYSCTL_ADD_PROC(&sc->sysctl_ctx,
13473347b07SNate Lawson 	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
13573347b07SNate Lawson 	    OID_AUTO, "freq", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
13673347b07SNate Lawson 	    cpufreq_curr_sysctl, "I", "Current CPU frequency");
13773347b07SNate Lawson 	SYSCTL_ADD_PROC(&sc->sysctl_ctx,
13873347b07SNate Lawson 	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
13973347b07SNate Lawson 	    OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
14073347b07SNate Lawson 	    cpufreq_levels_sysctl, "A", "CPU frequency levels");
14173347b07SNate Lawson 	cf_ev_tag = EVENTHANDLER_REGISTER(cpufreq_changed, cpufreq_evaluate,
14273347b07SNate Lawson 	    NULL, EVENTHANDLER_PRI_ANY);
14373347b07SNate Lawson 
14473347b07SNate Lawson 	return (0);
14573347b07SNate Lawson }
14673347b07SNate Lawson 
14773347b07SNate Lawson static int
14873347b07SNate Lawson cpufreq_detach(device_t dev)
14973347b07SNate Lawson {
15073347b07SNate Lawson 	struct cpufreq_softc *sc;
15173347b07SNate Lawson 	int numdevs;
15273347b07SNate Lawson 
15373347b07SNate Lawson 	sc = device_get_softc(dev);
15473347b07SNate Lawson 	sysctl_ctx_free(&sc->sysctl_ctx);
15573347b07SNate Lawson 
15673347b07SNate Lawson 	/* Only clean up these resources when the last device is detaching. */
15773347b07SNate Lawson 	numdevs = devclass_get_count(cpufreq_dc);
15873347b07SNate Lawson 	if (numdevs == 1)
15973347b07SNate Lawson 		EVENTHANDLER_DEREGISTER(cpufreq_changed, cf_ev_tag);
16073347b07SNate Lawson 
16173347b07SNate Lawson 	return (0);
16273347b07SNate Lawson }
16373347b07SNate Lawson 
16473347b07SNate Lawson static void
16573347b07SNate Lawson cpufreq_evaluate(void *arg)
16673347b07SNate Lawson {
16773347b07SNate Lawson 	/* TODO: Re-evaluate when notified of changes to drivers. */
16873347b07SNate Lawson }
16973347b07SNate Lawson 
17073347b07SNate Lawson static int
17173347b07SNate Lawson cf_set_method(device_t dev, const struct cf_level *level, int priority)
17273347b07SNate Lawson {
17373347b07SNate Lawson 	struct cpufreq_softc *sc;
17473347b07SNate Lawson 	const struct cf_setting *set;
1750325089dSNate Lawson 	struct pcpu *pc;
1760325089dSNate Lawson 	int cpu_id, error, i;
17773347b07SNate Lawson 
17873347b07SNate Lawson 	sc = device_get_softc(dev);
17973347b07SNate Lawson 
1800325089dSNate Lawson 	/*
1810325089dSNate Lawson 	 * Check that the TSC isn't being used as a timecounter.
1820325089dSNate Lawson 	 * If it is, then return EBUSY and refuse to change the
1830325089dSNate Lawson 	 * clock speed.
1840325089dSNate Lawson 	 */
1850325089dSNate Lawson 	if (strcmp(timecounter->tc_name, "TSC") == 0)
1860325089dSNate Lawson 		return (EBUSY);
1870325089dSNate Lawson 
18873347b07SNate Lawson 	/* If already at this level, just return. */
18973347b07SNate Lawson 	if (CPUFREQ_CMP(sc->curr_level.total_set.freq, level->total_set.freq))
19073347b07SNate Lawson 		return (0);
19173347b07SNate Lawson 
1920325089dSNate Lawson 	/* If the setting is for a different CPU, switch to it. */
1930325089dSNate Lawson 	cpu_id = PCPU_GET(cpuid);
1940325089dSNate Lawson 	pc = cpu_get_pcpu(dev);
1950325089dSNate Lawson 	KASSERT(pc, ("NULL pcpu for dev %p", dev));
1960325089dSNate Lawson 	if (cpu_id != pc->pc_cpuid) {
1970325089dSNate Lawson 		mtx_lock_spin(&sched_lock);
1980325089dSNate Lawson 		sched_bind(curthread, pc->pc_cpuid);
1990325089dSNate Lawson 		mtx_unlock_spin(&sched_lock);
2000325089dSNate Lawson 	}
2010325089dSNate Lawson 
20273347b07SNate Lawson 	/* First, set the absolute frequency via its driver. */
20373347b07SNate Lawson 	set = &level->abs_set;
20473347b07SNate Lawson 	if (set->dev) {
20573347b07SNate Lawson 		if (!device_is_attached(set->dev)) {
20673347b07SNate Lawson 			error = ENXIO;
20773347b07SNate Lawson 			goto out;
20873347b07SNate Lawson 		}
20973347b07SNate Lawson 		error = CPUFREQ_DRV_SET(set->dev, set);
21073347b07SNate Lawson 		if (error) {
21173347b07SNate Lawson 			goto out;
21273347b07SNate Lawson 		}
21373347b07SNate Lawson 	}
21473347b07SNate Lawson 
21588c9b54cSNate Lawson 	/* Next, set any/all relative frequencies via their drivers. */
21688c9b54cSNate Lawson 	for (i = 0; i < level->rel_count; i++) {
21788c9b54cSNate Lawson 		set = &level->rel_set[i];
21888c9b54cSNate Lawson 		if (!device_is_attached(set->dev)) {
21988c9b54cSNate Lawson 			error = ENXIO;
22088c9b54cSNate Lawson 			goto out;
22188c9b54cSNate Lawson 		}
22288c9b54cSNate Lawson 		error = CPUFREQ_DRV_SET(set->dev, set);
22388c9b54cSNate Lawson 		if (error) {
22488c9b54cSNate Lawson 			/* XXX Back out any successful setting? */
22588c9b54cSNate Lawson 			goto out;
22688c9b54cSNate Lawson 		}
22788c9b54cSNate Lawson 	}
22873347b07SNate Lawson 
22973347b07SNate Lawson 	/* Record the current level. */
23073347b07SNate Lawson 	sc->curr_level = *level;
23173347b07SNate Lawson 	sc->priority = priority;
23273347b07SNate Lawson 	error = 0;
23373347b07SNate Lawson 
23473347b07SNate Lawson out:
2350325089dSNate Lawson 	/* If we switched to another CPU, switch back before exiting. */
2360325089dSNate Lawson 	if (cpu_id != pc->pc_cpuid) {
2370325089dSNate Lawson 		mtx_lock_spin(&sched_lock);
2380325089dSNate Lawson 		sched_unbind(curthread);
2390325089dSNate Lawson 		mtx_unlock_spin(&sched_lock);
2400325089dSNate Lawson 	}
24173347b07SNate Lawson 	if (error)
24273347b07SNate Lawson 		device_printf(set->dev, "set freq failed, err %d\n", error);
24373347b07SNate Lawson 	return (error);
24473347b07SNate Lawson }
24573347b07SNate Lawson 
24673347b07SNate Lawson static int
24773347b07SNate Lawson cf_get_method(device_t dev, struct cf_level *level)
24873347b07SNate Lawson {
24973347b07SNate Lawson 	struct cpufreq_softc *sc;
25073347b07SNate Lawson 	struct cf_level *levels;
25173347b07SNate Lawson 	struct cf_setting *curr_set, set;
25273347b07SNate Lawson 	struct pcpu *pc;
25373347b07SNate Lawson 	device_t *devs;
25473347b07SNate Lawson 	int count, error, i, numdevs;
25573347b07SNate Lawson 	uint64_t rate;
25673347b07SNate Lawson 
25773347b07SNate Lawson 	sc = device_get_softc(dev);
25873347b07SNate Lawson 	curr_set = &sc->curr_level.total_set;
25973347b07SNate Lawson 	levels = NULL;
26073347b07SNate Lawson 
26173347b07SNate Lawson 	/* If we already know the current frequency, we're done. */
26273347b07SNate Lawson 	if (curr_set->freq != CPUFREQ_VAL_UNKNOWN)
26373347b07SNate Lawson 		goto out;
26473347b07SNate Lawson 
26573347b07SNate Lawson 	/*
26673347b07SNate Lawson 	 * We need to figure out the current level.  Loop through every
26773347b07SNate Lawson 	 * driver, getting the current setting.  Then, attempt to get a best
26873347b07SNate Lawson 	 * match of settings against each level.
26973347b07SNate Lawson 	 */
27073347b07SNate Lawson 	count = CF_MAX_LEVELS;
27173347b07SNate Lawson 	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
27273347b07SNate Lawson 	if (levels == NULL)
27373347b07SNate Lawson 		return (ENOMEM);
27473347b07SNate Lawson 	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
27573347b07SNate Lawson 	if (error)
27673347b07SNate Lawson 		goto out;
27773347b07SNate Lawson 	error = device_get_children(device_get_parent(dev), &devs, &numdevs);
27873347b07SNate Lawson 	if (error)
27973347b07SNate Lawson 		goto out;
28073347b07SNate Lawson 	for (i = 0; i < numdevs && curr_set->freq == CPUFREQ_VAL_UNKNOWN; i++) {
28173347b07SNate Lawson 		if (!device_is_attached(devs[i]))
28273347b07SNate Lawson 			continue;
28373347b07SNate Lawson 		error = CPUFREQ_DRV_GET(devs[i], &set);
28473347b07SNate Lawson 		if (error)
28573347b07SNate Lawson 			continue;
28673347b07SNate Lawson 		for (i = 0; i < count; i++) {
28788c9b54cSNate Lawson 			if (CPUFREQ_CMP(set.freq, levels[i].total_set.freq)) {
28873347b07SNate Lawson 				sc->curr_level = levels[i];
28973347b07SNate Lawson 				break;
29073347b07SNate Lawson 			}
29173347b07SNate Lawson 		}
29273347b07SNate Lawson 	}
29373347b07SNate Lawson 	free(devs, M_TEMP);
29473347b07SNate Lawson 	if (curr_set->freq != CPUFREQ_VAL_UNKNOWN)
29573347b07SNate Lawson 		goto out;
29673347b07SNate Lawson 
29773347b07SNate Lawson 	/*
29873347b07SNate Lawson 	 * We couldn't find an exact match, so attempt to estimate and then
29973347b07SNate Lawson 	 * match against a level.
30073347b07SNate Lawson 	 */
30173347b07SNate Lawson 	pc = cpu_get_pcpu(dev);
30273347b07SNate Lawson 	if (pc == NULL) {
30373347b07SNate Lawson 		error = ENXIO;
30473347b07SNate Lawson 		goto out;
30573347b07SNate Lawson 	}
30673347b07SNate Lawson 	cpu_est_clockrate(pc->pc_cpuid, &rate);
30773347b07SNate Lawson 	rate /= 1000000;
30873347b07SNate Lawson 	for (i = 0; i < count; i++) {
30973347b07SNate Lawson 		if (CPUFREQ_CMP(rate, levels[i].total_set.freq)) {
31073347b07SNate Lawson 			sc->curr_level = levels[i];
31173347b07SNate Lawson 			break;
31273347b07SNate Lawson 		}
31373347b07SNate Lawson 	}
31473347b07SNate Lawson 
31573347b07SNate Lawson out:
31673347b07SNate Lawson 	if (levels)
31773347b07SNate Lawson 		free(levels, M_TEMP);
31873347b07SNate Lawson 	*level = sc->curr_level;
31973347b07SNate Lawson 	return (0);
32073347b07SNate Lawson }
32173347b07SNate Lawson 
32273347b07SNate Lawson static int
32373347b07SNate Lawson cf_levels_method(device_t dev, struct cf_level *levels, int *count)
32473347b07SNate Lawson {
32588c9b54cSNate Lawson 	struct cf_setting_array *set_arr;
32673347b07SNate Lawson 	struct cf_setting_lst rel_sets;
32773347b07SNate Lawson 	struct cpufreq_softc *sc;
32873347b07SNate Lawson 	struct cf_level *lev;
32973347b07SNate Lawson 	struct cf_setting *sets;
33073347b07SNate Lawson 	struct pcpu *pc;
33173347b07SNate Lawson 	device_t *devs;
33288c9b54cSNate Lawson 	int error, i, numdevs, set_count, type;
33373347b07SNate Lawson 	uint64_t rate;
33473347b07SNate Lawson 
33573347b07SNate Lawson 	if (levels == NULL || count == NULL)
33673347b07SNate Lawson 		return (EINVAL);
33773347b07SNate Lawson 
33873347b07SNate Lawson 	TAILQ_INIT(&rel_sets);
33973347b07SNate Lawson 	sc = device_get_softc(dev);
34073347b07SNate Lawson 	error = device_get_children(device_get_parent(dev), &devs, &numdevs);
34173347b07SNate Lawson 	if (error)
34273347b07SNate Lawson 		return (error);
34373347b07SNate Lawson 	sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT);
34473347b07SNate Lawson 	if (sets == NULL) {
34573347b07SNate Lawson 		free(devs, M_TEMP);
34673347b07SNate Lawson 		return (ENOMEM);
34773347b07SNate Lawson 	}
34873347b07SNate Lawson 
34973347b07SNate Lawson 	/* Get settings from all cpufreq drivers. */
35073347b07SNate Lawson 	for (i = 0; i < numdevs; i++) {
351e22cd41cSNate Lawson 		/* Skip devices that aren't ready. */
35273347b07SNate Lawson 		if (!device_is_attached(devs[i]))
35373347b07SNate Lawson 			continue;
354e22cd41cSNate Lawson 
355e22cd41cSNate Lawson 		/*
356e22cd41cSNate Lawson 		 * Get settings, skipping drivers that offer no settings or
357e22cd41cSNate Lawson 		 * provide settings for informational purposes only.
358e22cd41cSNate Lawson 		 */
35973347b07SNate Lawson 		set_count = MAX_SETTINGS;
36073347b07SNate Lawson 		error = CPUFREQ_DRV_SETTINGS(devs[i], sets, &set_count, &type);
361e22cd41cSNate Lawson 		if (error || set_count == 0 || (type & CPUFREQ_FLAG_INFO_ONLY))
36273347b07SNate Lawson 			continue;
36373347b07SNate Lawson 
364e22cd41cSNate Lawson 		/* Add the settings to our absolute/relative lists. */
3650325089dSNate Lawson 		switch (type & CPUFREQ_TYPE_MASK) {
36688c9b54cSNate Lawson 		case CPUFREQ_TYPE_ABSOLUTE:
36788c9b54cSNate Lawson 			error = cpufreq_insert_abs(sc, sets, set_count);
36888c9b54cSNate Lawson 			break;
36988c9b54cSNate Lawson 		case CPUFREQ_TYPE_RELATIVE:
37088c9b54cSNate Lawson 			set_arr = malloc(sizeof(*set_arr), M_TEMP, M_NOWAIT);
37188c9b54cSNate Lawson 			if (set_arr == NULL) {
37288c9b54cSNate Lawson 				error = ENOMEM;
37388c9b54cSNate Lawson 				goto out;
37488c9b54cSNate Lawson 			}
37588c9b54cSNate Lawson 			bcopy(sets, set_arr->sets, set_count * sizeof(*sets));
37688c9b54cSNate Lawson 			set_arr->count = set_count;
37788c9b54cSNate Lawson 			TAILQ_INSERT_TAIL(&rel_sets, set_arr, link);
37888c9b54cSNate Lawson 			break;
37988c9b54cSNate Lawson 		default:
38088c9b54cSNate Lawson 			error = EINVAL;
38188c9b54cSNate Lawson 			break;
38288c9b54cSNate Lawson 		}
38388c9b54cSNate Lawson 		if (error)
38473347b07SNate Lawson 			goto out;
38573347b07SNate Lawson 	}
38673347b07SNate Lawson 
38773347b07SNate Lawson 	/* If there are no absolute levels, create a fake one at 100%. */
38873347b07SNate Lawson 	if (TAILQ_EMPTY(&sc->all_levels)) {
38973347b07SNate Lawson 		bzero(&sets[0], sizeof(*sets));
39073347b07SNate Lawson 		pc = cpu_get_pcpu(dev);
39173347b07SNate Lawson 		if (pc == NULL) {
39273347b07SNate Lawson 			error = ENXIO;
39373347b07SNate Lawson 			goto out;
39473347b07SNate Lawson 		}
39573347b07SNate Lawson 		cpu_est_clockrate(pc->pc_cpuid, &rate);
39673347b07SNate Lawson 		sets[0].freq = rate / 1000000;
39788c9b54cSNate Lawson 		error = cpufreq_insert_abs(sc, sets, 1);
39873347b07SNate Lawson 		if (error)
39973347b07SNate Lawson 			goto out;
40073347b07SNate Lawson 	}
40173347b07SNate Lawson 
40288c9b54cSNate Lawson 	/* Create a combined list of absolute + relative levels. */
40388c9b54cSNate Lawson 	TAILQ_FOREACH(set_arr, &rel_sets, link)
40488c9b54cSNate Lawson 		cpufreq_expand_set(sc, set_arr);
40588c9b54cSNate Lawson 
40688c9b54cSNate Lawson 	/* If the caller doesn't have enough space, return the actual count. */
40788c9b54cSNate Lawson 	if (sc->all_count > *count) {
40888c9b54cSNate Lawson 		*count = sc->all_count;
40988c9b54cSNate Lawson 		error = E2BIG;
41088c9b54cSNate Lawson 		goto out;
41188c9b54cSNate Lawson 	}
41288c9b54cSNate Lawson 
41388c9b54cSNate Lawson 	/* Finally, output the list of levels. */
41473347b07SNate Lawson 	i = 0;
41573347b07SNate Lawson 	TAILQ_FOREACH(lev, &sc->all_levels, link) {
41673347b07SNate Lawson 		levels[i] = *lev;
41773347b07SNate Lawson 		i++;
41873347b07SNate Lawson 	}
41988c9b54cSNate Lawson 	*count = sc->all_count;
42073347b07SNate Lawson 	error = 0;
42173347b07SNate Lawson 
42273347b07SNate Lawson out:
42373347b07SNate Lawson 	/* Clear all levels since we regenerate them each time. */
42473347b07SNate Lawson 	while ((lev = TAILQ_FIRST(&sc->all_levels)) != NULL) {
42573347b07SNate Lawson 		TAILQ_REMOVE(&sc->all_levels, lev, link);
42673347b07SNate Lawson 		free(lev, M_TEMP);
42773347b07SNate Lawson 	}
42888c9b54cSNate Lawson 	while ((set_arr = TAILQ_FIRST(&rel_sets)) != NULL) {
42988c9b54cSNate Lawson 		TAILQ_REMOVE(&rel_sets, set_arr, link);
43088c9b54cSNate Lawson 		free(set_arr, M_TEMP);
43188c9b54cSNate Lawson 	}
43288c9b54cSNate Lawson 	sc->all_count = 0;
43373347b07SNate Lawson 	free(devs, M_TEMP);
43473347b07SNate Lawson 	free(sets, M_TEMP);
43573347b07SNate Lawson 	return (error);
43673347b07SNate Lawson }
43773347b07SNate Lawson 
43873347b07SNate Lawson /*
43973347b07SNate Lawson  * Create levels for an array of absolute settings and insert them in
44073347b07SNate Lawson  * sorted order in the specified list.
44173347b07SNate Lawson  */
44273347b07SNate Lawson static int
44388c9b54cSNate Lawson cpufreq_insert_abs(struct cpufreq_softc *sc, struct cf_setting *sets,
44473347b07SNate Lawson     int count)
44573347b07SNate Lawson {
44688c9b54cSNate Lawson 	struct cf_level_lst *list;
44773347b07SNate Lawson 	struct cf_level *level, *search;
44873347b07SNate Lawson 	int i;
44973347b07SNate Lawson 
45088c9b54cSNate Lawson 	list = &sc->all_levels;
45173347b07SNate Lawson 	for (i = 0; i < count; i++) {
45273347b07SNate Lawson 		level = malloc(sizeof(*level), M_TEMP, M_NOWAIT | M_ZERO);
45373347b07SNate Lawson 		if (level == NULL)
45473347b07SNate Lawson 			return (ENOMEM);
45573347b07SNate Lawson 		level->abs_set = sets[i];
45688c9b54cSNate Lawson 		level->total_set = sets[i];
45788c9b54cSNate Lawson 		level->total_set.dev = NULL;
45888c9b54cSNate Lawson 		sc->all_count++;
45973347b07SNate Lawson 
46073347b07SNate Lawson 		if (TAILQ_EMPTY(list)) {
46173347b07SNate Lawson 			TAILQ_INSERT_HEAD(list, level, link);
46273347b07SNate Lawson 			continue;
46373347b07SNate Lawson 		}
46473347b07SNate Lawson 
46573347b07SNate Lawson 		TAILQ_FOREACH_REVERSE(search, list, cf_level_lst, link) {
46688c9b54cSNate Lawson 			if (sets[i].freq <= search->total_set.freq) {
46773347b07SNate Lawson 				TAILQ_INSERT_AFTER(list, search, level, link);
46873347b07SNate Lawson 				break;
46973347b07SNate Lawson 			}
47073347b07SNate Lawson 		}
47173347b07SNate Lawson 	}
47273347b07SNate Lawson 	return (0);
47373347b07SNate Lawson }
47473347b07SNate Lawson 
47588c9b54cSNate Lawson /*
47688c9b54cSNate Lawson  * Expand a group of relative settings, creating derived levels from them.
47788c9b54cSNate Lawson  */
47888c9b54cSNate Lawson static int
47988c9b54cSNate Lawson cpufreq_expand_set(struct cpufreq_softc *sc, struct cf_setting_array *set_arr)
48088c9b54cSNate Lawson {
48188c9b54cSNate Lawson 	struct cf_level *fill, *search;
48288c9b54cSNate Lawson 	struct cf_setting *set;
48388c9b54cSNate Lawson 	int i;
48488c9b54cSNate Lawson 
48588c9b54cSNate Lawson 	TAILQ_FOREACH(search, &sc->all_levels, link) {
48688c9b54cSNate Lawson 		/* Skip this level if we've already modified it. */
48788c9b54cSNate Lawson 		for (i = 0; i < search->rel_count; i++) {
48888c9b54cSNate Lawson 			if (search->rel_set[i].dev == set_arr->sets[0].dev)
48988c9b54cSNate Lawson 				break;
49088c9b54cSNate Lawson 		}
49188c9b54cSNate Lawson 		if (i != search->rel_count)
49288c9b54cSNate Lawson 			continue;
49388c9b54cSNate Lawson 
49488c9b54cSNate Lawson 		/* Add each setting to the level, duplicating if necessary. */
49588c9b54cSNate Lawson 		for (i = 0; i < set_arr->count; i++) {
49688c9b54cSNate Lawson 			set = &set_arr->sets[i];
49788c9b54cSNate Lawson 
49888c9b54cSNate Lawson 			/*
49988c9b54cSNate Lawson 			 * If this setting is less than 100%, split the level
50088c9b54cSNate Lawson 			 * into two and add this setting to the new level.
50188c9b54cSNate Lawson 			 */
50288c9b54cSNate Lawson 			fill = search;
50388c9b54cSNate Lawson 			if (set->freq < 10000)
50488c9b54cSNate Lawson 				fill = cpufreq_dup_set(sc, search, set);
50588c9b54cSNate Lawson 
50688c9b54cSNate Lawson 			/*
50788c9b54cSNate Lawson 			 * The new level was a duplicate of an existing level
50888c9b54cSNate Lawson 			 * so we freed it.  Go to the next setting.
50988c9b54cSNate Lawson 			 */
51088c9b54cSNate Lawson 			if (fill == NULL)
51188c9b54cSNate Lawson 				continue;
51288c9b54cSNate Lawson 
51388c9b54cSNate Lawson 			/* Add this setting to the existing or new level. */
51488c9b54cSNate Lawson 			KASSERT(fill->rel_count < MAX_SETTINGS,
51588c9b54cSNate Lawson 			    ("cpufreq: too many relative drivers (%d)",
51688c9b54cSNate Lawson 			    MAX_SETTINGS));
51788c9b54cSNate Lawson 			fill->rel_set[fill->rel_count] = *set;
51888c9b54cSNate Lawson 			fill->rel_count++;
51988c9b54cSNate Lawson 		}
52088c9b54cSNate Lawson 	}
52188c9b54cSNate Lawson 
52288c9b54cSNate Lawson 	return (0);
52388c9b54cSNate Lawson }
52488c9b54cSNate Lawson 
52588c9b54cSNate Lawson static struct cf_level *
52688c9b54cSNate Lawson cpufreq_dup_set(struct cpufreq_softc *sc, struct cf_level *dup,
52788c9b54cSNate Lawson     struct cf_setting *set)
52888c9b54cSNate Lawson {
52988c9b54cSNate Lawson 	struct cf_level_lst *list;
53088c9b54cSNate Lawson 	struct cf_level *fill, *itr;
53188c9b54cSNate Lawson 	struct cf_setting *fill_set, *itr_set;
53288c9b54cSNate Lawson 	int i;
53388c9b54cSNate Lawson 
53488c9b54cSNate Lawson 	/*
53588c9b54cSNate Lawson 	 * Create a new level, copy it from the old one, and update the
53688c9b54cSNate Lawson 	 * total frequency and power by the percentage specified in the
53788c9b54cSNate Lawson 	 * relative setting.
53888c9b54cSNate Lawson 	 */
53988c9b54cSNate Lawson 	fill = malloc(sizeof(*fill), M_TEMP, M_NOWAIT);
54088c9b54cSNate Lawson 	if (fill == NULL)
54188c9b54cSNate Lawson 		return (NULL);
54288c9b54cSNate Lawson 	*fill = *dup;
54388c9b54cSNate Lawson 	fill_set = &fill->total_set;
54488c9b54cSNate Lawson 	fill_set->freq =
54588c9b54cSNate Lawson 	    ((uint64_t)fill_set->freq * set->freq) / 10000;
54688c9b54cSNate Lawson 	if (fill_set->power != CPUFREQ_VAL_UNKNOWN) {
54788c9b54cSNate Lawson 		fill_set->power = ((uint64_t)fill_set->power * set->freq)
54888c9b54cSNate Lawson 		    / 10000;
54988c9b54cSNate Lawson 	}
55088c9b54cSNate Lawson 	if (set->lat != CPUFREQ_VAL_UNKNOWN) {
55188c9b54cSNate Lawson 		if (fill_set->lat != CPUFREQ_VAL_UNKNOWN)
55288c9b54cSNate Lawson 			fill_set->lat += set->lat;
55388c9b54cSNate Lawson 		else
55488c9b54cSNate Lawson 			fill_set->lat = set->lat;
55588c9b54cSNate Lawson 	}
55688c9b54cSNate Lawson 
55788c9b54cSNate Lawson 	/*
55888c9b54cSNate Lawson 	 * If we copied an old level that we already modified (say, at 100%),
55988c9b54cSNate Lawson 	 * we need to remove that setting before adding this one.  Since we
56088c9b54cSNate Lawson 	 * process each setting array in order, we know any settings for this
56188c9b54cSNate Lawson 	 * driver will be found at the end.
56288c9b54cSNate Lawson 	 */
56388c9b54cSNate Lawson 	for (i = fill->rel_count; i != 0; i--) {
56488c9b54cSNate Lawson 		if (fill->rel_set[i - 1].dev != set->dev)
56588c9b54cSNate Lawson 			break;
56688c9b54cSNate Lawson 		fill->rel_count--;
56788c9b54cSNate Lawson 	}
56888c9b54cSNate Lawson 
56988c9b54cSNate Lawson 	/*
57088c9b54cSNate Lawson 	 * Insert the new level in sorted order.  If we find a duplicate,
57188c9b54cSNate Lawson 	 * free the new level.  We can do this since any existing level will
57288c9b54cSNate Lawson 	 * be guaranteed to have the same or less settings and thus consume
57388c9b54cSNate Lawson 	 * less power.  For example, a level with one absolute setting of
57488c9b54cSNate Lawson 	 * 800 Mhz uses less power than one composed of an absolute setting
57588c9b54cSNate Lawson 	 * of 1600 Mhz and a relative setting at 50%.
57688c9b54cSNate Lawson 	 */
57788c9b54cSNate Lawson 	list = &sc->all_levels;
57888c9b54cSNate Lawson 	if (TAILQ_EMPTY(list)) {
57988c9b54cSNate Lawson 		TAILQ_INSERT_HEAD(list, fill, link);
58088c9b54cSNate Lawson 	} else {
58188c9b54cSNate Lawson 		TAILQ_FOREACH_REVERSE(itr, list, cf_level_lst, link) {
58288c9b54cSNate Lawson 			itr_set = &itr->total_set;
58388c9b54cSNate Lawson 			if (CPUFREQ_CMP(fill_set->freq, itr_set->freq)) {
58488c9b54cSNate Lawson 				free(fill, M_TEMP);
58588c9b54cSNate Lawson 				fill = NULL;
58688c9b54cSNate Lawson 				break;
58788c9b54cSNate Lawson 			} else if (fill_set->freq < itr_set->freq) {
58888c9b54cSNate Lawson 				TAILQ_INSERT_AFTER(list, itr, fill, link);
58988c9b54cSNate Lawson 				sc->all_count++;
59088c9b54cSNate Lawson 				break;
59188c9b54cSNate Lawson 			}
59288c9b54cSNate Lawson 		}
59388c9b54cSNate Lawson 	}
59488c9b54cSNate Lawson 
59588c9b54cSNate Lawson 	return (fill);
59688c9b54cSNate Lawson }
59788c9b54cSNate Lawson 
59873347b07SNate Lawson static int
59973347b07SNate Lawson cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS)
60073347b07SNate Lawson {
60173347b07SNate Lawson 	struct cpufreq_softc *sc;
60273347b07SNate Lawson 	struct cf_level *levels;
6030325089dSNate Lawson 	int count, devcount, error, freq, i, n;
6040325089dSNate Lawson 	device_t *devs;
60573347b07SNate Lawson 
6060325089dSNate Lawson 	devs = NULL;
60773347b07SNate Lawson 	sc = oidp->oid_arg1;
6080325089dSNate Lawson 	levels = malloc(CF_MAX_LEVELS * sizeof(*levels), M_TEMP, M_NOWAIT);
60973347b07SNate Lawson 	if (levels == NULL)
61073347b07SNate Lawson 		return (ENOMEM);
61173347b07SNate Lawson 
61273347b07SNate Lawson 	error = CPUFREQ_GET(sc->dev, &levels[0]);
61373347b07SNate Lawson 	if (error)
61473347b07SNate Lawson 		goto out;
61573347b07SNate Lawson 	freq = levels[0].total_set.freq;
61673347b07SNate Lawson 	error = sysctl_handle_int(oidp, &freq, 0, req);
61773347b07SNate Lawson 	if (error != 0 || req->newptr == NULL)
61873347b07SNate Lawson 		goto out;
61973347b07SNate Lawson 
6200325089dSNate Lawson 	/*
6210325089dSNate Lawson 	 * While we only call cpufreq_get() on one device (assuming all
6220325089dSNate Lawson 	 * CPUs have equal levels), we call cpufreq_set() on all CPUs.
6230325089dSNate Lawson 	 * This is needed for some MP systems.
6240325089dSNate Lawson 	 */
6250325089dSNate Lawson 	error = devclass_get_devices(cpufreq_dc, &devs, &devcount);
62673347b07SNate Lawson 	if (error)
62773347b07SNate Lawson 		goto out;
6280325089dSNate Lawson 	for (n = 0; n < devcount; n++) {
6290325089dSNate Lawson 		count = CF_MAX_LEVELS;
6300325089dSNate Lawson 		error = CPUFREQ_LEVELS(devs[n], levels, &count);
6310325089dSNate Lawson 		if (error)
6320325089dSNate Lawson 			break;
63373347b07SNate Lawson 		for (i = 0; i < count; i++) {
63473347b07SNate Lawson 			if (CPUFREQ_CMP(levels[i].total_set.freq, freq)) {
6350325089dSNate Lawson 				error = CPUFREQ_SET(devs[n], &levels[i],
63673347b07SNate Lawson 				    CPUFREQ_PRIO_USER);
63773347b07SNate Lawson 				break;
63873347b07SNate Lawson 			}
63973347b07SNate Lawson 		}
6400325089dSNate Lawson 		if (i == count) {
64173347b07SNate Lawson 			error = EINVAL;
6420325089dSNate Lawson 			break;
6430325089dSNate Lawson 		}
6440325089dSNate Lawson 	}
64573347b07SNate Lawson 
64673347b07SNate Lawson out:
6470325089dSNate Lawson 	if (devs)
6480325089dSNate Lawson 		free(devs, M_TEMP);
64973347b07SNate Lawson 	if (levels)
65073347b07SNate Lawson 		free(levels, M_TEMP);
65173347b07SNate Lawson 	return (error);
65273347b07SNate Lawson }
65373347b07SNate Lawson 
65473347b07SNate Lawson static int
65573347b07SNate Lawson cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS)
65673347b07SNate Lawson {
65773347b07SNate Lawson 	struct cpufreq_softc *sc;
65873347b07SNate Lawson 	struct cf_level *levels;
65973347b07SNate Lawson 	struct cf_setting *set;
66073347b07SNate Lawson 	struct sbuf sb;
66173347b07SNate Lawson 	int count, error, i;
66273347b07SNate Lawson 
66373347b07SNate Lawson 	sc = oidp->oid_arg1;
66473347b07SNate Lawson 	sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
66573347b07SNate Lawson 
66673347b07SNate Lawson 	/* Get settings from the device and generate the output string. */
66773347b07SNate Lawson 	count = CF_MAX_LEVELS;
66873347b07SNate Lawson 	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
66973347b07SNate Lawson 	if (levels == NULL)
67073347b07SNate Lawson 		return (ENOMEM);
67173347b07SNate Lawson 	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
67273347b07SNate Lawson 	if (error)
67373347b07SNate Lawson 		goto out;
67473347b07SNate Lawson 	if (count) {
67573347b07SNate Lawson 		for (i = 0; i < count; i++) {
67673347b07SNate Lawson 			set = &levels[i].total_set;
67773347b07SNate Lawson 			sbuf_printf(&sb, "%d/%d ", set->freq, set->power);
67873347b07SNate Lawson 		}
67973347b07SNate Lawson 	} else
68073347b07SNate Lawson 		sbuf_cpy(&sb, "0");
68173347b07SNate Lawson 	sbuf_trim(&sb);
68273347b07SNate Lawson 	sbuf_finish(&sb);
68373347b07SNate Lawson 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
68473347b07SNate Lawson 
68573347b07SNate Lawson out:
68673347b07SNate Lawson 	free(levels, M_TEMP);
68773347b07SNate Lawson 	sbuf_delete(&sb);
68873347b07SNate Lawson 	return (error);
68973347b07SNate Lawson }
69073347b07SNate Lawson 
69173347b07SNate Lawson int
69273347b07SNate Lawson cpufreq_register(device_t dev)
69373347b07SNate Lawson {
69473347b07SNate Lawson 	device_t cf_dev, cpu_dev;
69573347b07SNate Lawson 
69673347b07SNate Lawson 	/*
6970325089dSNate Lawson 	 * Add only one cpufreq device to each CPU.  Currently, all CPUs
6980325089dSNate Lawson 	 * must offer the same levels and be switched at the same time.
69973347b07SNate Lawson 	 */
7000325089dSNate Lawson 	cpu_dev = device_get_parent(dev);
7010325089dSNate Lawson 	KASSERT(cpu_dev != NULL, ("no parent for %p", dev));
7020325089dSNate Lawson 	if (device_find_child(cpu_dev, "cpufreq", -1))
70373347b07SNate Lawson 		return (0);
70473347b07SNate Lawson 
7050325089dSNate Lawson 	/* Add the child device and possibly sysctls. */
7060325089dSNate Lawson 	cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", -1);
70773347b07SNate Lawson 	if (cf_dev == NULL)
70873347b07SNate Lawson 		return (ENOMEM);
70973347b07SNate Lawson 	device_quiet(cf_dev);
71073347b07SNate Lawson 
71173347b07SNate Lawson 	return (device_probe_and_attach(cf_dev));
71273347b07SNate Lawson }
71373347b07SNate Lawson 
71473347b07SNate Lawson int
71573347b07SNate Lawson cpufreq_unregister(device_t dev)
71673347b07SNate Lawson {
71773347b07SNate Lawson 	device_t cf_dev, *devs;
71873347b07SNate Lawson 	int cfcount, count, devcount, error, i, type;
71973347b07SNate Lawson 	struct cf_setting set;
72073347b07SNate Lawson 
72173347b07SNate Lawson 	/*
72273347b07SNate Lawson 	 * If this is the last cpufreq child device, remove the control
72373347b07SNate Lawson 	 * device as well.  We identify cpufreq children by calling a method
72473347b07SNate Lawson 	 * they support.
72573347b07SNate Lawson 	 */
72673347b07SNate Lawson 	error = device_get_children(device_get_parent(dev), &devs, &devcount);
72773347b07SNate Lawson 	if (error)
72873347b07SNate Lawson 		return (error);
72973347b07SNate Lawson 	cf_dev = devclass_get_device(cpufreq_dc, 0);
73073347b07SNate Lawson 	KASSERT(cf_dev != NULL, ("unregister with no cpufreq dev"));
73173347b07SNate Lawson 	cfcount = 0;
73273347b07SNate Lawson 	for (i = 0; i < devcount; i++) {
73373347b07SNate Lawson 		if (!device_is_attached(devs[i]))
73473347b07SNate Lawson 			continue;
73573347b07SNate Lawson 		count = 1;
73673347b07SNate Lawson 		if (CPUFREQ_DRV_SETTINGS(devs[i], &set, &count, &type) == 0)
73773347b07SNate Lawson 			cfcount++;
73873347b07SNate Lawson 	}
7390325089dSNate Lawson 	if (cfcount <= 1)
74073347b07SNate Lawson 		device_delete_child(device_get_parent(cf_dev), cf_dev);
74173347b07SNate Lawson 	free(devs, M_TEMP);
74273347b07SNate Lawson 
74373347b07SNate Lawson 	return (0);
74473347b07SNate Lawson }
745