xref: /titanic_50/usr/src/uts/common/sys/cpudrv.h (revision 62a24de03df1f2399ceda704cb3874dabc98bbbd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef _SYS_CPUDRV_H
27 #define	_SYS_CPUDRV_H
28 
29 #include <sys/promif.h>
30 #include <sys/cpuvar.h>
31 #include <sys/taskq.h>
32 
33 #ifdef	__cplusplus
34 extern "C" {
35 #endif
36 
37 #ifdef _KERNEL
38 
39 /*
40  * CPU power management data
41  */
42 /*
43  * Data related to a particular speed.
44  *
45  * All per speed data nodes for a CPU are linked together using down_spd.
46  * The link list is ordered with first node containing data for
47  * normal (maximum) speed. up_spd points to the next speed up. Currently
48  * all up_spd's point to the normal speed but this can be changed in future.
49  * quant_cnt is the number of ticks when monitoring system will be called
50  * next. There are different quant_cnt for different speeds.
51  *
52  * Note that 'speed' has different meaning depending upon the platform.
53  * On SPARC, the speed is really a divisor of the maximum speed (e.g., a speed
54  * of 2 means that it's 1/2 the maximum speed). On x86, speed is a processor
55  * frequency.
56  */
57 typedef struct cpudrv_pm_spd {
58 	uint_t			speed;		/* platform dependent notion */
59 	uint_t			quant_cnt;	/* quantum count in ticks */
60 	struct cpudrv_pm_spd	*down_spd;	/* ptr to next speed down */
61 	struct cpudrv_pm_spd	*up_spd;	/* ptr to next speed up */
62 	uint_t			idle_hwm;	/* down if idle thread >= hwm */
63 	uint_t			idle_lwm;	/* up if idle thread < lwm */
64 	uint_t			idle_bhwm_cnt;	/* # of iters idle is < hwm */
65 	uint_t			idle_blwm_cnt;	/* # of iters idle is < lwm */
66 	uint_t			user_hwm;	/* up if user thread > hwm */
67 	int			user_lwm;	/* down if user thread <= lwm */
68 	int			pm_level;	/* power level for framework */
69 } cpudrv_pm_spd_t;
70 
71 /*
72  * Power management data
73  */
74 typedef struct cpudrv_pm {
75 	cpudrv_pm_spd_t	*head_spd;	/* ptr to head of speed */
76 	cpudrv_pm_spd_t	*cur_spd;	/* ptr to current speed */
77 	cpudrv_pm_spd_t	*targ_spd;	/* target speed when cur_spd */
78 					/* is unknown (i.e. NULL) */
79 	uint_t		num_spd;	/* number of speeds */
80 	hrtime_t	lastquan_mstate[NCMSTATES]; /* last quantum's mstate */
81 	clock_t		lastquan_lbolt;	/* last quantum's lbolt */
82 	int		pm_busycnt;	/* pm_busy_component() count  */
83 	taskq_t		*tq;		/* taskq handler for CPU monitor */
84 	timeout_id_t	timeout_id;	/* cpudrv_pm_monitor()'s timeout_id */
85 	int		timeout_count;	/* count dispatched timeouts */
86 	kmutex_t	timeout_lock;	/* protect timeout_count */
87 	kcondvar_t	timeout_cv;	/* wait on timeout_count change */
88 #if defined(__x86)
89 	kthread_t	*pm_governor_thread; /* governor thread */
90 #endif
91 	boolean_t	pm_started;	/* PM really started */
92 } cpudrv_pm_t;
93 
94 /*
95  * Idle & user threads water marks in percentage
96  */
97 #if defined(__x86)
98 #define	CPUDRV_PM_IDLE_HWM		85	/* idle high water mark */
99 #define	CPUDRV_PM_IDLE_LWM		70	/* idle low water mark */
100 #define	CPUDRV_PM_IDLE_BLWM_CNT_MAX	1    /* # of iters idle can be < lwm */
101 #define	CPUDRV_PM_IDLE_BHWM_CNT_MAX	1    /* # of iters idle can be < hwm */
102 #else
103 #define	CPUDRV_PM_IDLE_HWM		98	/* idle high water mark */
104 #define	CPUDRV_PM_IDLE_LWM		8	/* idle low water mark */
105 #define	CPUDRV_PM_IDLE_BLWM_CNT_MAX	2    /* # of iters idle can be < lwm */
106 #define	CPUDRV_PM_IDLE_BHWM_CNT_MAX	2    /* # of iters idle can be < hwm */
107 #endif
108 #define	CPUDRV_PM_USER_HWM		20	/* user high water mark */
109 #define	CPUDRV_PM_IDLE_BUF_ZONE		4    /* buffer zone when going down */
110 
111 
112 /*
113  * Maximums for creating 'pm-components' property
114  */
115 #define	CPUDRV_PM_COMP_MAX_DIG	4	/* max digits in power level */
116 					/* or divisor */
117 #define	CPUDRV_PM_COMP_MAX_VAL	9999	/* max value in above digits */
118 
119 /*
120  * Component number for calls to PM framework
121  */
122 #define	CPUDRV_PM_COMP_NUM	0	/* first component is 0 */
123 
124 /*
125  * Quantum counts for normal and other clock speeds in terms of ticks.
126  *
127  * In determining the quantum count, we need to balance two opposing factors:
128  *
129  *	1) Minimal delay when user start using the CPU that is in low
130  *	power mode -- requires that we monitor more frequently,
131  *
132  *	2) Extra code executed because of frequent monitoring -- requires
133  *	that we monitor less frequently.
134  *
135  * We reach a tradeoff between these two requirements by monitoring
136  * more frequently when we are in low speed mode (CPUDRV_PM_QUANT_CNT_OTHR)
137  * so we can bring the CPU up without user noticing it. Moreover, at low
138  * speed we are not using CPU much so extra code execution should be fine.
139  * Since we are in no hurry to bring CPU down and at normal speed and we
140  * might really be using the CPU fully, we monitor less frequently
141  * (CPUDRV_PM_QUANT_CNT_NORMAL).
142  */
143 #if defined(__x86)
144 #define	CPUDRV_PM_QUANT_CNT_NORMAL	(hz * 1)	/* 1 sec */
145 #else
146 #define	CPUDRV_PM_QUANT_CNT_NORMAL	(hz * 5)	/* 5 sec */
147 #endif
148 #define	CPUDRV_PM_QUANT_CNT_OTHR	(hz * 1)	/* 1 sec */
149 
150 /*
151  * Taskq parameters
152  */
153 #define	CPUDRV_PM_TASKQ_THREADS		1    /* # threads to run CPU monitor */
154 #define	CPUDRV_PM_TASKQ_MIN		2	/* min # of taskq entries */
155 #define	CPUDRV_PM_TASKQ_MAX		2	/* max # of taskq entries */
156 
157 
158 /*
159  * Device driver state structure
160  */
161 typedef struct cpudrv_devstate {
162 	dev_info_t	*dip;		/* devinfo handle */
163 	processorid_t	cpu_id;		/* CPU number for this node */
164 	cpudrv_pm_t	cpudrv_pm;	/* power management data */
165 	kmutex_t	lock;		/* protects state struct */
166 	void		*mach_state; /* machine specific state */
167 } cpudrv_devstate_t;
168 
169 extern void	*cpudrv_state;
170 
171 /*
172  * Debugging definitions
173  */
174 #ifdef	DEBUG
175 #define	D_INIT			0x00000001
176 #define	D_FINI			0x00000002
177 #define	D_ATTACH		0x00000004
178 #define	D_DETACH		0x00000008
179 #define	D_POWER			0x00000010
180 #define	D_PM_INIT		0x00000020
181 #define	D_PM_FREE		0x00000040
182 #define	D_PM_COMP_CREATE	0x00000080
183 #define	D_PM_MONITOR		0x00000100
184 #define	D_PM_MONITOR_VERBOSE	0x00000200
185 #define	D_PM_MONITOR_DELAY	0x00000400
186 
187 extern uint_t	cpudrv_debug;
188 
189 #define	_PRINTF prom_printf
190 #define	DPRINTF(flag, args)	if (cpudrv_debug & flag) _PRINTF args;
191 #else
192 #define	DPRINTF(flag, args)
193 #endif /* DEBUG */
194 
195 extern int cpudrv_pm_change_speed(cpudrv_devstate_t *, cpudrv_pm_spd_t *);
196 extern boolean_t cpudrv_pm_get_cpu_id(dev_info_t *, processorid_t *);
197 extern boolean_t cpudrv_pm_power_ready(void);
198 extern boolean_t cpudrv_pm_is_governor_thread(cpudrv_pm_t *);
199 extern boolean_t cpudrv_mach_pm_init(cpudrv_devstate_t *);
200 extern void cpudrv_mach_pm_free(cpudrv_devstate_t *);
201 
202 #endif /* _KERNEL */
203 
204 #ifdef	__cplusplus
205 }
206 #endif
207 
208 #endif /* _SYS_CPUDRV_H */
209