xref: /linux/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c (revision face6a3615a649456eb4549f6d474221d877d604)
1 // SPDX-License-Identifier: MIT
2 #include <linux/clk.h>
3 #include <linux/math64.h>
4 #include <linux/platform_device.h>
5 #include <linux/pm_opp.h>
6 
7 #include <drm/drm_managed.h>
8 
9 #include <subdev/clk.h>
10 
11 #include "nouveau_drv.h"
12 #include "nouveau_chan.h"
13 #include "priv.h"
14 #include "gk20a_devfreq.h"
15 #include "gk20a.h"
16 #include "gp10b.h"
17 
18 #define PMU_BUSY_CYCLES_NORM_MAX		1000U
19 
20 #define PWR_PMU_IDLE_COUNTER_TOTAL		0U
21 #define PWR_PMU_IDLE_COUNTER_BUSY		4U
22 
23 #define PWR_PMU_IDLE_COUNT_REG_OFFSET		0x0010A508U
24 #define PWR_PMU_IDLE_COUNT_REG_SIZE		16U
25 #define PWR_PMU_IDLE_COUNT_MASK			0x7FFFFFFFU
26 #define PWR_PMU_IDLE_COUNT_RESET_VALUE		(0x1U << 31U)
27 
28 #define PWR_PMU_IDLE_INTR_REG_OFFSET		0x0010A9E8U
29 #define PWR_PMU_IDLE_INTR_ENABLE_VALUE		0U
30 
31 #define PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET	0x0010A9ECU
32 #define PWR_PMU_IDLE_INTR_STATUS_MASK		0x00000001U
33 #define PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE	0x1U
34 
35 #define PWR_PMU_IDLE_THRESHOLD_REG_OFFSET	0x0010A8A0U
36 #define PWR_PMU_IDLE_THRESHOLD_REG_SIZE		4U
37 #define PWR_PMU_IDLE_THRESHOLD_MAX_VALUE	0x7FFFFFFFU
38 
39 #define PWR_PMU_IDLE_CTRL_REG_OFFSET		0x0010A50CU
40 #define PWR_PMU_IDLE_CTRL_REG_SIZE		16U
41 #define PWR_PMU_IDLE_CTRL_VALUE_MASK		0x3U
42 #define PWR_PMU_IDLE_CTRL_VALUE_BUSY		0x2U
43 #define PWR_PMU_IDLE_CTRL_VALUE_ALWAYS		0x3U
44 #define PWR_PMU_IDLE_CTRL_FILTER_MASK		(0x1U << 2)
45 #define PWR_PMU_IDLE_CTRL_FILTER_DISABLED	0x0U
46 
47 #define PWR_PMU_IDLE_MASK_REG_OFFSET		0x0010A504U
48 #define PWR_PMU_IDLE_MASK_REG_SIZE		16U
49 #define PWM_PMU_IDLE_MASK_GR_ENABLED		0x1U
50 #define PWM_PMU_IDLE_MASK_CE_2_ENABLED		0x200000U
51 
52 /**
53  * struct gk20a_devfreq - Device frequency management
54  */
55 struct gk20a_devfreq {
56 	/** @devfreq: devfreq device. */
57 	struct devfreq *devfreq;
58 
59 	/** @regs: Device registers. */
60 	void __iomem *regs;
61 
62 	/** @gov_data: Governor data. */
63 	struct devfreq_simple_ondemand_data gov_data;
64 
65 	/** @busy_time: Busy time. */
66 	ktime_t busy_time;
67 
68 	/** @total_time: Total time. */
69 	ktime_t total_time;
70 
71 	/** @time_last_update: Last update time. */
72 	ktime_t time_last_update;
73 };
74 
75 static struct gk20a_devfreq *dev_to_gk20a_devfreq(struct device *dev)
76 {
77 	struct nouveau_drm *drm = dev_get_drvdata(dev);
78 	struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
79 	struct nvkm_clk *base = nvkm_clk(subdev);
80 
81 	switch (drm->nvkm->chipset) {
82 	case 0x13b: return gp10b_clk(base)->devfreq; break;
83 	default: return gk20a_clk(base)->devfreq; break;
84 	}
85 }
86 
87 static void gk20a_pmu_init_perfmon_counter(struct gk20a_devfreq *gdevfreq)
88 {
89 	u32 data;
90 
91 	// Set pmu idle intr status bit on total counter overflow
92 	writel(PWR_PMU_IDLE_INTR_ENABLE_VALUE,
93 	       gdevfreq->regs + PWR_PMU_IDLE_INTR_REG_OFFSET);
94 
95 	writel(PWR_PMU_IDLE_THRESHOLD_MAX_VALUE,
96 	       gdevfreq->regs + PWR_PMU_IDLE_THRESHOLD_REG_OFFSET +
97 	       (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_THRESHOLD_REG_SIZE));
98 
99 	// Setup counter for total cycles
100 	data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
101 		     (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE));
102 	data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK);
103 	data |= PWR_PMU_IDLE_CTRL_VALUE_ALWAYS | PWR_PMU_IDLE_CTRL_FILTER_DISABLED;
104 	writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
105 		     (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE));
106 
107 	// Setup counter for busy cycles
108 	writel(PWM_PMU_IDLE_MASK_GR_ENABLED | PWM_PMU_IDLE_MASK_CE_2_ENABLED,
109 	       gdevfreq->regs + PWR_PMU_IDLE_MASK_REG_OFFSET +
110 	       (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_MASK_REG_SIZE));
111 
112 	data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
113 		     (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE));
114 	data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK);
115 	data |= PWR_PMU_IDLE_CTRL_VALUE_BUSY | PWR_PMU_IDLE_CTRL_FILTER_DISABLED;
116 	writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
117 		     (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE));
118 }
119 
120 static u32 gk20a_pmu_read_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id)
121 {
122 	u32 ret;
123 
124 	ret = readl(gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET +
125 		    (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE));
126 
127 	return ret & PWR_PMU_IDLE_COUNT_MASK;
128 }
129 
130 static void gk20a_pmu_reset_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id)
131 {
132 	writel(PWR_PMU_IDLE_COUNT_RESET_VALUE, gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET +
133 					       (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE));
134 }
135 
136 static u32 gk20a_pmu_read_idle_intr_status(struct gk20a_devfreq *gdevfreq)
137 {
138 	u32 ret;
139 
140 	ret = readl(gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET);
141 
142 	return ret & PWR_PMU_IDLE_INTR_STATUS_MASK;
143 }
144 
145 static void gk20a_pmu_clear_idle_intr_status(struct gk20a_devfreq *gdevfreq)
146 {
147 	writel(PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE,
148 	       gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET);
149 }
150 
151 static void gk20a_devfreq_update_utilization(struct gk20a_devfreq *gdevfreq)
152 {
153 	ktime_t now, last;
154 	u64 busy_cycles, total_cycles;
155 	u32 norm, intr_status;
156 
157 	now = ktime_get();
158 	last = gdevfreq->time_last_update;
159 	gdevfreq->total_time = ktime_us_delta(now, last);
160 
161 	busy_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
162 	total_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
163 	intr_status = gk20a_pmu_read_idle_intr_status(gdevfreq);
164 
165 	gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
166 	gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
167 
168 	if (intr_status != 0UL) {
169 		norm = PMU_BUSY_CYCLES_NORM_MAX;
170 		gk20a_pmu_clear_idle_intr_status(gdevfreq);
171 	} else if (total_cycles == 0ULL || busy_cycles > total_cycles) {
172 		norm = PMU_BUSY_CYCLES_NORM_MAX;
173 	} else {
174 		norm = (u32)div64_u64(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX,
175 				total_cycles);
176 	}
177 
178 	gdevfreq->busy_time = div_u64(gdevfreq->total_time * norm, PMU_BUSY_CYCLES_NORM_MAX);
179 	gdevfreq->time_last_update = now;
180 }
181 
182 static int gk20a_devfreq_target(struct device *dev, unsigned long *freq,
183 				u32 flags)
184 {
185 	struct nouveau_drm *drm = dev_get_drvdata(dev);
186 	struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
187 	struct nvkm_clk *base = nvkm_clk(subdev);
188 	struct nvkm_pstate *pstates = base->func->pstates;
189 	int nr_pstates = base->func->nr_pstates;
190 	int i, ret;
191 
192 	for (i = 0; i < nr_pstates - 1; i++)
193 		if (pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV >= *freq)
194 			break;
195 
196 	ret = nvkm_clk_ustate(base, pstates[i].pstate, 0);
197 	ret |= nvkm_clk_ustate(base, pstates[i].pstate, 1);
198 	if (ret) {
199 		nvkm_error(subdev, "cannot update clock\n");
200 		return ret;
201 	}
202 
203 	*freq = pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV;
204 
205 	return 0;
206 }
207 
208 static int gk20a_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
209 {
210 	struct nouveau_drm *drm = dev_get_drvdata(dev);
211 	struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
212 	struct nvkm_clk *base = nvkm_clk(subdev);
213 
214 	*freq = nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV;
215 
216 	return 0;
217 }
218 
219 static void gk20a_devfreq_reset(struct gk20a_devfreq *gdevfreq)
220 {
221 	gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
222 	gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
223 	gk20a_pmu_clear_idle_intr_status(gdevfreq);
224 
225 	gdevfreq->busy_time = 0;
226 	gdevfreq->total_time = 0;
227 	gdevfreq->time_last_update = ktime_get();
228 }
229 
230 static int gk20a_devfreq_get_dev_status(struct device *dev,
231 					struct devfreq_dev_status *status)
232 {
233 	struct nouveau_drm *drm = dev_get_drvdata(dev);
234 	struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
235 
236 	gk20a_devfreq_get_cur_freq(dev, &status->current_frequency);
237 
238 	gk20a_devfreq_update_utilization(gdevfreq);
239 
240 	status->busy_time = ktime_to_ns(gdevfreq->busy_time);
241 	status->total_time = ktime_to_ns(gdevfreq->total_time);
242 
243 	gk20a_devfreq_reset(gdevfreq);
244 
245 	NV_DEBUG(drm, "busy %lu total %lu %lu %% freq %lu MHz\n",
246 		 status->busy_time, status->total_time,
247 		 status->busy_time / (status->total_time / 100),
248 		 status->current_frequency / 1000 / 1000);
249 
250 	return 0;
251 }
252 
253 static struct devfreq_dev_profile gk20a_devfreq_profile = {
254 	.timer = DEVFREQ_TIMER_DELAYED,
255 	.polling_ms = 50,
256 	.target = gk20a_devfreq_target,
257 	.get_cur_freq = gk20a_devfreq_get_cur_freq,
258 	.get_dev_status = gk20a_devfreq_get_dev_status,
259 };
260 
261 int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **gdevfreq)
262 {
263 	struct nvkm_device *device = base->subdev.device;
264 	struct nouveau_drm *drm = dev_get_drvdata(device->dev);
265 	struct nvkm_device_tegra *tdev = device->func->tegra(device);
266 	struct nvkm_pstate *pstates = base->func->pstates;
267 	int nr_pstates = base->func->nr_pstates;
268 	struct gk20a_devfreq *new_gdevfreq;
269 	int i;
270 
271 	new_gdevfreq = drmm_kzalloc(drm->dev, sizeof(struct gk20a_devfreq), GFP_KERNEL);
272 	if (!new_gdevfreq)
273 		return -ENOMEM;
274 
275 	new_gdevfreq->regs = tdev->regs;
276 
277 	for (i = 0; i < nr_pstates; i++)
278 		dev_pm_opp_add(base->subdev.device->dev,
279 			       pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV, 0);
280 
281 	gk20a_pmu_init_perfmon_counter(new_gdevfreq);
282 	gk20a_devfreq_reset(new_gdevfreq);
283 
284 	gk20a_devfreq_profile.initial_freq =
285 		nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV;
286 
287 	new_gdevfreq->gov_data.upthreshold = 45;
288 	new_gdevfreq->gov_data.downdifferential = 5;
289 
290 	new_gdevfreq->devfreq = devm_devfreq_add_device(device->dev,
291 							&gk20a_devfreq_profile,
292 							DEVFREQ_GOV_SIMPLE_ONDEMAND,
293 							&new_gdevfreq->gov_data);
294 	if (IS_ERR(new_gdevfreq->devfreq))
295 		return PTR_ERR(new_gdevfreq->devfreq);
296 
297 	*gdevfreq = new_gdevfreq;
298 
299 	return 0;
300 }
301 
302 int gk20a_devfreq_resume(struct device *dev)
303 {
304 	struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
305 
306 	if (!gdevfreq || !gdevfreq->devfreq)
307 		return 0;
308 
309 	return devfreq_resume_device(gdevfreq->devfreq);
310 }
311 
312 int gk20a_devfreq_suspend(struct device *dev)
313 {
314 	struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
315 
316 	if (!gdevfreq || !gdevfreq->devfreq)
317 		return 0;
318 
319 	return devfreq_suspend_device(gdevfreq->devfreq);
320 }
321