1 // SPDX-License-Identifier: MIT 2 #include <linux/clk.h> 3 #include <linux/math64.h> 4 #include <linux/platform_device.h> 5 #include <linux/pm_opp.h> 6 7 #include <drm/drm_managed.h> 8 9 #include <subdev/clk.h> 10 11 #include "nouveau_drv.h" 12 #include "nouveau_chan.h" 13 #include "priv.h" 14 #include "gk20a_devfreq.h" 15 #include "gk20a.h" 16 #include "gp10b.h" 17 18 #define PMU_BUSY_CYCLES_NORM_MAX 1000U 19 20 #define PWR_PMU_IDLE_COUNTER_TOTAL 0U 21 #define PWR_PMU_IDLE_COUNTER_BUSY 4U 22 23 #define PWR_PMU_IDLE_COUNT_REG_OFFSET 0x0010A508U 24 #define PWR_PMU_IDLE_COUNT_REG_SIZE 16U 25 #define PWR_PMU_IDLE_COUNT_MASK 0x7FFFFFFFU 26 #define PWR_PMU_IDLE_COUNT_RESET_VALUE (0x1U << 31U) 27 28 #define PWR_PMU_IDLE_INTR_REG_OFFSET 0x0010A9E8U 29 #define PWR_PMU_IDLE_INTR_ENABLE_VALUE 0U 30 31 #define PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET 0x0010A9ECU 32 #define PWR_PMU_IDLE_INTR_STATUS_MASK 0x00000001U 33 #define PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE 0x1U 34 35 #define PWR_PMU_IDLE_THRESHOLD_REG_OFFSET 0x0010A8A0U 36 #define PWR_PMU_IDLE_THRESHOLD_REG_SIZE 4U 37 #define PWR_PMU_IDLE_THRESHOLD_MAX_VALUE 0x7FFFFFFFU 38 39 #define PWR_PMU_IDLE_CTRL_REG_OFFSET 0x0010A50CU 40 #define PWR_PMU_IDLE_CTRL_REG_SIZE 16U 41 #define PWR_PMU_IDLE_CTRL_VALUE_MASK 0x3U 42 #define PWR_PMU_IDLE_CTRL_VALUE_BUSY 0x2U 43 #define PWR_PMU_IDLE_CTRL_VALUE_ALWAYS 0x3U 44 #define PWR_PMU_IDLE_CTRL_FILTER_MASK (0x1U << 2) 45 #define PWR_PMU_IDLE_CTRL_FILTER_DISABLED 0x0U 46 47 #define PWR_PMU_IDLE_MASK_REG_OFFSET 0x0010A504U 48 #define PWR_PMU_IDLE_MASK_REG_SIZE 16U 49 #define PWM_PMU_IDLE_MASK_GR_ENABLED 0x1U 50 #define PWM_PMU_IDLE_MASK_CE_2_ENABLED 0x200000U 51 52 /** 53 * struct gk20a_devfreq - Device frequency management 54 */ 55 struct gk20a_devfreq { 56 /** @devfreq: devfreq device. */ 57 struct devfreq *devfreq; 58 59 /** @regs: Device registers. */ 60 void __iomem *regs; 61 62 /** @gov_data: Governor data. */ 63 struct devfreq_simple_ondemand_data gov_data; 64 65 /** @busy_time: Busy time. */ 66 ktime_t busy_time; 67 68 /** @total_time: Total time. */ 69 ktime_t total_time; 70 71 /** @time_last_update: Last update time. */ 72 ktime_t time_last_update; 73 }; 74 75 static struct gk20a_devfreq *dev_to_gk20a_devfreq(struct device *dev) 76 { 77 struct nouveau_drm *drm = dev_get_drvdata(dev); 78 struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); 79 struct nvkm_clk *base = nvkm_clk(subdev); 80 81 switch (drm->nvkm->chipset) { 82 case 0x13b: return gp10b_clk(base)->devfreq; break; 83 default: return gk20a_clk(base)->devfreq; break; 84 } 85 } 86 87 static void gk20a_pmu_init_perfmon_counter(struct gk20a_devfreq *gdevfreq) 88 { 89 u32 data; 90 91 // Set pmu idle intr status bit on total counter overflow 92 writel(PWR_PMU_IDLE_INTR_ENABLE_VALUE, 93 gdevfreq->regs + PWR_PMU_IDLE_INTR_REG_OFFSET); 94 95 writel(PWR_PMU_IDLE_THRESHOLD_MAX_VALUE, 96 gdevfreq->regs + PWR_PMU_IDLE_THRESHOLD_REG_OFFSET + 97 (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_THRESHOLD_REG_SIZE)); 98 99 // Setup counter for total cycles 100 data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + 101 (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE)); 102 data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK); 103 data |= PWR_PMU_IDLE_CTRL_VALUE_ALWAYS | PWR_PMU_IDLE_CTRL_FILTER_DISABLED; 104 writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + 105 (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE)); 106 107 // Setup counter for busy cycles 108 writel(PWM_PMU_IDLE_MASK_GR_ENABLED | PWM_PMU_IDLE_MASK_CE_2_ENABLED, 109 gdevfreq->regs + PWR_PMU_IDLE_MASK_REG_OFFSET + 110 (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_MASK_REG_SIZE)); 111 112 data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + 113 (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE)); 114 data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK); 115 data |= PWR_PMU_IDLE_CTRL_VALUE_BUSY | PWR_PMU_IDLE_CTRL_FILTER_DISABLED; 116 writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET + 117 (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE)); 118 } 119 120 static u32 gk20a_pmu_read_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id) 121 { 122 u32 ret; 123 124 ret = readl(gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET + 125 (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE)); 126 127 return ret & PWR_PMU_IDLE_COUNT_MASK; 128 } 129 130 static void gk20a_pmu_reset_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id) 131 { 132 writel(PWR_PMU_IDLE_COUNT_RESET_VALUE, gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET + 133 (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE)); 134 } 135 136 static u32 gk20a_pmu_read_idle_intr_status(struct gk20a_devfreq *gdevfreq) 137 { 138 u32 ret; 139 140 ret = readl(gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET); 141 142 return ret & PWR_PMU_IDLE_INTR_STATUS_MASK; 143 } 144 145 static void gk20a_pmu_clear_idle_intr_status(struct gk20a_devfreq *gdevfreq) 146 { 147 writel(PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE, 148 gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET); 149 } 150 151 static void gk20a_devfreq_update_utilization(struct gk20a_devfreq *gdevfreq) 152 { 153 ktime_t now, last; 154 u64 busy_cycles, total_cycles; 155 u32 norm, intr_status; 156 157 now = ktime_get(); 158 last = gdevfreq->time_last_update; 159 gdevfreq->total_time = ktime_us_delta(now, last); 160 161 busy_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); 162 total_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); 163 intr_status = gk20a_pmu_read_idle_intr_status(gdevfreq); 164 165 gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); 166 gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); 167 168 if (intr_status != 0UL) { 169 norm = PMU_BUSY_CYCLES_NORM_MAX; 170 gk20a_pmu_clear_idle_intr_status(gdevfreq); 171 } else if (total_cycles == 0ULL || busy_cycles > total_cycles) { 172 norm = PMU_BUSY_CYCLES_NORM_MAX; 173 } else { 174 norm = (u32)div64_u64(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX, 175 total_cycles); 176 } 177 178 gdevfreq->busy_time = div_u64(gdevfreq->total_time * norm, PMU_BUSY_CYCLES_NORM_MAX); 179 gdevfreq->time_last_update = now; 180 } 181 182 static int gk20a_devfreq_target(struct device *dev, unsigned long *freq, 183 u32 flags) 184 { 185 struct nouveau_drm *drm = dev_get_drvdata(dev); 186 struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); 187 struct nvkm_clk *base = nvkm_clk(subdev); 188 struct nvkm_pstate *pstates = base->func->pstates; 189 int nr_pstates = base->func->nr_pstates; 190 int i, ret; 191 192 for (i = 0; i < nr_pstates - 1; i++) 193 if (pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV >= *freq) 194 break; 195 196 ret = nvkm_clk_ustate(base, pstates[i].pstate, 0); 197 ret |= nvkm_clk_ustate(base, pstates[i].pstate, 1); 198 if (ret) { 199 nvkm_error(subdev, "cannot update clock\n"); 200 return ret; 201 } 202 203 *freq = pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV; 204 205 return 0; 206 } 207 208 static int gk20a_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) 209 { 210 struct nouveau_drm *drm = dev_get_drvdata(dev); 211 struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0); 212 struct nvkm_clk *base = nvkm_clk(subdev); 213 214 *freq = nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV; 215 216 return 0; 217 } 218 219 static void gk20a_devfreq_reset(struct gk20a_devfreq *gdevfreq) 220 { 221 gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY); 222 gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL); 223 gk20a_pmu_clear_idle_intr_status(gdevfreq); 224 225 gdevfreq->busy_time = 0; 226 gdevfreq->total_time = 0; 227 gdevfreq->time_last_update = ktime_get(); 228 } 229 230 static int gk20a_devfreq_get_dev_status(struct device *dev, 231 struct devfreq_dev_status *status) 232 { 233 struct nouveau_drm *drm = dev_get_drvdata(dev); 234 struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); 235 236 gk20a_devfreq_get_cur_freq(dev, &status->current_frequency); 237 238 gk20a_devfreq_update_utilization(gdevfreq); 239 240 status->busy_time = ktime_to_ns(gdevfreq->busy_time); 241 status->total_time = ktime_to_ns(gdevfreq->total_time); 242 243 gk20a_devfreq_reset(gdevfreq); 244 245 NV_DEBUG(drm, "busy %lu total %lu %lu %% freq %lu MHz\n", 246 status->busy_time, status->total_time, 247 status->busy_time / (status->total_time / 100), 248 status->current_frequency / 1000 / 1000); 249 250 return 0; 251 } 252 253 static struct devfreq_dev_profile gk20a_devfreq_profile = { 254 .timer = DEVFREQ_TIMER_DELAYED, 255 .polling_ms = 50, 256 .target = gk20a_devfreq_target, 257 .get_cur_freq = gk20a_devfreq_get_cur_freq, 258 .get_dev_status = gk20a_devfreq_get_dev_status, 259 }; 260 261 int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **gdevfreq) 262 { 263 struct nvkm_device *device = base->subdev.device; 264 struct nouveau_drm *drm = dev_get_drvdata(device->dev); 265 struct nvkm_device_tegra *tdev = device->func->tegra(device); 266 struct nvkm_pstate *pstates = base->func->pstates; 267 int nr_pstates = base->func->nr_pstates; 268 struct gk20a_devfreq *new_gdevfreq; 269 int i; 270 271 new_gdevfreq = drmm_kzalloc(drm->dev, sizeof(struct gk20a_devfreq), GFP_KERNEL); 272 if (!new_gdevfreq) 273 return -ENOMEM; 274 275 new_gdevfreq->regs = tdev->regs; 276 277 for (i = 0; i < nr_pstates; i++) 278 dev_pm_opp_add(base->subdev.device->dev, 279 pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV, 0); 280 281 gk20a_pmu_init_perfmon_counter(new_gdevfreq); 282 gk20a_devfreq_reset(new_gdevfreq); 283 284 gk20a_devfreq_profile.initial_freq = 285 nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV; 286 287 new_gdevfreq->gov_data.upthreshold = 45; 288 new_gdevfreq->gov_data.downdifferential = 5; 289 290 new_gdevfreq->devfreq = devm_devfreq_add_device(device->dev, 291 &gk20a_devfreq_profile, 292 DEVFREQ_GOV_SIMPLE_ONDEMAND, 293 &new_gdevfreq->gov_data); 294 if (IS_ERR(new_gdevfreq->devfreq)) 295 return PTR_ERR(new_gdevfreq->devfreq); 296 297 *gdevfreq = new_gdevfreq; 298 299 return 0; 300 } 301 302 int gk20a_devfreq_resume(struct device *dev) 303 { 304 struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); 305 306 if (!gdevfreq || !gdevfreq->devfreq) 307 return 0; 308 309 return devfreq_resume_device(gdevfreq->devfreq); 310 } 311 312 int gk20a_devfreq_suspend(struct device *dev) 313 { 314 struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev); 315 316 if (!gdevfreq || !gdevfreq->devfreq) 317 return 0; 318 319 return devfreq_suspend_device(gdevfreq->devfreq); 320 } 321