xref: /linux/drivers/gpu/drm/panfrost/panfrost_perfcnt.c (revision a44e4f3ab16bc808590763a543a93b6fbf3abcc4)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright 2019 Collabora Ltd */
3 
4 #include <drm/drm_file.h>
5 #include <drm/drm_gem_shmem_helper.h>
6 #include <drm/panfrost_drm.h>
7 #include <linux/completion.h>
8 #include <linux/iopoll.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/slab.h>
11 #include <linux/uaccess.h>
12 
13 #include "panfrost_device.h"
14 #include "panfrost_features.h"
15 #include "panfrost_gem.h"
16 #include "panfrost_issues.h"
17 #include "panfrost_job.h"
18 #include "panfrost_mmu.h"
19 #include "panfrost_perfcnt.h"
20 #include "panfrost_regs.h"
21 
22 #define COUNTERS_PER_BLOCK		64
23 #define BYTES_PER_COUNTER		4
24 #define BLOCKS_PER_COREGROUP		8
25 #define V4_SHADERS_PER_COREGROUP	4
26 
27 struct panfrost_perfcnt {
28 	struct panfrost_gem_object *bo;
29 	size_t bosize;
30 	void *buf;
31 	struct panfrost_file_priv *user;
32 	struct mutex lock;
33 	struct completion dump_comp;
34 };
35 
36 void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev)
37 {
38 	complete(&pfdev->perfcnt->dump_comp);
39 }
40 
41 void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
42 {
43 	gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES);
44 }
45 
46 static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
47 {
48 	u64 gpuva;
49 	int ret;
50 
51 	reinit_completion(&pfdev->perfcnt->dump_comp);
52 	gpuva = pfdev->perfcnt->bo->node.start << PAGE_SHIFT;
53 	gpu_write(pfdev, GPU_PERFCNT_BASE_LO, gpuva);
54 	gpu_write(pfdev, GPU_PERFCNT_BASE_HI, gpuva >> 32);
55 	gpu_write(pfdev, GPU_INT_CLEAR,
56 		  GPU_IRQ_CLEAN_CACHES_COMPLETED |
57 		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
58 	gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE);
59 	ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp,
60 							msecs_to_jiffies(1000));
61 	if (!ret)
62 		ret = -ETIMEDOUT;
63 	else if (ret > 0)
64 		ret = 0;
65 
66 	return ret;
67 }
68 
69 static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
70 					  struct panfrost_file_priv *user,
71 					  unsigned int counterset)
72 {
73 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
74 	struct drm_gem_shmem_object *bo;
75 	u32 cfg;
76 	int ret;
77 
78 	if (user == perfcnt->user)
79 		return 0;
80 	else if (perfcnt->user)
81 		return -EBUSY;
82 
83 	ret = pm_runtime_get_sync(pfdev->dev);
84 	if (ret < 0)
85 		return ret;
86 
87 	bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize);
88 	if (IS_ERR(bo))
89 		return PTR_ERR(bo);
90 
91 	perfcnt->bo = to_panfrost_bo(&bo->base);
92 
93 	/* Map the perfcnt buf in the address space attached to file_priv. */
94 	ret = panfrost_mmu_map(perfcnt->bo);
95 	if (ret)
96 		goto err_put_bo;
97 
98 	perfcnt->buf = drm_gem_shmem_vmap(&bo->base);
99 	if (IS_ERR(perfcnt->buf)) {
100 		ret = PTR_ERR(perfcnt->buf);
101 		goto err_put_bo;
102 	}
103 
104 	/*
105 	 * Invalidate the cache and clear the counters to start from a fresh
106 	 * state.
107 	 */
108 	reinit_completion(&pfdev->perfcnt->dump_comp);
109 	gpu_write(pfdev, GPU_INT_CLEAR,
110 		  GPU_IRQ_CLEAN_CACHES_COMPLETED |
111 		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
112 	gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR);
113 	gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES);
114 	ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp,
115 					  msecs_to_jiffies(1000));
116 	if (!ret) {
117 		ret = -ETIMEDOUT;
118 		goto err_vunmap;
119 	}
120 
121 	perfcnt->user = user;
122 
123 	/*
124 	 * Always use address space 0 for now.
125 	 * FIXME: this needs to be updated when we start using different
126 	 * address space.
127 	 */
128 	cfg = GPU_PERFCNT_CFG_AS(0) |
129 	      GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
130 
131 	/*
132 	 * Bifrost GPUs have 2 set of counters, but we're only interested by
133 	 * the first one for now.
134 	 */
135 	if (panfrost_model_is_bifrost(pfdev))
136 		cfg |= GPU_PERFCNT_CFG_SETSEL(counterset);
137 
138 	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff);
139 	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff);
140 	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff);
141 
142 	/*
143 	 * Due to PRLAM-8186 we need to disable the Tiler before we enable HW
144 	 * counters.
145 	 */
146 	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
147 		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
148 	else
149 		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
150 
151 	gpu_write(pfdev, GPU_PERFCNT_CFG, cfg);
152 
153 	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
154 		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
155 
156 	return 0;
157 
158 err_vunmap:
159 	drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf);
160 err_put_bo:
161 	drm_gem_object_put_unlocked(&bo->base);
162 	return ret;
163 }
164 
165 static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
166 					   struct panfrost_file_priv *user)
167 {
168 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
169 
170 	if (user != perfcnt->user)
171 		return -EINVAL;
172 
173 	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0);
174 	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0);
175 	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0);
176 	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
177 	gpu_write(pfdev, GPU_PERFCNT_CFG,
178 		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
179 
180 	perfcnt->user = NULL;
181 	drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf);
182 	perfcnt->buf = NULL;
183 	drm_gem_object_put_unlocked(&perfcnt->bo->base.base);
184 	perfcnt->bo = NULL;
185 	pm_runtime_mark_last_busy(pfdev->dev);
186 	pm_runtime_put_autosuspend(pfdev->dev);
187 
188 	return 0;
189 }
190 
191 int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
192 				  struct drm_file *file_priv)
193 {
194 	struct panfrost_file_priv *pfile = file_priv->driver_priv;
195 	struct panfrost_device *pfdev = dev->dev_private;
196 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
197 	struct drm_panfrost_perfcnt_enable *req = data;
198 	int ret;
199 
200 	ret = panfrost_unstable_ioctl_check();
201 	if (ret)
202 		return ret;
203 
204 	/* Only Bifrost GPUs have 2 set of counters. */
205 	if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0))
206 		return -EINVAL;
207 
208 	mutex_lock(&perfcnt->lock);
209 	if (req->enable)
210 		ret = panfrost_perfcnt_enable_locked(pfdev, pfile,
211 						     req->counterset);
212 	else
213 		ret = panfrost_perfcnt_disable_locked(pfdev, pfile);
214 	mutex_unlock(&perfcnt->lock);
215 
216 	return ret;
217 }
218 
219 int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
220 				struct drm_file *file_priv)
221 {
222 	struct panfrost_device *pfdev = dev->dev_private;
223 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
224 	struct drm_panfrost_perfcnt_dump *req = data;
225 	void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr;
226 	int ret;
227 
228 	ret = panfrost_unstable_ioctl_check();
229 	if (ret)
230 		return ret;
231 
232 	mutex_lock(&perfcnt->lock);
233 	if (perfcnt->user != file_priv->driver_priv) {
234 		ret = -EINVAL;
235 		goto out;
236 	}
237 
238 	ret = panfrost_perfcnt_dump_locked(pfdev);
239 	if (ret)
240 		goto out;
241 
242 	if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize))
243 		ret = -EFAULT;
244 
245 out:
246 	mutex_unlock(&perfcnt->lock);
247 
248 	return ret;
249 }
250 
251 void panfrost_perfcnt_close(struct panfrost_file_priv *pfile)
252 {
253 	struct panfrost_device *pfdev = pfile->pfdev;
254 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
255 
256 	pm_runtime_get_sync(pfdev->dev);
257 	mutex_lock(&perfcnt->lock);
258 	if (perfcnt->user == pfile)
259 		panfrost_perfcnt_disable_locked(pfdev, pfile);
260 	mutex_unlock(&perfcnt->lock);
261 	pm_runtime_mark_last_busy(pfdev->dev);
262 	pm_runtime_put_autosuspend(pfdev->dev);
263 }
264 
265 int panfrost_perfcnt_init(struct panfrost_device *pfdev)
266 {
267 	struct panfrost_perfcnt *perfcnt;
268 	size_t size;
269 
270 	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) {
271 		unsigned int ncoregroups;
272 
273 		ncoregroups = hweight64(pfdev->features.l2_present);
274 		size = ncoregroups * BLOCKS_PER_COREGROUP *
275 		       COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
276 	} else {
277 		unsigned int nl2c, ncores;
278 
279 		/*
280 		 * TODO: define a macro to extract the number of l2 caches from
281 		 * mem_features.
282 		 */
283 		nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1;
284 
285 		/*
286 		 * shader_present might be sparse, but the counters layout
287 		 * forces to dump unused regions too, hence the fls64() call
288 		 * instead of hweight64().
289 		 */
290 		ncores = fls64(pfdev->features.shader_present);
291 
292 		/*
293 		 * There's always one JM and one Tiler block, hence the '+ 2'
294 		 * here.
295 		 */
296 		size = (nl2c + ncores + 2) *
297 		       COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
298 	}
299 
300 	perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL);
301 	if (!perfcnt)
302 		return -ENOMEM;
303 
304 	perfcnt->bosize = size;
305 
306 	/* Start with everything disabled. */
307 	gpu_write(pfdev, GPU_PERFCNT_CFG,
308 		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
309 	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
310 	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
311 	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
312 	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
313 
314 	init_completion(&perfcnt->dump_comp);
315 	mutex_init(&perfcnt->lock);
316 	pfdev->perfcnt = perfcnt;
317 
318 	return 0;
319 }
320 
321 void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
322 {
323 	/* Disable everything before leaving. */
324 	gpu_write(pfdev, GPU_PERFCNT_CFG,
325 		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
326 	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
327 	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
328 	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
329 	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
330 }
331