xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c (revision 9fc3a18a942f74d245429211577a733930d365fa)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/kthread.h>
27 #include <linux/pci.h>
28 #include <linux/uaccess.h>
29 
30 #include <drm/drm_debugfs.h>
31 
32 #include "amdgpu.h"
33 
34 /**
35  * amdgpu_debugfs_add_files - Add simple debugfs entries
36  *
37  * @adev:  Device to attach debugfs entries to
38  * @files:  Array of function callbacks that respond to reads
39  * @nfiles: Number of callbacks to register
40  *
41  */
42 int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
43 			     const struct drm_info_list *files,
44 			     unsigned nfiles)
45 {
46 	unsigned i;
47 
48 	for (i = 0; i < adev->debugfs_count; i++) {
49 		if (adev->debugfs[i].files == files) {
50 			/* Already registered */
51 			return 0;
52 		}
53 	}
54 
55 	i = adev->debugfs_count + 1;
56 	if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) {
57 		DRM_ERROR("Reached maximum number of debugfs components.\n");
58 		DRM_ERROR("Report so we increase "
59 			  "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n");
60 		return -EINVAL;
61 	}
62 	adev->debugfs[adev->debugfs_count].files = files;
63 	adev->debugfs[adev->debugfs_count].num_files = nfiles;
64 	adev->debugfs_count = i;
65 #if defined(CONFIG_DEBUG_FS)
66 	drm_debugfs_create_files(files, nfiles,
67 				 adev->ddev->primary->debugfs_root,
68 				 adev->ddev->primary);
69 #endif
70 	return 0;
71 }
72 
73 #if defined(CONFIG_DEBUG_FS)
74 
75 /**
76  * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
77  *
78  * @read: True if reading
79  * @f: open file handle
80  * @buf: User buffer to write/read to
81  * @size: Number of bytes to write/read
82  * @pos:  Offset to seek to
83  *
84  * This debugfs entry has special meaning on the offset being sought.
85  * Various bits have different meanings:
86  *
87  * Bit 62:  Indicates a GRBM bank switch is needed
88  * Bit 61:  Indicates a SRBM bank switch is needed (implies bit 62 is
89  * 			zero)
90  * Bits 24..33: The SE or ME selector if needed
91  * Bits 34..43: The SH (or SA) or PIPE selector if needed
92  * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
93  *
94  * Bit 23:  Indicates that the PM power gating lock should be held
95  * 			This is necessary to read registers that might be
96  * 			unreliable during a power gating transistion.
97  *
98  * The lower bits are the BYTE offset of the register to read.  This
99  * allows reading multiple registers in a single call and having
100  * the returned size reflect that.
101  */
102 static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
103 		char __user *buf, size_t size, loff_t *pos)
104 {
105 	struct amdgpu_device *adev = file_inode(f)->i_private;
106 	ssize_t result = 0;
107 	int r;
108 	bool pm_pg_lock, use_bank, use_ring;
109 	unsigned instance_bank, sh_bank, se_bank, me, pipe, queue;
110 
111 	pm_pg_lock = use_bank = use_ring = false;
112 	instance_bank = sh_bank = se_bank = me = pipe = queue = 0;
113 
114 	if (size & 0x3 || *pos & 0x3 ||
115 			((*pos & (1ULL << 62)) && (*pos & (1ULL << 61))))
116 		return -EINVAL;
117 
118 	/* are we reading registers for which a PG lock is necessary? */
119 	pm_pg_lock = (*pos >> 23) & 1;
120 
121 	if (*pos & (1ULL << 62)) {
122 		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
123 		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
124 		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
125 
126 		if (se_bank == 0x3FF)
127 			se_bank = 0xFFFFFFFF;
128 		if (sh_bank == 0x3FF)
129 			sh_bank = 0xFFFFFFFF;
130 		if (instance_bank == 0x3FF)
131 			instance_bank = 0xFFFFFFFF;
132 		use_bank = 1;
133 	} else if (*pos & (1ULL << 61)) {
134 
135 		me = (*pos & GENMASK_ULL(33, 24)) >> 24;
136 		pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
137 		queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
138 
139 		use_ring = 1;
140 	} else {
141 		use_bank = use_ring = 0;
142 	}
143 
144 	*pos &= (1UL << 22) - 1;
145 
146 	if (use_bank) {
147 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
148 		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
149 			return -EINVAL;
150 		mutex_lock(&adev->grbm_idx_mutex);
151 		amdgpu_gfx_select_se_sh(adev, se_bank,
152 					sh_bank, instance_bank);
153 	} else if (use_ring) {
154 		mutex_lock(&adev->srbm_mutex);
155 		amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue);
156 	}
157 
158 	if (pm_pg_lock)
159 		mutex_lock(&adev->pm.mutex);
160 
161 	while (size) {
162 		uint32_t value;
163 
164 		if (read) {
165 			value = RREG32(*pos >> 2);
166 			r = put_user(value, (uint32_t *)buf);
167 		} else {
168 			r = get_user(value, (uint32_t *)buf);
169 			if (!r)
170 				WREG32(*pos >> 2, value);
171 		}
172 		if (r) {
173 			result = r;
174 			goto end;
175 		}
176 
177 		result += 4;
178 		buf += 4;
179 		*pos += 4;
180 		size -= 4;
181 	}
182 
183 end:
184 	if (use_bank) {
185 		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
186 		mutex_unlock(&adev->grbm_idx_mutex);
187 	} else if (use_ring) {
188 		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0);
189 		mutex_unlock(&adev->srbm_mutex);
190 	}
191 
192 	if (pm_pg_lock)
193 		mutex_unlock(&adev->pm.mutex);
194 
195 	return result;
196 }
197 
198 /**
199  * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
200  */
201 static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
202 					size_t size, loff_t *pos)
203 {
204 	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
205 }
206 
207 /**
208  * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
209  */
210 static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
211 					 size_t size, loff_t *pos)
212 {
213 	return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
214 }
215 
216 
217 /**
218  * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
219  *
220  * @f: open file handle
221  * @buf: User buffer to store read data in
222  * @size: Number of bytes to read
223  * @pos:  Offset to seek to
224  *
225  * The lower bits are the BYTE offset of the register to read.  This
226  * allows reading multiple registers in a single call and having
227  * the returned size reflect that.
228  */
229 static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
230 					size_t size, loff_t *pos)
231 {
232 	struct amdgpu_device *adev = file_inode(f)->i_private;
233 	ssize_t result = 0;
234 	int r;
235 
236 	if (size & 0x3 || *pos & 0x3)
237 		return -EINVAL;
238 
239 	while (size) {
240 		uint32_t value;
241 
242 		value = RREG32_PCIE(*pos >> 2);
243 		r = put_user(value, (uint32_t *)buf);
244 		if (r)
245 			return r;
246 
247 		result += 4;
248 		buf += 4;
249 		*pos += 4;
250 		size -= 4;
251 	}
252 
253 	return result;
254 }
255 
256 /**
257  * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
258  *
259  * @f: open file handle
260  * @buf: User buffer to write data from
261  * @size: Number of bytes to write
262  * @pos:  Offset to seek to
263  *
264  * The lower bits are the BYTE offset of the register to write.  This
265  * allows writing multiple registers in a single call and having
266  * the returned size reflect that.
267  */
268 static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
269 					 size_t size, loff_t *pos)
270 {
271 	struct amdgpu_device *adev = file_inode(f)->i_private;
272 	ssize_t result = 0;
273 	int r;
274 
275 	if (size & 0x3 || *pos & 0x3)
276 		return -EINVAL;
277 
278 	while (size) {
279 		uint32_t value;
280 
281 		r = get_user(value, (uint32_t *)buf);
282 		if (r)
283 			return r;
284 
285 		WREG32_PCIE(*pos >> 2, value);
286 
287 		result += 4;
288 		buf += 4;
289 		*pos += 4;
290 		size -= 4;
291 	}
292 
293 	return result;
294 }
295 
296 /**
297  * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
298  *
299  * @f: open file handle
300  * @buf: User buffer to store read data in
301  * @size: Number of bytes to read
302  * @pos:  Offset to seek to
303  *
304  * The lower bits are the BYTE offset of the register to read.  This
305  * allows reading multiple registers in a single call and having
306  * the returned size reflect that.
307  */
308 static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
309 					size_t size, loff_t *pos)
310 {
311 	struct amdgpu_device *adev = file_inode(f)->i_private;
312 	ssize_t result = 0;
313 	int r;
314 
315 	if (size & 0x3 || *pos & 0x3)
316 		return -EINVAL;
317 
318 	while (size) {
319 		uint32_t value;
320 
321 		value = RREG32_DIDT(*pos >> 2);
322 		r = put_user(value, (uint32_t *)buf);
323 		if (r)
324 			return r;
325 
326 		result += 4;
327 		buf += 4;
328 		*pos += 4;
329 		size -= 4;
330 	}
331 
332 	return result;
333 }
334 
335 /**
336  * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
337  *
338  * @f: open file handle
339  * @buf: User buffer to write data from
340  * @size: Number of bytes to write
341  * @pos:  Offset to seek to
342  *
343  * The lower bits are the BYTE offset of the register to write.  This
344  * allows writing multiple registers in a single call and having
345  * the returned size reflect that.
346  */
347 static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
348 					 size_t size, loff_t *pos)
349 {
350 	struct amdgpu_device *adev = file_inode(f)->i_private;
351 	ssize_t result = 0;
352 	int r;
353 
354 	if (size & 0x3 || *pos & 0x3)
355 		return -EINVAL;
356 
357 	while (size) {
358 		uint32_t value;
359 
360 		r = get_user(value, (uint32_t *)buf);
361 		if (r)
362 			return r;
363 
364 		WREG32_DIDT(*pos >> 2, value);
365 
366 		result += 4;
367 		buf += 4;
368 		*pos += 4;
369 		size -= 4;
370 	}
371 
372 	return result;
373 }
374 
375 /**
376  * amdgpu_debugfs_regs_smc_read - Read from a SMC register
377  *
378  * @f: open file handle
379  * @buf: User buffer to store read data in
380  * @size: Number of bytes to read
381  * @pos:  Offset to seek to
382  *
383  * The lower bits are the BYTE offset of the register to read.  This
384  * allows reading multiple registers in a single call and having
385  * the returned size reflect that.
386  */
387 static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
388 					size_t size, loff_t *pos)
389 {
390 	struct amdgpu_device *adev = file_inode(f)->i_private;
391 	ssize_t result = 0;
392 	int r;
393 
394 	if (size & 0x3 || *pos & 0x3)
395 		return -EINVAL;
396 
397 	while (size) {
398 		uint32_t value;
399 
400 		value = RREG32_SMC(*pos);
401 		r = put_user(value, (uint32_t *)buf);
402 		if (r)
403 			return r;
404 
405 		result += 4;
406 		buf += 4;
407 		*pos += 4;
408 		size -= 4;
409 	}
410 
411 	return result;
412 }
413 
414 /**
415  * amdgpu_debugfs_regs_smc_write - Write to a SMC register
416  *
417  * @f: open file handle
418  * @buf: User buffer to write data from
419  * @size: Number of bytes to write
420  * @pos:  Offset to seek to
421  *
422  * The lower bits are the BYTE offset of the register to write.  This
423  * allows writing multiple registers in a single call and having
424  * the returned size reflect that.
425  */
426 static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
427 					 size_t size, loff_t *pos)
428 {
429 	struct amdgpu_device *adev = file_inode(f)->i_private;
430 	ssize_t result = 0;
431 	int r;
432 
433 	if (size & 0x3 || *pos & 0x3)
434 		return -EINVAL;
435 
436 	while (size) {
437 		uint32_t value;
438 
439 		r = get_user(value, (uint32_t *)buf);
440 		if (r)
441 			return r;
442 
443 		WREG32_SMC(*pos, value);
444 
445 		result += 4;
446 		buf += 4;
447 		*pos += 4;
448 		size -= 4;
449 	}
450 
451 	return result;
452 }
453 
454 /**
455  * amdgpu_debugfs_gca_config_read - Read from gfx config data
456  *
457  * @f: open file handle
458  * @buf: User buffer to store read data in
459  * @size: Number of bytes to read
460  * @pos:  Offset to seek to
461  *
462  * This file is used to access configuration data in a somewhat
463  * stable fashion.  The format is a series of DWORDs with the first
464  * indicating which revision it is.  New content is appended to the
465  * end so that older software can still read the data.
466  */
467 
468 static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
469 					size_t size, loff_t *pos)
470 {
471 	struct amdgpu_device *adev = file_inode(f)->i_private;
472 	ssize_t result = 0;
473 	int r;
474 	uint32_t *config, no_regs = 0;
475 
476 	if (size & 0x3 || *pos & 0x3)
477 		return -EINVAL;
478 
479 	config = kmalloc_array(256, sizeof(*config), GFP_KERNEL);
480 	if (!config)
481 		return -ENOMEM;
482 
483 	/* version, increment each time something is added */
484 	config[no_regs++] = 3;
485 	config[no_regs++] = adev->gfx.config.max_shader_engines;
486 	config[no_regs++] = adev->gfx.config.max_tile_pipes;
487 	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
488 	config[no_regs++] = adev->gfx.config.max_sh_per_se;
489 	config[no_regs++] = adev->gfx.config.max_backends_per_se;
490 	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
491 	config[no_regs++] = adev->gfx.config.max_gprs;
492 	config[no_regs++] = adev->gfx.config.max_gs_threads;
493 	config[no_regs++] = adev->gfx.config.max_hw_contexts;
494 	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
495 	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
496 	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
497 	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
498 	config[no_regs++] = adev->gfx.config.num_tile_pipes;
499 	config[no_regs++] = adev->gfx.config.backend_enable_mask;
500 	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
501 	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
502 	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
503 	config[no_regs++] = adev->gfx.config.num_gpus;
504 	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
505 	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
506 	config[no_regs++] = adev->gfx.config.gb_addr_config;
507 	config[no_regs++] = adev->gfx.config.num_rbs;
508 
509 	/* rev==1 */
510 	config[no_regs++] = adev->rev_id;
511 	config[no_regs++] = adev->pg_flags;
512 	config[no_regs++] = adev->cg_flags;
513 
514 	/* rev==2 */
515 	config[no_regs++] = adev->family;
516 	config[no_regs++] = adev->external_rev_id;
517 
518 	/* rev==3 */
519 	config[no_regs++] = adev->pdev->device;
520 	config[no_regs++] = adev->pdev->revision;
521 	config[no_regs++] = adev->pdev->subsystem_device;
522 	config[no_regs++] = adev->pdev->subsystem_vendor;
523 
524 	while (size && (*pos < no_regs * 4)) {
525 		uint32_t value;
526 
527 		value = config[*pos >> 2];
528 		r = put_user(value, (uint32_t *)buf);
529 		if (r) {
530 			kfree(config);
531 			return r;
532 		}
533 
534 		result += 4;
535 		buf += 4;
536 		*pos += 4;
537 		size -= 4;
538 	}
539 
540 	kfree(config);
541 	return result;
542 }
543 
544 /**
545  * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
546  *
547  * @f: open file handle
548  * @buf: User buffer to store read data in
549  * @size: Number of bytes to read
550  * @pos:  Offset to seek to
551  *
552  * The offset is treated as the BYTE address of one of the sensors
553  * enumerated in amd/include/kgd_pp_interface.h under the
554  * 'amd_pp_sensors' enumeration.  For instance to read the UVD VCLK
555  * you would use the offset 3 * 4 = 12.
556  */
557 static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
558 					size_t size, loff_t *pos)
559 {
560 	struct amdgpu_device *adev = file_inode(f)->i_private;
561 	int idx, x, outsize, r, valuesize;
562 	uint32_t values[16];
563 
564 	if (size & 3 || *pos & 0x3)
565 		return -EINVAL;
566 
567 	if (!adev->pm.dpm_enabled)
568 		return -EINVAL;
569 
570 	/* convert offset to sensor number */
571 	idx = *pos >> 2;
572 
573 	valuesize = sizeof(values);
574 	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
575 	if (r)
576 		return r;
577 
578 	if (size > valuesize)
579 		return -EINVAL;
580 
581 	outsize = 0;
582 	x = 0;
583 	if (!r) {
584 		while (size) {
585 			r = put_user(values[x++], (int32_t *)buf);
586 			buf += 4;
587 			size -= 4;
588 			outsize += 4;
589 		}
590 	}
591 
592 	return !r ? outsize : r;
593 }
594 
595 /** amdgpu_debugfs_wave_read - Read WAVE STATUS data
596  *
597  * @f: open file handle
598  * @buf: User buffer to store read data in
599  * @size: Number of bytes to read
600  * @pos:  Offset to seek to
601  *
602  * The offset being sought changes which wave that the status data
603  * will be returned for.  The bits are used as follows:
604  *
605  * Bits 0..6: 	Byte offset into data
606  * Bits 7..14:	SE selector
607  * Bits 15..22:	SH/SA selector
608  * Bits 23..30: CU/{WGP+SIMD} selector
609  * Bits 31..36: WAVE ID selector
610  * Bits 37..44: SIMD ID selector
611  *
612  * The returned data begins with one DWORD of version information
613  * Followed by WAVE STATUS registers relevant to the GFX IP version
614  * being used.  See gfx_v8_0_read_wave_data() for an example output.
615  */
616 static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
617 					size_t size, loff_t *pos)
618 {
619 	struct amdgpu_device *adev = f->f_inode->i_private;
620 	int r, x;
621 	ssize_t result=0;
622 	uint32_t offset, se, sh, cu, wave, simd, data[32];
623 
624 	if (size & 3 || *pos & 3)
625 		return -EINVAL;
626 
627 	/* decode offset */
628 	offset = (*pos & GENMASK_ULL(6, 0));
629 	se = (*pos & GENMASK_ULL(14, 7)) >> 7;
630 	sh = (*pos & GENMASK_ULL(22, 15)) >> 15;
631 	cu = (*pos & GENMASK_ULL(30, 23)) >> 23;
632 	wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
633 	simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
634 
635 	/* switch to the specific se/sh/cu */
636 	mutex_lock(&adev->grbm_idx_mutex);
637 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
638 
639 	x = 0;
640 	if (adev->gfx.funcs->read_wave_data)
641 		adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
642 
643 	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
644 	mutex_unlock(&adev->grbm_idx_mutex);
645 
646 	if (!x)
647 		return -EINVAL;
648 
649 	while (size && (offset < x * 4)) {
650 		uint32_t value;
651 
652 		value = data[offset >> 2];
653 		r = put_user(value, (uint32_t *)buf);
654 		if (r)
655 			return r;
656 
657 		result += 4;
658 		buf += 4;
659 		offset += 4;
660 		size -= 4;
661 	}
662 
663 	return result;
664 }
665 
666 /** amdgpu_debugfs_gpr_read - Read wave gprs
667  *
668  * @f: open file handle
669  * @buf: User buffer to store read data in
670  * @size: Number of bytes to read
671  * @pos:  Offset to seek to
672  *
673  * The offset being sought changes which wave that the status data
674  * will be returned for.  The bits are used as follows:
675  *
676  * Bits 0..11:	Byte offset into data
677  * Bits 12..19:	SE selector
678  * Bits 20..27:	SH/SA selector
679  * Bits 28..35: CU/{WGP+SIMD} selector
680  * Bits 36..43: WAVE ID selector
681  * Bits 37..44: SIMD ID selector
682  * Bits 52..59: Thread selector
683  * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
684  *
685  * The return data comes from the SGPR or VGPR register bank for
686  * the selected operational unit.
687  */
688 static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
689 					size_t size, loff_t *pos)
690 {
691 	struct amdgpu_device *adev = f->f_inode->i_private;
692 	int r;
693 	ssize_t result = 0;
694 	uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
695 
696 	if (size & 3 || *pos & 3)
697 		return -EINVAL;
698 
699 	/* decode offset */
700 	offset = *pos & GENMASK_ULL(11, 0);
701 	se = (*pos & GENMASK_ULL(19, 12)) >> 12;
702 	sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
703 	cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
704 	wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
705 	simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
706 	thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
707 	bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
708 
709 	data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL);
710 	if (!data)
711 		return -ENOMEM;
712 
713 	/* switch to the specific se/sh/cu */
714 	mutex_lock(&adev->grbm_idx_mutex);
715 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
716 
717 	if (bank == 0) {
718 		if (adev->gfx.funcs->read_wave_vgprs)
719 			adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
720 	} else {
721 		if (adev->gfx.funcs->read_wave_sgprs)
722 			adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
723 	}
724 
725 	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
726 	mutex_unlock(&adev->grbm_idx_mutex);
727 
728 	while (size) {
729 		uint32_t value;
730 
731 		value = data[offset++];
732 		r = put_user(value, (uint32_t *)buf);
733 		if (r) {
734 			result = r;
735 			goto err;
736 		}
737 
738 		result += 4;
739 		buf += 4;
740 		size -= 4;
741 	}
742 
743 err:
744 	kfree(data);
745 	return result;
746 }
747 
748 static const struct file_operations amdgpu_debugfs_regs_fops = {
749 	.owner = THIS_MODULE,
750 	.read = amdgpu_debugfs_regs_read,
751 	.write = amdgpu_debugfs_regs_write,
752 	.llseek = default_llseek
753 };
754 static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
755 	.owner = THIS_MODULE,
756 	.read = amdgpu_debugfs_regs_didt_read,
757 	.write = amdgpu_debugfs_regs_didt_write,
758 	.llseek = default_llseek
759 };
760 static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
761 	.owner = THIS_MODULE,
762 	.read = amdgpu_debugfs_regs_pcie_read,
763 	.write = amdgpu_debugfs_regs_pcie_write,
764 	.llseek = default_llseek
765 };
766 static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
767 	.owner = THIS_MODULE,
768 	.read = amdgpu_debugfs_regs_smc_read,
769 	.write = amdgpu_debugfs_regs_smc_write,
770 	.llseek = default_llseek
771 };
772 
773 static const struct file_operations amdgpu_debugfs_gca_config_fops = {
774 	.owner = THIS_MODULE,
775 	.read = amdgpu_debugfs_gca_config_read,
776 	.llseek = default_llseek
777 };
778 
779 static const struct file_operations amdgpu_debugfs_sensors_fops = {
780 	.owner = THIS_MODULE,
781 	.read = amdgpu_debugfs_sensor_read,
782 	.llseek = default_llseek
783 };
784 
785 static const struct file_operations amdgpu_debugfs_wave_fops = {
786 	.owner = THIS_MODULE,
787 	.read = amdgpu_debugfs_wave_read,
788 	.llseek = default_llseek
789 };
790 static const struct file_operations amdgpu_debugfs_gpr_fops = {
791 	.owner = THIS_MODULE,
792 	.read = amdgpu_debugfs_gpr_read,
793 	.llseek = default_llseek
794 };
795 
796 static const struct file_operations *debugfs_regs[] = {
797 	&amdgpu_debugfs_regs_fops,
798 	&amdgpu_debugfs_regs_didt_fops,
799 	&amdgpu_debugfs_regs_pcie_fops,
800 	&amdgpu_debugfs_regs_smc_fops,
801 	&amdgpu_debugfs_gca_config_fops,
802 	&amdgpu_debugfs_sensors_fops,
803 	&amdgpu_debugfs_wave_fops,
804 	&amdgpu_debugfs_gpr_fops,
805 };
806 
807 static const char *debugfs_regs_names[] = {
808 	"amdgpu_regs",
809 	"amdgpu_regs_didt",
810 	"amdgpu_regs_pcie",
811 	"amdgpu_regs_smc",
812 	"amdgpu_gca_config",
813 	"amdgpu_sensors",
814 	"amdgpu_wave",
815 	"amdgpu_gpr",
816 };
817 
818 /**
819  * amdgpu_debugfs_regs_init -	Initialize debugfs entries that provide
820  * 								register access.
821  *
822  * @adev: The device to attach the debugfs entries to
823  */
824 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
825 {
826 	struct drm_minor *minor = adev->ddev->primary;
827 	struct dentry *ent, *root = minor->debugfs_root;
828 	unsigned int i;
829 
830 	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
831 		ent = debugfs_create_file(debugfs_regs_names[i],
832 					  S_IFREG | S_IRUGO, root,
833 					  adev, debugfs_regs[i]);
834 		if (!i && !IS_ERR_OR_NULL(ent))
835 			i_size_write(ent->d_inode, adev->rmmio_size);
836 		adev->debugfs_regs[i] = ent;
837 	}
838 
839 	return 0;
840 }
841 
842 void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
843 {
844 	unsigned i;
845 
846 	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
847 		if (adev->debugfs_regs[i]) {
848 			debugfs_remove(adev->debugfs_regs[i]);
849 			adev->debugfs_regs[i] = NULL;
850 		}
851 	}
852 }
853 
854 static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
855 {
856 	struct drm_info_node *node = (struct drm_info_node *) m->private;
857 	struct drm_device *dev = node->minor->dev;
858 	struct amdgpu_device *adev = dev->dev_private;
859 	int r = 0, i;
860 
861 	/* hold on the scheduler */
862 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
863 		struct amdgpu_ring *ring = adev->rings[i];
864 
865 		if (!ring || !ring->sched.thread)
866 			continue;
867 		kthread_park(ring->sched.thread);
868 	}
869 
870 	seq_printf(m, "run ib test:\n");
871 	r = amdgpu_ib_ring_tests(adev);
872 	if (r)
873 		seq_printf(m, "ib ring tests failed (%d).\n", r);
874 	else
875 		seq_printf(m, "ib ring tests passed.\n");
876 
877 	/* go on the scheduler */
878 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
879 		struct amdgpu_ring *ring = adev->rings[i];
880 
881 		if (!ring || !ring->sched.thread)
882 			continue;
883 		kthread_unpark(ring->sched.thread);
884 	}
885 
886 	return 0;
887 }
888 
889 static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data)
890 {
891 	struct drm_info_node *node = (struct drm_info_node *) m->private;
892 	struct drm_device *dev = node->minor->dev;
893 	struct amdgpu_device *adev = dev->dev_private;
894 
895 	seq_write(m, adev->bios, adev->bios_size);
896 	return 0;
897 }
898 
899 static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
900 {
901 	struct drm_info_node *node = (struct drm_info_node *)m->private;
902 	struct drm_device *dev = node->minor->dev;
903 	struct amdgpu_device *adev = dev->dev_private;
904 
905 	seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
906 	return 0;
907 }
908 
909 static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
910 {
911 	struct drm_info_node *node = (struct drm_info_node *)m->private;
912 	struct drm_device *dev = node->minor->dev;
913 	struct amdgpu_device *adev = dev->dev_private;
914 
915 	seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
916 	return 0;
917 }
918 
919 static const struct drm_info_list amdgpu_debugfs_list[] = {
920 	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
921 	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
922 	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
923 	{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
924 };
925 
926 static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
927 					  struct dma_fence **fences)
928 {
929 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
930 	uint32_t sync_seq, last_seq;
931 
932 	last_seq = atomic_read(&ring->fence_drv.last_seq);
933 	sync_seq = ring->fence_drv.sync_seq;
934 
935 	last_seq &= drv->num_fences_mask;
936 	sync_seq &= drv->num_fences_mask;
937 
938 	do {
939 		struct dma_fence *fence, **ptr;
940 
941 		++last_seq;
942 		last_seq &= drv->num_fences_mask;
943 		ptr = &drv->fences[last_seq];
944 
945 		fence = rcu_dereference_protected(*ptr, 1);
946 		RCU_INIT_POINTER(*ptr, NULL);
947 
948 		if (!fence)
949 			continue;
950 
951 		fences[last_seq] = fence;
952 
953 	} while (last_seq != sync_seq);
954 }
955 
956 static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
957 					    int length)
958 {
959 	int i;
960 	struct dma_fence *fence;
961 
962 	for (i = 0; i < length; i++) {
963 		fence = fences[i];
964 		if (!fence)
965 			continue;
966 		dma_fence_signal(fence);
967 		dma_fence_put(fence);
968 	}
969 }
970 
971 static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
972 {
973 	struct drm_sched_job *s_job;
974 	struct dma_fence *fence;
975 
976 	spin_lock(&sched->job_list_lock);
977 	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
978 		fence = sched->ops->run_job(s_job);
979 		dma_fence_put(fence);
980 	}
981 	spin_unlock(&sched->job_list_lock);
982 }
983 
984 static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
985 {
986 	struct amdgpu_job *job;
987 	struct drm_sched_job *s_job;
988 	uint32_t preempt_seq;
989 	struct dma_fence *fence, **ptr;
990 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
991 	struct drm_gpu_scheduler *sched = &ring->sched;
992 
993 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
994 		return;
995 
996 	preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
997 	if (preempt_seq <= atomic_read(&drv->last_seq))
998 		return;
999 
1000 	preempt_seq &= drv->num_fences_mask;
1001 	ptr = &drv->fences[preempt_seq];
1002 	fence = rcu_dereference_protected(*ptr, 1);
1003 
1004 	spin_lock(&sched->job_list_lock);
1005 	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
1006 		job = to_amdgpu_job(s_job);
1007 		if (job->fence == fence)
1008 			/* mark the job as preempted */
1009 			job->preemption_status |= AMDGPU_IB_PREEMPTED;
1010 	}
1011 	spin_unlock(&sched->job_list_lock);
1012 }
1013 
1014 static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
1015 {
1016 	int r, resched, length;
1017 	struct amdgpu_ring *ring;
1018 	struct dma_fence **fences = NULL;
1019 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
1020 
1021 	if (val >= AMDGPU_MAX_RINGS)
1022 		return -EINVAL;
1023 
1024 	ring = adev->rings[val];
1025 
1026 	if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
1027 		return -EINVAL;
1028 
1029 	/* the last preemption failed */
1030 	if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr))
1031 		return -EBUSY;
1032 
1033 	length = ring->fence_drv.num_fences_mask + 1;
1034 	fences = kcalloc(length, sizeof(void *), GFP_KERNEL);
1035 	if (!fences)
1036 		return -ENOMEM;
1037 
1038 	/* stop the scheduler */
1039 	kthread_park(ring->sched.thread);
1040 
1041 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
1042 
1043 	/* preempt the IB */
1044 	r = amdgpu_ring_preempt_ib(ring);
1045 	if (r) {
1046 		DRM_WARN("failed to preempt ring %d\n", ring->idx);
1047 		goto failure;
1048 	}
1049 
1050 	amdgpu_fence_process(ring);
1051 
1052 	if (atomic_read(&ring->fence_drv.last_seq) !=
1053 	    ring->fence_drv.sync_seq) {
1054 		DRM_INFO("ring %d was preempted\n", ring->idx);
1055 
1056 		amdgpu_ib_preempt_mark_partial_job(ring);
1057 
1058 		/* swap out the old fences */
1059 		amdgpu_ib_preempt_fences_swap(ring, fences);
1060 
1061 		amdgpu_fence_driver_force_completion(ring);
1062 
1063 		/* resubmit unfinished jobs */
1064 		amdgpu_ib_preempt_job_recovery(&ring->sched);
1065 
1066 		/* wait for jobs finished */
1067 		amdgpu_fence_wait_empty(ring);
1068 
1069 		/* signal the old fences */
1070 		amdgpu_ib_preempt_signal_fences(fences, length);
1071 	}
1072 
1073 failure:
1074 	/* restart the scheduler */
1075 	kthread_unpark(ring->sched.thread);
1076 
1077 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
1078 
1079 	if (fences)
1080 		kfree(fences);
1081 
1082 	return 0;
1083 }
1084 
1085 DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
1086 			amdgpu_debugfs_ib_preempt, "%llu\n");
1087 
1088 int amdgpu_debugfs_init(struct amdgpu_device *adev)
1089 {
1090 	adev->debugfs_preempt =
1091 		debugfs_create_file("amdgpu_preempt_ib", 0600,
1092 				    adev->ddev->primary->debugfs_root,
1093 				    (void *)adev, &fops_ib_preempt);
1094 	if (!(adev->debugfs_preempt)) {
1095 		DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
1096 		return -EIO;
1097 	}
1098 
1099 	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
1100 					ARRAY_SIZE(amdgpu_debugfs_list));
1101 }
1102 
1103 void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev)
1104 {
1105 	if (adev->debugfs_preempt)
1106 		debugfs_remove(adev->debugfs_preempt);
1107 }
1108 
1109 #else
1110 int amdgpu_debugfs_init(struct amdgpu_device *adev)
1111 {
1112 	return 0;
1113 }
1114 void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { }
1115 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
1116 {
1117 	return 0;
1118 }
1119 void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
1120 #endif
1121