xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c (revision bddb55ccbdc20dd7fd526c8dfb13f695637bf7b1)
1  /*
2   * Copyright 2014 Advanced Micro Devices, Inc.
3   * Copyright 2008 Red Hat Inc.
4   * Copyright 2009 Jerome Glisse.
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a
7   * copy of this software and associated documentation files (the "Software"),
8   * to deal in the Software without restriction, including without limitation
9   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10   * and/or sell copies of the Software, and to permit persons to whom the
11   * Software is furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22   * OTHER DEALINGS IN THE SOFTWARE.
23   *
24   */
25  
26  #include <linux/firmware.h>
27  #include "amdgpu.h"
28  #include "amdgpu_gfx.h"
29  #include "amdgpu_rlc.h"
30  #include "amdgpu_ras.h"
31  
32  /* delay 0.1 second to enable gfx off feature */
33  #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
34  
35  #define GFX_OFF_NO_DELAY 0
36  
37  /*
38   * GPU GFX IP block helpers function.
39   */
40  
41  int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
42  				int pipe, int queue)
43  {
44  	int bit = 0;
45  
46  	bit += mec * adev->gfx.mec.num_pipe_per_mec
47  		* adev->gfx.mec.num_queue_per_pipe;
48  	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
49  	bit += queue;
50  
51  	return bit;
52  }
53  
54  void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
55  				 int *mec, int *pipe, int *queue)
56  {
57  	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
58  	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
59  		% adev->gfx.mec.num_pipe_per_mec;
60  	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
61  	       / adev->gfx.mec.num_pipe_per_mec;
62  
63  }
64  
65  bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
66  				     int mec, int pipe, int queue)
67  {
68  	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
69  			adev->gfx.mec.queue_bitmap);
70  }
71  
72  int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
73  			       int me, int pipe, int queue)
74  {
75  	int bit = 0;
76  
77  	bit += me * adev->gfx.me.num_pipe_per_me
78  		* adev->gfx.me.num_queue_per_pipe;
79  	bit += pipe * adev->gfx.me.num_queue_per_pipe;
80  	bit += queue;
81  
82  	return bit;
83  }
84  
85  void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
86  				int *me, int *pipe, int *queue)
87  {
88  	*queue = bit % adev->gfx.me.num_queue_per_pipe;
89  	*pipe = (bit / adev->gfx.me.num_queue_per_pipe)
90  		% adev->gfx.me.num_pipe_per_me;
91  	*me = (bit / adev->gfx.me.num_queue_per_pipe)
92  		/ adev->gfx.me.num_pipe_per_me;
93  }
94  
95  bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
96  				    int me, int pipe, int queue)
97  {
98  	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
99  			adev->gfx.me.queue_bitmap);
100  }
101  
102  /**
103   * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
104   *
105   * @mask: array in which the per-shader array disable masks will be stored
106   * @max_se: number of SEs
107   * @max_sh: number of SHs
108   *
109   * The bitmask of CUs to be disabled in the shader array determined by se and
110   * sh is stored in mask[se * max_sh + sh].
111   */
112  void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
113  {
114  	unsigned se, sh, cu;
115  	const char *p;
116  
117  	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
118  
119  	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
120  		return;
121  
122  	p = amdgpu_disable_cu;
123  	for (;;) {
124  		char *next;
125  		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
126  		if (ret < 3) {
127  			DRM_ERROR("amdgpu: could not parse disable_cu\n");
128  			return;
129  		}
130  
131  		if (se < max_se && sh < max_sh && cu < 16) {
132  			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
133  			mask[se * max_sh + sh] |= 1u << cu;
134  		} else {
135  			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
136  				  se, sh, cu);
137  		}
138  
139  		next = strchr(p, ',');
140  		if (!next)
141  			break;
142  		p = next + 1;
143  	}
144  }
145  
146  static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
147  {
148  	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
149  }
150  
151  static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
152  {
153  	if (amdgpu_compute_multipipe != -1) {
154  		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
155  			 amdgpu_compute_multipipe);
156  		return amdgpu_compute_multipipe == 1;
157  	}
158  
159  	if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
160  		return true;
161  
162  	/* FIXME: spreading the queues across pipes causes perf regressions
163  	 * on POLARIS11 compute workloads */
164  	if (adev->asic_type == CHIP_POLARIS11)
165  		return false;
166  
167  	return adev->gfx.mec.num_mec > 1;
168  }
169  
170  bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
171  						struct amdgpu_ring *ring)
172  {
173  	int queue = ring->queue;
174  	int pipe = ring->pipe;
175  
176  	/* Policy: use pipe1 queue0 as high priority graphics queue if we
177  	 * have more than one gfx pipe.
178  	 */
179  	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
180  	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
181  		int me = ring->me;
182  		int bit;
183  
184  		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
185  		if (ring == &adev->gfx.gfx_ring[bit])
186  			return true;
187  	}
188  
189  	return false;
190  }
191  
192  bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
193  					       struct amdgpu_ring *ring)
194  {
195  	/* Policy: use 1st queue as high priority compute queue if we
196  	 * have more than one compute queue.
197  	 */
198  	if (adev->gfx.num_compute_rings > 1 &&
199  	    ring == &adev->gfx.compute_ring[0])
200  		return true;
201  
202  	return false;
203  }
204  
205  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
206  {
207  	int i, queue, pipe;
208  	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
209  	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
210  				     adev->gfx.mec.num_queue_per_pipe,
211  				     adev->gfx.num_compute_rings);
212  
213  	if (multipipe_policy) {
214  		/* policy: make queues evenly cross all pipes on MEC1 only */
215  		for (i = 0; i < max_queues_per_mec; i++) {
216  			pipe = i % adev->gfx.mec.num_pipe_per_mec;
217  			queue = (i / adev->gfx.mec.num_pipe_per_mec) %
218  				adev->gfx.mec.num_queue_per_pipe;
219  
220  			set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
221  					adev->gfx.mec.queue_bitmap);
222  		}
223  	} else {
224  		/* policy: amdgpu owns all queues in the given pipe */
225  		for (i = 0; i < max_queues_per_mec; ++i)
226  			set_bit(i, adev->gfx.mec.queue_bitmap);
227  	}
228  
229  	dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
230  }
231  
232  void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
233  {
234  	int i, queue, pipe;
235  	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
236  	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
237  					adev->gfx.me.num_queue_per_pipe;
238  
239  	if (multipipe_policy) {
240  		/* policy: amdgpu owns the first queue per pipe at this stage
241  		 * will extend to mulitple queues per pipe later */
242  		for (i = 0; i < max_queues_per_me; i++) {
243  			pipe = i % adev->gfx.me.num_pipe_per_me;
244  			queue = (i / adev->gfx.me.num_pipe_per_me) %
245  				adev->gfx.me.num_queue_per_pipe;
246  
247  			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
248  				adev->gfx.me.queue_bitmap);
249  		}
250  	} else {
251  		for (i = 0; i < max_queues_per_me; ++i)
252  			set_bit(i, adev->gfx.me.queue_bitmap);
253  	}
254  
255  	/* update the number of active graphics rings */
256  	adev->gfx.num_gfx_rings =
257  		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
258  }
259  
260  static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
261  				  struct amdgpu_ring *ring)
262  {
263  	int queue_bit;
264  	int mec, pipe, queue;
265  
266  	queue_bit = adev->gfx.mec.num_mec
267  		    * adev->gfx.mec.num_pipe_per_mec
268  		    * adev->gfx.mec.num_queue_per_pipe;
269  
270  	while (--queue_bit >= 0) {
271  		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
272  			continue;
273  
274  		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
275  
276  		/*
277  		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
278  		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
279  		 * only can be issued on queue 0.
280  		 */
281  		if ((mec == 1 && pipe > 1) || queue != 0)
282  			continue;
283  
284  		ring->me = mec + 1;
285  		ring->pipe = pipe;
286  		ring->queue = queue;
287  
288  		return 0;
289  	}
290  
291  	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
292  	return -EINVAL;
293  }
294  
295  int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
296  			     struct amdgpu_ring *ring,
297  			     struct amdgpu_irq_src *irq)
298  {
299  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
300  	int r = 0;
301  
302  	spin_lock_init(&kiq->ring_lock);
303  
304  	ring->adev = NULL;
305  	ring->ring_obj = NULL;
306  	ring->use_doorbell = true;
307  	ring->doorbell_index = adev->doorbell_index.kiq;
308  	ring->vm_hub = AMDGPU_GFXHUB_0;
309  
310  	r = amdgpu_gfx_kiq_acquire(adev, ring);
311  	if (r)
312  		return r;
313  
314  	ring->eop_gpu_addr = kiq->eop_gpu_addr;
315  	ring->no_scheduler = true;
316  	sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
317  	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
318  			     AMDGPU_RING_PRIO_DEFAULT, NULL);
319  	if (r)
320  		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
321  
322  	return r;
323  }
324  
325  void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
326  {
327  	amdgpu_ring_fini(ring);
328  }
329  
330  void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
331  {
332  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
333  
334  	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
335  }
336  
337  int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
338  			unsigned hpd_size)
339  {
340  	int r;
341  	u32 *hpd;
342  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
343  
344  	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
345  				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
346  				    &kiq->eop_gpu_addr, (void **)&hpd);
347  	if (r) {
348  		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
349  		return r;
350  	}
351  
352  	memset(hpd, 0, hpd_size);
353  
354  	r = amdgpu_bo_reserve(kiq->eop_obj, true);
355  	if (unlikely(r != 0))
356  		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
357  	amdgpu_bo_kunmap(kiq->eop_obj);
358  	amdgpu_bo_unreserve(kiq->eop_obj);
359  
360  	return 0;
361  }
362  
363  /* create MQD for each compute/gfx queue */
364  int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
365  			   unsigned mqd_size)
366  {
367  	struct amdgpu_ring *ring = NULL;
368  	int r, i;
369  
370  	/* create MQD for KIQ */
371  	ring = &adev->gfx.kiq.ring;
372  	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
373  		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
374  		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
375  		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
376  		 * KIQ MQD no matter SRIOV or Bare-metal
377  		 */
378  		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
379  					    AMDGPU_GEM_DOMAIN_VRAM |
380  					    AMDGPU_GEM_DOMAIN_GTT,
381  					    &ring->mqd_obj,
382  					    &ring->mqd_gpu_addr,
383  					    &ring->mqd_ptr);
384  		if (r) {
385  			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
386  			return r;
387  		}
388  
389  		/* prepare MQD backup */
390  		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
391  		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
392  				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
393  	}
394  
395  	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
396  		/* create MQD for each KGQ */
397  		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
398  			ring = &adev->gfx.gfx_ring[i];
399  			if (!ring->mqd_obj) {
400  				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
401  							    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
402  							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
403  				if (r) {
404  					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
405  					return r;
406  				}
407  
408  				/* prepare MQD backup */
409  				adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
410  				if (!adev->gfx.me.mqd_backup[i])
411  					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
412  			}
413  		}
414  	}
415  
416  	/* create MQD for each KCQ */
417  	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
418  		ring = &adev->gfx.compute_ring[i];
419  		if (!ring->mqd_obj) {
420  			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
421  						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
422  						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
423  			if (r) {
424  				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
425  				return r;
426  			}
427  
428  			/* prepare MQD backup */
429  			adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
430  			if (!adev->gfx.mec.mqd_backup[i])
431  				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
432  		}
433  	}
434  
435  	return 0;
436  }
437  
438  void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
439  {
440  	struct amdgpu_ring *ring = NULL;
441  	int i;
442  
443  	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
444  		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
445  			ring = &adev->gfx.gfx_ring[i];
446  			kfree(adev->gfx.me.mqd_backup[i]);
447  			amdgpu_bo_free_kernel(&ring->mqd_obj,
448  					      &ring->mqd_gpu_addr,
449  					      &ring->mqd_ptr);
450  		}
451  	}
452  
453  	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
454  		ring = &adev->gfx.compute_ring[i];
455  		kfree(adev->gfx.mec.mqd_backup[i]);
456  		amdgpu_bo_free_kernel(&ring->mqd_obj,
457  				      &ring->mqd_gpu_addr,
458  				      &ring->mqd_ptr);
459  	}
460  
461  	ring = &adev->gfx.kiq.ring;
462  	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
463  	amdgpu_bo_free_kernel(&ring->mqd_obj,
464  			      &ring->mqd_gpu_addr,
465  			      &ring->mqd_ptr);
466  }
467  
468  int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
469  {
470  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
471  	struct amdgpu_ring *kiq_ring = &kiq->ring;
472  	int i, r = 0;
473  
474  	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
475  		return -EINVAL;
476  
477  	spin_lock(&adev->gfx.kiq.ring_lock);
478  	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
479  					adev->gfx.num_compute_rings)) {
480  		spin_unlock(&adev->gfx.kiq.ring_lock);
481  		return -ENOMEM;
482  	}
483  
484  	for (i = 0; i < adev->gfx.num_compute_rings; i++)
485  		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
486  					   RESET_QUEUES, 0, 0);
487  
488  	if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
489  		r = amdgpu_ring_test_helper(kiq_ring);
490  	spin_unlock(&adev->gfx.kiq.ring_lock);
491  
492  	return r;
493  }
494  
495  int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
496  					int queue_bit)
497  {
498  	int mec, pipe, queue;
499  	int set_resource_bit = 0;
500  
501  	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
502  
503  	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
504  
505  	return set_resource_bit;
506  }
507  
508  int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
509  {
510  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
511  	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
512  	uint64_t queue_mask = 0;
513  	int r, i;
514  
515  	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
516  		return -EINVAL;
517  
518  	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
519  		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
520  			continue;
521  
522  		/* This situation may be hit in the future if a new HW
523  		 * generation exposes more than 64 queues. If so, the
524  		 * definition of queue_mask needs updating */
525  		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
526  			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
527  			break;
528  		}
529  
530  		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
531  	}
532  
533  	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
534  							kiq_ring->queue);
535  	spin_lock(&adev->gfx.kiq.ring_lock);
536  	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
537  					adev->gfx.num_compute_rings +
538  					kiq->pmf->set_resources_size);
539  	if (r) {
540  		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
541  		spin_unlock(&adev->gfx.kiq.ring_lock);
542  		return r;
543  	}
544  
545  	if (adev->enable_mes)
546  		queue_mask = ~0ULL;
547  
548  	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
549  	for (i = 0; i < adev->gfx.num_compute_rings; i++)
550  		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
551  
552  	r = amdgpu_ring_test_helper(kiq_ring);
553  	spin_unlock(&adev->gfx.kiq.ring_lock);
554  	if (r)
555  		DRM_ERROR("KCQ enable failed\n");
556  
557  	return r;
558  }
559  
560  /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
561   *
562   * @adev: amdgpu_device pointer
563   * @bool enable true: enable gfx off feature, false: disable gfx off feature
564   *
565   * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
566   * 2. other client can send request to disable gfx off feature, the request should be honored.
567   * 3. other client can cancel their request of disable gfx off feature
568   * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
569   */
570  
571  void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
572  {
573  	unsigned long delay = GFX_OFF_DELAY_ENABLE;
574  
575  	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
576  		return;
577  
578  	mutex_lock(&adev->gfx.gfx_off_mutex);
579  
580  	if (enable) {
581  		/* If the count is already 0, it means there's an imbalance bug somewhere.
582  		 * Note that the bug may be in a different caller than the one which triggers the
583  		 * WARN_ON_ONCE.
584  		 */
585  		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
586  			goto unlock;
587  
588  		adev->gfx.gfx_off_req_count--;
589  
590  		if (adev->gfx.gfx_off_req_count == 0 &&
591  		    !adev->gfx.gfx_off_state) {
592  			/* If going to s2idle, no need to wait */
593  			if (adev->in_s0ix) {
594  				if (!amdgpu_dpm_set_powergating_by_smu(adev,
595  						AMD_IP_BLOCK_TYPE_GFX, true))
596  					adev->gfx.gfx_off_state = true;
597  			} else {
598  				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
599  					      delay);
600  			}
601  		}
602  	} else {
603  		if (adev->gfx.gfx_off_req_count == 0) {
604  			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
605  
606  			if (adev->gfx.gfx_off_state &&
607  			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
608  				adev->gfx.gfx_off_state = false;
609  
610  				if (adev->gfx.funcs->init_spm_golden) {
611  					dev_dbg(adev->dev,
612  						"GFXOFF is disabled, re-init SPM golden settings\n");
613  					amdgpu_gfx_init_spm_golden(adev);
614  				}
615  			}
616  		}
617  
618  		adev->gfx.gfx_off_req_count++;
619  	}
620  
621  unlock:
622  	mutex_unlock(&adev->gfx.gfx_off_mutex);
623  }
624  
625  int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
626  {
627  	int r = 0;
628  
629  	mutex_lock(&adev->gfx.gfx_off_mutex);
630  
631  	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
632  
633  	mutex_unlock(&adev->gfx.gfx_off_mutex);
634  
635  	return r;
636  }
637  
638  int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
639  {
640  	int r = 0;
641  
642  	mutex_lock(&adev->gfx.gfx_off_mutex);
643  
644  	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
645  
646  	mutex_unlock(&adev->gfx.gfx_off_mutex);
647  
648  	return r;
649  }
650  
651  int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
652  {
653  	int r = 0;
654  
655  	mutex_lock(&adev->gfx.gfx_off_mutex);
656  
657  	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
658  
659  	mutex_unlock(&adev->gfx.gfx_off_mutex);
660  
661  	return r;
662  }
663  
664  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
665  {
666  
667  	int r = 0;
668  
669  	mutex_lock(&adev->gfx.gfx_off_mutex);
670  
671  	r = amdgpu_dpm_get_status_gfxoff(adev, value);
672  
673  	mutex_unlock(&adev->gfx.gfx_off_mutex);
674  
675  	return r;
676  }
677  
678  int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
679  {
680  	int r;
681  
682  	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
683  		if (!amdgpu_persistent_edc_harvesting_supported(adev))
684  			amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
685  
686  		r = amdgpu_ras_block_late_init(adev, ras_block);
687  		if (r)
688  			return r;
689  
690  		r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
691  		if (r)
692  			goto late_fini;
693  	} else {
694  		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
695  	}
696  
697  	return 0;
698  late_fini:
699  	amdgpu_ras_block_late_fini(adev, ras_block);
700  	return r;
701  }
702  
703  int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
704  {
705  	int err = 0;
706  	struct amdgpu_gfx_ras *ras = NULL;
707  
708  	/* adev->gfx.ras is NULL, which means gfx does not
709  	 * support ras function, then do nothing here.
710  	 */
711  	if (!adev->gfx.ras)
712  		return 0;
713  
714  	ras = adev->gfx.ras;
715  
716  	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
717  	if (err) {
718  		dev_err(adev->dev, "Failed to register gfx ras block!\n");
719  		return err;
720  	}
721  
722  	strcpy(ras->ras_block.ras_comm.name, "gfx");
723  	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
724  	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
725  	adev->gfx.ras_if = &ras->ras_block.ras_comm;
726  
727  	/* If not define special ras_late_init function, use gfx default ras_late_init */
728  	if (!ras->ras_block.ras_late_init)
729  		ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
730  
731  	/* If not defined special ras_cb function, use default ras_cb */
732  	if (!ras->ras_block.ras_cb)
733  		ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
734  
735  	return 0;
736  }
737  
738  int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
739  						struct amdgpu_iv_entry *entry)
740  {
741  	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
742  		return adev->gfx.ras->poison_consumption_handler(adev, entry);
743  
744  	return 0;
745  }
746  
747  int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
748  		void *err_data,
749  		struct amdgpu_iv_entry *entry)
750  {
751  	/* TODO ue will trigger an interrupt.
752  	 *
753  	 * When “Full RAS” is enabled, the per-IP interrupt sources should
754  	 * be disabled and the driver should only look for the aggregated
755  	 * interrupt via sync flood
756  	 */
757  	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
758  		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
759  		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
760  		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
761  			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
762  		amdgpu_ras_reset_gpu(adev);
763  	}
764  	return AMDGPU_RAS_SUCCESS;
765  }
766  
767  int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
768  				  struct amdgpu_irq_src *source,
769  				  struct amdgpu_iv_entry *entry)
770  {
771  	struct ras_common_if *ras_if = adev->gfx.ras_if;
772  	struct ras_dispatch_if ih_data = {
773  		.entry = entry,
774  	};
775  
776  	if (!ras_if)
777  		return 0;
778  
779  	ih_data.head = *ras_if;
780  
781  	DRM_ERROR("CP ECC ERROR IRQ\n");
782  	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
783  	return 0;
784  }
785  
786  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
787  {
788  	signed long r, cnt = 0;
789  	unsigned long flags;
790  	uint32_t seq, reg_val_offs = 0, value = 0;
791  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
792  	struct amdgpu_ring *ring = &kiq->ring;
793  
794  	if (amdgpu_device_skip_hw_access(adev))
795  		return 0;
796  
797  	if (adev->mes.ring.sched.ready)
798  		return amdgpu_mes_rreg(adev, reg);
799  
800  	BUG_ON(!ring->funcs->emit_rreg);
801  
802  	spin_lock_irqsave(&kiq->ring_lock, flags);
803  	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
804  		pr_err("critical bug! too many kiq readers\n");
805  		goto failed_unlock;
806  	}
807  	amdgpu_ring_alloc(ring, 32);
808  	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
809  	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
810  	if (r)
811  		goto failed_undo;
812  
813  	amdgpu_ring_commit(ring);
814  	spin_unlock_irqrestore(&kiq->ring_lock, flags);
815  
816  	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
817  
818  	/* don't wait anymore for gpu reset case because this way may
819  	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
820  	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
821  	 * never return if we keep waiting in virt_kiq_rreg, which cause
822  	 * gpu_recover() hang there.
823  	 *
824  	 * also don't wait anymore for IRQ context
825  	 * */
826  	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
827  		goto failed_kiq_read;
828  
829  	might_sleep();
830  	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
831  		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
832  		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
833  	}
834  
835  	if (cnt > MAX_KIQ_REG_TRY)
836  		goto failed_kiq_read;
837  
838  	mb();
839  	value = adev->wb.wb[reg_val_offs];
840  	amdgpu_device_wb_free(adev, reg_val_offs);
841  	return value;
842  
843  failed_undo:
844  	amdgpu_ring_undo(ring);
845  failed_unlock:
846  	spin_unlock_irqrestore(&kiq->ring_lock, flags);
847  failed_kiq_read:
848  	if (reg_val_offs)
849  		amdgpu_device_wb_free(adev, reg_val_offs);
850  	dev_err(adev->dev, "failed to read reg:%x\n", reg);
851  	return ~0;
852  }
853  
854  void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
855  {
856  	signed long r, cnt = 0;
857  	unsigned long flags;
858  	uint32_t seq;
859  	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
860  	struct amdgpu_ring *ring = &kiq->ring;
861  
862  	BUG_ON(!ring->funcs->emit_wreg);
863  
864  	if (amdgpu_device_skip_hw_access(adev))
865  		return;
866  
867  	if (adev->mes.ring.sched.ready) {
868  		amdgpu_mes_wreg(adev, reg, v);
869  		return;
870  	}
871  
872  	spin_lock_irqsave(&kiq->ring_lock, flags);
873  	amdgpu_ring_alloc(ring, 32);
874  	amdgpu_ring_emit_wreg(ring, reg, v);
875  	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
876  	if (r)
877  		goto failed_undo;
878  
879  	amdgpu_ring_commit(ring);
880  	spin_unlock_irqrestore(&kiq->ring_lock, flags);
881  
882  	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
883  
884  	/* don't wait anymore for gpu reset case because this way may
885  	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
886  	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
887  	 * never return if we keep waiting in virt_kiq_rreg, which cause
888  	 * gpu_recover() hang there.
889  	 *
890  	 * also don't wait anymore for IRQ context
891  	 * */
892  	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
893  		goto failed_kiq_write;
894  
895  	might_sleep();
896  	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
897  
898  		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
899  		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
900  	}
901  
902  	if (cnt > MAX_KIQ_REG_TRY)
903  		goto failed_kiq_write;
904  
905  	return;
906  
907  failed_undo:
908  	amdgpu_ring_undo(ring);
909  	spin_unlock_irqrestore(&kiq->ring_lock, flags);
910  failed_kiq_write:
911  	dev_err(adev->dev, "failed to write reg:%x\n", reg);
912  }
913  
914  int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
915  {
916  	if (amdgpu_num_kcq == -1) {
917  		return 8;
918  	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
919  		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
920  		return 8;
921  	}
922  	return amdgpu_num_kcq;
923  }
924  
925  void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
926  				  uint32_t ucode_id)
927  {
928  	const struct gfx_firmware_header_v1_0 *cp_hdr;
929  	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
930  	struct amdgpu_firmware_info *info = NULL;
931  	const struct firmware *ucode_fw;
932  	unsigned int fw_size;
933  
934  	switch (ucode_id) {
935  	case AMDGPU_UCODE_ID_CP_PFP:
936  		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
937  			adev->gfx.pfp_fw->data;
938  		adev->gfx.pfp_fw_version =
939  			le32_to_cpu(cp_hdr->header.ucode_version);
940  		adev->gfx.pfp_feature_version =
941  			le32_to_cpu(cp_hdr->ucode_feature_version);
942  		ucode_fw = adev->gfx.pfp_fw;
943  		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
944  		break;
945  	case AMDGPU_UCODE_ID_CP_RS64_PFP:
946  		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
947  			adev->gfx.pfp_fw->data;
948  		adev->gfx.pfp_fw_version =
949  			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
950  		adev->gfx.pfp_feature_version =
951  			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
952  		ucode_fw = adev->gfx.pfp_fw;
953  		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
954  		break;
955  	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
956  	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
957  		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
958  			adev->gfx.pfp_fw->data;
959  		ucode_fw = adev->gfx.pfp_fw;
960  		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
961  		break;
962  	case AMDGPU_UCODE_ID_CP_ME:
963  		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
964  			adev->gfx.me_fw->data;
965  		adev->gfx.me_fw_version =
966  			le32_to_cpu(cp_hdr->header.ucode_version);
967  		adev->gfx.me_feature_version =
968  			le32_to_cpu(cp_hdr->ucode_feature_version);
969  		ucode_fw = adev->gfx.me_fw;
970  		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
971  		break;
972  	case AMDGPU_UCODE_ID_CP_RS64_ME:
973  		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
974  			adev->gfx.me_fw->data;
975  		adev->gfx.me_fw_version =
976  			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
977  		adev->gfx.me_feature_version =
978  			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
979  		ucode_fw = adev->gfx.me_fw;
980  		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
981  		break;
982  	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
983  	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
984  		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
985  			adev->gfx.me_fw->data;
986  		ucode_fw = adev->gfx.me_fw;
987  		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
988  		break;
989  	case AMDGPU_UCODE_ID_CP_CE:
990  		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
991  			adev->gfx.ce_fw->data;
992  		adev->gfx.ce_fw_version =
993  			le32_to_cpu(cp_hdr->header.ucode_version);
994  		adev->gfx.ce_feature_version =
995  			le32_to_cpu(cp_hdr->ucode_feature_version);
996  		ucode_fw = adev->gfx.ce_fw;
997  		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
998  		break;
999  	case AMDGPU_UCODE_ID_CP_MEC1:
1000  		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1001  			adev->gfx.mec_fw->data;
1002  		adev->gfx.mec_fw_version =
1003  			le32_to_cpu(cp_hdr->header.ucode_version);
1004  		adev->gfx.mec_feature_version =
1005  			le32_to_cpu(cp_hdr->ucode_feature_version);
1006  		ucode_fw = adev->gfx.mec_fw;
1007  		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1008  			  le32_to_cpu(cp_hdr->jt_size) * 4;
1009  		break;
1010  	case AMDGPU_UCODE_ID_CP_MEC1_JT:
1011  		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1012  			adev->gfx.mec_fw->data;
1013  		ucode_fw = adev->gfx.mec_fw;
1014  		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1015  		break;
1016  	case AMDGPU_UCODE_ID_CP_MEC2:
1017  		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1018  			adev->gfx.mec2_fw->data;
1019  		adev->gfx.mec2_fw_version =
1020  			le32_to_cpu(cp_hdr->header.ucode_version);
1021  		adev->gfx.mec2_feature_version =
1022  			le32_to_cpu(cp_hdr->ucode_feature_version);
1023  		ucode_fw = adev->gfx.mec2_fw;
1024  		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1025  			  le32_to_cpu(cp_hdr->jt_size) * 4;
1026  		break;
1027  	case AMDGPU_UCODE_ID_CP_MEC2_JT:
1028  		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1029  			adev->gfx.mec2_fw->data;
1030  		ucode_fw = adev->gfx.mec2_fw;
1031  		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1032  		break;
1033  	case AMDGPU_UCODE_ID_CP_RS64_MEC:
1034  		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1035  			adev->gfx.mec_fw->data;
1036  		adev->gfx.mec_fw_version =
1037  			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1038  		adev->gfx.mec_feature_version =
1039  			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1040  		ucode_fw = adev->gfx.mec_fw;
1041  		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1042  		break;
1043  	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1044  	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1045  	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1046  	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1047  		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1048  			adev->gfx.mec_fw->data;
1049  		ucode_fw = adev->gfx.mec_fw;
1050  		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1051  		break;
1052  	default:
1053  		break;
1054  	}
1055  
1056  	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1057  		info = &adev->firmware.ucode[ucode_id];
1058  		info->ucode_id = ucode_id;
1059  		info->fw = ucode_fw;
1060  		adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1061  	}
1062  }
1063