xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision dd08ebf6c3525a7ea2186e636df064ea47281987)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37 
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 
78 #include <linux/suspend.h>
79 #include <drm/task_barrier.h>
80 #include <linux/pm_runtime.h>
81 
82 #include <drm/drm_drv.h>
83 
84 #if IS_ENABLED(CONFIG_X86)
85 #include <asm/intel-family.h>
86 #endif
87 
88 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
89 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
95 
96 #define AMDGPU_RESUME_MS		2000
97 #define AMDGPU_MAX_RETRY_LIMIT		2
98 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
99 
100 static const struct drm_driver amdgpu_kms_driver;
101 
102 const char *amdgpu_asic_name[] = {
103 	"TAHITI",
104 	"PITCAIRN",
105 	"VERDE",
106 	"OLAND",
107 	"HAINAN",
108 	"BONAIRE",
109 	"KAVERI",
110 	"KABINI",
111 	"HAWAII",
112 	"MULLINS",
113 	"TOPAZ",
114 	"TONGA",
115 	"FIJI",
116 	"CARRIZO",
117 	"STONEY",
118 	"POLARIS10",
119 	"POLARIS11",
120 	"POLARIS12",
121 	"VEGAM",
122 	"VEGA10",
123 	"VEGA12",
124 	"VEGA20",
125 	"RAVEN",
126 	"ARCTURUS",
127 	"RENOIR",
128 	"ALDEBARAN",
129 	"NAVI10",
130 	"CYAN_SKILLFISH",
131 	"NAVI14",
132 	"NAVI12",
133 	"SIENNA_CICHLID",
134 	"NAVY_FLOUNDER",
135 	"VANGOGH",
136 	"DIMGREY_CAVEFISH",
137 	"BEIGE_GOBY",
138 	"YELLOW_CARP",
139 	"IP DISCOVERY",
140 	"LAST",
141 };
142 
143 /**
144  * DOC: pcie_replay_count
145  *
146  * The amdgpu driver provides a sysfs API for reporting the total number
147  * of PCIe replays (NAKs)
148  * The file pcie_replay_count is used for this and returns the total
149  * number of replays as a sum of the NAKs generated and NAKs received
150  */
151 
152 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 		struct device_attribute *attr, char *buf)
154 {
155 	struct drm_device *ddev = dev_get_drvdata(dev);
156 	struct amdgpu_device *adev = drm_to_adev(ddev);
157 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158 
159 	return sysfs_emit(buf, "%llu\n", cnt);
160 }
161 
162 static DEVICE_ATTR(pcie_replay_count, 0444,
163 		amdgpu_device_get_pcie_replay_count, NULL);
164 
165 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
166 					  struct bin_attribute *attr, char *buf,
167 					  loff_t ppos, size_t count)
168 {
169 	struct device *dev = kobj_to_dev(kobj);
170 	struct drm_device *ddev = dev_get_drvdata(dev);
171 	struct amdgpu_device *adev = drm_to_adev(ddev);
172 	ssize_t bytes_read;
173 
174 	switch (ppos) {
175 	case AMDGPU_SYS_REG_STATE_XGMI:
176 		bytes_read = amdgpu_asic_get_reg_state(
177 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
178 		break;
179 	case AMDGPU_SYS_REG_STATE_WAFL:
180 		bytes_read = amdgpu_asic_get_reg_state(
181 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
182 		break;
183 	case AMDGPU_SYS_REG_STATE_PCIE:
184 		bytes_read = amdgpu_asic_get_reg_state(
185 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
186 		break;
187 	case AMDGPU_SYS_REG_STATE_USR:
188 		bytes_read = amdgpu_asic_get_reg_state(
189 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
190 		break;
191 	case AMDGPU_SYS_REG_STATE_USR_1:
192 		bytes_read = amdgpu_asic_get_reg_state(
193 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
194 		break;
195 	default:
196 		return -EINVAL;
197 	}
198 
199 	return bytes_read;
200 }
201 
202 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
203 	 AMDGPU_SYS_REG_STATE_END);
204 
205 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
206 {
207 	int ret;
208 
209 	if (!amdgpu_asic_get_reg_state_supported(adev))
210 		return 0;
211 
212 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
213 
214 	return ret;
215 }
216 
217 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
218 {
219 	if (!amdgpu_asic_get_reg_state_supported(adev))
220 		return;
221 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
222 }
223 
224 /**
225  * DOC: board_info
226  *
227  * The amdgpu driver provides a sysfs API for giving board related information.
228  * It provides the form factor information in the format
229  *
230  *   type : form factor
231  *
232  * Possible form factor values
233  *
234  * - "cem"		- PCIE CEM card
235  * - "oam"		- Open Compute Accelerator Module
236  * - "unknown"	- Not known
237  *
238  */
239 
240 static ssize_t amdgpu_device_get_board_info(struct device *dev,
241 					    struct device_attribute *attr,
242 					    char *buf)
243 {
244 	struct drm_device *ddev = dev_get_drvdata(dev);
245 	struct amdgpu_device *adev = drm_to_adev(ddev);
246 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
247 	const char *pkg;
248 
249 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
250 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
251 
252 	switch (pkg_type) {
253 	case AMDGPU_PKG_TYPE_CEM:
254 		pkg = "cem";
255 		break;
256 	case AMDGPU_PKG_TYPE_OAM:
257 		pkg = "oam";
258 		break;
259 	default:
260 		pkg = "unknown";
261 		break;
262 	}
263 
264 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
265 }
266 
267 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
268 
269 static struct attribute *amdgpu_board_attrs[] = {
270 	&dev_attr_board_info.attr,
271 	NULL,
272 };
273 
274 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
275 					     struct attribute *attr, int n)
276 {
277 	struct device *dev = kobj_to_dev(kobj);
278 	struct drm_device *ddev = dev_get_drvdata(dev);
279 	struct amdgpu_device *adev = drm_to_adev(ddev);
280 
281 	if (adev->flags & AMD_IS_APU)
282 		return 0;
283 
284 	return attr->mode;
285 }
286 
287 static const struct attribute_group amdgpu_board_attrs_group = {
288 	.attrs = amdgpu_board_attrs,
289 	.is_visible = amdgpu_board_attrs_is_visible
290 };
291 
292 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
293 
294 
295 /**
296  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
297  *
298  * @dev: drm_device pointer
299  *
300  * Returns true if the device is a dGPU with ATPX power control,
301  * otherwise return false.
302  */
303 bool amdgpu_device_supports_px(struct drm_device *dev)
304 {
305 	struct amdgpu_device *adev = drm_to_adev(dev);
306 
307 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
308 		return true;
309 	return false;
310 }
311 
312 /**
313  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
314  *
315  * @dev: drm_device pointer
316  *
317  * Returns true if the device is a dGPU with ACPI power control,
318  * otherwise return false.
319  */
320 bool amdgpu_device_supports_boco(struct drm_device *dev)
321 {
322 	struct amdgpu_device *adev = drm_to_adev(dev);
323 
324 	if (adev->has_pr3 ||
325 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
326 		return true;
327 	return false;
328 }
329 
330 /**
331  * amdgpu_device_supports_baco - Does the device support BACO
332  *
333  * @dev: drm_device pointer
334  *
335  * Returns true if the device supporte BACO,
336  * otherwise return false.
337  */
338 bool amdgpu_device_supports_baco(struct drm_device *dev)
339 {
340 	struct amdgpu_device *adev = drm_to_adev(dev);
341 
342 	return amdgpu_asic_supports_baco(adev);
343 }
344 
345 /**
346  * amdgpu_device_supports_smart_shift - Is the device dGPU with
347  * smart shift support
348  *
349  * @dev: drm_device pointer
350  *
351  * Returns true if the device is a dGPU with Smart Shift support,
352  * otherwise returns false.
353  */
354 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
355 {
356 	return (amdgpu_device_supports_boco(dev) &&
357 		amdgpu_acpi_is_power_shift_control_supported());
358 }
359 
360 /*
361  * VRAM access helper functions
362  */
363 
364 /**
365  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
366  *
367  * @adev: amdgpu_device pointer
368  * @pos: offset of the buffer in vram
369  * @buf: virtual address of the buffer in system memory
370  * @size: read/write size, sizeof(@buf) must > @size
371  * @write: true - write to vram, otherwise - read from vram
372  */
373 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
374 			     void *buf, size_t size, bool write)
375 {
376 	unsigned long flags;
377 	uint32_t hi = ~0, tmp = 0;
378 	uint32_t *data = buf;
379 	uint64_t last;
380 	int idx;
381 
382 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
383 		return;
384 
385 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
386 
387 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
388 	for (last = pos + size; pos < last; pos += 4) {
389 		tmp = pos >> 31;
390 
391 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
392 		if (tmp != hi) {
393 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
394 			hi = tmp;
395 		}
396 		if (write)
397 			WREG32_NO_KIQ(mmMM_DATA, *data++);
398 		else
399 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
400 	}
401 
402 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
403 	drm_dev_exit(idx);
404 }
405 
406 /**
407  * amdgpu_device_aper_access - access vram by vram aperature
408  *
409  * @adev: amdgpu_device pointer
410  * @pos: offset of the buffer in vram
411  * @buf: virtual address of the buffer in system memory
412  * @size: read/write size, sizeof(@buf) must > @size
413  * @write: true - write to vram, otherwise - read from vram
414  *
415  * The return value means how many bytes have been transferred.
416  */
417 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
418 				 void *buf, size_t size, bool write)
419 {
420 #ifdef CONFIG_64BIT
421 	void __iomem *addr;
422 	size_t count = 0;
423 	uint64_t last;
424 
425 	if (!adev->mman.aper_base_kaddr)
426 		return 0;
427 
428 	last = min(pos + size, adev->gmc.visible_vram_size);
429 	if (last > pos) {
430 		addr = adev->mman.aper_base_kaddr + pos;
431 		count = last - pos;
432 
433 		if (write) {
434 			memcpy_toio(addr, buf, count);
435 			/* Make sure HDP write cache flush happens without any reordering
436 			 * after the system memory contents are sent over PCIe device
437 			 */
438 			mb();
439 			amdgpu_device_flush_hdp(adev, NULL);
440 		} else {
441 			amdgpu_device_invalidate_hdp(adev, NULL);
442 			/* Make sure HDP read cache is invalidated before issuing a read
443 			 * to the PCIe device
444 			 */
445 			mb();
446 			memcpy_fromio(buf, addr, count);
447 		}
448 
449 	}
450 
451 	return count;
452 #else
453 	return 0;
454 #endif
455 }
456 
457 /**
458  * amdgpu_device_vram_access - read/write a buffer in vram
459  *
460  * @adev: amdgpu_device pointer
461  * @pos: offset of the buffer in vram
462  * @buf: virtual address of the buffer in system memory
463  * @size: read/write size, sizeof(@buf) must > @size
464  * @write: true - write to vram, otherwise - read from vram
465  */
466 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
467 			       void *buf, size_t size, bool write)
468 {
469 	size_t count;
470 
471 	/* try to using vram apreature to access vram first */
472 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
473 	size -= count;
474 	if (size) {
475 		/* using MM to access rest vram */
476 		pos += count;
477 		buf += count;
478 		amdgpu_device_mm_access(adev, pos, buf, size, write);
479 	}
480 }
481 
482 /*
483  * register access helper functions.
484  */
485 
486 /* Check if hw access should be skipped because of hotplug or device error */
487 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
488 {
489 	if (adev->no_hw_access)
490 		return true;
491 
492 #ifdef CONFIG_LOCKDEP
493 	/*
494 	 * This is a bit complicated to understand, so worth a comment. What we assert
495 	 * here is that the GPU reset is not running on another thread in parallel.
496 	 *
497 	 * For this we trylock the read side of the reset semaphore, if that succeeds
498 	 * we know that the reset is not running in paralell.
499 	 *
500 	 * If the trylock fails we assert that we are either already holding the read
501 	 * side of the lock or are the reset thread itself and hold the write side of
502 	 * the lock.
503 	 */
504 	if (in_task()) {
505 		if (down_read_trylock(&adev->reset_domain->sem))
506 			up_read(&adev->reset_domain->sem);
507 		else
508 			lockdep_assert_held(&adev->reset_domain->sem);
509 	}
510 #endif
511 	return false;
512 }
513 
514 /**
515  * amdgpu_device_rreg - read a memory mapped IO or indirect register
516  *
517  * @adev: amdgpu_device pointer
518  * @reg: dword aligned register offset
519  * @acc_flags: access flags which require special behavior
520  *
521  * Returns the 32 bit value from the offset specified.
522  */
523 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
524 			    uint32_t reg, uint32_t acc_flags)
525 {
526 	uint32_t ret;
527 
528 	if (amdgpu_device_skip_hw_access(adev))
529 		return 0;
530 
531 	if ((reg * 4) < adev->rmmio_size) {
532 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
533 		    amdgpu_sriov_runtime(adev) &&
534 		    down_read_trylock(&adev->reset_domain->sem)) {
535 			ret = amdgpu_kiq_rreg(adev, reg, 0);
536 			up_read(&adev->reset_domain->sem);
537 		} else {
538 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
539 		}
540 	} else {
541 		ret = adev->pcie_rreg(adev, reg * 4);
542 	}
543 
544 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
545 
546 	return ret;
547 }
548 
549 /*
550  * MMIO register read with bytes helper functions
551  * @offset:bytes offset from MMIO start
552  */
553 
554 /**
555  * amdgpu_mm_rreg8 - read a memory mapped IO register
556  *
557  * @adev: amdgpu_device pointer
558  * @offset: byte aligned register offset
559  *
560  * Returns the 8 bit value from the offset specified.
561  */
562 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
563 {
564 	if (amdgpu_device_skip_hw_access(adev))
565 		return 0;
566 
567 	if (offset < adev->rmmio_size)
568 		return (readb(adev->rmmio + offset));
569 	BUG();
570 }
571 
572 
573 /**
574  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
575  *
576  * @adev: amdgpu_device pointer
577  * @reg: dword aligned register offset
578  * @acc_flags: access flags which require special behavior
579  * @xcc_id: xcc accelerated compute core id
580  *
581  * Returns the 32 bit value from the offset specified.
582  */
583 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
584 				uint32_t reg, uint32_t acc_flags,
585 				uint32_t xcc_id)
586 {
587 	uint32_t ret, rlcg_flag;
588 
589 	if (amdgpu_device_skip_hw_access(adev))
590 		return 0;
591 
592 	if ((reg * 4) < adev->rmmio_size) {
593 		if (amdgpu_sriov_vf(adev) &&
594 		    !amdgpu_sriov_runtime(adev) &&
595 		    adev->gfx.rlc.rlcg_reg_access_supported &&
596 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
597 							 GC_HWIP, false,
598 							 &rlcg_flag)) {
599 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
600 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
601 		    amdgpu_sriov_runtime(adev) &&
602 		    down_read_trylock(&adev->reset_domain->sem)) {
603 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
604 			up_read(&adev->reset_domain->sem);
605 		} else {
606 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
607 		}
608 	} else {
609 		ret = adev->pcie_rreg(adev, reg * 4);
610 	}
611 
612 	return ret;
613 }
614 
615 /*
616  * MMIO register write with bytes helper functions
617  * @offset:bytes offset from MMIO start
618  * @value: the value want to be written to the register
619  */
620 
621 /**
622  * amdgpu_mm_wreg8 - read a memory mapped IO register
623  *
624  * @adev: amdgpu_device pointer
625  * @offset: byte aligned register offset
626  * @value: 8 bit value to write
627  *
628  * Writes the value specified to the offset specified.
629  */
630 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
631 {
632 	if (amdgpu_device_skip_hw_access(adev))
633 		return;
634 
635 	if (offset < adev->rmmio_size)
636 		writeb(value, adev->rmmio + offset);
637 	else
638 		BUG();
639 }
640 
641 /**
642  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
643  *
644  * @adev: amdgpu_device pointer
645  * @reg: dword aligned register offset
646  * @v: 32 bit value to write to the register
647  * @acc_flags: access flags which require special behavior
648  *
649  * Writes the value specified to the offset specified.
650  */
651 void amdgpu_device_wreg(struct amdgpu_device *adev,
652 			uint32_t reg, uint32_t v,
653 			uint32_t acc_flags)
654 {
655 	if (amdgpu_device_skip_hw_access(adev))
656 		return;
657 
658 	if ((reg * 4) < adev->rmmio_size) {
659 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
660 		    amdgpu_sriov_runtime(adev) &&
661 		    down_read_trylock(&adev->reset_domain->sem)) {
662 			amdgpu_kiq_wreg(adev, reg, v, 0);
663 			up_read(&adev->reset_domain->sem);
664 		} else {
665 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
666 		}
667 	} else {
668 		adev->pcie_wreg(adev, reg * 4, v);
669 	}
670 
671 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
672 }
673 
674 /**
675  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
676  *
677  * @adev: amdgpu_device pointer
678  * @reg: mmio/rlc register
679  * @v: value to write
680  * @xcc_id: xcc accelerated compute core id
681  *
682  * this function is invoked only for the debugfs register access
683  */
684 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
685 			     uint32_t reg, uint32_t v,
686 			     uint32_t xcc_id)
687 {
688 	if (amdgpu_device_skip_hw_access(adev))
689 		return;
690 
691 	if (amdgpu_sriov_fullaccess(adev) &&
692 	    adev->gfx.rlc.funcs &&
693 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
694 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
695 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
696 	} else if ((reg * 4) >= adev->rmmio_size) {
697 		adev->pcie_wreg(adev, reg * 4, v);
698 	} else {
699 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
700 	}
701 }
702 
703 /**
704  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
705  *
706  * @adev: amdgpu_device pointer
707  * @reg: dword aligned register offset
708  * @v: 32 bit value to write to the register
709  * @acc_flags: access flags which require special behavior
710  * @xcc_id: xcc accelerated compute core id
711  *
712  * Writes the value specified to the offset specified.
713  */
714 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
715 			uint32_t reg, uint32_t v,
716 			uint32_t acc_flags, uint32_t xcc_id)
717 {
718 	uint32_t rlcg_flag;
719 
720 	if (amdgpu_device_skip_hw_access(adev))
721 		return;
722 
723 	if ((reg * 4) < adev->rmmio_size) {
724 		if (amdgpu_sriov_vf(adev) &&
725 		    !amdgpu_sriov_runtime(adev) &&
726 		    adev->gfx.rlc.rlcg_reg_access_supported &&
727 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
728 							 GC_HWIP, true,
729 							 &rlcg_flag)) {
730 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
731 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
732 		    amdgpu_sriov_runtime(adev) &&
733 		    down_read_trylock(&adev->reset_domain->sem)) {
734 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
735 			up_read(&adev->reset_domain->sem);
736 		} else {
737 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
738 		}
739 	} else {
740 		adev->pcie_wreg(adev, reg * 4, v);
741 	}
742 }
743 
744 /**
745  * amdgpu_device_indirect_rreg - read an indirect register
746  *
747  * @adev: amdgpu_device pointer
748  * @reg_addr: indirect register address to read from
749  *
750  * Returns the value of indirect register @reg_addr
751  */
752 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
753 				u32 reg_addr)
754 {
755 	unsigned long flags, pcie_index, pcie_data;
756 	void __iomem *pcie_index_offset;
757 	void __iomem *pcie_data_offset;
758 	u32 r;
759 
760 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
761 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
762 
763 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
764 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
765 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
766 
767 	writel(reg_addr, pcie_index_offset);
768 	readl(pcie_index_offset);
769 	r = readl(pcie_data_offset);
770 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
771 
772 	return r;
773 }
774 
775 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
776 				    u64 reg_addr)
777 {
778 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
779 	u32 r;
780 	void __iomem *pcie_index_offset;
781 	void __iomem *pcie_index_hi_offset;
782 	void __iomem *pcie_data_offset;
783 
784 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
785 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
786 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
787 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
788 	else
789 		pcie_index_hi = 0;
790 
791 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
792 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
793 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
794 	if (pcie_index_hi != 0)
795 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
796 				pcie_index_hi * 4;
797 
798 	writel(reg_addr, pcie_index_offset);
799 	readl(pcie_index_offset);
800 	if (pcie_index_hi != 0) {
801 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
802 		readl(pcie_index_hi_offset);
803 	}
804 	r = readl(pcie_data_offset);
805 
806 	/* clear the high bits */
807 	if (pcie_index_hi != 0) {
808 		writel(0, pcie_index_hi_offset);
809 		readl(pcie_index_hi_offset);
810 	}
811 
812 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
813 
814 	return r;
815 }
816 
817 /**
818  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
819  *
820  * @adev: amdgpu_device pointer
821  * @reg_addr: indirect register address to read from
822  *
823  * Returns the value of indirect register @reg_addr
824  */
825 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
826 				  u32 reg_addr)
827 {
828 	unsigned long flags, pcie_index, pcie_data;
829 	void __iomem *pcie_index_offset;
830 	void __iomem *pcie_data_offset;
831 	u64 r;
832 
833 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
834 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
835 
836 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
837 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
838 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
839 
840 	/* read low 32 bits */
841 	writel(reg_addr, pcie_index_offset);
842 	readl(pcie_index_offset);
843 	r = readl(pcie_data_offset);
844 	/* read high 32 bits */
845 	writel(reg_addr + 4, pcie_index_offset);
846 	readl(pcie_index_offset);
847 	r |= ((u64)readl(pcie_data_offset) << 32);
848 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
849 
850 	return r;
851 }
852 
853 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
854 				  u64 reg_addr)
855 {
856 	unsigned long flags, pcie_index, pcie_data;
857 	unsigned long pcie_index_hi = 0;
858 	void __iomem *pcie_index_offset;
859 	void __iomem *pcie_index_hi_offset;
860 	void __iomem *pcie_data_offset;
861 	u64 r;
862 
863 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
864 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
865 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
866 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
867 
868 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
869 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
870 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
871 	if (pcie_index_hi != 0)
872 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
873 			pcie_index_hi * 4;
874 
875 	/* read low 32 bits */
876 	writel(reg_addr, pcie_index_offset);
877 	readl(pcie_index_offset);
878 	if (pcie_index_hi != 0) {
879 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
880 		readl(pcie_index_hi_offset);
881 	}
882 	r = readl(pcie_data_offset);
883 	/* read high 32 bits */
884 	writel(reg_addr + 4, pcie_index_offset);
885 	readl(pcie_index_offset);
886 	if (pcie_index_hi != 0) {
887 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
888 		readl(pcie_index_hi_offset);
889 	}
890 	r |= ((u64)readl(pcie_data_offset) << 32);
891 
892 	/* clear the high bits */
893 	if (pcie_index_hi != 0) {
894 		writel(0, pcie_index_hi_offset);
895 		readl(pcie_index_hi_offset);
896 	}
897 
898 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
899 
900 	return r;
901 }
902 
903 /**
904  * amdgpu_device_indirect_wreg - write an indirect register address
905  *
906  * @adev: amdgpu_device pointer
907  * @reg_addr: indirect register offset
908  * @reg_data: indirect register data
909  *
910  */
911 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
912 				 u32 reg_addr, u32 reg_data)
913 {
914 	unsigned long flags, pcie_index, pcie_data;
915 	void __iomem *pcie_index_offset;
916 	void __iomem *pcie_data_offset;
917 
918 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
919 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
920 
921 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
922 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
923 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
924 
925 	writel(reg_addr, pcie_index_offset);
926 	readl(pcie_index_offset);
927 	writel(reg_data, pcie_data_offset);
928 	readl(pcie_data_offset);
929 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
930 }
931 
932 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
933 				     u64 reg_addr, u32 reg_data)
934 {
935 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
936 	void __iomem *pcie_index_offset;
937 	void __iomem *pcie_index_hi_offset;
938 	void __iomem *pcie_data_offset;
939 
940 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
941 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
942 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
943 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
944 	else
945 		pcie_index_hi = 0;
946 
947 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
948 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
949 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
950 	if (pcie_index_hi != 0)
951 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
952 				pcie_index_hi * 4;
953 
954 	writel(reg_addr, pcie_index_offset);
955 	readl(pcie_index_offset);
956 	if (pcie_index_hi != 0) {
957 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
958 		readl(pcie_index_hi_offset);
959 	}
960 	writel(reg_data, pcie_data_offset);
961 	readl(pcie_data_offset);
962 
963 	/* clear the high bits */
964 	if (pcie_index_hi != 0) {
965 		writel(0, pcie_index_hi_offset);
966 		readl(pcie_index_hi_offset);
967 	}
968 
969 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
970 }
971 
972 /**
973  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
974  *
975  * @adev: amdgpu_device pointer
976  * @reg_addr: indirect register offset
977  * @reg_data: indirect register data
978  *
979  */
980 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
981 				   u32 reg_addr, u64 reg_data)
982 {
983 	unsigned long flags, pcie_index, pcie_data;
984 	void __iomem *pcie_index_offset;
985 	void __iomem *pcie_data_offset;
986 
987 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
988 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
989 
990 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
991 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
992 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
993 
994 	/* write low 32 bits */
995 	writel(reg_addr, pcie_index_offset);
996 	readl(pcie_index_offset);
997 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
998 	readl(pcie_data_offset);
999 	/* write high 32 bits */
1000 	writel(reg_addr + 4, pcie_index_offset);
1001 	readl(pcie_index_offset);
1002 	writel((u32)(reg_data >> 32), pcie_data_offset);
1003 	readl(pcie_data_offset);
1004 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1005 }
1006 
1007 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1008 				   u64 reg_addr, u64 reg_data)
1009 {
1010 	unsigned long flags, pcie_index, pcie_data;
1011 	unsigned long pcie_index_hi = 0;
1012 	void __iomem *pcie_index_offset;
1013 	void __iomem *pcie_index_hi_offset;
1014 	void __iomem *pcie_data_offset;
1015 
1016 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1017 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1018 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1019 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1020 
1021 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1022 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1023 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1024 	if (pcie_index_hi != 0)
1025 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1026 				pcie_index_hi * 4;
1027 
1028 	/* write low 32 bits */
1029 	writel(reg_addr, pcie_index_offset);
1030 	readl(pcie_index_offset);
1031 	if (pcie_index_hi != 0) {
1032 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1033 		readl(pcie_index_hi_offset);
1034 	}
1035 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1036 	readl(pcie_data_offset);
1037 	/* write high 32 bits */
1038 	writel(reg_addr + 4, pcie_index_offset);
1039 	readl(pcie_index_offset);
1040 	if (pcie_index_hi != 0) {
1041 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1042 		readl(pcie_index_hi_offset);
1043 	}
1044 	writel((u32)(reg_data >> 32), pcie_data_offset);
1045 	readl(pcie_data_offset);
1046 
1047 	/* clear the high bits */
1048 	if (pcie_index_hi != 0) {
1049 		writel(0, pcie_index_hi_offset);
1050 		readl(pcie_index_hi_offset);
1051 	}
1052 
1053 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1054 }
1055 
1056 /**
1057  * amdgpu_device_get_rev_id - query device rev_id
1058  *
1059  * @adev: amdgpu_device pointer
1060  *
1061  * Return device rev_id
1062  */
1063 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1064 {
1065 	return adev->nbio.funcs->get_rev_id(adev);
1066 }
1067 
1068 /**
1069  * amdgpu_invalid_rreg - dummy reg read function
1070  *
1071  * @adev: amdgpu_device pointer
1072  * @reg: offset of register
1073  *
1074  * Dummy register read function.  Used for register blocks
1075  * that certain asics don't have (all asics).
1076  * Returns the value in the register.
1077  */
1078 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1079 {
1080 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1081 	BUG();
1082 	return 0;
1083 }
1084 
1085 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1086 {
1087 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1088 	BUG();
1089 	return 0;
1090 }
1091 
1092 /**
1093  * amdgpu_invalid_wreg - dummy reg write function
1094  *
1095  * @adev: amdgpu_device pointer
1096  * @reg: offset of register
1097  * @v: value to write to the register
1098  *
1099  * Dummy register read function.  Used for register blocks
1100  * that certain asics don't have (all asics).
1101  */
1102 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1103 {
1104 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1105 		  reg, v);
1106 	BUG();
1107 }
1108 
1109 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1110 {
1111 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1112 		  reg, v);
1113 	BUG();
1114 }
1115 
1116 /**
1117  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1118  *
1119  * @adev: amdgpu_device pointer
1120  * @reg: offset of register
1121  *
1122  * Dummy register read function.  Used for register blocks
1123  * that certain asics don't have (all asics).
1124  * Returns the value in the register.
1125  */
1126 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1127 {
1128 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1129 	BUG();
1130 	return 0;
1131 }
1132 
1133 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1134 {
1135 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1136 	BUG();
1137 	return 0;
1138 }
1139 
1140 /**
1141  * amdgpu_invalid_wreg64 - dummy reg write function
1142  *
1143  * @adev: amdgpu_device pointer
1144  * @reg: offset of register
1145  * @v: value to write to the register
1146  *
1147  * Dummy register read function.  Used for register blocks
1148  * that certain asics don't have (all asics).
1149  */
1150 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1151 {
1152 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1153 		  reg, v);
1154 	BUG();
1155 }
1156 
1157 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1158 {
1159 	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1160 		  reg, v);
1161 	BUG();
1162 }
1163 
1164 /**
1165  * amdgpu_block_invalid_rreg - dummy reg read function
1166  *
1167  * @adev: amdgpu_device pointer
1168  * @block: offset of instance
1169  * @reg: offset of register
1170  *
1171  * Dummy register read function.  Used for register blocks
1172  * that certain asics don't have (all asics).
1173  * Returns the value in the register.
1174  */
1175 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1176 					  uint32_t block, uint32_t reg)
1177 {
1178 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1179 		  reg, block);
1180 	BUG();
1181 	return 0;
1182 }
1183 
1184 /**
1185  * amdgpu_block_invalid_wreg - dummy reg write function
1186  *
1187  * @adev: amdgpu_device pointer
1188  * @block: offset of instance
1189  * @reg: offset of register
1190  * @v: value to write to the register
1191  *
1192  * Dummy register read function.  Used for register blocks
1193  * that certain asics don't have (all asics).
1194  */
1195 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1196 				      uint32_t block,
1197 				      uint32_t reg, uint32_t v)
1198 {
1199 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1200 		  reg, block, v);
1201 	BUG();
1202 }
1203 
1204 /**
1205  * amdgpu_device_asic_init - Wrapper for atom asic_init
1206  *
1207  * @adev: amdgpu_device pointer
1208  *
1209  * Does any asic specific work and then calls atom asic init.
1210  */
1211 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1212 {
1213 	int ret;
1214 
1215 	amdgpu_asic_pre_asic_init(adev);
1216 
1217 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1218 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1219 		amdgpu_psp_wait_for_bootloader(adev);
1220 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1221 		/* TODO: check the return val and stop device initialization if boot fails */
1222 		amdgpu_psp_query_boot_status(adev);
1223 		return ret;
1224 	} else {
1225 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 /**
1232  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1233  *
1234  * @adev: amdgpu_device pointer
1235  *
1236  * Allocates a scratch page of VRAM for use by various things in the
1237  * driver.
1238  */
1239 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1240 {
1241 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1242 				       AMDGPU_GEM_DOMAIN_VRAM |
1243 				       AMDGPU_GEM_DOMAIN_GTT,
1244 				       &adev->mem_scratch.robj,
1245 				       &adev->mem_scratch.gpu_addr,
1246 				       (void **)&adev->mem_scratch.ptr);
1247 }
1248 
1249 /**
1250  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1251  *
1252  * @adev: amdgpu_device pointer
1253  *
1254  * Frees the VRAM scratch page.
1255  */
1256 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1257 {
1258 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1259 }
1260 
1261 /**
1262  * amdgpu_device_program_register_sequence - program an array of registers.
1263  *
1264  * @adev: amdgpu_device pointer
1265  * @registers: pointer to the register array
1266  * @array_size: size of the register array
1267  *
1268  * Programs an array or registers with and or masks.
1269  * This is a helper for setting golden registers.
1270  */
1271 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1272 					     const u32 *registers,
1273 					     const u32 array_size)
1274 {
1275 	u32 tmp, reg, and_mask, or_mask;
1276 	int i;
1277 
1278 	if (array_size % 3)
1279 		return;
1280 
1281 	for (i = 0; i < array_size; i += 3) {
1282 		reg = registers[i + 0];
1283 		and_mask = registers[i + 1];
1284 		or_mask = registers[i + 2];
1285 
1286 		if (and_mask == 0xffffffff) {
1287 			tmp = or_mask;
1288 		} else {
1289 			tmp = RREG32(reg);
1290 			tmp &= ~and_mask;
1291 			if (adev->family >= AMDGPU_FAMILY_AI)
1292 				tmp |= (or_mask & and_mask);
1293 			else
1294 				tmp |= or_mask;
1295 		}
1296 		WREG32(reg, tmp);
1297 	}
1298 }
1299 
1300 /**
1301  * amdgpu_device_pci_config_reset - reset the GPU
1302  *
1303  * @adev: amdgpu_device pointer
1304  *
1305  * Resets the GPU using the pci config reset sequence.
1306  * Only applicable to asics prior to vega10.
1307  */
1308 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1309 {
1310 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1311 }
1312 
1313 /**
1314  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1315  *
1316  * @adev: amdgpu_device pointer
1317  *
1318  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1319  */
1320 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1321 {
1322 	return pci_reset_function(adev->pdev);
1323 }
1324 
1325 /*
1326  * amdgpu_device_wb_*()
1327  * Writeback is the method by which the GPU updates special pages in memory
1328  * with the status of certain GPU events (fences, ring pointers,etc.).
1329  */
1330 
1331 /**
1332  * amdgpu_device_wb_fini - Disable Writeback and free memory
1333  *
1334  * @adev: amdgpu_device pointer
1335  *
1336  * Disables Writeback and frees the Writeback memory (all asics).
1337  * Used at driver shutdown.
1338  */
1339 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1340 {
1341 	if (adev->wb.wb_obj) {
1342 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1343 				      &adev->wb.gpu_addr,
1344 				      (void **)&adev->wb.wb);
1345 		adev->wb.wb_obj = NULL;
1346 	}
1347 }
1348 
1349 /**
1350  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1351  *
1352  * @adev: amdgpu_device pointer
1353  *
1354  * Initializes writeback and allocates writeback memory (all asics).
1355  * Used at driver startup.
1356  * Returns 0 on success or an -error on failure.
1357  */
1358 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1359 {
1360 	int r;
1361 
1362 	if (adev->wb.wb_obj == NULL) {
1363 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1364 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1365 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1366 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1367 					    (void **)&adev->wb.wb);
1368 		if (r) {
1369 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1370 			return r;
1371 		}
1372 
1373 		adev->wb.num_wb = AMDGPU_MAX_WB;
1374 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1375 
1376 		/* clear wb memory */
1377 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1378 	}
1379 
1380 	return 0;
1381 }
1382 
1383 /**
1384  * amdgpu_device_wb_get - Allocate a wb entry
1385  *
1386  * @adev: amdgpu_device pointer
1387  * @wb: wb index
1388  *
1389  * Allocate a wb slot for use by the driver (all asics).
1390  * Returns 0 on success or -EINVAL on failure.
1391  */
1392 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1393 {
1394 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1395 
1396 	if (offset < adev->wb.num_wb) {
1397 		__set_bit(offset, adev->wb.used);
1398 		*wb = offset << 3; /* convert to dw offset */
1399 		return 0;
1400 	} else {
1401 		return -EINVAL;
1402 	}
1403 }
1404 
1405 /**
1406  * amdgpu_device_wb_free - Free a wb entry
1407  *
1408  * @adev: amdgpu_device pointer
1409  * @wb: wb index
1410  *
1411  * Free a wb slot allocated for use by the driver (all asics)
1412  */
1413 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1414 {
1415 	wb >>= 3;
1416 	if (wb < adev->wb.num_wb)
1417 		__clear_bit(wb, adev->wb.used);
1418 }
1419 
1420 /**
1421  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1422  *
1423  * @adev: amdgpu_device pointer
1424  *
1425  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1426  * to fail, but if any of the BARs is not accessible after the size we abort
1427  * driver loading by returning -ENODEV.
1428  */
1429 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1430 {
1431 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1432 	struct pci_bus *root;
1433 	struct resource *res;
1434 	unsigned int i;
1435 	u16 cmd;
1436 	int r;
1437 
1438 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1439 		return 0;
1440 
1441 	/* Bypass for VF */
1442 	if (amdgpu_sriov_vf(adev))
1443 		return 0;
1444 
1445 	/* skip if the bios has already enabled large BAR */
1446 	if (adev->gmc.real_vram_size &&
1447 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1448 		return 0;
1449 
1450 	/* Check if the root BUS has 64bit memory resources */
1451 	root = adev->pdev->bus;
1452 	while (root->parent)
1453 		root = root->parent;
1454 
1455 	pci_bus_for_each_resource(root, res, i) {
1456 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1457 		    res->start > 0x100000000ull)
1458 			break;
1459 	}
1460 
1461 	/* Trying to resize is pointless without a root hub window above 4GB */
1462 	if (!res)
1463 		return 0;
1464 
1465 	/* Limit the BAR size to what is available */
1466 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1467 			rbar_size);
1468 
1469 	/* Disable memory decoding while we change the BAR addresses and size */
1470 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1471 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1472 			      cmd & ~PCI_COMMAND_MEMORY);
1473 
1474 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1475 	amdgpu_doorbell_fini(adev);
1476 	if (adev->asic_type >= CHIP_BONAIRE)
1477 		pci_release_resource(adev->pdev, 2);
1478 
1479 	pci_release_resource(adev->pdev, 0);
1480 
1481 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1482 	if (r == -ENOSPC)
1483 		DRM_INFO("Not enough PCI address space for a large BAR.");
1484 	else if (r && r != -ENOTSUPP)
1485 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1486 
1487 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1488 
1489 	/* When the doorbell or fb BAR isn't available we have no chance of
1490 	 * using the device.
1491 	 */
1492 	r = amdgpu_doorbell_init(adev);
1493 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1494 		return -ENODEV;
1495 
1496 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1497 
1498 	return 0;
1499 }
1500 
1501 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1502 {
1503 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1504 		return false;
1505 
1506 	return true;
1507 }
1508 
1509 /*
1510  * GPU helpers function.
1511  */
1512 /**
1513  * amdgpu_device_need_post - check if the hw need post or not
1514  *
1515  * @adev: amdgpu_device pointer
1516  *
1517  * Check if the asic has been initialized (all asics) at driver startup
1518  * or post is needed if  hw reset is performed.
1519  * Returns true if need or false if not.
1520  */
1521 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1522 {
1523 	uint32_t reg;
1524 
1525 	if (amdgpu_sriov_vf(adev))
1526 		return false;
1527 
1528 	if (!amdgpu_device_read_bios(adev))
1529 		return false;
1530 
1531 	if (amdgpu_passthrough(adev)) {
1532 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1533 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1534 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1535 		 * vpost executed for smc version below 22.15
1536 		 */
1537 		if (adev->asic_type == CHIP_FIJI) {
1538 			int err;
1539 			uint32_t fw_ver;
1540 
1541 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1542 			/* force vPost if error occured */
1543 			if (err)
1544 				return true;
1545 
1546 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1547 			if (fw_ver < 0x00160e00)
1548 				return true;
1549 		}
1550 	}
1551 
1552 	/* Don't post if we need to reset whole hive on init */
1553 	if (adev->gmc.xgmi.pending_reset)
1554 		return false;
1555 
1556 	if (adev->has_hw_reset) {
1557 		adev->has_hw_reset = false;
1558 		return true;
1559 	}
1560 
1561 	/* bios scratch used on CIK+ */
1562 	if (adev->asic_type >= CHIP_BONAIRE)
1563 		return amdgpu_atombios_scratch_need_asic_init(adev);
1564 
1565 	/* check MEM_SIZE for older asics */
1566 	reg = amdgpu_asic_get_config_memsize(adev);
1567 
1568 	if ((reg != 0) && (reg != 0xffffffff))
1569 		return false;
1570 
1571 	return true;
1572 }
1573 
1574 /*
1575  * Check whether seamless boot is supported.
1576  *
1577  * So far we only support seamless boot on DCE 3.0 or later.
1578  * If users report that it works on older ASICS as well, we may
1579  * loosen this.
1580  */
1581 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1582 {
1583 	switch (amdgpu_seamless) {
1584 	case -1:
1585 		break;
1586 	case 1:
1587 		return true;
1588 	case 0:
1589 		return false;
1590 	default:
1591 		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1592 			  amdgpu_seamless);
1593 		return false;
1594 	}
1595 
1596 	if (!(adev->flags & AMD_IS_APU))
1597 		return false;
1598 
1599 	if (adev->mman.keep_stolen_vga_memory)
1600 		return false;
1601 
1602 	return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
1603 }
1604 
1605 /*
1606  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1607  * don't support dynamic speed switching. Until we have confirmation from Intel
1608  * that a specific host supports it, it's safer that we keep it disabled for all.
1609  *
1610  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1611  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1612  */
1613 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1614 {
1615 #if IS_ENABLED(CONFIG_X86)
1616 	struct cpuinfo_x86 *c = &cpu_data(0);
1617 
1618 	/* eGPU change speeds based on USB4 fabric conditions */
1619 	if (dev_is_removable(adev->dev))
1620 		return true;
1621 
1622 	if (c->x86_vendor == X86_VENDOR_INTEL)
1623 		return false;
1624 #endif
1625 	return true;
1626 }
1627 
1628 /**
1629  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1630  *
1631  * @adev: amdgpu_device pointer
1632  *
1633  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1634  * be set for this device.
1635  *
1636  * Returns true if it should be used or false if not.
1637  */
1638 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1639 {
1640 	switch (amdgpu_aspm) {
1641 	case -1:
1642 		break;
1643 	case 0:
1644 		return false;
1645 	case 1:
1646 		return true;
1647 	default:
1648 		return false;
1649 	}
1650 	if (adev->flags & AMD_IS_APU)
1651 		return false;
1652 	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1653 		return false;
1654 	return pcie_aspm_enabled(adev->pdev);
1655 }
1656 
1657 /* if we get transitioned to only one device, take VGA back */
1658 /**
1659  * amdgpu_device_vga_set_decode - enable/disable vga decode
1660  *
1661  * @pdev: PCI device pointer
1662  * @state: enable/disable vga decode
1663  *
1664  * Enable/disable vga decode (all asics).
1665  * Returns VGA resource flags.
1666  */
1667 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1668 		bool state)
1669 {
1670 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1671 
1672 	amdgpu_asic_set_vga_state(adev, state);
1673 	if (state)
1674 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1675 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1676 	else
1677 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1678 }
1679 
1680 /**
1681  * amdgpu_device_check_block_size - validate the vm block size
1682  *
1683  * @adev: amdgpu_device pointer
1684  *
1685  * Validates the vm block size specified via module parameter.
1686  * The vm block size defines number of bits in page table versus page directory,
1687  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1688  * page table and the remaining bits are in the page directory.
1689  */
1690 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1691 {
1692 	/* defines number of bits in page table versus page directory,
1693 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1694 	 * page table and the remaining bits are in the page directory
1695 	 */
1696 	if (amdgpu_vm_block_size == -1)
1697 		return;
1698 
1699 	if (amdgpu_vm_block_size < 9) {
1700 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1701 			 amdgpu_vm_block_size);
1702 		amdgpu_vm_block_size = -1;
1703 	}
1704 }
1705 
1706 /**
1707  * amdgpu_device_check_vm_size - validate the vm size
1708  *
1709  * @adev: amdgpu_device pointer
1710  *
1711  * Validates the vm size in GB specified via module parameter.
1712  * The VM size is the size of the GPU virtual memory space in GB.
1713  */
1714 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1715 {
1716 	/* no need to check the default value */
1717 	if (amdgpu_vm_size == -1)
1718 		return;
1719 
1720 	if (amdgpu_vm_size < 1) {
1721 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1722 			 amdgpu_vm_size);
1723 		amdgpu_vm_size = -1;
1724 	}
1725 }
1726 
1727 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1728 {
1729 	struct sysinfo si;
1730 	bool is_os_64 = (sizeof(void *) == 8);
1731 	uint64_t total_memory;
1732 	uint64_t dram_size_seven_GB = 0x1B8000000;
1733 	uint64_t dram_size_three_GB = 0xB8000000;
1734 
1735 	if (amdgpu_smu_memory_pool_size == 0)
1736 		return;
1737 
1738 	if (!is_os_64) {
1739 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1740 		goto def_value;
1741 	}
1742 	si_meminfo(&si);
1743 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1744 
1745 	if ((amdgpu_smu_memory_pool_size == 1) ||
1746 		(amdgpu_smu_memory_pool_size == 2)) {
1747 		if (total_memory < dram_size_three_GB)
1748 			goto def_value1;
1749 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1750 		(amdgpu_smu_memory_pool_size == 8)) {
1751 		if (total_memory < dram_size_seven_GB)
1752 			goto def_value1;
1753 	} else {
1754 		DRM_WARN("Smu memory pool size not supported\n");
1755 		goto def_value;
1756 	}
1757 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1758 
1759 	return;
1760 
1761 def_value1:
1762 	DRM_WARN("No enough system memory\n");
1763 def_value:
1764 	adev->pm.smu_prv_buffer_size = 0;
1765 }
1766 
1767 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1768 {
1769 	if (!(adev->flags & AMD_IS_APU) ||
1770 	    adev->asic_type < CHIP_RAVEN)
1771 		return 0;
1772 
1773 	switch (adev->asic_type) {
1774 	case CHIP_RAVEN:
1775 		if (adev->pdev->device == 0x15dd)
1776 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1777 		if (adev->pdev->device == 0x15d8)
1778 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1779 		break;
1780 	case CHIP_RENOIR:
1781 		if ((adev->pdev->device == 0x1636) ||
1782 		    (adev->pdev->device == 0x164c))
1783 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1784 		else
1785 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1786 		break;
1787 	case CHIP_VANGOGH:
1788 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1789 		break;
1790 	case CHIP_YELLOW_CARP:
1791 		break;
1792 	case CHIP_CYAN_SKILLFISH:
1793 		if ((adev->pdev->device == 0x13FE) ||
1794 		    (adev->pdev->device == 0x143F))
1795 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1796 		break;
1797 	default:
1798 		break;
1799 	}
1800 
1801 	return 0;
1802 }
1803 
1804 /**
1805  * amdgpu_device_check_arguments - validate module params
1806  *
1807  * @adev: amdgpu_device pointer
1808  *
1809  * Validates certain module parameters and updates
1810  * the associated values used by the driver (all asics).
1811  */
1812 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1813 {
1814 	if (amdgpu_sched_jobs < 4) {
1815 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1816 			 amdgpu_sched_jobs);
1817 		amdgpu_sched_jobs = 4;
1818 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1819 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1820 			 amdgpu_sched_jobs);
1821 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1822 	}
1823 
1824 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1825 		/* gart size must be greater or equal to 32M */
1826 		dev_warn(adev->dev, "gart size (%d) too small\n",
1827 			 amdgpu_gart_size);
1828 		amdgpu_gart_size = -1;
1829 	}
1830 
1831 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1832 		/* gtt size must be greater or equal to 32M */
1833 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1834 				 amdgpu_gtt_size);
1835 		amdgpu_gtt_size = -1;
1836 	}
1837 
1838 	/* valid range is between 4 and 9 inclusive */
1839 	if (amdgpu_vm_fragment_size != -1 &&
1840 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1841 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1842 		amdgpu_vm_fragment_size = -1;
1843 	}
1844 
1845 	if (amdgpu_sched_hw_submission < 2) {
1846 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1847 			 amdgpu_sched_hw_submission);
1848 		amdgpu_sched_hw_submission = 2;
1849 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1850 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1851 			 amdgpu_sched_hw_submission);
1852 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1853 	}
1854 
1855 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1856 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1857 		amdgpu_reset_method = -1;
1858 	}
1859 
1860 	amdgpu_device_check_smu_prv_buffer_size(adev);
1861 
1862 	amdgpu_device_check_vm_size(adev);
1863 
1864 	amdgpu_device_check_block_size(adev);
1865 
1866 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1867 
1868 	return 0;
1869 }
1870 
1871 /**
1872  * amdgpu_switcheroo_set_state - set switcheroo state
1873  *
1874  * @pdev: pci dev pointer
1875  * @state: vga_switcheroo state
1876  *
1877  * Callback for the switcheroo driver.  Suspends or resumes
1878  * the asics before or after it is powered up using ACPI methods.
1879  */
1880 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1881 					enum vga_switcheroo_state state)
1882 {
1883 	struct drm_device *dev = pci_get_drvdata(pdev);
1884 	int r;
1885 
1886 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1887 		return;
1888 
1889 	if (state == VGA_SWITCHEROO_ON) {
1890 		pr_info("switched on\n");
1891 		/* don't suspend or resume card normally */
1892 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1893 
1894 		pci_set_power_state(pdev, PCI_D0);
1895 		amdgpu_device_load_pci_state(pdev);
1896 		r = pci_enable_device(pdev);
1897 		if (r)
1898 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1899 		amdgpu_device_resume(dev, true);
1900 
1901 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1902 	} else {
1903 		pr_info("switched off\n");
1904 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1905 		amdgpu_device_prepare(dev);
1906 		amdgpu_device_suspend(dev, true);
1907 		amdgpu_device_cache_pci_state(pdev);
1908 		/* Shut down the device */
1909 		pci_disable_device(pdev);
1910 		pci_set_power_state(pdev, PCI_D3cold);
1911 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1912 	}
1913 }
1914 
1915 /**
1916  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1917  *
1918  * @pdev: pci dev pointer
1919  *
1920  * Callback for the switcheroo driver.  Check of the switcheroo
1921  * state can be changed.
1922  * Returns true if the state can be changed, false if not.
1923  */
1924 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1925 {
1926 	struct drm_device *dev = pci_get_drvdata(pdev);
1927 
1928        /*
1929 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1930 	* locking inversion with the driver load path. And the access here is
1931 	* completely racy anyway. So don't bother with locking for now.
1932 	*/
1933 	return atomic_read(&dev->open_count) == 0;
1934 }
1935 
1936 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1937 	.set_gpu_state = amdgpu_switcheroo_set_state,
1938 	.reprobe = NULL,
1939 	.can_switch = amdgpu_switcheroo_can_switch,
1940 };
1941 
1942 /**
1943  * amdgpu_device_ip_set_clockgating_state - set the CG state
1944  *
1945  * @dev: amdgpu_device pointer
1946  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1947  * @state: clockgating state (gate or ungate)
1948  *
1949  * Sets the requested clockgating state for all instances of
1950  * the hardware IP specified.
1951  * Returns the error code from the last instance.
1952  */
1953 int amdgpu_device_ip_set_clockgating_state(void *dev,
1954 					   enum amd_ip_block_type block_type,
1955 					   enum amd_clockgating_state state)
1956 {
1957 	struct amdgpu_device *adev = dev;
1958 	int i, r = 0;
1959 
1960 	for (i = 0; i < adev->num_ip_blocks; i++) {
1961 		if (!adev->ip_blocks[i].status.valid)
1962 			continue;
1963 		if (adev->ip_blocks[i].version->type != block_type)
1964 			continue;
1965 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1966 			continue;
1967 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1968 			(void *)adev, state);
1969 		if (r)
1970 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1971 				  adev->ip_blocks[i].version->funcs->name, r);
1972 	}
1973 	return r;
1974 }
1975 
1976 /**
1977  * amdgpu_device_ip_set_powergating_state - set the PG state
1978  *
1979  * @dev: amdgpu_device pointer
1980  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1981  * @state: powergating state (gate or ungate)
1982  *
1983  * Sets the requested powergating state for all instances of
1984  * the hardware IP specified.
1985  * Returns the error code from the last instance.
1986  */
1987 int amdgpu_device_ip_set_powergating_state(void *dev,
1988 					   enum amd_ip_block_type block_type,
1989 					   enum amd_powergating_state state)
1990 {
1991 	struct amdgpu_device *adev = dev;
1992 	int i, r = 0;
1993 
1994 	for (i = 0; i < adev->num_ip_blocks; i++) {
1995 		if (!adev->ip_blocks[i].status.valid)
1996 			continue;
1997 		if (adev->ip_blocks[i].version->type != block_type)
1998 			continue;
1999 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2000 			continue;
2001 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2002 			(void *)adev, state);
2003 		if (r)
2004 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2005 				  adev->ip_blocks[i].version->funcs->name, r);
2006 	}
2007 	return r;
2008 }
2009 
2010 /**
2011  * amdgpu_device_ip_get_clockgating_state - get the CG state
2012  *
2013  * @adev: amdgpu_device pointer
2014  * @flags: clockgating feature flags
2015  *
2016  * Walks the list of IPs on the device and updates the clockgating
2017  * flags for each IP.
2018  * Updates @flags with the feature flags for each hardware IP where
2019  * clockgating is enabled.
2020  */
2021 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2022 					    u64 *flags)
2023 {
2024 	int i;
2025 
2026 	for (i = 0; i < adev->num_ip_blocks; i++) {
2027 		if (!adev->ip_blocks[i].status.valid)
2028 			continue;
2029 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2030 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2031 	}
2032 }
2033 
2034 /**
2035  * amdgpu_device_ip_wait_for_idle - wait for idle
2036  *
2037  * @adev: amdgpu_device pointer
2038  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2039  *
2040  * Waits for the request hardware IP to be idle.
2041  * Returns 0 for success or a negative error code on failure.
2042  */
2043 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2044 				   enum amd_ip_block_type block_type)
2045 {
2046 	int i, r;
2047 
2048 	for (i = 0; i < adev->num_ip_blocks; i++) {
2049 		if (!adev->ip_blocks[i].status.valid)
2050 			continue;
2051 		if (adev->ip_blocks[i].version->type == block_type) {
2052 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2053 			if (r)
2054 				return r;
2055 			break;
2056 		}
2057 	}
2058 	return 0;
2059 
2060 }
2061 
2062 /**
2063  * amdgpu_device_ip_is_idle - is the hardware IP idle
2064  *
2065  * @adev: amdgpu_device pointer
2066  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2067  *
2068  * Check if the hardware IP is idle or not.
2069  * Returns true if it the IP is idle, false if not.
2070  */
2071 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2072 			      enum amd_ip_block_type block_type)
2073 {
2074 	int i;
2075 
2076 	for (i = 0; i < adev->num_ip_blocks; i++) {
2077 		if (!adev->ip_blocks[i].status.valid)
2078 			continue;
2079 		if (adev->ip_blocks[i].version->type == block_type)
2080 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2081 	}
2082 	return true;
2083 
2084 }
2085 
2086 /**
2087  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2088  *
2089  * @adev: amdgpu_device pointer
2090  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2091  *
2092  * Returns a pointer to the hardware IP block structure
2093  * if it exists for the asic, otherwise NULL.
2094  */
2095 struct amdgpu_ip_block *
2096 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2097 			      enum amd_ip_block_type type)
2098 {
2099 	int i;
2100 
2101 	for (i = 0; i < adev->num_ip_blocks; i++)
2102 		if (adev->ip_blocks[i].version->type == type)
2103 			return &adev->ip_blocks[i];
2104 
2105 	return NULL;
2106 }
2107 
2108 /**
2109  * amdgpu_device_ip_block_version_cmp
2110  *
2111  * @adev: amdgpu_device pointer
2112  * @type: enum amd_ip_block_type
2113  * @major: major version
2114  * @minor: minor version
2115  *
2116  * return 0 if equal or greater
2117  * return 1 if smaller or the ip_block doesn't exist
2118  */
2119 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2120 				       enum amd_ip_block_type type,
2121 				       u32 major, u32 minor)
2122 {
2123 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2124 
2125 	if (ip_block && ((ip_block->version->major > major) ||
2126 			((ip_block->version->major == major) &&
2127 			(ip_block->version->minor >= minor))))
2128 		return 0;
2129 
2130 	return 1;
2131 }
2132 
2133 /**
2134  * amdgpu_device_ip_block_add
2135  *
2136  * @adev: amdgpu_device pointer
2137  * @ip_block_version: pointer to the IP to add
2138  *
2139  * Adds the IP block driver information to the collection of IPs
2140  * on the asic.
2141  */
2142 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2143 			       const struct amdgpu_ip_block_version *ip_block_version)
2144 {
2145 	if (!ip_block_version)
2146 		return -EINVAL;
2147 
2148 	switch (ip_block_version->type) {
2149 	case AMD_IP_BLOCK_TYPE_VCN:
2150 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2151 			return 0;
2152 		break;
2153 	case AMD_IP_BLOCK_TYPE_JPEG:
2154 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2155 			return 0;
2156 		break;
2157 	default:
2158 		break;
2159 	}
2160 
2161 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2162 		  ip_block_version->funcs->name);
2163 
2164 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2165 
2166 	return 0;
2167 }
2168 
2169 /**
2170  * amdgpu_device_enable_virtual_display - enable virtual display feature
2171  *
2172  * @adev: amdgpu_device pointer
2173  *
2174  * Enabled the virtual display feature if the user has enabled it via
2175  * the module parameter virtual_display.  This feature provides a virtual
2176  * display hardware on headless boards or in virtualized environments.
2177  * This function parses and validates the configuration string specified by
2178  * the user and configues the virtual display configuration (number of
2179  * virtual connectors, crtcs, etc.) specified.
2180  */
2181 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2182 {
2183 	adev->enable_virtual_display = false;
2184 
2185 	if (amdgpu_virtual_display) {
2186 		const char *pci_address_name = pci_name(adev->pdev);
2187 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2188 
2189 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2190 		pciaddstr_tmp = pciaddstr;
2191 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2192 			pciaddname = strsep(&pciaddname_tmp, ",");
2193 			if (!strcmp("all", pciaddname)
2194 			    || !strcmp(pci_address_name, pciaddname)) {
2195 				long num_crtc;
2196 				int res = -1;
2197 
2198 				adev->enable_virtual_display = true;
2199 
2200 				if (pciaddname_tmp)
2201 					res = kstrtol(pciaddname_tmp, 10,
2202 						      &num_crtc);
2203 
2204 				if (!res) {
2205 					if (num_crtc < 1)
2206 						num_crtc = 1;
2207 					if (num_crtc > 6)
2208 						num_crtc = 6;
2209 					adev->mode_info.num_crtc = num_crtc;
2210 				} else {
2211 					adev->mode_info.num_crtc = 1;
2212 				}
2213 				break;
2214 			}
2215 		}
2216 
2217 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2218 			 amdgpu_virtual_display, pci_address_name,
2219 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2220 
2221 		kfree(pciaddstr);
2222 	}
2223 }
2224 
2225 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2226 {
2227 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2228 		adev->mode_info.num_crtc = 1;
2229 		adev->enable_virtual_display = true;
2230 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2231 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2232 	}
2233 }
2234 
2235 /**
2236  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2237  *
2238  * @adev: amdgpu_device pointer
2239  *
2240  * Parses the asic configuration parameters specified in the gpu info
2241  * firmware and makes them availale to the driver for use in configuring
2242  * the asic.
2243  * Returns 0 on success, -EINVAL on failure.
2244  */
2245 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2246 {
2247 	const char *chip_name;
2248 	char fw_name[40];
2249 	int err;
2250 	const struct gpu_info_firmware_header_v1_0 *hdr;
2251 
2252 	adev->firmware.gpu_info_fw = NULL;
2253 
2254 	if (adev->mman.discovery_bin) {
2255 		/*
2256 		 * FIXME: The bounding box is still needed by Navi12, so
2257 		 * temporarily read it from gpu_info firmware. Should be dropped
2258 		 * when DAL no longer needs it.
2259 		 */
2260 		if (adev->asic_type != CHIP_NAVI12)
2261 			return 0;
2262 	}
2263 
2264 	switch (adev->asic_type) {
2265 	default:
2266 		return 0;
2267 	case CHIP_VEGA10:
2268 		chip_name = "vega10";
2269 		break;
2270 	case CHIP_VEGA12:
2271 		chip_name = "vega12";
2272 		break;
2273 	case CHIP_RAVEN:
2274 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2275 			chip_name = "raven2";
2276 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2277 			chip_name = "picasso";
2278 		else
2279 			chip_name = "raven";
2280 		break;
2281 	case CHIP_ARCTURUS:
2282 		chip_name = "arcturus";
2283 		break;
2284 	case CHIP_NAVI12:
2285 		chip_name = "navi12";
2286 		break;
2287 	}
2288 
2289 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2290 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2291 	if (err) {
2292 		dev_err(adev->dev,
2293 			"Failed to get gpu_info firmware \"%s\"\n",
2294 			fw_name);
2295 		goto out;
2296 	}
2297 
2298 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2299 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2300 
2301 	switch (hdr->version_major) {
2302 	case 1:
2303 	{
2304 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2305 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2306 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2307 
2308 		/*
2309 		 * Should be droped when DAL no longer needs it.
2310 		 */
2311 		if (adev->asic_type == CHIP_NAVI12)
2312 			goto parse_soc_bounding_box;
2313 
2314 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2315 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2316 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2317 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2318 		adev->gfx.config.max_texture_channel_caches =
2319 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2320 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2321 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2322 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2323 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2324 		adev->gfx.config.double_offchip_lds_buf =
2325 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2326 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2327 		adev->gfx.cu_info.max_waves_per_simd =
2328 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2329 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2330 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2331 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2332 		if (hdr->version_minor >= 1) {
2333 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2334 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2335 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2336 			adev->gfx.config.num_sc_per_sh =
2337 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2338 			adev->gfx.config.num_packer_per_sc =
2339 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2340 		}
2341 
2342 parse_soc_bounding_box:
2343 		/*
2344 		 * soc bounding box info is not integrated in disocovery table,
2345 		 * we always need to parse it from gpu info firmware if needed.
2346 		 */
2347 		if (hdr->version_minor == 2) {
2348 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2349 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2350 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2351 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2352 		}
2353 		break;
2354 	}
2355 	default:
2356 		dev_err(adev->dev,
2357 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2358 		err = -EINVAL;
2359 		goto out;
2360 	}
2361 out:
2362 	return err;
2363 }
2364 
2365 /**
2366  * amdgpu_device_ip_early_init - run early init for hardware IPs
2367  *
2368  * @adev: amdgpu_device pointer
2369  *
2370  * Early initialization pass for hardware IPs.  The hardware IPs that make
2371  * up each asic are discovered each IP's early_init callback is run.  This
2372  * is the first stage in initializing the asic.
2373  * Returns 0 on success, negative error code on failure.
2374  */
2375 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2376 {
2377 	struct pci_dev *parent;
2378 	int i, r;
2379 	bool total;
2380 
2381 	amdgpu_device_enable_virtual_display(adev);
2382 
2383 	if (amdgpu_sriov_vf(adev)) {
2384 		r = amdgpu_virt_request_full_gpu(adev, true);
2385 		if (r)
2386 			return r;
2387 	}
2388 
2389 	switch (adev->asic_type) {
2390 #ifdef CONFIG_DRM_AMDGPU_SI
2391 	case CHIP_VERDE:
2392 	case CHIP_TAHITI:
2393 	case CHIP_PITCAIRN:
2394 	case CHIP_OLAND:
2395 	case CHIP_HAINAN:
2396 		adev->family = AMDGPU_FAMILY_SI;
2397 		r = si_set_ip_blocks(adev);
2398 		if (r)
2399 			return r;
2400 		break;
2401 #endif
2402 #ifdef CONFIG_DRM_AMDGPU_CIK
2403 	case CHIP_BONAIRE:
2404 	case CHIP_HAWAII:
2405 	case CHIP_KAVERI:
2406 	case CHIP_KABINI:
2407 	case CHIP_MULLINS:
2408 		if (adev->flags & AMD_IS_APU)
2409 			adev->family = AMDGPU_FAMILY_KV;
2410 		else
2411 			adev->family = AMDGPU_FAMILY_CI;
2412 
2413 		r = cik_set_ip_blocks(adev);
2414 		if (r)
2415 			return r;
2416 		break;
2417 #endif
2418 	case CHIP_TOPAZ:
2419 	case CHIP_TONGA:
2420 	case CHIP_FIJI:
2421 	case CHIP_POLARIS10:
2422 	case CHIP_POLARIS11:
2423 	case CHIP_POLARIS12:
2424 	case CHIP_VEGAM:
2425 	case CHIP_CARRIZO:
2426 	case CHIP_STONEY:
2427 		if (adev->flags & AMD_IS_APU)
2428 			adev->family = AMDGPU_FAMILY_CZ;
2429 		else
2430 			adev->family = AMDGPU_FAMILY_VI;
2431 
2432 		r = vi_set_ip_blocks(adev);
2433 		if (r)
2434 			return r;
2435 		break;
2436 	default:
2437 		r = amdgpu_discovery_set_ip_blocks(adev);
2438 		if (r)
2439 			return r;
2440 		break;
2441 	}
2442 
2443 	if (amdgpu_has_atpx() &&
2444 	    (amdgpu_is_atpx_hybrid() ||
2445 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2446 	    ((adev->flags & AMD_IS_APU) == 0) &&
2447 	    !dev_is_removable(&adev->pdev->dev))
2448 		adev->flags |= AMD_IS_PX;
2449 
2450 	if (!(adev->flags & AMD_IS_APU)) {
2451 		parent = pcie_find_root_port(adev->pdev);
2452 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2453 	}
2454 
2455 
2456 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2457 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2458 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2459 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2460 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2461 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2462 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2463 
2464 	total = true;
2465 	for (i = 0; i < adev->num_ip_blocks; i++) {
2466 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2467 			DRM_WARN("disabled ip block: %d <%s>\n",
2468 				  i, adev->ip_blocks[i].version->funcs->name);
2469 			adev->ip_blocks[i].status.valid = false;
2470 		} else {
2471 			if (adev->ip_blocks[i].version->funcs->early_init) {
2472 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2473 				if (r == -ENOENT) {
2474 					adev->ip_blocks[i].status.valid = false;
2475 				} else if (r) {
2476 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2477 						  adev->ip_blocks[i].version->funcs->name, r);
2478 					total = false;
2479 				} else {
2480 					adev->ip_blocks[i].status.valid = true;
2481 				}
2482 			} else {
2483 				adev->ip_blocks[i].status.valid = true;
2484 			}
2485 		}
2486 		/* get the vbios after the asic_funcs are set up */
2487 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2488 			r = amdgpu_device_parse_gpu_info_fw(adev);
2489 			if (r)
2490 				return r;
2491 
2492 			/* Read BIOS */
2493 			if (amdgpu_device_read_bios(adev)) {
2494 				if (!amdgpu_get_bios(adev))
2495 					return -EINVAL;
2496 
2497 				r = amdgpu_atombios_init(adev);
2498 				if (r) {
2499 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2500 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2501 					return r;
2502 				}
2503 			}
2504 
2505 			/*get pf2vf msg info at it's earliest time*/
2506 			if (amdgpu_sriov_vf(adev))
2507 				amdgpu_virt_init_data_exchange(adev);
2508 
2509 		}
2510 	}
2511 	if (!total)
2512 		return -ENODEV;
2513 
2514 	amdgpu_amdkfd_device_probe(adev);
2515 	adev->cg_flags &= amdgpu_cg_mask;
2516 	adev->pg_flags &= amdgpu_pg_mask;
2517 
2518 	return 0;
2519 }
2520 
2521 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2522 {
2523 	int i, r;
2524 
2525 	for (i = 0; i < adev->num_ip_blocks; i++) {
2526 		if (!adev->ip_blocks[i].status.sw)
2527 			continue;
2528 		if (adev->ip_blocks[i].status.hw)
2529 			continue;
2530 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2531 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2532 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2533 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2534 			if (r) {
2535 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2536 					  adev->ip_blocks[i].version->funcs->name, r);
2537 				return r;
2538 			}
2539 			adev->ip_blocks[i].status.hw = true;
2540 		}
2541 	}
2542 
2543 	return 0;
2544 }
2545 
2546 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2547 {
2548 	int i, r;
2549 
2550 	for (i = 0; i < adev->num_ip_blocks; i++) {
2551 		if (!adev->ip_blocks[i].status.sw)
2552 			continue;
2553 		if (adev->ip_blocks[i].status.hw)
2554 			continue;
2555 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2556 		if (r) {
2557 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2558 				  adev->ip_blocks[i].version->funcs->name, r);
2559 			return r;
2560 		}
2561 		adev->ip_blocks[i].status.hw = true;
2562 	}
2563 
2564 	return 0;
2565 }
2566 
2567 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2568 {
2569 	int r = 0;
2570 	int i;
2571 	uint32_t smu_version;
2572 
2573 	if (adev->asic_type >= CHIP_VEGA10) {
2574 		for (i = 0; i < adev->num_ip_blocks; i++) {
2575 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2576 				continue;
2577 
2578 			if (!adev->ip_blocks[i].status.sw)
2579 				continue;
2580 
2581 			/* no need to do the fw loading again if already done*/
2582 			if (adev->ip_blocks[i].status.hw == true)
2583 				break;
2584 
2585 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2586 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2587 				if (r) {
2588 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2589 							  adev->ip_blocks[i].version->funcs->name, r);
2590 					return r;
2591 				}
2592 			} else {
2593 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2594 				if (r) {
2595 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2596 							  adev->ip_blocks[i].version->funcs->name, r);
2597 					return r;
2598 				}
2599 			}
2600 
2601 			adev->ip_blocks[i].status.hw = true;
2602 			break;
2603 		}
2604 	}
2605 
2606 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2607 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2608 
2609 	return r;
2610 }
2611 
2612 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2613 {
2614 	long timeout;
2615 	int r, i;
2616 
2617 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2618 		struct amdgpu_ring *ring = adev->rings[i];
2619 
2620 		/* No need to setup the GPU scheduler for rings that don't need it */
2621 		if (!ring || ring->no_scheduler)
2622 			continue;
2623 
2624 		switch (ring->funcs->type) {
2625 		case AMDGPU_RING_TYPE_GFX:
2626 			timeout = adev->gfx_timeout;
2627 			break;
2628 		case AMDGPU_RING_TYPE_COMPUTE:
2629 			timeout = adev->compute_timeout;
2630 			break;
2631 		case AMDGPU_RING_TYPE_SDMA:
2632 			timeout = adev->sdma_timeout;
2633 			break;
2634 		default:
2635 			timeout = adev->video_timeout;
2636 			break;
2637 		}
2638 
2639 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2640 				   DRM_SCHED_PRIORITY_COUNT,
2641 				   ring->num_hw_submission, 0,
2642 				   timeout, adev->reset_domain->wq,
2643 				   ring->sched_score, ring->name,
2644 				   adev->dev);
2645 		if (r) {
2646 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2647 				  ring->name);
2648 			return r;
2649 		}
2650 		r = amdgpu_uvd_entity_init(adev, ring);
2651 		if (r) {
2652 			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2653 				  ring->name);
2654 			return r;
2655 		}
2656 		r = amdgpu_vce_entity_init(adev, ring);
2657 		if (r) {
2658 			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2659 				  ring->name);
2660 			return r;
2661 		}
2662 	}
2663 
2664 	amdgpu_xcp_update_partition_sched_list(adev);
2665 
2666 	return 0;
2667 }
2668 
2669 
2670 /**
2671  * amdgpu_device_ip_init - run init for hardware IPs
2672  *
2673  * @adev: amdgpu_device pointer
2674  *
2675  * Main initialization pass for hardware IPs.  The list of all the hardware
2676  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2677  * are run.  sw_init initializes the software state associated with each IP
2678  * and hw_init initializes the hardware associated with each IP.
2679  * Returns 0 on success, negative error code on failure.
2680  */
2681 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2682 {
2683 	int i, r;
2684 
2685 	r = amdgpu_ras_init(adev);
2686 	if (r)
2687 		return r;
2688 
2689 	for (i = 0; i < adev->num_ip_blocks; i++) {
2690 		if (!adev->ip_blocks[i].status.valid)
2691 			continue;
2692 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2693 		if (r) {
2694 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2695 				  adev->ip_blocks[i].version->funcs->name, r);
2696 			goto init_failed;
2697 		}
2698 		adev->ip_blocks[i].status.sw = true;
2699 
2700 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2701 			/* need to do common hw init early so everything is set up for gmc */
2702 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2703 			if (r) {
2704 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2705 				goto init_failed;
2706 			}
2707 			adev->ip_blocks[i].status.hw = true;
2708 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2709 			/* need to do gmc hw init early so we can allocate gpu mem */
2710 			/* Try to reserve bad pages early */
2711 			if (amdgpu_sriov_vf(adev))
2712 				amdgpu_virt_exchange_data(adev);
2713 
2714 			r = amdgpu_device_mem_scratch_init(adev);
2715 			if (r) {
2716 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2717 				goto init_failed;
2718 			}
2719 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2720 			if (r) {
2721 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2722 				goto init_failed;
2723 			}
2724 			r = amdgpu_device_wb_init(adev);
2725 			if (r) {
2726 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2727 				goto init_failed;
2728 			}
2729 			adev->ip_blocks[i].status.hw = true;
2730 
2731 			/* right after GMC hw init, we create CSA */
2732 			if (adev->gfx.mcbp) {
2733 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2734 							       AMDGPU_GEM_DOMAIN_VRAM |
2735 							       AMDGPU_GEM_DOMAIN_GTT,
2736 							       AMDGPU_CSA_SIZE);
2737 				if (r) {
2738 					DRM_ERROR("allocate CSA failed %d\n", r);
2739 					goto init_failed;
2740 				}
2741 			}
2742 
2743 			r = amdgpu_seq64_init(adev);
2744 			if (r) {
2745 				DRM_ERROR("allocate seq64 failed %d\n", r);
2746 				goto init_failed;
2747 			}
2748 		}
2749 	}
2750 
2751 	if (amdgpu_sriov_vf(adev))
2752 		amdgpu_virt_init_data_exchange(adev);
2753 
2754 	r = amdgpu_ib_pool_init(adev);
2755 	if (r) {
2756 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2757 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2758 		goto init_failed;
2759 	}
2760 
2761 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2762 	if (r)
2763 		goto init_failed;
2764 
2765 	r = amdgpu_device_ip_hw_init_phase1(adev);
2766 	if (r)
2767 		goto init_failed;
2768 
2769 	r = amdgpu_device_fw_loading(adev);
2770 	if (r)
2771 		goto init_failed;
2772 
2773 	r = amdgpu_device_ip_hw_init_phase2(adev);
2774 	if (r)
2775 		goto init_failed;
2776 
2777 	/*
2778 	 * retired pages will be loaded from eeprom and reserved here,
2779 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2780 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2781 	 * for I2C communication which only true at this point.
2782 	 *
2783 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2784 	 * failure from bad gpu situation and stop amdgpu init process
2785 	 * accordingly. For other failed cases, it will still release all
2786 	 * the resource and print error message, rather than returning one
2787 	 * negative value to upper level.
2788 	 *
2789 	 * Note: theoretically, this should be called before all vram allocations
2790 	 * to protect retired page from abusing
2791 	 */
2792 	r = amdgpu_ras_recovery_init(adev);
2793 	if (r)
2794 		goto init_failed;
2795 
2796 	/**
2797 	 * In case of XGMI grab extra reference for reset domain for this device
2798 	 */
2799 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2800 		if (amdgpu_xgmi_add_device(adev) == 0) {
2801 			if (!amdgpu_sriov_vf(adev)) {
2802 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2803 
2804 				if (WARN_ON(!hive)) {
2805 					r = -ENOENT;
2806 					goto init_failed;
2807 				}
2808 
2809 				if (!hive->reset_domain ||
2810 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2811 					r = -ENOENT;
2812 					amdgpu_put_xgmi_hive(hive);
2813 					goto init_failed;
2814 				}
2815 
2816 				/* Drop the early temporary reset domain we created for device */
2817 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2818 				adev->reset_domain = hive->reset_domain;
2819 				amdgpu_put_xgmi_hive(hive);
2820 			}
2821 		}
2822 	}
2823 
2824 	r = amdgpu_device_init_schedulers(adev);
2825 	if (r)
2826 		goto init_failed;
2827 
2828 	if (adev->mman.buffer_funcs_ring->sched.ready)
2829 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2830 
2831 	/* Don't init kfd if whole hive need to be reset during init */
2832 	if (!adev->gmc.xgmi.pending_reset) {
2833 		kgd2kfd_init_zone_device(adev);
2834 		amdgpu_amdkfd_device_init(adev);
2835 	}
2836 
2837 	amdgpu_fru_get_product_info(adev);
2838 
2839 init_failed:
2840 
2841 	return r;
2842 }
2843 
2844 /**
2845  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2846  *
2847  * @adev: amdgpu_device pointer
2848  *
2849  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2850  * this function before a GPU reset.  If the value is retained after a
2851  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2852  */
2853 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2854 {
2855 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2856 }
2857 
2858 /**
2859  * amdgpu_device_check_vram_lost - check if vram is valid
2860  *
2861  * @adev: amdgpu_device pointer
2862  *
2863  * Checks the reset magic value written to the gart pointer in VRAM.
2864  * The driver calls this after a GPU reset to see if the contents of
2865  * VRAM is lost or now.
2866  * returns true if vram is lost, false if not.
2867  */
2868 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2869 {
2870 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2871 			AMDGPU_RESET_MAGIC_NUM))
2872 		return true;
2873 
2874 	if (!amdgpu_in_reset(adev))
2875 		return false;
2876 
2877 	/*
2878 	 * For all ASICs with baco/mode1 reset, the VRAM is
2879 	 * always assumed to be lost.
2880 	 */
2881 	switch (amdgpu_asic_reset_method(adev)) {
2882 	case AMD_RESET_METHOD_BACO:
2883 	case AMD_RESET_METHOD_MODE1:
2884 		return true;
2885 	default:
2886 		return false;
2887 	}
2888 }
2889 
2890 /**
2891  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2892  *
2893  * @adev: amdgpu_device pointer
2894  * @state: clockgating state (gate or ungate)
2895  *
2896  * The list of all the hardware IPs that make up the asic is walked and the
2897  * set_clockgating_state callbacks are run.
2898  * Late initialization pass enabling clockgating for hardware IPs.
2899  * Fini or suspend, pass disabling clockgating for hardware IPs.
2900  * Returns 0 on success, negative error code on failure.
2901  */
2902 
2903 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2904 			       enum amd_clockgating_state state)
2905 {
2906 	int i, j, r;
2907 
2908 	if (amdgpu_emu_mode == 1)
2909 		return 0;
2910 
2911 	for (j = 0; j < adev->num_ip_blocks; j++) {
2912 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2913 		if (!adev->ip_blocks[i].status.late_initialized)
2914 			continue;
2915 		/* skip CG for GFX, SDMA on S0ix */
2916 		if (adev->in_s0ix &&
2917 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2918 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2919 			continue;
2920 		/* skip CG for VCE/UVD, it's handled specially */
2921 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2922 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2923 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2924 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2925 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2926 			/* enable clockgating to save power */
2927 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2928 										     state);
2929 			if (r) {
2930 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2931 					  adev->ip_blocks[i].version->funcs->name, r);
2932 				return r;
2933 			}
2934 		}
2935 	}
2936 
2937 	return 0;
2938 }
2939 
2940 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2941 			       enum amd_powergating_state state)
2942 {
2943 	int i, j, r;
2944 
2945 	if (amdgpu_emu_mode == 1)
2946 		return 0;
2947 
2948 	for (j = 0; j < adev->num_ip_blocks; j++) {
2949 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2950 		if (!adev->ip_blocks[i].status.late_initialized)
2951 			continue;
2952 		/* skip PG for GFX, SDMA on S0ix */
2953 		if (adev->in_s0ix &&
2954 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2955 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2956 			continue;
2957 		/* skip CG for VCE/UVD, it's handled specially */
2958 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2959 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2960 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2961 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2962 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2963 			/* enable powergating to save power */
2964 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2965 											state);
2966 			if (r) {
2967 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2968 					  adev->ip_blocks[i].version->funcs->name, r);
2969 				return r;
2970 			}
2971 		}
2972 	}
2973 	return 0;
2974 }
2975 
2976 static int amdgpu_device_enable_mgpu_fan_boost(void)
2977 {
2978 	struct amdgpu_gpu_instance *gpu_ins;
2979 	struct amdgpu_device *adev;
2980 	int i, ret = 0;
2981 
2982 	mutex_lock(&mgpu_info.mutex);
2983 
2984 	/*
2985 	 * MGPU fan boost feature should be enabled
2986 	 * only when there are two or more dGPUs in
2987 	 * the system
2988 	 */
2989 	if (mgpu_info.num_dgpu < 2)
2990 		goto out;
2991 
2992 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2993 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2994 		adev = gpu_ins->adev;
2995 		if (!(adev->flags & AMD_IS_APU) &&
2996 		    !gpu_ins->mgpu_fan_enabled) {
2997 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2998 			if (ret)
2999 				break;
3000 
3001 			gpu_ins->mgpu_fan_enabled = 1;
3002 		}
3003 	}
3004 
3005 out:
3006 	mutex_unlock(&mgpu_info.mutex);
3007 
3008 	return ret;
3009 }
3010 
3011 /**
3012  * amdgpu_device_ip_late_init - run late init for hardware IPs
3013  *
3014  * @adev: amdgpu_device pointer
3015  *
3016  * Late initialization pass for hardware IPs.  The list of all the hardware
3017  * IPs that make up the asic is walked and the late_init callbacks are run.
3018  * late_init covers any special initialization that an IP requires
3019  * after all of the have been initialized or something that needs to happen
3020  * late in the init process.
3021  * Returns 0 on success, negative error code on failure.
3022  */
3023 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3024 {
3025 	struct amdgpu_gpu_instance *gpu_instance;
3026 	int i = 0, r;
3027 
3028 	for (i = 0; i < adev->num_ip_blocks; i++) {
3029 		if (!adev->ip_blocks[i].status.hw)
3030 			continue;
3031 		if (adev->ip_blocks[i].version->funcs->late_init) {
3032 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3033 			if (r) {
3034 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3035 					  adev->ip_blocks[i].version->funcs->name, r);
3036 				return r;
3037 			}
3038 		}
3039 		adev->ip_blocks[i].status.late_initialized = true;
3040 	}
3041 
3042 	r = amdgpu_ras_late_init(adev);
3043 	if (r) {
3044 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3045 		return r;
3046 	}
3047 
3048 	amdgpu_ras_set_error_query_ready(adev, true);
3049 
3050 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3051 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3052 
3053 	amdgpu_device_fill_reset_magic(adev);
3054 
3055 	r = amdgpu_device_enable_mgpu_fan_boost();
3056 	if (r)
3057 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3058 
3059 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3060 	if (amdgpu_passthrough(adev) &&
3061 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3062 	     adev->asic_type == CHIP_ALDEBARAN))
3063 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3064 
3065 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3066 		mutex_lock(&mgpu_info.mutex);
3067 
3068 		/*
3069 		 * Reset device p-state to low as this was booted with high.
3070 		 *
3071 		 * This should be performed only after all devices from the same
3072 		 * hive get initialized.
3073 		 *
3074 		 * However, it's unknown how many device in the hive in advance.
3075 		 * As this is counted one by one during devices initializations.
3076 		 *
3077 		 * So, we wait for all XGMI interlinked devices initialized.
3078 		 * This may bring some delays as those devices may come from
3079 		 * different hives. But that should be OK.
3080 		 */
3081 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3082 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3083 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3084 				if (gpu_instance->adev->flags & AMD_IS_APU)
3085 					continue;
3086 
3087 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3088 						AMDGPU_XGMI_PSTATE_MIN);
3089 				if (r) {
3090 					DRM_ERROR("pstate setting failed (%d).\n", r);
3091 					break;
3092 				}
3093 			}
3094 		}
3095 
3096 		mutex_unlock(&mgpu_info.mutex);
3097 	}
3098 
3099 	return 0;
3100 }
3101 
3102 /**
3103  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3104  *
3105  * @adev: amdgpu_device pointer
3106  *
3107  * For ASICs need to disable SMC first
3108  */
3109 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3110 {
3111 	int i, r;
3112 
3113 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3114 		return;
3115 
3116 	for (i = 0; i < adev->num_ip_blocks; i++) {
3117 		if (!adev->ip_blocks[i].status.hw)
3118 			continue;
3119 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3120 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3121 			/* XXX handle errors */
3122 			if (r) {
3123 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3124 					  adev->ip_blocks[i].version->funcs->name, r);
3125 			}
3126 			adev->ip_blocks[i].status.hw = false;
3127 			break;
3128 		}
3129 	}
3130 }
3131 
3132 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3133 {
3134 	int i, r;
3135 
3136 	for (i = 0; i < adev->num_ip_blocks; i++) {
3137 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3138 			continue;
3139 
3140 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3141 		if (r) {
3142 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3143 				  adev->ip_blocks[i].version->funcs->name, r);
3144 		}
3145 	}
3146 
3147 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3148 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3149 
3150 	amdgpu_amdkfd_suspend(adev, false);
3151 
3152 	/* Workaroud for ASICs need to disable SMC first */
3153 	amdgpu_device_smu_fini_early(adev);
3154 
3155 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3156 		if (!adev->ip_blocks[i].status.hw)
3157 			continue;
3158 
3159 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3160 		/* XXX handle errors */
3161 		if (r) {
3162 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3163 				  adev->ip_blocks[i].version->funcs->name, r);
3164 		}
3165 
3166 		adev->ip_blocks[i].status.hw = false;
3167 	}
3168 
3169 	if (amdgpu_sriov_vf(adev)) {
3170 		if (amdgpu_virt_release_full_gpu(adev, false))
3171 			DRM_ERROR("failed to release exclusive mode on fini\n");
3172 	}
3173 
3174 	return 0;
3175 }
3176 
3177 /**
3178  * amdgpu_device_ip_fini - run fini for hardware IPs
3179  *
3180  * @adev: amdgpu_device pointer
3181  *
3182  * Main teardown pass for hardware IPs.  The list of all the hardware
3183  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3184  * are run.  hw_fini tears down the hardware associated with each IP
3185  * and sw_fini tears down any software state associated with each IP.
3186  * Returns 0 on success, negative error code on failure.
3187  */
3188 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3189 {
3190 	int i, r;
3191 
3192 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3193 		amdgpu_virt_release_ras_err_handler_data(adev);
3194 
3195 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3196 		amdgpu_xgmi_remove_device(adev);
3197 
3198 	amdgpu_amdkfd_device_fini_sw(adev);
3199 
3200 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3201 		if (!adev->ip_blocks[i].status.sw)
3202 			continue;
3203 
3204 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3205 			amdgpu_ucode_free_bo(adev);
3206 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3207 			amdgpu_device_wb_fini(adev);
3208 			amdgpu_device_mem_scratch_fini(adev);
3209 			amdgpu_ib_pool_fini(adev);
3210 			amdgpu_seq64_fini(adev);
3211 		}
3212 
3213 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3214 		/* XXX handle errors */
3215 		if (r) {
3216 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3217 				  adev->ip_blocks[i].version->funcs->name, r);
3218 		}
3219 		adev->ip_blocks[i].status.sw = false;
3220 		adev->ip_blocks[i].status.valid = false;
3221 	}
3222 
3223 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3224 		if (!adev->ip_blocks[i].status.late_initialized)
3225 			continue;
3226 		if (adev->ip_blocks[i].version->funcs->late_fini)
3227 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3228 		adev->ip_blocks[i].status.late_initialized = false;
3229 	}
3230 
3231 	amdgpu_ras_fini(adev);
3232 
3233 	return 0;
3234 }
3235 
3236 /**
3237  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3238  *
3239  * @work: work_struct.
3240  */
3241 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3242 {
3243 	struct amdgpu_device *adev =
3244 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3245 	int r;
3246 
3247 	r = amdgpu_ib_ring_tests(adev);
3248 	if (r)
3249 		DRM_ERROR("ib ring test failed (%d).\n", r);
3250 }
3251 
3252 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3253 {
3254 	struct amdgpu_device *adev =
3255 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3256 
3257 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3258 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3259 
3260 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3261 		adev->gfx.gfx_off_state = true;
3262 }
3263 
3264 /**
3265  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3266  *
3267  * @adev: amdgpu_device pointer
3268  *
3269  * Main suspend function for hardware IPs.  The list of all the hardware
3270  * IPs that make up the asic is walked, clockgating is disabled and the
3271  * suspend callbacks are run.  suspend puts the hardware and software state
3272  * in each IP into a state suitable for suspend.
3273  * Returns 0 on success, negative error code on failure.
3274  */
3275 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3276 {
3277 	int i, r;
3278 
3279 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3280 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3281 
3282 	/*
3283 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3284 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3285 	 * scenario. Add the missing df cstate disablement here.
3286 	 */
3287 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3288 		dev_warn(adev->dev, "Failed to disallow df cstate");
3289 
3290 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3291 		if (!adev->ip_blocks[i].status.valid)
3292 			continue;
3293 
3294 		/* displays are handled separately */
3295 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3296 			continue;
3297 
3298 		/* XXX handle errors */
3299 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3300 		/* XXX handle errors */
3301 		if (r) {
3302 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3303 				  adev->ip_blocks[i].version->funcs->name, r);
3304 			return r;
3305 		}
3306 
3307 		adev->ip_blocks[i].status.hw = false;
3308 	}
3309 
3310 	return 0;
3311 }
3312 
3313 /**
3314  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3315  *
3316  * @adev: amdgpu_device pointer
3317  *
3318  * Main suspend function for hardware IPs.  The list of all the hardware
3319  * IPs that make up the asic is walked, clockgating is disabled and the
3320  * suspend callbacks are run.  suspend puts the hardware and software state
3321  * in each IP into a state suitable for suspend.
3322  * Returns 0 on success, negative error code on failure.
3323  */
3324 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3325 {
3326 	int i, r;
3327 
3328 	if (adev->in_s0ix)
3329 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3330 
3331 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3332 		if (!adev->ip_blocks[i].status.valid)
3333 			continue;
3334 		/* displays are handled in phase1 */
3335 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3336 			continue;
3337 		/* PSP lost connection when err_event_athub occurs */
3338 		if (amdgpu_ras_intr_triggered() &&
3339 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3340 			adev->ip_blocks[i].status.hw = false;
3341 			continue;
3342 		}
3343 
3344 		/* skip unnecessary suspend if we do not initialize them yet */
3345 		if (adev->gmc.xgmi.pending_reset &&
3346 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3347 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3348 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3349 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3350 			adev->ip_blocks[i].status.hw = false;
3351 			continue;
3352 		}
3353 
3354 		/* skip suspend of gfx/mes and psp for S0ix
3355 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3356 		 * like at runtime. PSP is also part of the always on hardware
3357 		 * so no need to suspend it.
3358 		 */
3359 		if (adev->in_s0ix &&
3360 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3361 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3362 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3363 			continue;
3364 
3365 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3366 		if (adev->in_s0ix &&
3367 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3368 		     IP_VERSION(5, 0, 0)) &&
3369 		    (adev->ip_blocks[i].version->type ==
3370 		     AMD_IP_BLOCK_TYPE_SDMA))
3371 			continue;
3372 
3373 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3374 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3375 		 * from this location and RLC Autoload automatically also gets loaded
3376 		 * from here based on PMFW -> PSP message during re-init sequence.
3377 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3378 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3379 		 */
3380 		if (amdgpu_in_reset(adev) &&
3381 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3382 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3383 			continue;
3384 
3385 		/* XXX handle errors */
3386 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3387 		/* XXX handle errors */
3388 		if (r) {
3389 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3390 				  adev->ip_blocks[i].version->funcs->name, r);
3391 		}
3392 		adev->ip_blocks[i].status.hw = false;
3393 		/* handle putting the SMC in the appropriate state */
3394 		if (!amdgpu_sriov_vf(adev)) {
3395 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3396 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3397 				if (r) {
3398 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3399 							adev->mp1_state, r);
3400 					return r;
3401 				}
3402 			}
3403 		}
3404 	}
3405 
3406 	return 0;
3407 }
3408 
3409 /**
3410  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3411  *
3412  * @adev: amdgpu_device pointer
3413  *
3414  * Main suspend function for hardware IPs.  The list of all the hardware
3415  * IPs that make up the asic is walked, clockgating is disabled and the
3416  * suspend callbacks are run.  suspend puts the hardware and software state
3417  * in each IP into a state suitable for suspend.
3418  * Returns 0 on success, negative error code on failure.
3419  */
3420 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3421 {
3422 	int r;
3423 
3424 	if (amdgpu_sriov_vf(adev)) {
3425 		amdgpu_virt_fini_data_exchange(adev);
3426 		amdgpu_virt_request_full_gpu(adev, false);
3427 	}
3428 
3429 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3430 
3431 	r = amdgpu_device_ip_suspend_phase1(adev);
3432 	if (r)
3433 		return r;
3434 	r = amdgpu_device_ip_suspend_phase2(adev);
3435 
3436 	if (amdgpu_sriov_vf(adev))
3437 		amdgpu_virt_release_full_gpu(adev, false);
3438 
3439 	return r;
3440 }
3441 
3442 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3443 {
3444 	int i, r;
3445 
3446 	static enum amd_ip_block_type ip_order[] = {
3447 		AMD_IP_BLOCK_TYPE_COMMON,
3448 		AMD_IP_BLOCK_TYPE_GMC,
3449 		AMD_IP_BLOCK_TYPE_PSP,
3450 		AMD_IP_BLOCK_TYPE_IH,
3451 	};
3452 
3453 	for (i = 0; i < adev->num_ip_blocks; i++) {
3454 		int j;
3455 		struct amdgpu_ip_block *block;
3456 
3457 		block = &adev->ip_blocks[i];
3458 		block->status.hw = false;
3459 
3460 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3461 
3462 			if (block->version->type != ip_order[j] ||
3463 				!block->status.valid)
3464 				continue;
3465 
3466 			r = block->version->funcs->hw_init(adev);
3467 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3468 			if (r)
3469 				return r;
3470 			block->status.hw = true;
3471 		}
3472 	}
3473 
3474 	return 0;
3475 }
3476 
3477 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3478 {
3479 	int i, r;
3480 
3481 	static enum amd_ip_block_type ip_order[] = {
3482 		AMD_IP_BLOCK_TYPE_SMC,
3483 		AMD_IP_BLOCK_TYPE_DCE,
3484 		AMD_IP_BLOCK_TYPE_GFX,
3485 		AMD_IP_BLOCK_TYPE_SDMA,
3486 		AMD_IP_BLOCK_TYPE_MES,
3487 		AMD_IP_BLOCK_TYPE_UVD,
3488 		AMD_IP_BLOCK_TYPE_VCE,
3489 		AMD_IP_BLOCK_TYPE_VCN,
3490 		AMD_IP_BLOCK_TYPE_JPEG
3491 	};
3492 
3493 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3494 		int j;
3495 		struct amdgpu_ip_block *block;
3496 
3497 		for (j = 0; j < adev->num_ip_blocks; j++) {
3498 			block = &adev->ip_blocks[j];
3499 
3500 			if (block->version->type != ip_order[i] ||
3501 				!block->status.valid ||
3502 				block->status.hw)
3503 				continue;
3504 
3505 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3506 				r = block->version->funcs->resume(adev);
3507 			else
3508 				r = block->version->funcs->hw_init(adev);
3509 
3510 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3511 			if (r)
3512 				return r;
3513 			block->status.hw = true;
3514 		}
3515 	}
3516 
3517 	return 0;
3518 }
3519 
3520 /**
3521  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3522  *
3523  * @adev: amdgpu_device pointer
3524  *
3525  * First resume function for hardware IPs.  The list of all the hardware
3526  * IPs that make up the asic is walked and the resume callbacks are run for
3527  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3528  * after a suspend and updates the software state as necessary.  This
3529  * function is also used for restoring the GPU after a GPU reset.
3530  * Returns 0 on success, negative error code on failure.
3531  */
3532 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3533 {
3534 	int i, r;
3535 
3536 	for (i = 0; i < adev->num_ip_blocks; i++) {
3537 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3538 			continue;
3539 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3540 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3541 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3542 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3543 
3544 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3545 			if (r) {
3546 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3547 					  adev->ip_blocks[i].version->funcs->name, r);
3548 				return r;
3549 			}
3550 			adev->ip_blocks[i].status.hw = true;
3551 		}
3552 	}
3553 
3554 	return 0;
3555 }
3556 
3557 /**
3558  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3559  *
3560  * @adev: amdgpu_device pointer
3561  *
3562  * First resume function for hardware IPs.  The list of all the hardware
3563  * IPs that make up the asic is walked and the resume callbacks are run for
3564  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3565  * functional state after a suspend and updates the software state as
3566  * necessary.  This function is also used for restoring the GPU after a GPU
3567  * reset.
3568  * Returns 0 on success, negative error code on failure.
3569  */
3570 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3571 {
3572 	int i, r;
3573 
3574 	for (i = 0; i < adev->num_ip_blocks; i++) {
3575 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3576 			continue;
3577 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3578 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3579 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3580 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3581 			continue;
3582 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3583 		if (r) {
3584 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3585 				  adev->ip_blocks[i].version->funcs->name, r);
3586 			return r;
3587 		}
3588 		adev->ip_blocks[i].status.hw = true;
3589 	}
3590 
3591 	return 0;
3592 }
3593 
3594 /**
3595  * amdgpu_device_ip_resume - run resume for hardware IPs
3596  *
3597  * @adev: amdgpu_device pointer
3598  *
3599  * Main resume function for hardware IPs.  The hardware IPs
3600  * are split into two resume functions because they are
3601  * also used in recovering from a GPU reset and some additional
3602  * steps need to be take between them.  In this case (S3/S4) they are
3603  * run sequentially.
3604  * Returns 0 on success, negative error code on failure.
3605  */
3606 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3607 {
3608 	int r;
3609 
3610 	r = amdgpu_device_ip_resume_phase1(adev);
3611 	if (r)
3612 		return r;
3613 
3614 	r = amdgpu_device_fw_loading(adev);
3615 	if (r)
3616 		return r;
3617 
3618 	r = amdgpu_device_ip_resume_phase2(adev);
3619 
3620 	if (adev->mman.buffer_funcs_ring->sched.ready)
3621 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3622 
3623 	return r;
3624 }
3625 
3626 /**
3627  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3628  *
3629  * @adev: amdgpu_device pointer
3630  *
3631  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3632  */
3633 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3634 {
3635 	if (amdgpu_sriov_vf(adev)) {
3636 		if (adev->is_atom_fw) {
3637 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3638 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3639 		} else {
3640 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3641 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3642 		}
3643 
3644 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3645 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3646 	}
3647 }
3648 
3649 /**
3650  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3651  *
3652  * @asic_type: AMD asic type
3653  *
3654  * Check if there is DC (new modesetting infrastructre) support for an asic.
3655  * returns true if DC has support, false if not.
3656  */
3657 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3658 {
3659 	switch (asic_type) {
3660 #ifdef CONFIG_DRM_AMDGPU_SI
3661 	case CHIP_HAINAN:
3662 #endif
3663 	case CHIP_TOPAZ:
3664 		/* chips with no display hardware */
3665 		return false;
3666 #if defined(CONFIG_DRM_AMD_DC)
3667 	case CHIP_TAHITI:
3668 	case CHIP_PITCAIRN:
3669 	case CHIP_VERDE:
3670 	case CHIP_OLAND:
3671 		/*
3672 		 * We have systems in the wild with these ASICs that require
3673 		 * LVDS and VGA support which is not supported with DC.
3674 		 *
3675 		 * Fallback to the non-DC driver here by default so as not to
3676 		 * cause regressions.
3677 		 */
3678 #if defined(CONFIG_DRM_AMD_DC_SI)
3679 		return amdgpu_dc > 0;
3680 #else
3681 		return false;
3682 #endif
3683 	case CHIP_BONAIRE:
3684 	case CHIP_KAVERI:
3685 	case CHIP_KABINI:
3686 	case CHIP_MULLINS:
3687 		/*
3688 		 * We have systems in the wild with these ASICs that require
3689 		 * VGA support which is not supported with DC.
3690 		 *
3691 		 * Fallback to the non-DC driver here by default so as not to
3692 		 * cause regressions.
3693 		 */
3694 		return amdgpu_dc > 0;
3695 	default:
3696 		return amdgpu_dc != 0;
3697 #else
3698 	default:
3699 		if (amdgpu_dc > 0)
3700 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3701 		return false;
3702 #endif
3703 	}
3704 }
3705 
3706 /**
3707  * amdgpu_device_has_dc_support - check if dc is supported
3708  *
3709  * @adev: amdgpu_device pointer
3710  *
3711  * Returns true for supported, false for not supported
3712  */
3713 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3714 {
3715 	if (adev->enable_virtual_display ||
3716 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3717 		return false;
3718 
3719 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3720 }
3721 
3722 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3723 {
3724 	struct amdgpu_device *adev =
3725 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3726 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3727 
3728 	/* It's a bug to not have a hive within this function */
3729 	if (WARN_ON(!hive))
3730 		return;
3731 
3732 	/*
3733 	 * Use task barrier to synchronize all xgmi reset works across the
3734 	 * hive. task_barrier_enter and task_barrier_exit will block
3735 	 * until all the threads running the xgmi reset works reach
3736 	 * those points. task_barrier_full will do both blocks.
3737 	 */
3738 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3739 
3740 		task_barrier_enter(&hive->tb);
3741 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3742 
3743 		if (adev->asic_reset_res)
3744 			goto fail;
3745 
3746 		task_barrier_exit(&hive->tb);
3747 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3748 
3749 		if (adev->asic_reset_res)
3750 			goto fail;
3751 
3752 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3753 	} else {
3754 
3755 		task_barrier_full(&hive->tb);
3756 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3757 	}
3758 
3759 fail:
3760 	if (adev->asic_reset_res)
3761 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3762 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3763 	amdgpu_put_xgmi_hive(hive);
3764 }
3765 
3766 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3767 {
3768 	char *input = amdgpu_lockup_timeout;
3769 	char *timeout_setting = NULL;
3770 	int index = 0;
3771 	long timeout;
3772 	int ret = 0;
3773 
3774 	/*
3775 	 * By default timeout for non compute jobs is 10000
3776 	 * and 60000 for compute jobs.
3777 	 * In SR-IOV or passthrough mode, timeout for compute
3778 	 * jobs are 60000 by default.
3779 	 */
3780 	adev->gfx_timeout = msecs_to_jiffies(10000);
3781 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3782 	if (amdgpu_sriov_vf(adev))
3783 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3784 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3785 	else
3786 		adev->compute_timeout =  msecs_to_jiffies(60000);
3787 
3788 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3789 		while ((timeout_setting = strsep(&input, ",")) &&
3790 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3791 			ret = kstrtol(timeout_setting, 0, &timeout);
3792 			if (ret)
3793 				return ret;
3794 
3795 			if (timeout == 0) {
3796 				index++;
3797 				continue;
3798 			} else if (timeout < 0) {
3799 				timeout = MAX_SCHEDULE_TIMEOUT;
3800 				dev_warn(adev->dev, "lockup timeout disabled");
3801 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3802 			} else {
3803 				timeout = msecs_to_jiffies(timeout);
3804 			}
3805 
3806 			switch (index++) {
3807 			case 0:
3808 				adev->gfx_timeout = timeout;
3809 				break;
3810 			case 1:
3811 				adev->compute_timeout = timeout;
3812 				break;
3813 			case 2:
3814 				adev->sdma_timeout = timeout;
3815 				break;
3816 			case 3:
3817 				adev->video_timeout = timeout;
3818 				break;
3819 			default:
3820 				break;
3821 			}
3822 		}
3823 		/*
3824 		 * There is only one value specified and
3825 		 * it should apply to all non-compute jobs.
3826 		 */
3827 		if (index == 1) {
3828 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3829 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3830 				adev->compute_timeout = adev->gfx_timeout;
3831 		}
3832 	}
3833 
3834 	return ret;
3835 }
3836 
3837 /**
3838  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3839  *
3840  * @adev: amdgpu_device pointer
3841  *
3842  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3843  */
3844 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3845 {
3846 	struct iommu_domain *domain;
3847 
3848 	domain = iommu_get_domain_for_dev(adev->dev);
3849 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3850 		adev->ram_is_direct_mapped = true;
3851 }
3852 
3853 static const struct attribute *amdgpu_dev_attributes[] = {
3854 	&dev_attr_pcie_replay_count.attr,
3855 	NULL
3856 };
3857 
3858 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3859 {
3860 	if (amdgpu_mcbp == 1)
3861 		adev->gfx.mcbp = true;
3862 	else if (amdgpu_mcbp == 0)
3863 		adev->gfx.mcbp = false;
3864 	else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3865 		 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
3866 		 adev->gfx.num_gfx_rings)
3867 		adev->gfx.mcbp = true;
3868 
3869 	if (amdgpu_sriov_vf(adev))
3870 		adev->gfx.mcbp = true;
3871 
3872 	if (adev->gfx.mcbp)
3873 		DRM_INFO("MCBP is enabled\n");
3874 }
3875 
3876 /**
3877  * amdgpu_device_init - initialize the driver
3878  *
3879  * @adev: amdgpu_device pointer
3880  * @flags: driver flags
3881  *
3882  * Initializes the driver info and hw (all asics).
3883  * Returns 0 for success or an error on failure.
3884  * Called at driver startup.
3885  */
3886 int amdgpu_device_init(struct amdgpu_device *adev,
3887 		       uint32_t flags)
3888 {
3889 	struct drm_device *ddev = adev_to_drm(adev);
3890 	struct pci_dev *pdev = adev->pdev;
3891 	int r, i;
3892 	bool px = false;
3893 	u32 max_MBps;
3894 	int tmp;
3895 
3896 	adev->shutdown = false;
3897 	adev->flags = flags;
3898 
3899 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3900 		adev->asic_type = amdgpu_force_asic_type;
3901 	else
3902 		adev->asic_type = flags & AMD_ASIC_MASK;
3903 
3904 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3905 	if (amdgpu_emu_mode == 1)
3906 		adev->usec_timeout *= 10;
3907 	adev->gmc.gart_size = 512 * 1024 * 1024;
3908 	adev->accel_working = false;
3909 	adev->num_rings = 0;
3910 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3911 	adev->mman.buffer_funcs = NULL;
3912 	adev->mman.buffer_funcs_ring = NULL;
3913 	adev->vm_manager.vm_pte_funcs = NULL;
3914 	adev->vm_manager.vm_pte_num_scheds = 0;
3915 	adev->gmc.gmc_funcs = NULL;
3916 	adev->harvest_ip_mask = 0x0;
3917 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3918 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3919 
3920 	adev->smc_rreg = &amdgpu_invalid_rreg;
3921 	adev->smc_wreg = &amdgpu_invalid_wreg;
3922 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3923 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3924 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3925 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3926 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3927 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3928 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3929 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3930 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3931 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
3932 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3933 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3934 	adev->didt_rreg = &amdgpu_invalid_rreg;
3935 	adev->didt_wreg = &amdgpu_invalid_wreg;
3936 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3937 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3938 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3939 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3940 
3941 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3942 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3943 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3944 
3945 	/* mutex initialization are all done here so we
3946 	 * can recall function without having locking issues
3947 	 */
3948 	mutex_init(&adev->firmware.mutex);
3949 	mutex_init(&adev->pm.mutex);
3950 	mutex_init(&adev->gfx.gpu_clock_mutex);
3951 	mutex_init(&adev->srbm_mutex);
3952 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3953 	mutex_init(&adev->gfx.gfx_off_mutex);
3954 	mutex_init(&adev->gfx.partition_mutex);
3955 	mutex_init(&adev->grbm_idx_mutex);
3956 	mutex_init(&adev->mn_lock);
3957 	mutex_init(&adev->virt.vf_errors.lock);
3958 	hash_init(adev->mn_hash);
3959 	mutex_init(&adev->psp.mutex);
3960 	mutex_init(&adev->notifier_lock);
3961 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3962 	mutex_init(&adev->benchmark_mutex);
3963 
3964 	amdgpu_device_init_apu_flags(adev);
3965 
3966 	r = amdgpu_device_check_arguments(adev);
3967 	if (r)
3968 		return r;
3969 
3970 	spin_lock_init(&adev->mmio_idx_lock);
3971 	spin_lock_init(&adev->smc_idx_lock);
3972 	spin_lock_init(&adev->pcie_idx_lock);
3973 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3974 	spin_lock_init(&adev->didt_idx_lock);
3975 	spin_lock_init(&adev->gc_cac_idx_lock);
3976 	spin_lock_init(&adev->se_cac_idx_lock);
3977 	spin_lock_init(&adev->audio_endpt_idx_lock);
3978 	spin_lock_init(&adev->mm_stats.lock);
3979 
3980 	INIT_LIST_HEAD(&adev->shadow_list);
3981 	mutex_init(&adev->shadow_list_lock);
3982 
3983 	INIT_LIST_HEAD(&adev->reset_list);
3984 
3985 	INIT_LIST_HEAD(&adev->ras_list);
3986 
3987 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3988 
3989 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3990 			  amdgpu_device_delayed_init_work_handler);
3991 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3992 			  amdgpu_device_delay_enable_gfx_off);
3993 
3994 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3995 
3996 	adev->gfx.gfx_off_req_count = 1;
3997 	adev->gfx.gfx_off_residency = 0;
3998 	adev->gfx.gfx_off_entrycount = 0;
3999 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4000 
4001 	atomic_set(&adev->throttling_logging_enabled, 1);
4002 	/*
4003 	 * If throttling continues, logging will be performed every minute
4004 	 * to avoid log flooding. "-1" is subtracted since the thermal
4005 	 * throttling interrupt comes every second. Thus, the total logging
4006 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4007 	 * for throttling interrupt) = 60 seconds.
4008 	 */
4009 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4010 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4011 
4012 	/* Registers mapping */
4013 	/* TODO: block userspace mapping of io register */
4014 	if (adev->asic_type >= CHIP_BONAIRE) {
4015 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4016 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4017 	} else {
4018 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4019 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4020 	}
4021 
4022 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4023 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4024 
4025 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4026 	if (!adev->rmmio)
4027 		return -ENOMEM;
4028 
4029 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4030 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4031 
4032 	/*
4033 	 * Reset domain needs to be present early, before XGMI hive discovered
4034 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4035 	 * early on during init and before calling to RREG32.
4036 	 */
4037 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4038 	if (!adev->reset_domain)
4039 		return -ENOMEM;
4040 
4041 	/* detect hw virtualization here */
4042 	amdgpu_detect_virtualization(adev);
4043 
4044 	amdgpu_device_get_pcie_info(adev);
4045 
4046 	r = amdgpu_device_get_job_timeout_settings(adev);
4047 	if (r) {
4048 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4049 		return r;
4050 	}
4051 
4052 	/* early init functions */
4053 	r = amdgpu_device_ip_early_init(adev);
4054 	if (r)
4055 		return r;
4056 
4057 	amdgpu_device_set_mcbp(adev);
4058 
4059 	/* Get rid of things like offb */
4060 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4061 	if (r)
4062 		return r;
4063 
4064 	/* Enable TMZ based on IP_VERSION */
4065 	amdgpu_gmc_tmz_set(adev);
4066 
4067 	amdgpu_gmc_noretry_set(adev);
4068 	/* Need to get xgmi info early to decide the reset behavior*/
4069 	if (adev->gmc.xgmi.supported) {
4070 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4071 		if (r)
4072 			return r;
4073 	}
4074 
4075 	/* enable PCIE atomic ops */
4076 	if (amdgpu_sriov_vf(adev)) {
4077 		if (adev->virt.fw_reserve.p_pf2vf)
4078 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4079 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4080 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4081 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4082 	 * internal path natively support atomics, set have_atomics_support to true.
4083 	 */
4084 	} else if ((adev->flags & AMD_IS_APU) &&
4085 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4086 		    IP_VERSION(9, 0, 0))) {
4087 		adev->have_atomics_support = true;
4088 	} else {
4089 		adev->have_atomics_support =
4090 			!pci_enable_atomic_ops_to_root(adev->pdev,
4091 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4092 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4093 	}
4094 
4095 	if (!adev->have_atomics_support)
4096 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4097 
4098 	/* doorbell bar mapping and doorbell index init*/
4099 	amdgpu_doorbell_init(adev);
4100 
4101 	if (amdgpu_emu_mode == 1) {
4102 		/* post the asic on emulation mode */
4103 		emu_soc_asic_init(adev);
4104 		goto fence_driver_init;
4105 	}
4106 
4107 	amdgpu_reset_init(adev);
4108 
4109 	/* detect if we are with an SRIOV vbios */
4110 	if (adev->bios)
4111 		amdgpu_device_detect_sriov_bios(adev);
4112 
4113 	/* check if we need to reset the asic
4114 	 *  E.g., driver was not cleanly unloaded previously, etc.
4115 	 */
4116 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4117 		if (adev->gmc.xgmi.num_physical_nodes) {
4118 			dev_info(adev->dev, "Pending hive reset.\n");
4119 			adev->gmc.xgmi.pending_reset = true;
4120 			/* Only need to init necessary block for SMU to handle the reset */
4121 			for (i = 0; i < adev->num_ip_blocks; i++) {
4122 				if (!adev->ip_blocks[i].status.valid)
4123 					continue;
4124 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4125 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4126 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4127 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4128 					DRM_DEBUG("IP %s disabled for hw_init.\n",
4129 						adev->ip_blocks[i].version->funcs->name);
4130 					adev->ip_blocks[i].status.hw = true;
4131 				}
4132 			}
4133 		} else {
4134 			switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
4135 			case IP_VERSION(13, 0, 0):
4136 			case IP_VERSION(13, 0, 7):
4137 			case IP_VERSION(13, 0, 10):
4138 				r = psp_gpu_reset(adev);
4139 				break;
4140 			default:
4141 				tmp = amdgpu_reset_method;
4142 				/* It should do a default reset when loading or reloading the driver,
4143 				 * regardless of the module parameter reset_method.
4144 				 */
4145 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4146 				r = amdgpu_asic_reset(adev);
4147 				amdgpu_reset_method = tmp;
4148 				break;
4149 			}
4150 
4151 			if (r) {
4152 				dev_err(adev->dev, "asic reset on init failed\n");
4153 				goto failed;
4154 			}
4155 		}
4156 	}
4157 
4158 	/* Post card if necessary */
4159 	if (amdgpu_device_need_post(adev)) {
4160 		if (!adev->bios) {
4161 			dev_err(adev->dev, "no vBIOS found\n");
4162 			r = -EINVAL;
4163 			goto failed;
4164 		}
4165 		DRM_INFO("GPU posting now...\n");
4166 		r = amdgpu_device_asic_init(adev);
4167 		if (r) {
4168 			dev_err(adev->dev, "gpu post error!\n");
4169 			goto failed;
4170 		}
4171 	}
4172 
4173 	if (adev->bios) {
4174 		if (adev->is_atom_fw) {
4175 			/* Initialize clocks */
4176 			r = amdgpu_atomfirmware_get_clock_info(adev);
4177 			if (r) {
4178 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4179 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4180 				goto failed;
4181 			}
4182 		} else {
4183 			/* Initialize clocks */
4184 			r = amdgpu_atombios_get_clock_info(adev);
4185 			if (r) {
4186 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4187 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4188 				goto failed;
4189 			}
4190 			/* init i2c buses */
4191 			if (!amdgpu_device_has_dc_support(adev))
4192 				amdgpu_atombios_i2c_init(adev);
4193 		}
4194 	}
4195 
4196 fence_driver_init:
4197 	/* Fence driver */
4198 	r = amdgpu_fence_driver_sw_init(adev);
4199 	if (r) {
4200 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4201 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4202 		goto failed;
4203 	}
4204 
4205 	/* init the mode config */
4206 	drm_mode_config_init(adev_to_drm(adev));
4207 
4208 	r = amdgpu_device_ip_init(adev);
4209 	if (r) {
4210 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4211 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4212 		goto release_ras_con;
4213 	}
4214 
4215 	amdgpu_fence_driver_hw_init(adev);
4216 
4217 	dev_info(adev->dev,
4218 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4219 			adev->gfx.config.max_shader_engines,
4220 			adev->gfx.config.max_sh_per_se,
4221 			adev->gfx.config.max_cu_per_sh,
4222 			adev->gfx.cu_info.number);
4223 
4224 	adev->accel_working = true;
4225 
4226 	amdgpu_vm_check_compute_bug(adev);
4227 
4228 	/* Initialize the buffer migration limit. */
4229 	if (amdgpu_moverate >= 0)
4230 		max_MBps = amdgpu_moverate;
4231 	else
4232 		max_MBps = 8; /* Allow 8 MB/s. */
4233 	/* Get a log2 for easy divisions. */
4234 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4235 
4236 	/*
4237 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4238 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4239 	 * gpu instance is counted less.
4240 	 */
4241 	amdgpu_register_gpu_instance(adev);
4242 
4243 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4244 	 * explicit gating rather than handling it automatically.
4245 	 */
4246 	if (!adev->gmc.xgmi.pending_reset) {
4247 		r = amdgpu_device_ip_late_init(adev);
4248 		if (r) {
4249 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4250 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4251 			goto release_ras_con;
4252 		}
4253 		/* must succeed. */
4254 		amdgpu_ras_resume(adev);
4255 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4256 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4257 	}
4258 
4259 	if (amdgpu_sriov_vf(adev)) {
4260 		amdgpu_virt_release_full_gpu(adev, true);
4261 		flush_delayed_work(&adev->delayed_init_work);
4262 	}
4263 
4264 	/*
4265 	 * Place those sysfs registering after `late_init`. As some of those
4266 	 * operations performed in `late_init` might affect the sysfs
4267 	 * interfaces creating.
4268 	 */
4269 	r = amdgpu_atombios_sysfs_init(adev);
4270 	if (r)
4271 		drm_err(&adev->ddev,
4272 			"registering atombios sysfs failed (%d).\n", r);
4273 
4274 	r = amdgpu_pm_sysfs_init(adev);
4275 	if (r)
4276 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4277 
4278 	r = amdgpu_ucode_sysfs_init(adev);
4279 	if (r) {
4280 		adev->ucode_sysfs_en = false;
4281 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4282 	} else
4283 		adev->ucode_sysfs_en = true;
4284 
4285 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4286 	if (r)
4287 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4288 
4289 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4290 	if (r)
4291 		dev_err(adev->dev,
4292 			"Could not create amdgpu board attributes\n");
4293 
4294 	amdgpu_fru_sysfs_init(adev);
4295 	amdgpu_reg_state_sysfs_init(adev);
4296 
4297 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4298 		r = amdgpu_pmu_init(adev);
4299 	if (r)
4300 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4301 
4302 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4303 	if (amdgpu_device_cache_pci_state(adev->pdev))
4304 		pci_restore_state(pdev);
4305 
4306 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4307 	/* this will fail for cards that aren't VGA class devices, just
4308 	 * ignore it
4309 	 */
4310 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4311 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4312 
4313 	px = amdgpu_device_supports_px(ddev);
4314 
4315 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4316 				apple_gmux_detect(NULL, NULL)))
4317 		vga_switcheroo_register_client(adev->pdev,
4318 					       &amdgpu_switcheroo_ops, px);
4319 
4320 	if (px)
4321 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4322 
4323 	if (adev->gmc.xgmi.pending_reset)
4324 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4325 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4326 
4327 	amdgpu_device_check_iommu_direct_map(adev);
4328 
4329 	return 0;
4330 
4331 release_ras_con:
4332 	if (amdgpu_sriov_vf(adev))
4333 		amdgpu_virt_release_full_gpu(adev, true);
4334 
4335 	/* failed in exclusive mode due to timeout */
4336 	if (amdgpu_sriov_vf(adev) &&
4337 		!amdgpu_sriov_runtime(adev) &&
4338 		amdgpu_virt_mmio_blocked(adev) &&
4339 		!amdgpu_virt_wait_reset(adev)) {
4340 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4341 		/* Don't send request since VF is inactive. */
4342 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4343 		adev->virt.ops = NULL;
4344 		r = -EAGAIN;
4345 	}
4346 	amdgpu_release_ras_context(adev);
4347 
4348 failed:
4349 	amdgpu_vf_error_trans_all(adev);
4350 
4351 	return r;
4352 }
4353 
4354 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4355 {
4356 
4357 	/* Clear all CPU mappings pointing to this device */
4358 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4359 
4360 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4361 	amdgpu_doorbell_fini(adev);
4362 
4363 	iounmap(adev->rmmio);
4364 	adev->rmmio = NULL;
4365 	if (adev->mman.aper_base_kaddr)
4366 		iounmap(adev->mman.aper_base_kaddr);
4367 	adev->mman.aper_base_kaddr = NULL;
4368 
4369 	/* Memory manager related */
4370 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4371 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4372 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4373 	}
4374 }
4375 
4376 /**
4377  * amdgpu_device_fini_hw - tear down the driver
4378  *
4379  * @adev: amdgpu_device pointer
4380  *
4381  * Tear down the driver info (all asics).
4382  * Called at driver shutdown.
4383  */
4384 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4385 {
4386 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4387 	flush_delayed_work(&adev->delayed_init_work);
4388 	adev->shutdown = true;
4389 
4390 	/* make sure IB test finished before entering exclusive mode
4391 	 * to avoid preemption on IB test
4392 	 */
4393 	if (amdgpu_sriov_vf(adev)) {
4394 		amdgpu_virt_request_full_gpu(adev, false);
4395 		amdgpu_virt_fini_data_exchange(adev);
4396 	}
4397 
4398 	/* disable all interrupts */
4399 	amdgpu_irq_disable_all(adev);
4400 	if (adev->mode_info.mode_config_initialized) {
4401 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4402 			drm_helper_force_disable_all(adev_to_drm(adev));
4403 		else
4404 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4405 	}
4406 	amdgpu_fence_driver_hw_fini(adev);
4407 
4408 	if (adev->mman.initialized)
4409 		drain_workqueue(adev->mman.bdev.wq);
4410 
4411 	if (adev->pm.sysfs_initialized)
4412 		amdgpu_pm_sysfs_fini(adev);
4413 	if (adev->ucode_sysfs_en)
4414 		amdgpu_ucode_sysfs_fini(adev);
4415 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4416 	amdgpu_fru_sysfs_fini(adev);
4417 
4418 	amdgpu_reg_state_sysfs_fini(adev);
4419 
4420 	/* disable ras feature must before hw fini */
4421 	amdgpu_ras_pre_fini(adev);
4422 
4423 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4424 
4425 	amdgpu_device_ip_fini_early(adev);
4426 
4427 	amdgpu_irq_fini_hw(adev);
4428 
4429 	if (adev->mman.initialized)
4430 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4431 
4432 	amdgpu_gart_dummy_page_fini(adev);
4433 
4434 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4435 		amdgpu_device_unmap_mmio(adev);
4436 
4437 }
4438 
4439 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4440 {
4441 	int idx;
4442 	bool px;
4443 
4444 	amdgpu_fence_driver_sw_fini(adev);
4445 	amdgpu_device_ip_fini(adev);
4446 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4447 	adev->accel_working = false;
4448 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4449 
4450 	amdgpu_reset_fini(adev);
4451 
4452 	/* free i2c buses */
4453 	if (!amdgpu_device_has_dc_support(adev))
4454 		amdgpu_i2c_fini(adev);
4455 
4456 	if (amdgpu_emu_mode != 1)
4457 		amdgpu_atombios_fini(adev);
4458 
4459 	kfree(adev->bios);
4460 	adev->bios = NULL;
4461 
4462 	kfree(adev->fru_info);
4463 	adev->fru_info = NULL;
4464 
4465 	px = amdgpu_device_supports_px(adev_to_drm(adev));
4466 
4467 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4468 				apple_gmux_detect(NULL, NULL)))
4469 		vga_switcheroo_unregister_client(adev->pdev);
4470 
4471 	if (px)
4472 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4473 
4474 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4475 		vga_client_unregister(adev->pdev);
4476 
4477 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4478 
4479 		iounmap(adev->rmmio);
4480 		adev->rmmio = NULL;
4481 		amdgpu_doorbell_fini(adev);
4482 		drm_dev_exit(idx);
4483 	}
4484 
4485 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4486 		amdgpu_pmu_fini(adev);
4487 	if (adev->mman.discovery_bin)
4488 		amdgpu_discovery_fini(adev);
4489 
4490 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4491 	adev->reset_domain = NULL;
4492 
4493 	kfree(adev->pci_state);
4494 
4495 }
4496 
4497 /**
4498  * amdgpu_device_evict_resources - evict device resources
4499  * @adev: amdgpu device object
4500  *
4501  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4502  * of the vram memory type. Mainly used for evicting device resources
4503  * at suspend time.
4504  *
4505  */
4506 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4507 {
4508 	int ret;
4509 
4510 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4511 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4512 		return 0;
4513 
4514 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4515 	if (ret)
4516 		DRM_WARN("evicting device resources failed\n");
4517 	return ret;
4518 }
4519 
4520 /*
4521  * Suspend & resume.
4522  */
4523 /**
4524  * amdgpu_device_prepare - prepare for device suspend
4525  *
4526  * @dev: drm dev pointer
4527  *
4528  * Prepare to put the hw in the suspend state (all asics).
4529  * Returns 0 for success or an error on failure.
4530  * Called at driver suspend.
4531  */
4532 int amdgpu_device_prepare(struct drm_device *dev)
4533 {
4534 	struct amdgpu_device *adev = drm_to_adev(dev);
4535 	int i, r;
4536 
4537 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4538 		return 0;
4539 
4540 	/* Evict the majority of BOs before starting suspend sequence */
4541 	r = amdgpu_device_evict_resources(adev);
4542 	if (r)
4543 		return r;
4544 
4545 	for (i = 0; i < adev->num_ip_blocks; i++) {
4546 		if (!adev->ip_blocks[i].status.valid)
4547 			continue;
4548 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4549 			continue;
4550 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4551 		if (r)
4552 			return r;
4553 	}
4554 
4555 	return 0;
4556 }
4557 
4558 /**
4559  * amdgpu_device_suspend - initiate device suspend
4560  *
4561  * @dev: drm dev pointer
4562  * @fbcon : notify the fbdev of suspend
4563  *
4564  * Puts the hw in the suspend state (all asics).
4565  * Returns 0 for success or an error on failure.
4566  * Called at driver suspend.
4567  */
4568 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4569 {
4570 	struct amdgpu_device *adev = drm_to_adev(dev);
4571 	int r = 0;
4572 
4573 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4574 		return 0;
4575 
4576 	adev->in_suspend = true;
4577 
4578 	if (amdgpu_sriov_vf(adev)) {
4579 		amdgpu_virt_fini_data_exchange(adev);
4580 		r = amdgpu_virt_request_full_gpu(adev, false);
4581 		if (r)
4582 			return r;
4583 	}
4584 
4585 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4586 		DRM_WARN("smart shift update failed\n");
4587 
4588 	if (fbcon)
4589 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4590 
4591 	cancel_delayed_work_sync(&adev->delayed_init_work);
4592 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4593 
4594 	amdgpu_ras_suspend(adev);
4595 
4596 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4597 
4598 	amdgpu_device_ip_suspend_phase1(adev);
4599 
4600 	if (!adev->in_s0ix)
4601 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4602 
4603 	r = amdgpu_device_evict_resources(adev);
4604 	if (r)
4605 		return r;
4606 
4607 	amdgpu_fence_driver_hw_fini(adev);
4608 
4609 	amdgpu_device_ip_suspend_phase2(adev);
4610 
4611 	if (amdgpu_sriov_vf(adev))
4612 		amdgpu_virt_release_full_gpu(adev, false);
4613 
4614 	r = amdgpu_dpm_notify_rlc_state(adev, false);
4615 	if (r)
4616 		return r;
4617 
4618 	return 0;
4619 }
4620 
4621 /**
4622  * amdgpu_device_resume - initiate device resume
4623  *
4624  * @dev: drm dev pointer
4625  * @fbcon : notify the fbdev of resume
4626  *
4627  * Bring the hw back to operating state (all asics).
4628  * Returns 0 for success or an error on failure.
4629  * Called at driver resume.
4630  */
4631 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4632 {
4633 	struct amdgpu_device *adev = drm_to_adev(dev);
4634 	int r = 0;
4635 
4636 	if (amdgpu_sriov_vf(adev)) {
4637 		r = amdgpu_virt_request_full_gpu(adev, true);
4638 		if (r)
4639 			return r;
4640 	}
4641 
4642 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4643 		return 0;
4644 
4645 	if (adev->in_s0ix)
4646 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4647 
4648 	/* post card */
4649 	if (amdgpu_device_need_post(adev)) {
4650 		r = amdgpu_device_asic_init(adev);
4651 		if (r)
4652 			dev_err(adev->dev, "amdgpu asic init failed\n");
4653 	}
4654 
4655 	r = amdgpu_device_ip_resume(adev);
4656 
4657 	if (r) {
4658 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4659 		goto exit;
4660 	}
4661 	amdgpu_fence_driver_hw_init(adev);
4662 
4663 	if (!adev->in_s0ix) {
4664 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4665 		if (r)
4666 			goto exit;
4667 	}
4668 
4669 	r = amdgpu_device_ip_late_init(adev);
4670 	if (r)
4671 		goto exit;
4672 
4673 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4674 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4675 exit:
4676 	if (amdgpu_sriov_vf(adev)) {
4677 		amdgpu_virt_init_data_exchange(adev);
4678 		amdgpu_virt_release_full_gpu(adev, true);
4679 	}
4680 
4681 	if (r)
4682 		return r;
4683 
4684 	/* Make sure IB tests flushed */
4685 	flush_delayed_work(&adev->delayed_init_work);
4686 
4687 	if (fbcon)
4688 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4689 
4690 	amdgpu_ras_resume(adev);
4691 
4692 	if (adev->mode_info.num_crtc) {
4693 		/*
4694 		 * Most of the connector probing functions try to acquire runtime pm
4695 		 * refs to ensure that the GPU is powered on when connector polling is
4696 		 * performed. Since we're calling this from a runtime PM callback,
4697 		 * trying to acquire rpm refs will cause us to deadlock.
4698 		 *
4699 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4700 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4701 		 */
4702 #ifdef CONFIG_PM
4703 		dev->dev->power.disable_depth++;
4704 #endif
4705 		if (!adev->dc_enabled)
4706 			drm_helper_hpd_irq_event(dev);
4707 		else
4708 			drm_kms_helper_hotplug_event(dev);
4709 #ifdef CONFIG_PM
4710 		dev->dev->power.disable_depth--;
4711 #endif
4712 	}
4713 	adev->in_suspend = false;
4714 
4715 	if (adev->enable_mes)
4716 		amdgpu_mes_self_test(adev);
4717 
4718 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4719 		DRM_WARN("smart shift update failed\n");
4720 
4721 	return 0;
4722 }
4723 
4724 /**
4725  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4726  *
4727  * @adev: amdgpu_device pointer
4728  *
4729  * The list of all the hardware IPs that make up the asic is walked and
4730  * the check_soft_reset callbacks are run.  check_soft_reset determines
4731  * if the asic is still hung or not.
4732  * Returns true if any of the IPs are still in a hung state, false if not.
4733  */
4734 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4735 {
4736 	int i;
4737 	bool asic_hang = false;
4738 
4739 	if (amdgpu_sriov_vf(adev))
4740 		return true;
4741 
4742 	if (amdgpu_asic_need_full_reset(adev))
4743 		return true;
4744 
4745 	for (i = 0; i < adev->num_ip_blocks; i++) {
4746 		if (!adev->ip_blocks[i].status.valid)
4747 			continue;
4748 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4749 			adev->ip_blocks[i].status.hang =
4750 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4751 		if (adev->ip_blocks[i].status.hang) {
4752 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4753 			asic_hang = true;
4754 		}
4755 	}
4756 	return asic_hang;
4757 }
4758 
4759 /**
4760  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4761  *
4762  * @adev: amdgpu_device pointer
4763  *
4764  * The list of all the hardware IPs that make up the asic is walked and the
4765  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4766  * handles any IP specific hardware or software state changes that are
4767  * necessary for a soft reset to succeed.
4768  * Returns 0 on success, negative error code on failure.
4769  */
4770 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4771 {
4772 	int i, r = 0;
4773 
4774 	for (i = 0; i < adev->num_ip_blocks; i++) {
4775 		if (!adev->ip_blocks[i].status.valid)
4776 			continue;
4777 		if (adev->ip_blocks[i].status.hang &&
4778 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4779 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4780 			if (r)
4781 				return r;
4782 		}
4783 	}
4784 
4785 	return 0;
4786 }
4787 
4788 /**
4789  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4790  *
4791  * @adev: amdgpu_device pointer
4792  *
4793  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4794  * reset is necessary to recover.
4795  * Returns true if a full asic reset is required, false if not.
4796  */
4797 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4798 {
4799 	int i;
4800 
4801 	if (amdgpu_asic_need_full_reset(adev))
4802 		return true;
4803 
4804 	for (i = 0; i < adev->num_ip_blocks; i++) {
4805 		if (!adev->ip_blocks[i].status.valid)
4806 			continue;
4807 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4808 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4809 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4810 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4811 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4812 			if (adev->ip_blocks[i].status.hang) {
4813 				dev_info(adev->dev, "Some block need full reset!\n");
4814 				return true;
4815 			}
4816 		}
4817 	}
4818 	return false;
4819 }
4820 
4821 /**
4822  * amdgpu_device_ip_soft_reset - do a soft reset
4823  *
4824  * @adev: amdgpu_device pointer
4825  *
4826  * The list of all the hardware IPs that make up the asic is walked and the
4827  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4828  * IP specific hardware or software state changes that are necessary to soft
4829  * reset the IP.
4830  * Returns 0 on success, negative error code on failure.
4831  */
4832 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4833 {
4834 	int i, r = 0;
4835 
4836 	for (i = 0; i < adev->num_ip_blocks; i++) {
4837 		if (!adev->ip_blocks[i].status.valid)
4838 			continue;
4839 		if (adev->ip_blocks[i].status.hang &&
4840 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4841 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4842 			if (r)
4843 				return r;
4844 		}
4845 	}
4846 
4847 	return 0;
4848 }
4849 
4850 /**
4851  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4852  *
4853  * @adev: amdgpu_device pointer
4854  *
4855  * The list of all the hardware IPs that make up the asic is walked and the
4856  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4857  * handles any IP specific hardware or software state changes that are
4858  * necessary after the IP has been soft reset.
4859  * Returns 0 on success, negative error code on failure.
4860  */
4861 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4862 {
4863 	int i, r = 0;
4864 
4865 	for (i = 0; i < adev->num_ip_blocks; i++) {
4866 		if (!adev->ip_blocks[i].status.valid)
4867 			continue;
4868 		if (adev->ip_blocks[i].status.hang &&
4869 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4870 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4871 		if (r)
4872 			return r;
4873 	}
4874 
4875 	return 0;
4876 }
4877 
4878 /**
4879  * amdgpu_device_recover_vram - Recover some VRAM contents
4880  *
4881  * @adev: amdgpu_device pointer
4882  *
4883  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4884  * restore things like GPUVM page tables after a GPU reset where
4885  * the contents of VRAM might be lost.
4886  *
4887  * Returns:
4888  * 0 on success, negative error code on failure.
4889  */
4890 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4891 {
4892 	struct dma_fence *fence = NULL, *next = NULL;
4893 	struct amdgpu_bo *shadow;
4894 	struct amdgpu_bo_vm *vmbo;
4895 	long r = 1, tmo;
4896 
4897 	if (amdgpu_sriov_runtime(adev))
4898 		tmo = msecs_to_jiffies(8000);
4899 	else
4900 		tmo = msecs_to_jiffies(100);
4901 
4902 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4903 	mutex_lock(&adev->shadow_list_lock);
4904 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4905 		/* If vm is compute context or adev is APU, shadow will be NULL */
4906 		if (!vmbo->shadow)
4907 			continue;
4908 		shadow = vmbo->shadow;
4909 
4910 		/* No need to recover an evicted BO */
4911 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4912 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4913 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4914 			continue;
4915 
4916 		r = amdgpu_bo_restore_shadow(shadow, &next);
4917 		if (r)
4918 			break;
4919 
4920 		if (fence) {
4921 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4922 			dma_fence_put(fence);
4923 			fence = next;
4924 			if (tmo == 0) {
4925 				r = -ETIMEDOUT;
4926 				break;
4927 			} else if (tmo < 0) {
4928 				r = tmo;
4929 				break;
4930 			}
4931 		} else {
4932 			fence = next;
4933 		}
4934 	}
4935 	mutex_unlock(&adev->shadow_list_lock);
4936 
4937 	if (fence)
4938 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4939 	dma_fence_put(fence);
4940 
4941 	if (r < 0 || tmo <= 0) {
4942 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4943 		return -EIO;
4944 	}
4945 
4946 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4947 	return 0;
4948 }
4949 
4950 
4951 /**
4952  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4953  *
4954  * @adev: amdgpu_device pointer
4955  * @from_hypervisor: request from hypervisor
4956  *
4957  * do VF FLR and reinitialize Asic
4958  * return 0 means succeeded otherwise failed
4959  */
4960 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4961 				     bool from_hypervisor)
4962 {
4963 	int r;
4964 	struct amdgpu_hive_info *hive = NULL;
4965 	int retry_limit = 0;
4966 
4967 retry:
4968 	amdgpu_amdkfd_pre_reset(adev);
4969 
4970 	if (from_hypervisor)
4971 		r = amdgpu_virt_request_full_gpu(adev, true);
4972 	else
4973 		r = amdgpu_virt_reset_gpu(adev);
4974 	if (r)
4975 		return r;
4976 	amdgpu_irq_gpu_reset_resume_helper(adev);
4977 
4978 	/* some sw clean up VF needs to do before recover */
4979 	amdgpu_virt_post_reset(adev);
4980 
4981 	/* Resume IP prior to SMC */
4982 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4983 	if (r)
4984 		goto error;
4985 
4986 	amdgpu_virt_init_data_exchange(adev);
4987 
4988 	r = amdgpu_device_fw_loading(adev);
4989 	if (r)
4990 		return r;
4991 
4992 	/* now we are okay to resume SMC/CP/SDMA */
4993 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4994 	if (r)
4995 		goto error;
4996 
4997 	hive = amdgpu_get_xgmi_hive(adev);
4998 	/* Update PSP FW topology after reset */
4999 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5000 		r = amdgpu_xgmi_update_topology(hive, adev);
5001 
5002 	if (hive)
5003 		amdgpu_put_xgmi_hive(hive);
5004 
5005 	if (!r) {
5006 		r = amdgpu_ib_ring_tests(adev);
5007 
5008 		amdgpu_amdkfd_post_reset(adev);
5009 	}
5010 
5011 error:
5012 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5013 		amdgpu_inc_vram_lost(adev);
5014 		r = amdgpu_device_recover_vram(adev);
5015 	}
5016 	amdgpu_virt_release_full_gpu(adev, true);
5017 
5018 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5019 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5020 			retry_limit++;
5021 			goto retry;
5022 		} else
5023 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5024 	}
5025 
5026 	return r;
5027 }
5028 
5029 /**
5030  * amdgpu_device_has_job_running - check if there is any job in mirror list
5031  *
5032  * @adev: amdgpu_device pointer
5033  *
5034  * check if there is any job in mirror list
5035  */
5036 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5037 {
5038 	int i;
5039 	struct drm_sched_job *job;
5040 
5041 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5042 		struct amdgpu_ring *ring = adev->rings[i];
5043 
5044 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5045 			continue;
5046 
5047 		spin_lock(&ring->sched.job_list_lock);
5048 		job = list_first_entry_or_null(&ring->sched.pending_list,
5049 					       struct drm_sched_job, list);
5050 		spin_unlock(&ring->sched.job_list_lock);
5051 		if (job)
5052 			return true;
5053 	}
5054 	return false;
5055 }
5056 
5057 /**
5058  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5059  *
5060  * @adev: amdgpu_device pointer
5061  *
5062  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5063  * a hung GPU.
5064  */
5065 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5066 {
5067 
5068 	if (amdgpu_gpu_recovery == 0)
5069 		goto disabled;
5070 
5071 	/* Skip soft reset check in fatal error mode */
5072 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5073 		return true;
5074 
5075 	if (amdgpu_sriov_vf(adev))
5076 		return true;
5077 
5078 	if (amdgpu_gpu_recovery == -1) {
5079 		switch (adev->asic_type) {
5080 #ifdef CONFIG_DRM_AMDGPU_SI
5081 		case CHIP_VERDE:
5082 		case CHIP_TAHITI:
5083 		case CHIP_PITCAIRN:
5084 		case CHIP_OLAND:
5085 		case CHIP_HAINAN:
5086 #endif
5087 #ifdef CONFIG_DRM_AMDGPU_CIK
5088 		case CHIP_KAVERI:
5089 		case CHIP_KABINI:
5090 		case CHIP_MULLINS:
5091 #endif
5092 		case CHIP_CARRIZO:
5093 		case CHIP_STONEY:
5094 		case CHIP_CYAN_SKILLFISH:
5095 			goto disabled;
5096 		default:
5097 			break;
5098 		}
5099 	}
5100 
5101 	return true;
5102 
5103 disabled:
5104 		dev_info(adev->dev, "GPU recovery disabled.\n");
5105 		return false;
5106 }
5107 
5108 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5109 {
5110 	u32 i;
5111 	int ret = 0;
5112 
5113 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5114 
5115 	dev_info(adev->dev, "GPU mode1 reset\n");
5116 
5117 	/* disable BM */
5118 	pci_clear_master(adev->pdev);
5119 
5120 	amdgpu_device_cache_pci_state(adev->pdev);
5121 
5122 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5123 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5124 		ret = amdgpu_dpm_mode1_reset(adev);
5125 	} else {
5126 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5127 		ret = psp_gpu_reset(adev);
5128 	}
5129 
5130 	if (ret)
5131 		goto mode1_reset_failed;
5132 
5133 	amdgpu_device_load_pci_state(adev->pdev);
5134 	ret = amdgpu_psp_wait_for_bootloader(adev);
5135 	if (ret)
5136 		goto mode1_reset_failed;
5137 
5138 	/* wait for asic to come out of reset */
5139 	for (i = 0; i < adev->usec_timeout; i++) {
5140 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5141 
5142 		if (memsize != 0xffffffff)
5143 			break;
5144 		udelay(1);
5145 	}
5146 
5147 	if (i >= adev->usec_timeout) {
5148 		ret = -ETIMEDOUT;
5149 		goto mode1_reset_failed;
5150 	}
5151 
5152 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5153 
5154 	return 0;
5155 
5156 mode1_reset_failed:
5157 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5158 	return ret;
5159 }
5160 
5161 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5162 				 struct amdgpu_reset_context *reset_context)
5163 {
5164 	int i, r = 0;
5165 	struct amdgpu_job *job = NULL;
5166 	bool need_full_reset =
5167 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5168 
5169 	if (reset_context->reset_req_dev == adev)
5170 		job = reset_context->job;
5171 
5172 	if (amdgpu_sriov_vf(adev)) {
5173 		/* stop the data exchange thread */
5174 		amdgpu_virt_fini_data_exchange(adev);
5175 	}
5176 
5177 	amdgpu_fence_driver_isr_toggle(adev, true);
5178 
5179 	/* block all schedulers and reset given job's ring */
5180 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5181 		struct amdgpu_ring *ring = adev->rings[i];
5182 
5183 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5184 			continue;
5185 
5186 		/* Clear job fence from fence drv to avoid force_completion
5187 		 * leave NULL and vm flush fence in fence drv
5188 		 */
5189 		amdgpu_fence_driver_clear_job_fences(ring);
5190 
5191 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5192 		amdgpu_fence_driver_force_completion(ring);
5193 	}
5194 
5195 	amdgpu_fence_driver_isr_toggle(adev, false);
5196 
5197 	if (job && job->vm)
5198 		drm_sched_increase_karma(&job->base);
5199 
5200 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5201 	/* If reset handler not implemented, continue; otherwise return */
5202 	if (r == -EOPNOTSUPP)
5203 		r = 0;
5204 	else
5205 		return r;
5206 
5207 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5208 	if (!amdgpu_sriov_vf(adev)) {
5209 
5210 		if (!need_full_reset)
5211 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5212 
5213 		if (!need_full_reset && amdgpu_gpu_recovery &&
5214 		    amdgpu_device_ip_check_soft_reset(adev)) {
5215 			amdgpu_device_ip_pre_soft_reset(adev);
5216 			r = amdgpu_device_ip_soft_reset(adev);
5217 			amdgpu_device_ip_post_soft_reset(adev);
5218 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5219 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5220 				need_full_reset = true;
5221 			}
5222 		}
5223 
5224 		if (need_full_reset)
5225 			r = amdgpu_device_ip_suspend(adev);
5226 		if (need_full_reset)
5227 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5228 		else
5229 			clear_bit(AMDGPU_NEED_FULL_RESET,
5230 				  &reset_context->flags);
5231 	}
5232 
5233 	return r;
5234 }
5235 
5236 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5237 {
5238 	int i;
5239 
5240 	lockdep_assert_held(&adev->reset_domain->sem);
5241 
5242 	for (i = 0; i < adev->reset_info.num_regs; i++) {
5243 		adev->reset_info.reset_dump_reg_value[i] =
5244 			RREG32(adev->reset_info.reset_dump_reg_list[i]);
5245 
5246 		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5247 					     adev->reset_info.reset_dump_reg_value[i]);
5248 	}
5249 
5250 	return 0;
5251 }
5252 
5253 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5254 			 struct amdgpu_reset_context *reset_context)
5255 {
5256 	struct amdgpu_device *tmp_adev = NULL;
5257 	bool need_full_reset, skip_hw_reset, vram_lost = false;
5258 	int r = 0;
5259 	bool gpu_reset_for_dev_remove = 0;
5260 
5261 	/* Try reset handler method first */
5262 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5263 				    reset_list);
5264 	amdgpu_reset_reg_dumps(tmp_adev);
5265 
5266 	reset_context->reset_device_list = device_list_handle;
5267 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5268 	/* If reset handler not implemented, continue; otherwise return */
5269 	if (r == -EOPNOTSUPP)
5270 		r = 0;
5271 	else
5272 		return r;
5273 
5274 	/* Reset handler not implemented, use the default method */
5275 	need_full_reset =
5276 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5277 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5278 
5279 	gpu_reset_for_dev_remove =
5280 		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5281 			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5282 
5283 	/*
5284 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5285 	 * to allow proper links negotiation in FW (within 1 sec)
5286 	 */
5287 	if (!skip_hw_reset && need_full_reset) {
5288 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5289 			/* For XGMI run all resets in parallel to speed up the process */
5290 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5291 				tmp_adev->gmc.xgmi.pending_reset = false;
5292 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5293 					r = -EALREADY;
5294 			} else
5295 				r = amdgpu_asic_reset(tmp_adev);
5296 
5297 			if (r) {
5298 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5299 					 r, adev_to_drm(tmp_adev)->unique);
5300 				goto out;
5301 			}
5302 		}
5303 
5304 		/* For XGMI wait for all resets to complete before proceed */
5305 		if (!r) {
5306 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5307 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5308 					flush_work(&tmp_adev->xgmi_reset_work);
5309 					r = tmp_adev->asic_reset_res;
5310 					if (r)
5311 						break;
5312 				}
5313 			}
5314 		}
5315 	}
5316 
5317 	if (!r && amdgpu_ras_intr_triggered()) {
5318 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5319 			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5320 		}
5321 
5322 		amdgpu_ras_intr_cleared();
5323 	}
5324 
5325 	/* Since the mode1 reset affects base ip blocks, the
5326 	 * phase1 ip blocks need to be resumed. Otherwise there
5327 	 * will be a BIOS signature error and the psp bootloader
5328 	 * can't load kdb on the next amdgpu install.
5329 	 */
5330 	if (gpu_reset_for_dev_remove) {
5331 		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5332 			amdgpu_device_ip_resume_phase1(tmp_adev);
5333 
5334 		goto end;
5335 	}
5336 
5337 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5338 		if (need_full_reset) {
5339 			/* post card */
5340 			r = amdgpu_device_asic_init(tmp_adev);
5341 			if (r) {
5342 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5343 			} else {
5344 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5345 
5346 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5347 				if (r)
5348 					goto out;
5349 
5350 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5351 
5352 				amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5353 
5354 				if (vram_lost) {
5355 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5356 					amdgpu_inc_vram_lost(tmp_adev);
5357 				}
5358 
5359 				r = amdgpu_device_fw_loading(tmp_adev);
5360 				if (r)
5361 					return r;
5362 
5363 				r = amdgpu_xcp_restore_partition_mode(
5364 					tmp_adev->xcp_mgr);
5365 				if (r)
5366 					goto out;
5367 
5368 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5369 				if (r)
5370 					goto out;
5371 
5372 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5373 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5374 
5375 				if (vram_lost)
5376 					amdgpu_device_fill_reset_magic(tmp_adev);
5377 
5378 				/*
5379 				 * Add this ASIC as tracked as reset was already
5380 				 * complete successfully.
5381 				 */
5382 				amdgpu_register_gpu_instance(tmp_adev);
5383 
5384 				if (!reset_context->hive &&
5385 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5386 					amdgpu_xgmi_add_device(tmp_adev);
5387 
5388 				r = amdgpu_device_ip_late_init(tmp_adev);
5389 				if (r)
5390 					goto out;
5391 
5392 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5393 
5394 				/*
5395 				 * The GPU enters bad state once faulty pages
5396 				 * by ECC has reached the threshold, and ras
5397 				 * recovery is scheduled next. So add one check
5398 				 * here to break recovery if it indeed exceeds
5399 				 * bad page threshold, and remind user to
5400 				 * retire this GPU or setting one bigger
5401 				 * bad_page_threshold value to fix this once
5402 				 * probing driver again.
5403 				 */
5404 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5405 					/* must succeed. */
5406 					amdgpu_ras_resume(tmp_adev);
5407 				} else {
5408 					r = -EINVAL;
5409 					goto out;
5410 				}
5411 
5412 				/* Update PSP FW topology after reset */
5413 				if (reset_context->hive &&
5414 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5415 					r = amdgpu_xgmi_update_topology(
5416 						reset_context->hive, tmp_adev);
5417 			}
5418 		}
5419 
5420 out:
5421 		if (!r) {
5422 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5423 			r = amdgpu_ib_ring_tests(tmp_adev);
5424 			if (r) {
5425 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5426 				need_full_reset = true;
5427 				r = -EAGAIN;
5428 				goto end;
5429 			}
5430 		}
5431 
5432 		if (!r)
5433 			r = amdgpu_device_recover_vram(tmp_adev);
5434 		else
5435 			tmp_adev->asic_reset_res = r;
5436 	}
5437 
5438 end:
5439 	if (need_full_reset)
5440 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5441 	else
5442 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5443 	return r;
5444 }
5445 
5446 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5447 {
5448 
5449 	switch (amdgpu_asic_reset_method(adev)) {
5450 	case AMD_RESET_METHOD_MODE1:
5451 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5452 		break;
5453 	case AMD_RESET_METHOD_MODE2:
5454 		adev->mp1_state = PP_MP1_STATE_RESET;
5455 		break;
5456 	default:
5457 		adev->mp1_state = PP_MP1_STATE_NONE;
5458 		break;
5459 	}
5460 }
5461 
5462 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5463 {
5464 	amdgpu_vf_error_trans_all(adev);
5465 	adev->mp1_state = PP_MP1_STATE_NONE;
5466 }
5467 
5468 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5469 {
5470 	struct pci_dev *p = NULL;
5471 
5472 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5473 			adev->pdev->bus->number, 1);
5474 	if (p) {
5475 		pm_runtime_enable(&(p->dev));
5476 		pm_runtime_resume(&(p->dev));
5477 	}
5478 
5479 	pci_dev_put(p);
5480 }
5481 
5482 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5483 {
5484 	enum amd_reset_method reset_method;
5485 	struct pci_dev *p = NULL;
5486 	u64 expires;
5487 
5488 	/*
5489 	 * For now, only BACO and mode1 reset are confirmed
5490 	 * to suffer the audio issue without proper suspended.
5491 	 */
5492 	reset_method = amdgpu_asic_reset_method(adev);
5493 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5494 	     (reset_method != AMD_RESET_METHOD_MODE1))
5495 		return -EINVAL;
5496 
5497 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5498 			adev->pdev->bus->number, 1);
5499 	if (!p)
5500 		return -ENODEV;
5501 
5502 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5503 	if (!expires)
5504 		/*
5505 		 * If we cannot get the audio device autosuspend delay,
5506 		 * a fixed 4S interval will be used. Considering 3S is
5507 		 * the audio controller default autosuspend delay setting.
5508 		 * 4S used here is guaranteed to cover that.
5509 		 */
5510 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5511 
5512 	while (!pm_runtime_status_suspended(&(p->dev))) {
5513 		if (!pm_runtime_suspend(&(p->dev)))
5514 			break;
5515 
5516 		if (expires < ktime_get_mono_fast_ns()) {
5517 			dev_warn(adev->dev, "failed to suspend display audio\n");
5518 			pci_dev_put(p);
5519 			/* TODO: abort the succeeding gpu reset? */
5520 			return -ETIMEDOUT;
5521 		}
5522 	}
5523 
5524 	pm_runtime_disable(&(p->dev));
5525 
5526 	pci_dev_put(p);
5527 	return 0;
5528 }
5529 
5530 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5531 {
5532 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5533 
5534 #if defined(CONFIG_DEBUG_FS)
5535 	if (!amdgpu_sriov_vf(adev))
5536 		cancel_work(&adev->reset_work);
5537 #endif
5538 
5539 	if (adev->kfd.dev)
5540 		cancel_work(&adev->kfd.reset_work);
5541 
5542 	if (amdgpu_sriov_vf(adev))
5543 		cancel_work(&adev->virt.flr_work);
5544 
5545 	if (con && adev->ras_enabled)
5546 		cancel_work(&con->recovery_work);
5547 
5548 }
5549 
5550 /**
5551  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5552  *
5553  * @adev: amdgpu_device pointer
5554  * @job: which job trigger hang
5555  * @reset_context: amdgpu reset context pointer
5556  *
5557  * Attempt to reset the GPU if it has hung (all asics).
5558  * Attempt to do soft-reset or full-reset and reinitialize Asic
5559  * Returns 0 for success or an error on failure.
5560  */
5561 
5562 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5563 			      struct amdgpu_job *job,
5564 			      struct amdgpu_reset_context *reset_context)
5565 {
5566 	struct list_head device_list, *device_list_handle =  NULL;
5567 	bool job_signaled = false;
5568 	struct amdgpu_hive_info *hive = NULL;
5569 	struct amdgpu_device *tmp_adev = NULL;
5570 	int i, r = 0;
5571 	bool need_emergency_restart = false;
5572 	bool audio_suspended = false;
5573 	bool gpu_reset_for_dev_remove = false;
5574 
5575 	gpu_reset_for_dev_remove =
5576 			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5577 				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5578 
5579 	/*
5580 	 * Special case: RAS triggered and full reset isn't supported
5581 	 */
5582 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5583 
5584 	/*
5585 	 * Flush RAM to disk so that after reboot
5586 	 * the user can read log and see why the system rebooted.
5587 	 */
5588 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5589 		amdgpu_ras_get_context(adev)->reboot) {
5590 		DRM_WARN("Emergency reboot.");
5591 
5592 		ksys_sync_helper();
5593 		emergency_restart();
5594 	}
5595 
5596 	dev_info(adev->dev, "GPU %s begin!\n",
5597 		need_emergency_restart ? "jobs stop":"reset");
5598 
5599 	if (!amdgpu_sriov_vf(adev))
5600 		hive = amdgpu_get_xgmi_hive(adev);
5601 	if (hive)
5602 		mutex_lock(&hive->hive_lock);
5603 
5604 	reset_context->job = job;
5605 	reset_context->hive = hive;
5606 	/*
5607 	 * Build list of devices to reset.
5608 	 * In case we are in XGMI hive mode, resort the device list
5609 	 * to put adev in the 1st position.
5610 	 */
5611 	INIT_LIST_HEAD(&device_list);
5612 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5613 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5614 			list_add_tail(&tmp_adev->reset_list, &device_list);
5615 			if (gpu_reset_for_dev_remove && adev->shutdown)
5616 				tmp_adev->shutdown = true;
5617 		}
5618 		if (!list_is_first(&adev->reset_list, &device_list))
5619 			list_rotate_to_front(&adev->reset_list, &device_list);
5620 		device_list_handle = &device_list;
5621 	} else {
5622 		list_add_tail(&adev->reset_list, &device_list);
5623 		device_list_handle = &device_list;
5624 	}
5625 
5626 	/* We need to lock reset domain only once both for XGMI and single device */
5627 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5628 				    reset_list);
5629 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5630 
5631 	/* block all schedulers and reset given job's ring */
5632 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5633 
5634 		amdgpu_device_set_mp1_state(tmp_adev);
5635 
5636 		/*
5637 		 * Try to put the audio codec into suspend state
5638 		 * before gpu reset started.
5639 		 *
5640 		 * Due to the power domain of the graphics device
5641 		 * is shared with AZ power domain. Without this,
5642 		 * we may change the audio hardware from behind
5643 		 * the audio driver's back. That will trigger
5644 		 * some audio codec errors.
5645 		 */
5646 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5647 			audio_suspended = true;
5648 
5649 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5650 
5651 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5652 
5653 		if (!amdgpu_sriov_vf(tmp_adev))
5654 			amdgpu_amdkfd_pre_reset(tmp_adev);
5655 
5656 		/*
5657 		 * Mark these ASICs to be reseted as untracked first
5658 		 * And add them back after reset completed
5659 		 */
5660 		amdgpu_unregister_gpu_instance(tmp_adev);
5661 
5662 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5663 
5664 		/* disable ras on ALL IPs */
5665 		if (!need_emergency_restart &&
5666 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5667 			amdgpu_ras_suspend(tmp_adev);
5668 
5669 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5670 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5671 
5672 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5673 				continue;
5674 
5675 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5676 
5677 			if (need_emergency_restart)
5678 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5679 		}
5680 		atomic_inc(&tmp_adev->gpu_reset_counter);
5681 	}
5682 
5683 	if (need_emergency_restart)
5684 		goto skip_sched_resume;
5685 
5686 	/*
5687 	 * Must check guilty signal here since after this point all old
5688 	 * HW fences are force signaled.
5689 	 *
5690 	 * job->base holds a reference to parent fence
5691 	 */
5692 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5693 		job_signaled = true;
5694 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5695 		goto skip_hw_reset;
5696 	}
5697 
5698 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5699 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5700 		if (gpu_reset_for_dev_remove) {
5701 			/* Workaroud for ASICs need to disable SMC first */
5702 			amdgpu_device_smu_fini_early(tmp_adev);
5703 		}
5704 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5705 		/*TODO Should we stop ?*/
5706 		if (r) {
5707 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5708 				  r, adev_to_drm(tmp_adev)->unique);
5709 			tmp_adev->asic_reset_res = r;
5710 		}
5711 
5712 		/*
5713 		 * Drop all pending non scheduler resets. Scheduler resets
5714 		 * were already dropped during drm_sched_stop
5715 		 */
5716 		amdgpu_device_stop_pending_resets(tmp_adev);
5717 	}
5718 
5719 	/* Actual ASIC resets if needed.*/
5720 	/* Host driver will handle XGMI hive reset for SRIOV */
5721 	if (amdgpu_sriov_vf(adev)) {
5722 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5723 		if (r)
5724 			adev->asic_reset_res = r;
5725 
5726 		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5727 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5728 			    IP_VERSION(9, 4, 2) ||
5729 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5730 			amdgpu_ras_resume(adev);
5731 	} else {
5732 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5733 		if (r && r == -EAGAIN)
5734 			goto retry;
5735 
5736 		if (!r && gpu_reset_for_dev_remove)
5737 			goto recover_end;
5738 	}
5739 
5740 skip_hw_reset:
5741 
5742 	/* Post ASIC reset for all devs .*/
5743 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5744 
5745 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5746 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5747 
5748 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5749 				continue;
5750 
5751 			drm_sched_start(&ring->sched, true);
5752 		}
5753 
5754 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5755 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5756 
5757 		if (tmp_adev->asic_reset_res)
5758 			r = tmp_adev->asic_reset_res;
5759 
5760 		tmp_adev->asic_reset_res = 0;
5761 
5762 		if (r) {
5763 			/* bad news, how to tell it to userspace ? */
5764 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5765 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5766 		} else {
5767 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5768 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5769 				DRM_WARN("smart shift update failed\n");
5770 		}
5771 	}
5772 
5773 skip_sched_resume:
5774 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5775 		/* unlock kfd: SRIOV would do it separately */
5776 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5777 			amdgpu_amdkfd_post_reset(tmp_adev);
5778 
5779 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5780 		 * need to bring up kfd here if it's not be initialized before
5781 		 */
5782 		if (!adev->kfd.init_complete)
5783 			amdgpu_amdkfd_device_init(adev);
5784 
5785 		if (audio_suspended)
5786 			amdgpu_device_resume_display_audio(tmp_adev);
5787 
5788 		amdgpu_device_unset_mp1_state(tmp_adev);
5789 
5790 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5791 	}
5792 
5793 recover_end:
5794 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5795 					    reset_list);
5796 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5797 
5798 	if (hive) {
5799 		mutex_unlock(&hive->hive_lock);
5800 		amdgpu_put_xgmi_hive(hive);
5801 	}
5802 
5803 	if (r)
5804 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5805 
5806 	atomic_set(&adev->reset_domain->reset_res, r);
5807 	return r;
5808 }
5809 
5810 /**
5811  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5812  *
5813  * @adev: amdgpu_device pointer
5814  * @speed: pointer to the speed of the link
5815  * @width: pointer to the width of the link
5816  *
5817  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5818  * first physical partner to an AMD dGPU.
5819  * This will exclude any virtual switches and links.
5820  */
5821 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5822 					    enum pci_bus_speed *speed,
5823 					    enum pcie_link_width *width)
5824 {
5825 	struct pci_dev *parent = adev->pdev;
5826 
5827 	if (!speed || !width)
5828 		return;
5829 
5830 	*speed = PCI_SPEED_UNKNOWN;
5831 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5832 
5833 	while ((parent = pci_upstream_bridge(parent))) {
5834 		/* skip upstream/downstream switches internal to dGPU*/
5835 		if (parent->vendor == PCI_VENDOR_ID_ATI)
5836 			continue;
5837 		*speed = pcie_get_speed_cap(parent);
5838 		*width = pcie_get_width_cap(parent);
5839 		break;
5840 	}
5841 }
5842 
5843 /**
5844  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5845  *
5846  * @adev: amdgpu_device pointer
5847  *
5848  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5849  * and lanes) of the slot the device is in. Handles APUs and
5850  * virtualized environments where PCIE config space may not be available.
5851  */
5852 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5853 {
5854 	struct pci_dev *pdev;
5855 	enum pci_bus_speed speed_cap, platform_speed_cap;
5856 	enum pcie_link_width platform_link_width;
5857 
5858 	if (amdgpu_pcie_gen_cap)
5859 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5860 
5861 	if (amdgpu_pcie_lane_cap)
5862 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5863 
5864 	/* covers APUs as well */
5865 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5866 		if (adev->pm.pcie_gen_mask == 0)
5867 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5868 		if (adev->pm.pcie_mlw_mask == 0)
5869 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5870 		return;
5871 	}
5872 
5873 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5874 		return;
5875 
5876 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5877 					&platform_link_width);
5878 
5879 	if (adev->pm.pcie_gen_mask == 0) {
5880 		/* asic caps */
5881 		pdev = adev->pdev;
5882 		speed_cap = pcie_get_speed_cap(pdev);
5883 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5884 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5885 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5886 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5887 		} else {
5888 			if (speed_cap == PCIE_SPEED_32_0GT)
5889 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5890 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5891 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5892 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5893 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5894 			else if (speed_cap == PCIE_SPEED_16_0GT)
5895 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5896 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5897 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5898 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5899 			else if (speed_cap == PCIE_SPEED_8_0GT)
5900 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5901 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5902 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5903 			else if (speed_cap == PCIE_SPEED_5_0GT)
5904 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5905 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5906 			else
5907 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5908 		}
5909 		/* platform caps */
5910 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5911 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5912 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5913 		} else {
5914 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5915 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5916 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5917 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5918 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5919 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5920 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5921 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5922 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5923 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5924 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5925 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5926 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5927 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5928 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5929 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5930 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5931 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5932 			else
5933 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5934 
5935 		}
5936 	}
5937 	if (adev->pm.pcie_mlw_mask == 0) {
5938 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5939 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5940 		} else {
5941 			switch (platform_link_width) {
5942 			case PCIE_LNK_X32:
5943 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5944 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5945 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5946 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5947 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5948 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5949 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5950 				break;
5951 			case PCIE_LNK_X16:
5952 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5953 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5954 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5955 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5956 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5957 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5958 				break;
5959 			case PCIE_LNK_X12:
5960 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5961 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5962 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5963 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5964 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5965 				break;
5966 			case PCIE_LNK_X8:
5967 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5968 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5969 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5970 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5971 				break;
5972 			case PCIE_LNK_X4:
5973 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5974 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5975 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5976 				break;
5977 			case PCIE_LNK_X2:
5978 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5979 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5980 				break;
5981 			case PCIE_LNK_X1:
5982 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5983 				break;
5984 			default:
5985 				break;
5986 			}
5987 		}
5988 	}
5989 }
5990 
5991 /**
5992  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5993  *
5994  * @adev: amdgpu_device pointer
5995  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5996  *
5997  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5998  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5999  * @peer_adev.
6000  */
6001 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6002 				      struct amdgpu_device *peer_adev)
6003 {
6004 #ifdef CONFIG_HSA_AMD_P2P
6005 	uint64_t address_mask = peer_adev->dev->dma_mask ?
6006 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6007 	resource_size_t aper_limit =
6008 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
6009 	bool p2p_access =
6010 		!adev->gmc.xgmi.connected_to_cpu &&
6011 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6012 
6013 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6014 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6015 		!(adev->gmc.aper_base & address_mask ||
6016 		  aper_limit & address_mask));
6017 #else
6018 	return false;
6019 #endif
6020 }
6021 
6022 int amdgpu_device_baco_enter(struct drm_device *dev)
6023 {
6024 	struct amdgpu_device *adev = drm_to_adev(dev);
6025 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6026 
6027 	if (!amdgpu_device_supports_baco(dev))
6028 		return -ENOTSUPP;
6029 
6030 	if (ras && adev->ras_enabled &&
6031 	    adev->nbio.funcs->enable_doorbell_interrupt)
6032 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6033 
6034 	return amdgpu_dpm_baco_enter(adev);
6035 }
6036 
6037 int amdgpu_device_baco_exit(struct drm_device *dev)
6038 {
6039 	struct amdgpu_device *adev = drm_to_adev(dev);
6040 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6041 	int ret = 0;
6042 
6043 	if (!amdgpu_device_supports_baco(dev))
6044 		return -ENOTSUPP;
6045 
6046 	ret = amdgpu_dpm_baco_exit(adev);
6047 	if (ret)
6048 		return ret;
6049 
6050 	if (ras && adev->ras_enabled &&
6051 	    adev->nbio.funcs->enable_doorbell_interrupt)
6052 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6053 
6054 	if (amdgpu_passthrough(adev) &&
6055 	    adev->nbio.funcs->clear_doorbell_interrupt)
6056 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6057 
6058 	return 0;
6059 }
6060 
6061 /**
6062  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6063  * @pdev: PCI device struct
6064  * @state: PCI channel state
6065  *
6066  * Description: Called when a PCI error is detected.
6067  *
6068  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6069  */
6070 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6071 {
6072 	struct drm_device *dev = pci_get_drvdata(pdev);
6073 	struct amdgpu_device *adev = drm_to_adev(dev);
6074 	int i;
6075 
6076 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6077 
6078 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6079 		DRM_WARN("No support for XGMI hive yet...");
6080 		return PCI_ERS_RESULT_DISCONNECT;
6081 	}
6082 
6083 	adev->pci_channel_state = state;
6084 
6085 	switch (state) {
6086 	case pci_channel_io_normal:
6087 		return PCI_ERS_RESULT_CAN_RECOVER;
6088 	/* Fatal error, prepare for slot reset */
6089 	case pci_channel_io_frozen:
6090 		/*
6091 		 * Locking adev->reset_domain->sem will prevent any external access
6092 		 * to GPU during PCI error recovery
6093 		 */
6094 		amdgpu_device_lock_reset_domain(adev->reset_domain);
6095 		amdgpu_device_set_mp1_state(adev);
6096 
6097 		/*
6098 		 * Block any work scheduling as we do for regular GPU reset
6099 		 * for the duration of the recovery
6100 		 */
6101 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6102 			struct amdgpu_ring *ring = adev->rings[i];
6103 
6104 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
6105 				continue;
6106 
6107 			drm_sched_stop(&ring->sched, NULL);
6108 		}
6109 		atomic_inc(&adev->gpu_reset_counter);
6110 		return PCI_ERS_RESULT_NEED_RESET;
6111 	case pci_channel_io_perm_failure:
6112 		/* Permanent error, prepare for device removal */
6113 		return PCI_ERS_RESULT_DISCONNECT;
6114 	}
6115 
6116 	return PCI_ERS_RESULT_NEED_RESET;
6117 }
6118 
6119 /**
6120  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6121  * @pdev: pointer to PCI device
6122  */
6123 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6124 {
6125 
6126 	DRM_INFO("PCI error: mmio enabled callback!!\n");
6127 
6128 	/* TODO - dump whatever for debugging purposes */
6129 
6130 	/* This called only if amdgpu_pci_error_detected returns
6131 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6132 	 * works, no need to reset slot.
6133 	 */
6134 
6135 	return PCI_ERS_RESULT_RECOVERED;
6136 }
6137 
6138 /**
6139  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6140  * @pdev: PCI device struct
6141  *
6142  * Description: This routine is called by the pci error recovery
6143  * code after the PCI slot has been reset, just before we
6144  * should resume normal operations.
6145  */
6146 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6147 {
6148 	struct drm_device *dev = pci_get_drvdata(pdev);
6149 	struct amdgpu_device *adev = drm_to_adev(dev);
6150 	int r, i;
6151 	struct amdgpu_reset_context reset_context;
6152 	u32 memsize;
6153 	struct list_head device_list;
6154 
6155 	DRM_INFO("PCI error: slot reset callback!!\n");
6156 
6157 	memset(&reset_context, 0, sizeof(reset_context));
6158 
6159 	INIT_LIST_HEAD(&device_list);
6160 	list_add_tail(&adev->reset_list, &device_list);
6161 
6162 	/* wait for asic to come out of reset */
6163 	msleep(500);
6164 
6165 	/* Restore PCI confspace */
6166 	amdgpu_device_load_pci_state(pdev);
6167 
6168 	/* confirm  ASIC came out of reset */
6169 	for (i = 0; i < adev->usec_timeout; i++) {
6170 		memsize = amdgpu_asic_get_config_memsize(adev);
6171 
6172 		if (memsize != 0xffffffff)
6173 			break;
6174 		udelay(1);
6175 	}
6176 	if (memsize == 0xffffffff) {
6177 		r = -ETIME;
6178 		goto out;
6179 	}
6180 
6181 	reset_context.method = AMD_RESET_METHOD_NONE;
6182 	reset_context.reset_req_dev = adev;
6183 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6184 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6185 
6186 	adev->no_hw_access = true;
6187 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6188 	adev->no_hw_access = false;
6189 	if (r)
6190 		goto out;
6191 
6192 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6193 
6194 out:
6195 	if (!r) {
6196 		if (amdgpu_device_cache_pci_state(adev->pdev))
6197 			pci_restore_state(adev->pdev);
6198 
6199 		DRM_INFO("PCIe error recovery succeeded\n");
6200 	} else {
6201 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6202 		amdgpu_device_unset_mp1_state(adev);
6203 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6204 	}
6205 
6206 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6207 }
6208 
6209 /**
6210  * amdgpu_pci_resume() - resume normal ops after PCI reset
6211  * @pdev: pointer to PCI device
6212  *
6213  * Called when the error recovery driver tells us that its
6214  * OK to resume normal operation.
6215  */
6216 void amdgpu_pci_resume(struct pci_dev *pdev)
6217 {
6218 	struct drm_device *dev = pci_get_drvdata(pdev);
6219 	struct amdgpu_device *adev = drm_to_adev(dev);
6220 	int i;
6221 
6222 
6223 	DRM_INFO("PCI error: resume callback!!\n");
6224 
6225 	/* Only continue execution for the case of pci_channel_io_frozen */
6226 	if (adev->pci_channel_state != pci_channel_io_frozen)
6227 		return;
6228 
6229 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6230 		struct amdgpu_ring *ring = adev->rings[i];
6231 
6232 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
6233 			continue;
6234 
6235 		drm_sched_start(&ring->sched, true);
6236 	}
6237 
6238 	amdgpu_device_unset_mp1_state(adev);
6239 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6240 }
6241 
6242 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6243 {
6244 	struct drm_device *dev = pci_get_drvdata(pdev);
6245 	struct amdgpu_device *adev = drm_to_adev(dev);
6246 	int r;
6247 
6248 	r = pci_save_state(pdev);
6249 	if (!r) {
6250 		kfree(adev->pci_state);
6251 
6252 		adev->pci_state = pci_store_saved_state(pdev);
6253 
6254 		if (!adev->pci_state) {
6255 			DRM_ERROR("Failed to store PCI saved state");
6256 			return false;
6257 		}
6258 	} else {
6259 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6260 		return false;
6261 	}
6262 
6263 	return true;
6264 }
6265 
6266 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6267 {
6268 	struct drm_device *dev = pci_get_drvdata(pdev);
6269 	struct amdgpu_device *adev = drm_to_adev(dev);
6270 	int r;
6271 
6272 	if (!adev->pci_state)
6273 		return false;
6274 
6275 	r = pci_load_saved_state(pdev, adev->pci_state);
6276 
6277 	if (!r) {
6278 		pci_restore_state(pdev);
6279 	} else {
6280 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6281 		return false;
6282 	}
6283 
6284 	return true;
6285 }
6286 
6287 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6288 		struct amdgpu_ring *ring)
6289 {
6290 #ifdef CONFIG_X86_64
6291 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6292 		return;
6293 #endif
6294 	if (adev->gmc.xgmi.connected_to_cpu)
6295 		return;
6296 
6297 	if (ring && ring->funcs->emit_hdp_flush)
6298 		amdgpu_ring_emit_hdp_flush(ring);
6299 	else
6300 		amdgpu_asic_flush_hdp(adev, ring);
6301 }
6302 
6303 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6304 		struct amdgpu_ring *ring)
6305 {
6306 #ifdef CONFIG_X86_64
6307 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6308 		return;
6309 #endif
6310 	if (adev->gmc.xgmi.connected_to_cpu)
6311 		return;
6312 
6313 	amdgpu_asic_invalidate_hdp(adev, ring);
6314 }
6315 
6316 int amdgpu_in_reset(struct amdgpu_device *adev)
6317 {
6318 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6319 }
6320 
6321 /**
6322  * amdgpu_device_halt() - bring hardware to some kind of halt state
6323  *
6324  * @adev: amdgpu_device pointer
6325  *
6326  * Bring hardware to some kind of halt state so that no one can touch it
6327  * any more. It will help to maintain error context when error occurred.
6328  * Compare to a simple hang, the system will keep stable at least for SSH
6329  * access. Then it should be trivial to inspect the hardware state and
6330  * see what's going on. Implemented as following:
6331  *
6332  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6333  *    clears all CPU mappings to device, disallows remappings through page faults
6334  * 2. amdgpu_irq_disable_all() disables all interrupts
6335  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6336  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6337  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6338  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6339  *    flush any in flight DMA operations
6340  */
6341 void amdgpu_device_halt(struct amdgpu_device *adev)
6342 {
6343 	struct pci_dev *pdev = adev->pdev;
6344 	struct drm_device *ddev = adev_to_drm(adev);
6345 
6346 	amdgpu_xcp_dev_unplug(adev);
6347 	drm_dev_unplug(ddev);
6348 
6349 	amdgpu_irq_disable_all(adev);
6350 
6351 	amdgpu_fence_driver_hw_fini(adev);
6352 
6353 	adev->no_hw_access = true;
6354 
6355 	amdgpu_device_unmap_mmio(adev);
6356 
6357 	pci_disable_device(pdev);
6358 	pci_wait_for_pending_transaction(pdev);
6359 }
6360 
6361 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6362 				u32 reg)
6363 {
6364 	unsigned long flags, address, data;
6365 	u32 r;
6366 
6367 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6368 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6369 
6370 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6371 	WREG32(address, reg * 4);
6372 	(void)RREG32(address);
6373 	r = RREG32(data);
6374 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6375 	return r;
6376 }
6377 
6378 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6379 				u32 reg, u32 v)
6380 {
6381 	unsigned long flags, address, data;
6382 
6383 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6384 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6385 
6386 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6387 	WREG32(address, reg * 4);
6388 	(void)RREG32(address);
6389 	WREG32(data, v);
6390 	(void)RREG32(data);
6391 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6392 }
6393 
6394 /**
6395  * amdgpu_device_switch_gang - switch to a new gang
6396  * @adev: amdgpu_device pointer
6397  * @gang: the gang to switch to
6398  *
6399  * Try to switch to a new gang.
6400  * Returns: NULL if we switched to the new gang or a reference to the current
6401  * gang leader.
6402  */
6403 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6404 					    struct dma_fence *gang)
6405 {
6406 	struct dma_fence *old = NULL;
6407 
6408 	do {
6409 		dma_fence_put(old);
6410 		rcu_read_lock();
6411 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
6412 		rcu_read_unlock();
6413 
6414 		if (old == gang)
6415 			break;
6416 
6417 		if (!dma_fence_is_signaled(old))
6418 			return old;
6419 
6420 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6421 			 old, gang) != old);
6422 
6423 	dma_fence_put(old);
6424 	return NULL;
6425 }
6426 
6427 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6428 {
6429 	switch (adev->asic_type) {
6430 #ifdef CONFIG_DRM_AMDGPU_SI
6431 	case CHIP_HAINAN:
6432 #endif
6433 	case CHIP_TOPAZ:
6434 		/* chips with no display hardware */
6435 		return false;
6436 #ifdef CONFIG_DRM_AMDGPU_SI
6437 	case CHIP_TAHITI:
6438 	case CHIP_PITCAIRN:
6439 	case CHIP_VERDE:
6440 	case CHIP_OLAND:
6441 #endif
6442 #ifdef CONFIG_DRM_AMDGPU_CIK
6443 	case CHIP_BONAIRE:
6444 	case CHIP_HAWAII:
6445 	case CHIP_KAVERI:
6446 	case CHIP_KABINI:
6447 	case CHIP_MULLINS:
6448 #endif
6449 	case CHIP_TONGA:
6450 	case CHIP_FIJI:
6451 	case CHIP_POLARIS10:
6452 	case CHIP_POLARIS11:
6453 	case CHIP_POLARIS12:
6454 	case CHIP_VEGAM:
6455 	case CHIP_CARRIZO:
6456 	case CHIP_STONEY:
6457 		/* chips with display hardware */
6458 		return true;
6459 	default:
6460 		/* IP discovery */
6461 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6462 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6463 			return false;
6464 		return true;
6465 	}
6466 }
6467 
6468 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6469 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6470 		uint32_t expected_value, uint32_t mask)
6471 {
6472 	uint32_t ret = 0;
6473 	uint32_t old_ = 0;
6474 	uint32_t tmp_ = RREG32(reg_addr);
6475 	uint32_t loop = adev->usec_timeout;
6476 
6477 	while ((tmp_ & (mask)) != (expected_value)) {
6478 		if (old_ != tmp_) {
6479 			loop = adev->usec_timeout;
6480 			old_ = tmp_;
6481 		} else
6482 			udelay(1);
6483 		tmp_ = RREG32(reg_addr);
6484 		loop--;
6485 		if (!loop) {
6486 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6487 				  inst, reg_name, (uint32_t)expected_value,
6488 				  (uint32_t)(tmp_ & (mask)));
6489 			ret = -ETIMEDOUT;
6490 			break;
6491 		}
6492 	}
6493 	return ret;
6494 }
6495