xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision c435bce6af9b2a277662698875a689c389358f17)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37 
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 #include "amdgpu_dev_coredump.h"
78 
79 #include <linux/suspend.h>
80 #include <drm/task_barrier.h>
81 #include <linux/pm_runtime.h>
82 
83 #include <drm/drm_drv.h>
84 
85 #if IS_ENABLED(CONFIG_X86)
86 #include <asm/intel-family.h>
87 #endif
88 
89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
96 
97 #define AMDGPU_RESUME_MS		2000
98 #define AMDGPU_MAX_RETRY_LIMIT		2
99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
103 
104 static const struct drm_driver amdgpu_kms_driver;
105 
106 const char *amdgpu_asic_name[] = {
107 	"TAHITI",
108 	"PITCAIRN",
109 	"VERDE",
110 	"OLAND",
111 	"HAINAN",
112 	"BONAIRE",
113 	"KAVERI",
114 	"KABINI",
115 	"HAWAII",
116 	"MULLINS",
117 	"TOPAZ",
118 	"TONGA",
119 	"FIJI",
120 	"CARRIZO",
121 	"STONEY",
122 	"POLARIS10",
123 	"POLARIS11",
124 	"POLARIS12",
125 	"VEGAM",
126 	"VEGA10",
127 	"VEGA12",
128 	"VEGA20",
129 	"RAVEN",
130 	"ARCTURUS",
131 	"RENOIR",
132 	"ALDEBARAN",
133 	"NAVI10",
134 	"CYAN_SKILLFISH",
135 	"NAVI14",
136 	"NAVI12",
137 	"SIENNA_CICHLID",
138 	"NAVY_FLOUNDER",
139 	"VANGOGH",
140 	"DIMGREY_CAVEFISH",
141 	"BEIGE_GOBY",
142 	"YELLOW_CARP",
143 	"IP DISCOVERY",
144 	"LAST",
145 };
146 
147 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
148 
149 /**
150  * DOC: pcie_replay_count
151  *
152  * The amdgpu driver provides a sysfs API for reporting the total number
153  * of PCIe replays (NAKs)
154  * The file pcie_replay_count is used for this and returns the total
155  * number of replays as a sum of the NAKs generated and NAKs received
156  */
157 
158 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
159 		struct device_attribute *attr, char *buf)
160 {
161 	struct drm_device *ddev = dev_get_drvdata(dev);
162 	struct amdgpu_device *adev = drm_to_adev(ddev);
163 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
164 
165 	return sysfs_emit(buf, "%llu\n", cnt);
166 }
167 
168 static DEVICE_ATTR(pcie_replay_count, 0444,
169 		amdgpu_device_get_pcie_replay_count, NULL);
170 
171 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
172 					  struct bin_attribute *attr, char *buf,
173 					  loff_t ppos, size_t count)
174 {
175 	struct device *dev = kobj_to_dev(kobj);
176 	struct drm_device *ddev = dev_get_drvdata(dev);
177 	struct amdgpu_device *adev = drm_to_adev(ddev);
178 	ssize_t bytes_read;
179 
180 	switch (ppos) {
181 	case AMDGPU_SYS_REG_STATE_XGMI:
182 		bytes_read = amdgpu_asic_get_reg_state(
183 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
184 		break;
185 	case AMDGPU_SYS_REG_STATE_WAFL:
186 		bytes_read = amdgpu_asic_get_reg_state(
187 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
188 		break;
189 	case AMDGPU_SYS_REG_STATE_PCIE:
190 		bytes_read = amdgpu_asic_get_reg_state(
191 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
192 		break;
193 	case AMDGPU_SYS_REG_STATE_USR:
194 		bytes_read = amdgpu_asic_get_reg_state(
195 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
196 		break;
197 	case AMDGPU_SYS_REG_STATE_USR_1:
198 		bytes_read = amdgpu_asic_get_reg_state(
199 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
200 		break;
201 	default:
202 		return -EINVAL;
203 	}
204 
205 	return bytes_read;
206 }
207 
208 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
209 	 AMDGPU_SYS_REG_STATE_END);
210 
211 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
212 {
213 	int ret;
214 
215 	if (!amdgpu_asic_get_reg_state_supported(adev))
216 		return 0;
217 
218 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
219 
220 	return ret;
221 }
222 
223 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
224 {
225 	if (!amdgpu_asic_get_reg_state_supported(adev))
226 		return;
227 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
228 }
229 
230 /**
231  * DOC: board_info
232  *
233  * The amdgpu driver provides a sysfs API for giving board related information.
234  * It provides the form factor information in the format
235  *
236  *   type : form factor
237  *
238  * Possible form factor values
239  *
240  * - "cem"		- PCIE CEM card
241  * - "oam"		- Open Compute Accelerator Module
242  * - "unknown"	- Not known
243  *
244  */
245 
246 static ssize_t amdgpu_device_get_board_info(struct device *dev,
247 					    struct device_attribute *attr,
248 					    char *buf)
249 {
250 	struct drm_device *ddev = dev_get_drvdata(dev);
251 	struct amdgpu_device *adev = drm_to_adev(ddev);
252 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
253 	const char *pkg;
254 
255 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
256 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
257 
258 	switch (pkg_type) {
259 	case AMDGPU_PKG_TYPE_CEM:
260 		pkg = "cem";
261 		break;
262 	case AMDGPU_PKG_TYPE_OAM:
263 		pkg = "oam";
264 		break;
265 	default:
266 		pkg = "unknown";
267 		break;
268 	}
269 
270 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
271 }
272 
273 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
274 
275 static struct attribute *amdgpu_board_attrs[] = {
276 	&dev_attr_board_info.attr,
277 	NULL,
278 };
279 
280 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
281 					     struct attribute *attr, int n)
282 {
283 	struct device *dev = kobj_to_dev(kobj);
284 	struct drm_device *ddev = dev_get_drvdata(dev);
285 	struct amdgpu_device *adev = drm_to_adev(ddev);
286 
287 	if (adev->flags & AMD_IS_APU)
288 		return 0;
289 
290 	return attr->mode;
291 }
292 
293 static const struct attribute_group amdgpu_board_attrs_group = {
294 	.attrs = amdgpu_board_attrs,
295 	.is_visible = amdgpu_board_attrs_is_visible
296 };
297 
298 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
299 
300 
301 /**
302  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
303  *
304  * @dev: drm_device pointer
305  *
306  * Returns true if the device is a dGPU with ATPX power control,
307  * otherwise return false.
308  */
309 bool amdgpu_device_supports_px(struct drm_device *dev)
310 {
311 	struct amdgpu_device *adev = drm_to_adev(dev);
312 
313 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
314 		return true;
315 	return false;
316 }
317 
318 /**
319  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
320  *
321  * @dev: drm_device pointer
322  *
323  * Returns true if the device is a dGPU with ACPI power control,
324  * otherwise return false.
325  */
326 bool amdgpu_device_supports_boco(struct drm_device *dev)
327 {
328 	struct amdgpu_device *adev = drm_to_adev(dev);
329 
330 	if (adev->has_pr3 ||
331 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
332 		return true;
333 	return false;
334 }
335 
336 /**
337  * amdgpu_device_supports_baco - Does the device support BACO
338  *
339  * @dev: drm_device pointer
340  *
341  * Returns true if the device supporte BACO,
342  * otherwise return false.
343  */
344 bool amdgpu_device_supports_baco(struct drm_device *dev)
345 {
346 	struct amdgpu_device *adev = drm_to_adev(dev);
347 
348 	return amdgpu_asic_supports_baco(adev);
349 }
350 
351 /**
352  * amdgpu_device_supports_smart_shift - Is the device dGPU with
353  * smart shift support
354  *
355  * @dev: drm_device pointer
356  *
357  * Returns true if the device is a dGPU with Smart Shift support,
358  * otherwise returns false.
359  */
360 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
361 {
362 	return (amdgpu_device_supports_boco(dev) &&
363 		amdgpu_acpi_is_power_shift_control_supported());
364 }
365 
366 /*
367  * VRAM access helper functions
368  */
369 
370 /**
371  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
372  *
373  * @adev: amdgpu_device pointer
374  * @pos: offset of the buffer in vram
375  * @buf: virtual address of the buffer in system memory
376  * @size: read/write size, sizeof(@buf) must > @size
377  * @write: true - write to vram, otherwise - read from vram
378  */
379 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
380 			     void *buf, size_t size, bool write)
381 {
382 	unsigned long flags;
383 	uint32_t hi = ~0, tmp = 0;
384 	uint32_t *data = buf;
385 	uint64_t last;
386 	int idx;
387 
388 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
389 		return;
390 
391 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
392 
393 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
394 	for (last = pos + size; pos < last; pos += 4) {
395 		tmp = pos >> 31;
396 
397 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
398 		if (tmp != hi) {
399 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
400 			hi = tmp;
401 		}
402 		if (write)
403 			WREG32_NO_KIQ(mmMM_DATA, *data++);
404 		else
405 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
406 	}
407 
408 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
409 	drm_dev_exit(idx);
410 }
411 
412 /**
413  * amdgpu_device_aper_access - access vram by vram aperature
414  *
415  * @adev: amdgpu_device pointer
416  * @pos: offset of the buffer in vram
417  * @buf: virtual address of the buffer in system memory
418  * @size: read/write size, sizeof(@buf) must > @size
419  * @write: true - write to vram, otherwise - read from vram
420  *
421  * The return value means how many bytes have been transferred.
422  */
423 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
424 				 void *buf, size_t size, bool write)
425 {
426 #ifdef CONFIG_64BIT
427 	void __iomem *addr;
428 	size_t count = 0;
429 	uint64_t last;
430 
431 	if (!adev->mman.aper_base_kaddr)
432 		return 0;
433 
434 	last = min(pos + size, adev->gmc.visible_vram_size);
435 	if (last > pos) {
436 		addr = adev->mman.aper_base_kaddr + pos;
437 		count = last - pos;
438 
439 		if (write) {
440 			memcpy_toio(addr, buf, count);
441 			/* Make sure HDP write cache flush happens without any reordering
442 			 * after the system memory contents are sent over PCIe device
443 			 */
444 			mb();
445 			amdgpu_device_flush_hdp(adev, NULL);
446 		} else {
447 			amdgpu_device_invalidate_hdp(adev, NULL);
448 			/* Make sure HDP read cache is invalidated before issuing a read
449 			 * to the PCIe device
450 			 */
451 			mb();
452 			memcpy_fromio(buf, addr, count);
453 		}
454 
455 	}
456 
457 	return count;
458 #else
459 	return 0;
460 #endif
461 }
462 
463 /**
464  * amdgpu_device_vram_access - read/write a buffer in vram
465  *
466  * @adev: amdgpu_device pointer
467  * @pos: offset of the buffer in vram
468  * @buf: virtual address of the buffer in system memory
469  * @size: read/write size, sizeof(@buf) must > @size
470  * @write: true - write to vram, otherwise - read from vram
471  */
472 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
473 			       void *buf, size_t size, bool write)
474 {
475 	size_t count;
476 
477 	/* try to using vram apreature to access vram first */
478 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
479 	size -= count;
480 	if (size) {
481 		/* using MM to access rest vram */
482 		pos += count;
483 		buf += count;
484 		amdgpu_device_mm_access(adev, pos, buf, size, write);
485 	}
486 }
487 
488 /*
489  * register access helper functions.
490  */
491 
492 /* Check if hw access should be skipped because of hotplug or device error */
493 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
494 {
495 	if (adev->no_hw_access)
496 		return true;
497 
498 #ifdef CONFIG_LOCKDEP
499 	/*
500 	 * This is a bit complicated to understand, so worth a comment. What we assert
501 	 * here is that the GPU reset is not running on another thread in parallel.
502 	 *
503 	 * For this we trylock the read side of the reset semaphore, if that succeeds
504 	 * we know that the reset is not running in paralell.
505 	 *
506 	 * If the trylock fails we assert that we are either already holding the read
507 	 * side of the lock or are the reset thread itself and hold the write side of
508 	 * the lock.
509 	 */
510 	if (in_task()) {
511 		if (down_read_trylock(&adev->reset_domain->sem))
512 			up_read(&adev->reset_domain->sem);
513 		else
514 			lockdep_assert_held(&adev->reset_domain->sem);
515 	}
516 #endif
517 	return false;
518 }
519 
520 /**
521  * amdgpu_device_rreg - read a memory mapped IO or indirect register
522  *
523  * @adev: amdgpu_device pointer
524  * @reg: dword aligned register offset
525  * @acc_flags: access flags which require special behavior
526  *
527  * Returns the 32 bit value from the offset specified.
528  */
529 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
530 			    uint32_t reg, uint32_t acc_flags)
531 {
532 	uint32_t ret;
533 
534 	if (amdgpu_device_skip_hw_access(adev))
535 		return 0;
536 
537 	if ((reg * 4) < adev->rmmio_size) {
538 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
539 		    amdgpu_sriov_runtime(adev) &&
540 		    down_read_trylock(&adev->reset_domain->sem)) {
541 			ret = amdgpu_kiq_rreg(adev, reg, 0);
542 			up_read(&adev->reset_domain->sem);
543 		} else {
544 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
545 		}
546 	} else {
547 		ret = adev->pcie_rreg(adev, reg * 4);
548 	}
549 
550 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
551 
552 	return ret;
553 }
554 
555 /*
556  * MMIO register read with bytes helper functions
557  * @offset:bytes offset from MMIO start
558  */
559 
560 /**
561  * amdgpu_mm_rreg8 - read a memory mapped IO register
562  *
563  * @adev: amdgpu_device pointer
564  * @offset: byte aligned register offset
565  *
566  * Returns the 8 bit value from the offset specified.
567  */
568 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
569 {
570 	if (amdgpu_device_skip_hw_access(adev))
571 		return 0;
572 
573 	if (offset < adev->rmmio_size)
574 		return (readb(adev->rmmio + offset));
575 	BUG();
576 }
577 
578 
579 /**
580  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
581  *
582  * @adev: amdgpu_device pointer
583  * @reg: dword aligned register offset
584  * @acc_flags: access flags which require special behavior
585  * @xcc_id: xcc accelerated compute core id
586  *
587  * Returns the 32 bit value from the offset specified.
588  */
589 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
590 				uint32_t reg, uint32_t acc_flags,
591 				uint32_t xcc_id)
592 {
593 	uint32_t ret, rlcg_flag;
594 
595 	if (amdgpu_device_skip_hw_access(adev))
596 		return 0;
597 
598 	if ((reg * 4) < adev->rmmio_size) {
599 		if (amdgpu_sriov_vf(adev) &&
600 		    !amdgpu_sriov_runtime(adev) &&
601 		    adev->gfx.rlc.rlcg_reg_access_supported &&
602 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
603 							 GC_HWIP, false,
604 							 &rlcg_flag)) {
605 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
606 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
607 		    amdgpu_sriov_runtime(adev) &&
608 		    down_read_trylock(&adev->reset_domain->sem)) {
609 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
610 			up_read(&adev->reset_domain->sem);
611 		} else {
612 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
613 		}
614 	} else {
615 		ret = adev->pcie_rreg(adev, reg * 4);
616 	}
617 
618 	return ret;
619 }
620 
621 /*
622  * MMIO register write with bytes helper functions
623  * @offset:bytes offset from MMIO start
624  * @value: the value want to be written to the register
625  */
626 
627 /**
628  * amdgpu_mm_wreg8 - read a memory mapped IO register
629  *
630  * @adev: amdgpu_device pointer
631  * @offset: byte aligned register offset
632  * @value: 8 bit value to write
633  *
634  * Writes the value specified to the offset specified.
635  */
636 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
637 {
638 	if (amdgpu_device_skip_hw_access(adev))
639 		return;
640 
641 	if (offset < adev->rmmio_size)
642 		writeb(value, adev->rmmio + offset);
643 	else
644 		BUG();
645 }
646 
647 /**
648  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
649  *
650  * @adev: amdgpu_device pointer
651  * @reg: dword aligned register offset
652  * @v: 32 bit value to write to the register
653  * @acc_flags: access flags which require special behavior
654  *
655  * Writes the value specified to the offset specified.
656  */
657 void amdgpu_device_wreg(struct amdgpu_device *adev,
658 			uint32_t reg, uint32_t v,
659 			uint32_t acc_flags)
660 {
661 	if (amdgpu_device_skip_hw_access(adev))
662 		return;
663 
664 	if ((reg * 4) < adev->rmmio_size) {
665 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
666 		    amdgpu_sriov_runtime(adev) &&
667 		    down_read_trylock(&adev->reset_domain->sem)) {
668 			amdgpu_kiq_wreg(adev, reg, v, 0);
669 			up_read(&adev->reset_domain->sem);
670 		} else {
671 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
672 		}
673 	} else {
674 		adev->pcie_wreg(adev, reg * 4, v);
675 	}
676 
677 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
678 }
679 
680 /**
681  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
682  *
683  * @adev: amdgpu_device pointer
684  * @reg: mmio/rlc register
685  * @v: value to write
686  * @xcc_id: xcc accelerated compute core id
687  *
688  * this function is invoked only for the debugfs register access
689  */
690 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
691 			     uint32_t reg, uint32_t v,
692 			     uint32_t xcc_id)
693 {
694 	if (amdgpu_device_skip_hw_access(adev))
695 		return;
696 
697 	if (amdgpu_sriov_fullaccess(adev) &&
698 	    adev->gfx.rlc.funcs &&
699 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
700 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
701 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
702 	} else if ((reg * 4) >= adev->rmmio_size) {
703 		adev->pcie_wreg(adev, reg * 4, v);
704 	} else {
705 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
706 	}
707 }
708 
709 /**
710  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
711  *
712  * @adev: amdgpu_device pointer
713  * @reg: dword aligned register offset
714  * @v: 32 bit value to write to the register
715  * @acc_flags: access flags which require special behavior
716  * @xcc_id: xcc accelerated compute core id
717  *
718  * Writes the value specified to the offset specified.
719  */
720 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
721 			uint32_t reg, uint32_t v,
722 			uint32_t acc_flags, uint32_t xcc_id)
723 {
724 	uint32_t rlcg_flag;
725 
726 	if (amdgpu_device_skip_hw_access(adev))
727 		return;
728 
729 	if ((reg * 4) < adev->rmmio_size) {
730 		if (amdgpu_sriov_vf(adev) &&
731 		    !amdgpu_sriov_runtime(adev) &&
732 		    adev->gfx.rlc.rlcg_reg_access_supported &&
733 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
734 							 GC_HWIP, true,
735 							 &rlcg_flag)) {
736 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
737 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
738 		    amdgpu_sriov_runtime(adev) &&
739 		    down_read_trylock(&adev->reset_domain->sem)) {
740 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
741 			up_read(&adev->reset_domain->sem);
742 		} else {
743 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
744 		}
745 	} else {
746 		adev->pcie_wreg(adev, reg * 4, v);
747 	}
748 }
749 
750 /**
751  * amdgpu_device_indirect_rreg - read an indirect register
752  *
753  * @adev: amdgpu_device pointer
754  * @reg_addr: indirect register address to read from
755  *
756  * Returns the value of indirect register @reg_addr
757  */
758 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
759 				u32 reg_addr)
760 {
761 	unsigned long flags, pcie_index, pcie_data;
762 	void __iomem *pcie_index_offset;
763 	void __iomem *pcie_data_offset;
764 	u32 r;
765 
766 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
767 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
768 
769 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
770 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
771 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
772 
773 	writel(reg_addr, pcie_index_offset);
774 	readl(pcie_index_offset);
775 	r = readl(pcie_data_offset);
776 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
777 
778 	return r;
779 }
780 
781 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
782 				    u64 reg_addr)
783 {
784 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
785 	u32 r;
786 	void __iomem *pcie_index_offset;
787 	void __iomem *pcie_index_hi_offset;
788 	void __iomem *pcie_data_offset;
789 
790 	if (unlikely(!adev->nbio.funcs)) {
791 		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
792 		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
793 	} else {
794 		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
795 		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
796 	}
797 
798 	if (reg_addr >> 32) {
799 		if (unlikely(!adev->nbio.funcs))
800 			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
801 		else
802 			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
803 	} else {
804 		pcie_index_hi = 0;
805 	}
806 
807 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
808 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
809 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
810 	if (pcie_index_hi != 0)
811 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
812 				pcie_index_hi * 4;
813 
814 	writel(reg_addr, pcie_index_offset);
815 	readl(pcie_index_offset);
816 	if (pcie_index_hi != 0) {
817 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
818 		readl(pcie_index_hi_offset);
819 	}
820 	r = readl(pcie_data_offset);
821 
822 	/* clear the high bits */
823 	if (pcie_index_hi != 0) {
824 		writel(0, pcie_index_hi_offset);
825 		readl(pcie_index_hi_offset);
826 	}
827 
828 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
829 
830 	return r;
831 }
832 
833 /**
834  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
835  *
836  * @adev: amdgpu_device pointer
837  * @reg_addr: indirect register address to read from
838  *
839  * Returns the value of indirect register @reg_addr
840  */
841 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
842 				  u32 reg_addr)
843 {
844 	unsigned long flags, pcie_index, pcie_data;
845 	void __iomem *pcie_index_offset;
846 	void __iomem *pcie_data_offset;
847 	u64 r;
848 
849 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
850 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
851 
852 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
853 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
854 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
855 
856 	/* read low 32 bits */
857 	writel(reg_addr, pcie_index_offset);
858 	readl(pcie_index_offset);
859 	r = readl(pcie_data_offset);
860 	/* read high 32 bits */
861 	writel(reg_addr + 4, pcie_index_offset);
862 	readl(pcie_index_offset);
863 	r |= ((u64)readl(pcie_data_offset) << 32);
864 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
865 
866 	return r;
867 }
868 
869 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
870 				  u64 reg_addr)
871 {
872 	unsigned long flags, pcie_index, pcie_data;
873 	unsigned long pcie_index_hi = 0;
874 	void __iomem *pcie_index_offset;
875 	void __iomem *pcie_index_hi_offset;
876 	void __iomem *pcie_data_offset;
877 	u64 r;
878 
879 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
880 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
881 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
882 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
883 
884 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
885 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
886 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
887 	if (pcie_index_hi != 0)
888 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
889 			pcie_index_hi * 4;
890 
891 	/* read low 32 bits */
892 	writel(reg_addr, pcie_index_offset);
893 	readl(pcie_index_offset);
894 	if (pcie_index_hi != 0) {
895 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
896 		readl(pcie_index_hi_offset);
897 	}
898 	r = readl(pcie_data_offset);
899 	/* read high 32 bits */
900 	writel(reg_addr + 4, pcie_index_offset);
901 	readl(pcie_index_offset);
902 	if (pcie_index_hi != 0) {
903 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
904 		readl(pcie_index_hi_offset);
905 	}
906 	r |= ((u64)readl(pcie_data_offset) << 32);
907 
908 	/* clear the high bits */
909 	if (pcie_index_hi != 0) {
910 		writel(0, pcie_index_hi_offset);
911 		readl(pcie_index_hi_offset);
912 	}
913 
914 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
915 
916 	return r;
917 }
918 
919 /**
920  * amdgpu_device_indirect_wreg - write an indirect register address
921  *
922  * @adev: amdgpu_device pointer
923  * @reg_addr: indirect register offset
924  * @reg_data: indirect register data
925  *
926  */
927 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
928 				 u32 reg_addr, u32 reg_data)
929 {
930 	unsigned long flags, pcie_index, pcie_data;
931 	void __iomem *pcie_index_offset;
932 	void __iomem *pcie_data_offset;
933 
934 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
935 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
936 
937 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
938 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
939 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
940 
941 	writel(reg_addr, pcie_index_offset);
942 	readl(pcie_index_offset);
943 	writel(reg_data, pcie_data_offset);
944 	readl(pcie_data_offset);
945 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
946 }
947 
948 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
949 				     u64 reg_addr, u32 reg_data)
950 {
951 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
952 	void __iomem *pcie_index_offset;
953 	void __iomem *pcie_index_hi_offset;
954 	void __iomem *pcie_data_offset;
955 
956 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
957 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
958 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
959 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
960 	else
961 		pcie_index_hi = 0;
962 
963 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
964 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
965 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
966 	if (pcie_index_hi != 0)
967 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
968 				pcie_index_hi * 4;
969 
970 	writel(reg_addr, pcie_index_offset);
971 	readl(pcie_index_offset);
972 	if (pcie_index_hi != 0) {
973 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
974 		readl(pcie_index_hi_offset);
975 	}
976 	writel(reg_data, pcie_data_offset);
977 	readl(pcie_data_offset);
978 
979 	/* clear the high bits */
980 	if (pcie_index_hi != 0) {
981 		writel(0, pcie_index_hi_offset);
982 		readl(pcie_index_hi_offset);
983 	}
984 
985 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
986 }
987 
988 /**
989  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
990  *
991  * @adev: amdgpu_device pointer
992  * @reg_addr: indirect register offset
993  * @reg_data: indirect register data
994  *
995  */
996 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
997 				   u32 reg_addr, u64 reg_data)
998 {
999 	unsigned long flags, pcie_index, pcie_data;
1000 	void __iomem *pcie_index_offset;
1001 	void __iomem *pcie_data_offset;
1002 
1003 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1004 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1005 
1006 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1007 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1008 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1009 
1010 	/* write low 32 bits */
1011 	writel(reg_addr, pcie_index_offset);
1012 	readl(pcie_index_offset);
1013 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1014 	readl(pcie_data_offset);
1015 	/* write high 32 bits */
1016 	writel(reg_addr + 4, pcie_index_offset);
1017 	readl(pcie_index_offset);
1018 	writel((u32)(reg_data >> 32), pcie_data_offset);
1019 	readl(pcie_data_offset);
1020 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1021 }
1022 
1023 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1024 				   u64 reg_addr, u64 reg_data)
1025 {
1026 	unsigned long flags, pcie_index, pcie_data;
1027 	unsigned long pcie_index_hi = 0;
1028 	void __iomem *pcie_index_offset;
1029 	void __iomem *pcie_index_hi_offset;
1030 	void __iomem *pcie_data_offset;
1031 
1032 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1033 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1034 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1035 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1036 
1037 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1038 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1039 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1040 	if (pcie_index_hi != 0)
1041 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1042 				pcie_index_hi * 4;
1043 
1044 	/* write low 32 bits */
1045 	writel(reg_addr, pcie_index_offset);
1046 	readl(pcie_index_offset);
1047 	if (pcie_index_hi != 0) {
1048 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1049 		readl(pcie_index_hi_offset);
1050 	}
1051 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1052 	readl(pcie_data_offset);
1053 	/* write high 32 bits */
1054 	writel(reg_addr + 4, pcie_index_offset);
1055 	readl(pcie_index_offset);
1056 	if (pcie_index_hi != 0) {
1057 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1058 		readl(pcie_index_hi_offset);
1059 	}
1060 	writel((u32)(reg_data >> 32), pcie_data_offset);
1061 	readl(pcie_data_offset);
1062 
1063 	/* clear the high bits */
1064 	if (pcie_index_hi != 0) {
1065 		writel(0, pcie_index_hi_offset);
1066 		readl(pcie_index_hi_offset);
1067 	}
1068 
1069 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1070 }
1071 
1072 /**
1073  * amdgpu_device_get_rev_id - query device rev_id
1074  *
1075  * @adev: amdgpu_device pointer
1076  *
1077  * Return device rev_id
1078  */
1079 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1080 {
1081 	return adev->nbio.funcs->get_rev_id(adev);
1082 }
1083 
1084 /**
1085  * amdgpu_invalid_rreg - dummy reg read function
1086  *
1087  * @adev: amdgpu_device pointer
1088  * @reg: offset of register
1089  *
1090  * Dummy register read function.  Used for register blocks
1091  * that certain asics don't have (all asics).
1092  * Returns the value in the register.
1093  */
1094 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1095 {
1096 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1097 	BUG();
1098 	return 0;
1099 }
1100 
1101 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1102 {
1103 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1104 	BUG();
1105 	return 0;
1106 }
1107 
1108 /**
1109  * amdgpu_invalid_wreg - dummy reg write function
1110  *
1111  * @adev: amdgpu_device pointer
1112  * @reg: offset of register
1113  * @v: value to write to the register
1114  *
1115  * Dummy register read function.  Used for register blocks
1116  * that certain asics don't have (all asics).
1117  */
1118 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1119 {
1120 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1121 		  reg, v);
1122 	BUG();
1123 }
1124 
1125 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1126 {
1127 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1128 		  reg, v);
1129 	BUG();
1130 }
1131 
1132 /**
1133  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1134  *
1135  * @adev: amdgpu_device pointer
1136  * @reg: offset of register
1137  *
1138  * Dummy register read function.  Used for register blocks
1139  * that certain asics don't have (all asics).
1140  * Returns the value in the register.
1141  */
1142 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1143 {
1144 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1145 	BUG();
1146 	return 0;
1147 }
1148 
1149 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1150 {
1151 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1152 	BUG();
1153 	return 0;
1154 }
1155 
1156 /**
1157  * amdgpu_invalid_wreg64 - dummy reg write function
1158  *
1159  * @adev: amdgpu_device pointer
1160  * @reg: offset of register
1161  * @v: value to write to the register
1162  *
1163  * Dummy register read function.  Used for register blocks
1164  * that certain asics don't have (all asics).
1165  */
1166 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1167 {
1168 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1169 		  reg, v);
1170 	BUG();
1171 }
1172 
1173 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1174 {
1175 	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1176 		  reg, v);
1177 	BUG();
1178 }
1179 
1180 /**
1181  * amdgpu_block_invalid_rreg - dummy reg read function
1182  *
1183  * @adev: amdgpu_device pointer
1184  * @block: offset of instance
1185  * @reg: offset of register
1186  *
1187  * Dummy register read function.  Used for register blocks
1188  * that certain asics don't have (all asics).
1189  * Returns the value in the register.
1190  */
1191 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1192 					  uint32_t block, uint32_t reg)
1193 {
1194 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1195 		  reg, block);
1196 	BUG();
1197 	return 0;
1198 }
1199 
1200 /**
1201  * amdgpu_block_invalid_wreg - dummy reg write function
1202  *
1203  * @adev: amdgpu_device pointer
1204  * @block: offset of instance
1205  * @reg: offset of register
1206  * @v: value to write to the register
1207  *
1208  * Dummy register read function.  Used for register blocks
1209  * that certain asics don't have (all asics).
1210  */
1211 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1212 				      uint32_t block,
1213 				      uint32_t reg, uint32_t v)
1214 {
1215 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1216 		  reg, block, v);
1217 	BUG();
1218 }
1219 
1220 /**
1221  * amdgpu_device_asic_init - Wrapper for atom asic_init
1222  *
1223  * @adev: amdgpu_device pointer
1224  *
1225  * Does any asic specific work and then calls atom asic init.
1226  */
1227 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1228 {
1229 	int ret;
1230 
1231 	amdgpu_asic_pre_asic_init(adev);
1232 
1233 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1234 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1235 		amdgpu_psp_wait_for_bootloader(adev);
1236 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1237 		return ret;
1238 	} else {
1239 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1240 	}
1241 
1242 	return 0;
1243 }
1244 
1245 /**
1246  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1247  *
1248  * @adev: amdgpu_device pointer
1249  *
1250  * Allocates a scratch page of VRAM for use by various things in the
1251  * driver.
1252  */
1253 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1254 {
1255 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1256 				       AMDGPU_GEM_DOMAIN_VRAM |
1257 				       AMDGPU_GEM_DOMAIN_GTT,
1258 				       &adev->mem_scratch.robj,
1259 				       &adev->mem_scratch.gpu_addr,
1260 				       (void **)&adev->mem_scratch.ptr);
1261 }
1262 
1263 /**
1264  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1265  *
1266  * @adev: amdgpu_device pointer
1267  *
1268  * Frees the VRAM scratch page.
1269  */
1270 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1271 {
1272 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1273 }
1274 
1275 /**
1276  * amdgpu_device_program_register_sequence - program an array of registers.
1277  *
1278  * @adev: amdgpu_device pointer
1279  * @registers: pointer to the register array
1280  * @array_size: size of the register array
1281  *
1282  * Programs an array or registers with and or masks.
1283  * This is a helper for setting golden registers.
1284  */
1285 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1286 					     const u32 *registers,
1287 					     const u32 array_size)
1288 {
1289 	u32 tmp, reg, and_mask, or_mask;
1290 	int i;
1291 
1292 	if (array_size % 3)
1293 		return;
1294 
1295 	for (i = 0; i < array_size; i += 3) {
1296 		reg = registers[i + 0];
1297 		and_mask = registers[i + 1];
1298 		or_mask = registers[i + 2];
1299 
1300 		if (and_mask == 0xffffffff) {
1301 			tmp = or_mask;
1302 		} else {
1303 			tmp = RREG32(reg);
1304 			tmp &= ~and_mask;
1305 			if (adev->family >= AMDGPU_FAMILY_AI)
1306 				tmp |= (or_mask & and_mask);
1307 			else
1308 				tmp |= or_mask;
1309 		}
1310 		WREG32(reg, tmp);
1311 	}
1312 }
1313 
1314 /**
1315  * amdgpu_device_pci_config_reset - reset the GPU
1316  *
1317  * @adev: amdgpu_device pointer
1318  *
1319  * Resets the GPU using the pci config reset sequence.
1320  * Only applicable to asics prior to vega10.
1321  */
1322 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1323 {
1324 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1325 }
1326 
1327 /**
1328  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1329  *
1330  * @adev: amdgpu_device pointer
1331  *
1332  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1333  */
1334 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1335 {
1336 	return pci_reset_function(adev->pdev);
1337 }
1338 
1339 /*
1340  * amdgpu_device_wb_*()
1341  * Writeback is the method by which the GPU updates special pages in memory
1342  * with the status of certain GPU events (fences, ring pointers,etc.).
1343  */
1344 
1345 /**
1346  * amdgpu_device_wb_fini - Disable Writeback and free memory
1347  *
1348  * @adev: amdgpu_device pointer
1349  *
1350  * Disables Writeback and frees the Writeback memory (all asics).
1351  * Used at driver shutdown.
1352  */
1353 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1354 {
1355 	if (adev->wb.wb_obj) {
1356 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1357 				      &adev->wb.gpu_addr,
1358 				      (void **)&adev->wb.wb);
1359 		adev->wb.wb_obj = NULL;
1360 	}
1361 }
1362 
1363 /**
1364  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1365  *
1366  * @adev: amdgpu_device pointer
1367  *
1368  * Initializes writeback and allocates writeback memory (all asics).
1369  * Used at driver startup.
1370  * Returns 0 on success or an -error on failure.
1371  */
1372 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1373 {
1374 	int r;
1375 
1376 	if (adev->wb.wb_obj == NULL) {
1377 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1378 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1379 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1380 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1381 					    (void **)&adev->wb.wb);
1382 		if (r) {
1383 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1384 			return r;
1385 		}
1386 
1387 		adev->wb.num_wb = AMDGPU_MAX_WB;
1388 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1389 
1390 		/* clear wb memory */
1391 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1392 	}
1393 
1394 	return 0;
1395 }
1396 
1397 /**
1398  * amdgpu_device_wb_get - Allocate a wb entry
1399  *
1400  * @adev: amdgpu_device pointer
1401  * @wb: wb index
1402  *
1403  * Allocate a wb slot for use by the driver (all asics).
1404  * Returns 0 on success or -EINVAL on failure.
1405  */
1406 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1407 {
1408 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1409 
1410 	if (offset < adev->wb.num_wb) {
1411 		__set_bit(offset, adev->wb.used);
1412 		*wb = offset << 3; /* convert to dw offset */
1413 		return 0;
1414 	} else {
1415 		return -EINVAL;
1416 	}
1417 }
1418 
1419 /**
1420  * amdgpu_device_wb_free - Free a wb entry
1421  *
1422  * @adev: amdgpu_device pointer
1423  * @wb: wb index
1424  *
1425  * Free a wb slot allocated for use by the driver (all asics)
1426  */
1427 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1428 {
1429 	wb >>= 3;
1430 	if (wb < adev->wb.num_wb)
1431 		__clear_bit(wb, adev->wb.used);
1432 }
1433 
1434 /**
1435  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1436  *
1437  * @adev: amdgpu_device pointer
1438  *
1439  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1440  * to fail, but if any of the BARs is not accessible after the size we abort
1441  * driver loading by returning -ENODEV.
1442  */
1443 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1444 {
1445 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1446 	struct pci_bus *root;
1447 	struct resource *res;
1448 	unsigned int i;
1449 	u16 cmd;
1450 	int r;
1451 
1452 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1453 		return 0;
1454 
1455 	/* Bypass for VF */
1456 	if (amdgpu_sriov_vf(adev))
1457 		return 0;
1458 
1459 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1460 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1461 		DRM_WARN("System can't access extended configuration space,please check!!\n");
1462 
1463 	/* skip if the bios has already enabled large BAR */
1464 	if (adev->gmc.real_vram_size &&
1465 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1466 		return 0;
1467 
1468 	/* Check if the root BUS has 64bit memory resources */
1469 	root = adev->pdev->bus;
1470 	while (root->parent)
1471 		root = root->parent;
1472 
1473 	pci_bus_for_each_resource(root, res, i) {
1474 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1475 		    res->start > 0x100000000ull)
1476 			break;
1477 	}
1478 
1479 	/* Trying to resize is pointless without a root hub window above 4GB */
1480 	if (!res)
1481 		return 0;
1482 
1483 	/* Limit the BAR size to what is available */
1484 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1485 			rbar_size);
1486 
1487 	/* Disable memory decoding while we change the BAR addresses and size */
1488 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1489 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1490 			      cmd & ~PCI_COMMAND_MEMORY);
1491 
1492 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1493 	amdgpu_doorbell_fini(adev);
1494 	if (adev->asic_type >= CHIP_BONAIRE)
1495 		pci_release_resource(adev->pdev, 2);
1496 
1497 	pci_release_resource(adev->pdev, 0);
1498 
1499 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1500 	if (r == -ENOSPC)
1501 		DRM_INFO("Not enough PCI address space for a large BAR.");
1502 	else if (r && r != -ENOTSUPP)
1503 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1504 
1505 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1506 
1507 	/* When the doorbell or fb BAR isn't available we have no chance of
1508 	 * using the device.
1509 	 */
1510 	r = amdgpu_doorbell_init(adev);
1511 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1512 		return -ENODEV;
1513 
1514 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1515 
1516 	return 0;
1517 }
1518 
1519 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1520 {
1521 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1522 		return false;
1523 
1524 	return true;
1525 }
1526 
1527 /*
1528  * GPU helpers function.
1529  */
1530 /**
1531  * amdgpu_device_need_post - check if the hw need post or not
1532  *
1533  * @adev: amdgpu_device pointer
1534  *
1535  * Check if the asic has been initialized (all asics) at driver startup
1536  * or post is needed if  hw reset is performed.
1537  * Returns true if need or false if not.
1538  */
1539 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1540 {
1541 	uint32_t reg;
1542 
1543 	if (amdgpu_sriov_vf(adev))
1544 		return false;
1545 
1546 	if (!amdgpu_device_read_bios(adev))
1547 		return false;
1548 
1549 	if (amdgpu_passthrough(adev)) {
1550 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1551 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1552 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1553 		 * vpost executed for smc version below 22.15
1554 		 */
1555 		if (adev->asic_type == CHIP_FIJI) {
1556 			int err;
1557 			uint32_t fw_ver;
1558 
1559 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1560 			/* force vPost if error occured */
1561 			if (err)
1562 				return true;
1563 
1564 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1565 			release_firmware(adev->pm.fw);
1566 			if (fw_ver < 0x00160e00)
1567 				return true;
1568 		}
1569 	}
1570 
1571 	/* Don't post if we need to reset whole hive on init */
1572 	if (adev->gmc.xgmi.pending_reset)
1573 		return false;
1574 
1575 	if (adev->has_hw_reset) {
1576 		adev->has_hw_reset = false;
1577 		return true;
1578 	}
1579 
1580 	/* bios scratch used on CIK+ */
1581 	if (adev->asic_type >= CHIP_BONAIRE)
1582 		return amdgpu_atombios_scratch_need_asic_init(adev);
1583 
1584 	/* check MEM_SIZE for older asics */
1585 	reg = amdgpu_asic_get_config_memsize(adev);
1586 
1587 	if ((reg != 0) && (reg != 0xffffffff))
1588 		return false;
1589 
1590 	return true;
1591 }
1592 
1593 /*
1594  * Check whether seamless boot is supported.
1595  *
1596  * So far we only support seamless boot on DCE 3.0 or later.
1597  * If users report that it works on older ASICS as well, we may
1598  * loosen this.
1599  */
1600 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1601 {
1602 	switch (amdgpu_seamless) {
1603 	case -1:
1604 		break;
1605 	case 1:
1606 		return true;
1607 	case 0:
1608 		return false;
1609 	default:
1610 		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1611 			  amdgpu_seamless);
1612 		return false;
1613 	}
1614 
1615 	if (!(adev->flags & AMD_IS_APU))
1616 		return false;
1617 
1618 	if (adev->mman.keep_stolen_vga_memory)
1619 		return false;
1620 
1621 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1622 }
1623 
1624 /*
1625  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1626  * don't support dynamic speed switching. Until we have confirmation from Intel
1627  * that a specific host supports it, it's safer that we keep it disabled for all.
1628  *
1629  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1630  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1631  */
1632 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1633 {
1634 #if IS_ENABLED(CONFIG_X86)
1635 	struct cpuinfo_x86 *c = &cpu_data(0);
1636 
1637 	/* eGPU change speeds based on USB4 fabric conditions */
1638 	if (dev_is_removable(adev->dev))
1639 		return true;
1640 
1641 	if (c->x86_vendor == X86_VENDOR_INTEL)
1642 		return false;
1643 #endif
1644 	return true;
1645 }
1646 
1647 /**
1648  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1649  *
1650  * @adev: amdgpu_device pointer
1651  *
1652  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1653  * be set for this device.
1654  *
1655  * Returns true if it should be used or false if not.
1656  */
1657 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1658 {
1659 	switch (amdgpu_aspm) {
1660 	case -1:
1661 		break;
1662 	case 0:
1663 		return false;
1664 	case 1:
1665 		return true;
1666 	default:
1667 		return false;
1668 	}
1669 	if (adev->flags & AMD_IS_APU)
1670 		return false;
1671 	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1672 		return false;
1673 	return pcie_aspm_enabled(adev->pdev);
1674 }
1675 
1676 /* if we get transitioned to only one device, take VGA back */
1677 /**
1678  * amdgpu_device_vga_set_decode - enable/disable vga decode
1679  *
1680  * @pdev: PCI device pointer
1681  * @state: enable/disable vga decode
1682  *
1683  * Enable/disable vga decode (all asics).
1684  * Returns VGA resource flags.
1685  */
1686 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1687 		bool state)
1688 {
1689 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1690 
1691 	amdgpu_asic_set_vga_state(adev, state);
1692 	if (state)
1693 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1694 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1695 	else
1696 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1697 }
1698 
1699 /**
1700  * amdgpu_device_check_block_size - validate the vm block size
1701  *
1702  * @adev: amdgpu_device pointer
1703  *
1704  * Validates the vm block size specified via module parameter.
1705  * The vm block size defines number of bits in page table versus page directory,
1706  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1707  * page table and the remaining bits are in the page directory.
1708  */
1709 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1710 {
1711 	/* defines number of bits in page table versus page directory,
1712 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1713 	 * page table and the remaining bits are in the page directory
1714 	 */
1715 	if (amdgpu_vm_block_size == -1)
1716 		return;
1717 
1718 	if (amdgpu_vm_block_size < 9) {
1719 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1720 			 amdgpu_vm_block_size);
1721 		amdgpu_vm_block_size = -1;
1722 	}
1723 }
1724 
1725 /**
1726  * amdgpu_device_check_vm_size - validate the vm size
1727  *
1728  * @adev: amdgpu_device pointer
1729  *
1730  * Validates the vm size in GB specified via module parameter.
1731  * The VM size is the size of the GPU virtual memory space in GB.
1732  */
1733 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1734 {
1735 	/* no need to check the default value */
1736 	if (amdgpu_vm_size == -1)
1737 		return;
1738 
1739 	if (amdgpu_vm_size < 1) {
1740 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1741 			 amdgpu_vm_size);
1742 		amdgpu_vm_size = -1;
1743 	}
1744 }
1745 
1746 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1747 {
1748 	struct sysinfo si;
1749 	bool is_os_64 = (sizeof(void *) == 8);
1750 	uint64_t total_memory;
1751 	uint64_t dram_size_seven_GB = 0x1B8000000;
1752 	uint64_t dram_size_three_GB = 0xB8000000;
1753 
1754 	if (amdgpu_smu_memory_pool_size == 0)
1755 		return;
1756 
1757 	if (!is_os_64) {
1758 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1759 		goto def_value;
1760 	}
1761 	si_meminfo(&si);
1762 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1763 
1764 	if ((amdgpu_smu_memory_pool_size == 1) ||
1765 		(amdgpu_smu_memory_pool_size == 2)) {
1766 		if (total_memory < dram_size_three_GB)
1767 			goto def_value1;
1768 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1769 		(amdgpu_smu_memory_pool_size == 8)) {
1770 		if (total_memory < dram_size_seven_GB)
1771 			goto def_value1;
1772 	} else {
1773 		DRM_WARN("Smu memory pool size not supported\n");
1774 		goto def_value;
1775 	}
1776 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1777 
1778 	return;
1779 
1780 def_value1:
1781 	DRM_WARN("No enough system memory\n");
1782 def_value:
1783 	adev->pm.smu_prv_buffer_size = 0;
1784 }
1785 
1786 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1787 {
1788 	if (!(adev->flags & AMD_IS_APU) ||
1789 	    adev->asic_type < CHIP_RAVEN)
1790 		return 0;
1791 
1792 	switch (adev->asic_type) {
1793 	case CHIP_RAVEN:
1794 		if (adev->pdev->device == 0x15dd)
1795 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1796 		if (adev->pdev->device == 0x15d8)
1797 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1798 		break;
1799 	case CHIP_RENOIR:
1800 		if ((adev->pdev->device == 0x1636) ||
1801 		    (adev->pdev->device == 0x164c))
1802 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1803 		else
1804 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1805 		break;
1806 	case CHIP_VANGOGH:
1807 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1808 		break;
1809 	case CHIP_YELLOW_CARP:
1810 		break;
1811 	case CHIP_CYAN_SKILLFISH:
1812 		if ((adev->pdev->device == 0x13FE) ||
1813 		    (adev->pdev->device == 0x143F))
1814 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1815 		break;
1816 	default:
1817 		break;
1818 	}
1819 
1820 	return 0;
1821 }
1822 
1823 /**
1824  * amdgpu_device_check_arguments - validate module params
1825  *
1826  * @adev: amdgpu_device pointer
1827  *
1828  * Validates certain module parameters and updates
1829  * the associated values used by the driver (all asics).
1830  */
1831 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1832 {
1833 	if (amdgpu_sched_jobs < 4) {
1834 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1835 			 amdgpu_sched_jobs);
1836 		amdgpu_sched_jobs = 4;
1837 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1838 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1839 			 amdgpu_sched_jobs);
1840 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1841 	}
1842 
1843 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1844 		/* gart size must be greater or equal to 32M */
1845 		dev_warn(adev->dev, "gart size (%d) too small\n",
1846 			 amdgpu_gart_size);
1847 		amdgpu_gart_size = -1;
1848 	}
1849 
1850 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1851 		/* gtt size must be greater or equal to 32M */
1852 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1853 				 amdgpu_gtt_size);
1854 		amdgpu_gtt_size = -1;
1855 	}
1856 
1857 	/* valid range is between 4 and 9 inclusive */
1858 	if (amdgpu_vm_fragment_size != -1 &&
1859 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1860 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1861 		amdgpu_vm_fragment_size = -1;
1862 	}
1863 
1864 	if (amdgpu_sched_hw_submission < 2) {
1865 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1866 			 amdgpu_sched_hw_submission);
1867 		amdgpu_sched_hw_submission = 2;
1868 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1869 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1870 			 amdgpu_sched_hw_submission);
1871 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1872 	}
1873 
1874 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1875 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1876 		amdgpu_reset_method = -1;
1877 	}
1878 
1879 	amdgpu_device_check_smu_prv_buffer_size(adev);
1880 
1881 	amdgpu_device_check_vm_size(adev);
1882 
1883 	amdgpu_device_check_block_size(adev);
1884 
1885 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1886 
1887 	return 0;
1888 }
1889 
1890 /**
1891  * amdgpu_switcheroo_set_state - set switcheroo state
1892  *
1893  * @pdev: pci dev pointer
1894  * @state: vga_switcheroo state
1895  *
1896  * Callback for the switcheroo driver.  Suspends or resumes
1897  * the asics before or after it is powered up using ACPI methods.
1898  */
1899 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1900 					enum vga_switcheroo_state state)
1901 {
1902 	struct drm_device *dev = pci_get_drvdata(pdev);
1903 	int r;
1904 
1905 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1906 		return;
1907 
1908 	if (state == VGA_SWITCHEROO_ON) {
1909 		pr_info("switched on\n");
1910 		/* don't suspend or resume card normally */
1911 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1912 
1913 		pci_set_power_state(pdev, PCI_D0);
1914 		amdgpu_device_load_pci_state(pdev);
1915 		r = pci_enable_device(pdev);
1916 		if (r)
1917 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1918 		amdgpu_device_resume(dev, true);
1919 
1920 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1921 	} else {
1922 		pr_info("switched off\n");
1923 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1924 		amdgpu_device_prepare(dev);
1925 		amdgpu_device_suspend(dev, true);
1926 		amdgpu_device_cache_pci_state(pdev);
1927 		/* Shut down the device */
1928 		pci_disable_device(pdev);
1929 		pci_set_power_state(pdev, PCI_D3cold);
1930 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1931 	}
1932 }
1933 
1934 /**
1935  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1936  *
1937  * @pdev: pci dev pointer
1938  *
1939  * Callback for the switcheroo driver.  Check of the switcheroo
1940  * state can be changed.
1941  * Returns true if the state can be changed, false if not.
1942  */
1943 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1944 {
1945 	struct drm_device *dev = pci_get_drvdata(pdev);
1946 
1947        /*
1948 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1949 	* locking inversion with the driver load path. And the access here is
1950 	* completely racy anyway. So don't bother with locking for now.
1951 	*/
1952 	return atomic_read(&dev->open_count) == 0;
1953 }
1954 
1955 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1956 	.set_gpu_state = amdgpu_switcheroo_set_state,
1957 	.reprobe = NULL,
1958 	.can_switch = amdgpu_switcheroo_can_switch,
1959 };
1960 
1961 /**
1962  * amdgpu_device_ip_set_clockgating_state - set the CG state
1963  *
1964  * @dev: amdgpu_device pointer
1965  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1966  * @state: clockgating state (gate or ungate)
1967  *
1968  * Sets the requested clockgating state for all instances of
1969  * the hardware IP specified.
1970  * Returns the error code from the last instance.
1971  */
1972 int amdgpu_device_ip_set_clockgating_state(void *dev,
1973 					   enum amd_ip_block_type block_type,
1974 					   enum amd_clockgating_state state)
1975 {
1976 	struct amdgpu_device *adev = dev;
1977 	int i, r = 0;
1978 
1979 	for (i = 0; i < adev->num_ip_blocks; i++) {
1980 		if (!adev->ip_blocks[i].status.valid)
1981 			continue;
1982 		if (adev->ip_blocks[i].version->type != block_type)
1983 			continue;
1984 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1985 			continue;
1986 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1987 			(void *)adev, state);
1988 		if (r)
1989 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1990 				  adev->ip_blocks[i].version->funcs->name, r);
1991 	}
1992 	return r;
1993 }
1994 
1995 /**
1996  * amdgpu_device_ip_set_powergating_state - set the PG state
1997  *
1998  * @dev: amdgpu_device pointer
1999  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2000  * @state: powergating state (gate or ungate)
2001  *
2002  * Sets the requested powergating state for all instances of
2003  * the hardware IP specified.
2004  * Returns the error code from the last instance.
2005  */
2006 int amdgpu_device_ip_set_powergating_state(void *dev,
2007 					   enum amd_ip_block_type block_type,
2008 					   enum amd_powergating_state state)
2009 {
2010 	struct amdgpu_device *adev = dev;
2011 	int i, r = 0;
2012 
2013 	for (i = 0; i < adev->num_ip_blocks; i++) {
2014 		if (!adev->ip_blocks[i].status.valid)
2015 			continue;
2016 		if (adev->ip_blocks[i].version->type != block_type)
2017 			continue;
2018 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2019 			continue;
2020 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2021 			(void *)adev, state);
2022 		if (r)
2023 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2024 				  adev->ip_blocks[i].version->funcs->name, r);
2025 	}
2026 	return r;
2027 }
2028 
2029 /**
2030  * amdgpu_device_ip_get_clockgating_state - get the CG state
2031  *
2032  * @adev: amdgpu_device pointer
2033  * @flags: clockgating feature flags
2034  *
2035  * Walks the list of IPs on the device and updates the clockgating
2036  * flags for each IP.
2037  * Updates @flags with the feature flags for each hardware IP where
2038  * clockgating is enabled.
2039  */
2040 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2041 					    u64 *flags)
2042 {
2043 	int i;
2044 
2045 	for (i = 0; i < adev->num_ip_blocks; i++) {
2046 		if (!adev->ip_blocks[i].status.valid)
2047 			continue;
2048 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2049 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2050 	}
2051 }
2052 
2053 /**
2054  * amdgpu_device_ip_wait_for_idle - wait for idle
2055  *
2056  * @adev: amdgpu_device pointer
2057  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2058  *
2059  * Waits for the request hardware IP to be idle.
2060  * Returns 0 for success or a negative error code on failure.
2061  */
2062 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2063 				   enum amd_ip_block_type block_type)
2064 {
2065 	int i, r;
2066 
2067 	for (i = 0; i < adev->num_ip_blocks; i++) {
2068 		if (!adev->ip_blocks[i].status.valid)
2069 			continue;
2070 		if (adev->ip_blocks[i].version->type == block_type) {
2071 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2072 			if (r)
2073 				return r;
2074 			break;
2075 		}
2076 	}
2077 	return 0;
2078 
2079 }
2080 
2081 /**
2082  * amdgpu_device_ip_is_idle - is the hardware IP idle
2083  *
2084  * @adev: amdgpu_device pointer
2085  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2086  *
2087  * Check if the hardware IP is idle or not.
2088  * Returns true if it the IP is idle, false if not.
2089  */
2090 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2091 			      enum amd_ip_block_type block_type)
2092 {
2093 	int i;
2094 
2095 	for (i = 0; i < adev->num_ip_blocks; i++) {
2096 		if (!adev->ip_blocks[i].status.valid)
2097 			continue;
2098 		if (adev->ip_blocks[i].version->type == block_type)
2099 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2100 	}
2101 	return true;
2102 
2103 }
2104 
2105 /**
2106  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2107  *
2108  * @adev: amdgpu_device pointer
2109  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2110  *
2111  * Returns a pointer to the hardware IP block structure
2112  * if it exists for the asic, otherwise NULL.
2113  */
2114 struct amdgpu_ip_block *
2115 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2116 			      enum amd_ip_block_type type)
2117 {
2118 	int i;
2119 
2120 	for (i = 0; i < adev->num_ip_blocks; i++)
2121 		if (adev->ip_blocks[i].version->type == type)
2122 			return &adev->ip_blocks[i];
2123 
2124 	return NULL;
2125 }
2126 
2127 /**
2128  * amdgpu_device_ip_block_version_cmp
2129  *
2130  * @adev: amdgpu_device pointer
2131  * @type: enum amd_ip_block_type
2132  * @major: major version
2133  * @minor: minor version
2134  *
2135  * return 0 if equal or greater
2136  * return 1 if smaller or the ip_block doesn't exist
2137  */
2138 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2139 				       enum amd_ip_block_type type,
2140 				       u32 major, u32 minor)
2141 {
2142 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2143 
2144 	if (ip_block && ((ip_block->version->major > major) ||
2145 			((ip_block->version->major == major) &&
2146 			(ip_block->version->minor >= minor))))
2147 		return 0;
2148 
2149 	return 1;
2150 }
2151 
2152 /**
2153  * amdgpu_device_ip_block_add
2154  *
2155  * @adev: amdgpu_device pointer
2156  * @ip_block_version: pointer to the IP to add
2157  *
2158  * Adds the IP block driver information to the collection of IPs
2159  * on the asic.
2160  */
2161 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2162 			       const struct amdgpu_ip_block_version *ip_block_version)
2163 {
2164 	if (!ip_block_version)
2165 		return -EINVAL;
2166 
2167 	switch (ip_block_version->type) {
2168 	case AMD_IP_BLOCK_TYPE_VCN:
2169 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2170 			return 0;
2171 		break;
2172 	case AMD_IP_BLOCK_TYPE_JPEG:
2173 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2174 			return 0;
2175 		break;
2176 	default:
2177 		break;
2178 	}
2179 
2180 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2181 		  ip_block_version->funcs->name);
2182 
2183 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2184 
2185 	return 0;
2186 }
2187 
2188 /**
2189  * amdgpu_device_enable_virtual_display - enable virtual display feature
2190  *
2191  * @adev: amdgpu_device pointer
2192  *
2193  * Enabled the virtual display feature if the user has enabled it via
2194  * the module parameter virtual_display.  This feature provides a virtual
2195  * display hardware on headless boards or in virtualized environments.
2196  * This function parses and validates the configuration string specified by
2197  * the user and configues the virtual display configuration (number of
2198  * virtual connectors, crtcs, etc.) specified.
2199  */
2200 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2201 {
2202 	adev->enable_virtual_display = false;
2203 
2204 	if (amdgpu_virtual_display) {
2205 		const char *pci_address_name = pci_name(adev->pdev);
2206 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2207 
2208 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2209 		pciaddstr_tmp = pciaddstr;
2210 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2211 			pciaddname = strsep(&pciaddname_tmp, ",");
2212 			if (!strcmp("all", pciaddname)
2213 			    || !strcmp(pci_address_name, pciaddname)) {
2214 				long num_crtc;
2215 				int res = -1;
2216 
2217 				adev->enable_virtual_display = true;
2218 
2219 				if (pciaddname_tmp)
2220 					res = kstrtol(pciaddname_tmp, 10,
2221 						      &num_crtc);
2222 
2223 				if (!res) {
2224 					if (num_crtc < 1)
2225 						num_crtc = 1;
2226 					if (num_crtc > 6)
2227 						num_crtc = 6;
2228 					adev->mode_info.num_crtc = num_crtc;
2229 				} else {
2230 					adev->mode_info.num_crtc = 1;
2231 				}
2232 				break;
2233 			}
2234 		}
2235 
2236 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2237 			 amdgpu_virtual_display, pci_address_name,
2238 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2239 
2240 		kfree(pciaddstr);
2241 	}
2242 }
2243 
2244 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2245 {
2246 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2247 		adev->mode_info.num_crtc = 1;
2248 		adev->enable_virtual_display = true;
2249 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2250 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2251 	}
2252 }
2253 
2254 /**
2255  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2256  *
2257  * @adev: amdgpu_device pointer
2258  *
2259  * Parses the asic configuration parameters specified in the gpu info
2260  * firmware and makes them availale to the driver for use in configuring
2261  * the asic.
2262  * Returns 0 on success, -EINVAL on failure.
2263  */
2264 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2265 {
2266 	const char *chip_name;
2267 	char fw_name[40];
2268 	int err;
2269 	const struct gpu_info_firmware_header_v1_0 *hdr;
2270 
2271 	adev->firmware.gpu_info_fw = NULL;
2272 
2273 	if (adev->mman.discovery_bin)
2274 		return 0;
2275 
2276 	switch (adev->asic_type) {
2277 	default:
2278 		return 0;
2279 	case CHIP_VEGA10:
2280 		chip_name = "vega10";
2281 		break;
2282 	case CHIP_VEGA12:
2283 		chip_name = "vega12";
2284 		break;
2285 	case CHIP_RAVEN:
2286 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2287 			chip_name = "raven2";
2288 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2289 			chip_name = "picasso";
2290 		else
2291 			chip_name = "raven";
2292 		break;
2293 	case CHIP_ARCTURUS:
2294 		chip_name = "arcturus";
2295 		break;
2296 	case CHIP_NAVI12:
2297 		chip_name = "navi12";
2298 		break;
2299 	}
2300 
2301 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2302 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2303 	if (err) {
2304 		dev_err(adev->dev,
2305 			"Failed to get gpu_info firmware \"%s\"\n",
2306 			fw_name);
2307 		goto out;
2308 	}
2309 
2310 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2311 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2312 
2313 	switch (hdr->version_major) {
2314 	case 1:
2315 	{
2316 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2317 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2318 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2319 
2320 		/*
2321 		 * Should be droped when DAL no longer needs it.
2322 		 */
2323 		if (adev->asic_type == CHIP_NAVI12)
2324 			goto parse_soc_bounding_box;
2325 
2326 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2327 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2328 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2329 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2330 		adev->gfx.config.max_texture_channel_caches =
2331 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2332 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2333 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2334 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2335 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2336 		adev->gfx.config.double_offchip_lds_buf =
2337 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2338 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2339 		adev->gfx.cu_info.max_waves_per_simd =
2340 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2341 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2342 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2343 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2344 		if (hdr->version_minor >= 1) {
2345 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2346 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2347 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2348 			adev->gfx.config.num_sc_per_sh =
2349 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2350 			adev->gfx.config.num_packer_per_sc =
2351 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2352 		}
2353 
2354 parse_soc_bounding_box:
2355 		/*
2356 		 * soc bounding box info is not integrated in disocovery table,
2357 		 * we always need to parse it from gpu info firmware if needed.
2358 		 */
2359 		if (hdr->version_minor == 2) {
2360 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2361 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2362 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2363 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2364 		}
2365 		break;
2366 	}
2367 	default:
2368 		dev_err(adev->dev,
2369 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2370 		err = -EINVAL;
2371 		goto out;
2372 	}
2373 out:
2374 	return err;
2375 }
2376 
2377 /**
2378  * amdgpu_device_ip_early_init - run early init for hardware IPs
2379  *
2380  * @adev: amdgpu_device pointer
2381  *
2382  * Early initialization pass for hardware IPs.  The hardware IPs that make
2383  * up each asic are discovered each IP's early_init callback is run.  This
2384  * is the first stage in initializing the asic.
2385  * Returns 0 on success, negative error code on failure.
2386  */
2387 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2388 {
2389 	struct pci_dev *parent;
2390 	int i, r;
2391 	bool total;
2392 
2393 	amdgpu_device_enable_virtual_display(adev);
2394 
2395 	if (amdgpu_sriov_vf(adev)) {
2396 		r = amdgpu_virt_request_full_gpu(adev, true);
2397 		if (r)
2398 			return r;
2399 	}
2400 
2401 	switch (adev->asic_type) {
2402 #ifdef CONFIG_DRM_AMDGPU_SI
2403 	case CHIP_VERDE:
2404 	case CHIP_TAHITI:
2405 	case CHIP_PITCAIRN:
2406 	case CHIP_OLAND:
2407 	case CHIP_HAINAN:
2408 		adev->family = AMDGPU_FAMILY_SI;
2409 		r = si_set_ip_blocks(adev);
2410 		if (r)
2411 			return r;
2412 		break;
2413 #endif
2414 #ifdef CONFIG_DRM_AMDGPU_CIK
2415 	case CHIP_BONAIRE:
2416 	case CHIP_HAWAII:
2417 	case CHIP_KAVERI:
2418 	case CHIP_KABINI:
2419 	case CHIP_MULLINS:
2420 		if (adev->flags & AMD_IS_APU)
2421 			adev->family = AMDGPU_FAMILY_KV;
2422 		else
2423 			adev->family = AMDGPU_FAMILY_CI;
2424 
2425 		r = cik_set_ip_blocks(adev);
2426 		if (r)
2427 			return r;
2428 		break;
2429 #endif
2430 	case CHIP_TOPAZ:
2431 	case CHIP_TONGA:
2432 	case CHIP_FIJI:
2433 	case CHIP_POLARIS10:
2434 	case CHIP_POLARIS11:
2435 	case CHIP_POLARIS12:
2436 	case CHIP_VEGAM:
2437 	case CHIP_CARRIZO:
2438 	case CHIP_STONEY:
2439 		if (adev->flags & AMD_IS_APU)
2440 			adev->family = AMDGPU_FAMILY_CZ;
2441 		else
2442 			adev->family = AMDGPU_FAMILY_VI;
2443 
2444 		r = vi_set_ip_blocks(adev);
2445 		if (r)
2446 			return r;
2447 		break;
2448 	default:
2449 		r = amdgpu_discovery_set_ip_blocks(adev);
2450 		if (r)
2451 			return r;
2452 		break;
2453 	}
2454 
2455 	if (amdgpu_has_atpx() &&
2456 	    (amdgpu_is_atpx_hybrid() ||
2457 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2458 	    ((adev->flags & AMD_IS_APU) == 0) &&
2459 	    !dev_is_removable(&adev->pdev->dev))
2460 		adev->flags |= AMD_IS_PX;
2461 
2462 	if (!(adev->flags & AMD_IS_APU)) {
2463 		parent = pcie_find_root_port(adev->pdev);
2464 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2465 	}
2466 
2467 
2468 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2469 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2470 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2471 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2472 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2473 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2474 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2475 
2476 	total = true;
2477 	for (i = 0; i < adev->num_ip_blocks; i++) {
2478 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2479 			DRM_WARN("disabled ip block: %d <%s>\n",
2480 				  i, adev->ip_blocks[i].version->funcs->name);
2481 			adev->ip_blocks[i].status.valid = false;
2482 		} else {
2483 			if (adev->ip_blocks[i].version->funcs->early_init) {
2484 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2485 				if (r == -ENOENT) {
2486 					adev->ip_blocks[i].status.valid = false;
2487 				} else if (r) {
2488 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2489 						  adev->ip_blocks[i].version->funcs->name, r);
2490 					total = false;
2491 				} else {
2492 					adev->ip_blocks[i].status.valid = true;
2493 				}
2494 			} else {
2495 				adev->ip_blocks[i].status.valid = true;
2496 			}
2497 		}
2498 		/* get the vbios after the asic_funcs are set up */
2499 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2500 			r = amdgpu_device_parse_gpu_info_fw(adev);
2501 			if (r)
2502 				return r;
2503 
2504 			/* Read BIOS */
2505 			if (amdgpu_device_read_bios(adev)) {
2506 				if (!amdgpu_get_bios(adev))
2507 					return -EINVAL;
2508 
2509 				r = amdgpu_atombios_init(adev);
2510 				if (r) {
2511 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2512 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2513 					return r;
2514 				}
2515 			}
2516 
2517 			/*get pf2vf msg info at it's earliest time*/
2518 			if (amdgpu_sriov_vf(adev))
2519 				amdgpu_virt_init_data_exchange(adev);
2520 
2521 		}
2522 	}
2523 	if (!total)
2524 		return -ENODEV;
2525 
2526 	amdgpu_amdkfd_device_probe(adev);
2527 	adev->cg_flags &= amdgpu_cg_mask;
2528 	adev->pg_flags &= amdgpu_pg_mask;
2529 
2530 	return 0;
2531 }
2532 
2533 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2534 {
2535 	int i, r;
2536 
2537 	for (i = 0; i < adev->num_ip_blocks; i++) {
2538 		if (!adev->ip_blocks[i].status.sw)
2539 			continue;
2540 		if (adev->ip_blocks[i].status.hw)
2541 			continue;
2542 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2543 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2544 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2545 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2546 			if (r) {
2547 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2548 					  adev->ip_blocks[i].version->funcs->name, r);
2549 				return r;
2550 			}
2551 			adev->ip_blocks[i].status.hw = true;
2552 		}
2553 	}
2554 
2555 	return 0;
2556 }
2557 
2558 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2559 {
2560 	int i, r;
2561 
2562 	for (i = 0; i < adev->num_ip_blocks; i++) {
2563 		if (!adev->ip_blocks[i].status.sw)
2564 			continue;
2565 		if (adev->ip_blocks[i].status.hw)
2566 			continue;
2567 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2568 		if (r) {
2569 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2570 				  adev->ip_blocks[i].version->funcs->name, r);
2571 			return r;
2572 		}
2573 		adev->ip_blocks[i].status.hw = true;
2574 	}
2575 
2576 	return 0;
2577 }
2578 
2579 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2580 {
2581 	int r = 0;
2582 	int i;
2583 	uint32_t smu_version;
2584 
2585 	if (adev->asic_type >= CHIP_VEGA10) {
2586 		for (i = 0; i < adev->num_ip_blocks; i++) {
2587 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2588 				continue;
2589 
2590 			if (!adev->ip_blocks[i].status.sw)
2591 				continue;
2592 
2593 			/* no need to do the fw loading again if already done*/
2594 			if (adev->ip_blocks[i].status.hw == true)
2595 				break;
2596 
2597 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2598 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2599 				if (r) {
2600 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2601 							  adev->ip_blocks[i].version->funcs->name, r);
2602 					return r;
2603 				}
2604 			} else {
2605 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2606 				if (r) {
2607 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2608 							  adev->ip_blocks[i].version->funcs->name, r);
2609 					return r;
2610 				}
2611 			}
2612 
2613 			adev->ip_blocks[i].status.hw = true;
2614 			break;
2615 		}
2616 	}
2617 
2618 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2619 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2620 
2621 	return r;
2622 }
2623 
2624 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2625 {
2626 	long timeout;
2627 	int r, i;
2628 
2629 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2630 		struct amdgpu_ring *ring = adev->rings[i];
2631 
2632 		/* No need to setup the GPU scheduler for rings that don't need it */
2633 		if (!ring || ring->no_scheduler)
2634 			continue;
2635 
2636 		switch (ring->funcs->type) {
2637 		case AMDGPU_RING_TYPE_GFX:
2638 			timeout = adev->gfx_timeout;
2639 			break;
2640 		case AMDGPU_RING_TYPE_COMPUTE:
2641 			timeout = adev->compute_timeout;
2642 			break;
2643 		case AMDGPU_RING_TYPE_SDMA:
2644 			timeout = adev->sdma_timeout;
2645 			break;
2646 		default:
2647 			timeout = adev->video_timeout;
2648 			break;
2649 		}
2650 
2651 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2652 				   DRM_SCHED_PRIORITY_COUNT,
2653 				   ring->num_hw_submission, 0,
2654 				   timeout, adev->reset_domain->wq,
2655 				   ring->sched_score, ring->name,
2656 				   adev->dev);
2657 		if (r) {
2658 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2659 				  ring->name);
2660 			return r;
2661 		}
2662 		r = amdgpu_uvd_entity_init(adev, ring);
2663 		if (r) {
2664 			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2665 				  ring->name);
2666 			return r;
2667 		}
2668 		r = amdgpu_vce_entity_init(adev, ring);
2669 		if (r) {
2670 			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2671 				  ring->name);
2672 			return r;
2673 		}
2674 	}
2675 
2676 	amdgpu_xcp_update_partition_sched_list(adev);
2677 
2678 	return 0;
2679 }
2680 
2681 
2682 /**
2683  * amdgpu_device_ip_init - run init for hardware IPs
2684  *
2685  * @adev: amdgpu_device pointer
2686  *
2687  * Main initialization pass for hardware IPs.  The list of all the hardware
2688  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2689  * are run.  sw_init initializes the software state associated with each IP
2690  * and hw_init initializes the hardware associated with each IP.
2691  * Returns 0 on success, negative error code on failure.
2692  */
2693 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2694 {
2695 	int i, r;
2696 
2697 	r = amdgpu_ras_init(adev);
2698 	if (r)
2699 		return r;
2700 
2701 	for (i = 0; i < adev->num_ip_blocks; i++) {
2702 		if (!adev->ip_blocks[i].status.valid)
2703 			continue;
2704 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2705 		if (r) {
2706 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2707 				  adev->ip_blocks[i].version->funcs->name, r);
2708 			goto init_failed;
2709 		}
2710 		adev->ip_blocks[i].status.sw = true;
2711 
2712 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2713 			/* need to do common hw init early so everything is set up for gmc */
2714 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2715 			if (r) {
2716 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2717 				goto init_failed;
2718 			}
2719 			adev->ip_blocks[i].status.hw = true;
2720 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2721 			/* need to do gmc hw init early so we can allocate gpu mem */
2722 			/* Try to reserve bad pages early */
2723 			if (amdgpu_sriov_vf(adev))
2724 				amdgpu_virt_exchange_data(adev);
2725 
2726 			r = amdgpu_device_mem_scratch_init(adev);
2727 			if (r) {
2728 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2729 				goto init_failed;
2730 			}
2731 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2732 			if (r) {
2733 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2734 				goto init_failed;
2735 			}
2736 			r = amdgpu_device_wb_init(adev);
2737 			if (r) {
2738 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2739 				goto init_failed;
2740 			}
2741 			adev->ip_blocks[i].status.hw = true;
2742 
2743 			/* right after GMC hw init, we create CSA */
2744 			if (adev->gfx.mcbp) {
2745 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2746 							       AMDGPU_GEM_DOMAIN_VRAM |
2747 							       AMDGPU_GEM_DOMAIN_GTT,
2748 							       AMDGPU_CSA_SIZE);
2749 				if (r) {
2750 					DRM_ERROR("allocate CSA failed %d\n", r);
2751 					goto init_failed;
2752 				}
2753 			}
2754 
2755 			r = amdgpu_seq64_init(adev);
2756 			if (r) {
2757 				DRM_ERROR("allocate seq64 failed %d\n", r);
2758 				goto init_failed;
2759 			}
2760 		}
2761 	}
2762 
2763 	if (amdgpu_sriov_vf(adev))
2764 		amdgpu_virt_init_data_exchange(adev);
2765 
2766 	r = amdgpu_ib_pool_init(adev);
2767 	if (r) {
2768 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2769 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2770 		goto init_failed;
2771 	}
2772 
2773 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2774 	if (r)
2775 		goto init_failed;
2776 
2777 	r = amdgpu_device_ip_hw_init_phase1(adev);
2778 	if (r)
2779 		goto init_failed;
2780 
2781 	r = amdgpu_device_fw_loading(adev);
2782 	if (r)
2783 		goto init_failed;
2784 
2785 	r = amdgpu_device_ip_hw_init_phase2(adev);
2786 	if (r)
2787 		goto init_failed;
2788 
2789 	/*
2790 	 * retired pages will be loaded from eeprom and reserved here,
2791 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2792 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2793 	 * for I2C communication which only true at this point.
2794 	 *
2795 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2796 	 * failure from bad gpu situation and stop amdgpu init process
2797 	 * accordingly. For other failed cases, it will still release all
2798 	 * the resource and print error message, rather than returning one
2799 	 * negative value to upper level.
2800 	 *
2801 	 * Note: theoretically, this should be called before all vram allocations
2802 	 * to protect retired page from abusing
2803 	 */
2804 	r = amdgpu_ras_recovery_init(adev);
2805 	if (r)
2806 		goto init_failed;
2807 
2808 	/**
2809 	 * In case of XGMI grab extra reference for reset domain for this device
2810 	 */
2811 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2812 		if (amdgpu_xgmi_add_device(adev) == 0) {
2813 			if (!amdgpu_sriov_vf(adev)) {
2814 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2815 
2816 				if (WARN_ON(!hive)) {
2817 					r = -ENOENT;
2818 					goto init_failed;
2819 				}
2820 
2821 				if (!hive->reset_domain ||
2822 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2823 					r = -ENOENT;
2824 					amdgpu_put_xgmi_hive(hive);
2825 					goto init_failed;
2826 				}
2827 
2828 				/* Drop the early temporary reset domain we created for device */
2829 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2830 				adev->reset_domain = hive->reset_domain;
2831 				amdgpu_put_xgmi_hive(hive);
2832 			}
2833 		}
2834 	}
2835 
2836 	r = amdgpu_device_init_schedulers(adev);
2837 	if (r)
2838 		goto init_failed;
2839 
2840 	if (adev->mman.buffer_funcs_ring->sched.ready)
2841 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2842 
2843 	/* Don't init kfd if whole hive need to be reset during init */
2844 	if (!adev->gmc.xgmi.pending_reset) {
2845 		kgd2kfd_init_zone_device(adev);
2846 		amdgpu_amdkfd_device_init(adev);
2847 	}
2848 
2849 	amdgpu_fru_get_product_info(adev);
2850 
2851 init_failed:
2852 
2853 	return r;
2854 }
2855 
2856 /**
2857  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2858  *
2859  * @adev: amdgpu_device pointer
2860  *
2861  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2862  * this function before a GPU reset.  If the value is retained after a
2863  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2864  */
2865 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2866 {
2867 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2868 }
2869 
2870 /**
2871  * amdgpu_device_check_vram_lost - check if vram is valid
2872  *
2873  * @adev: amdgpu_device pointer
2874  *
2875  * Checks the reset magic value written to the gart pointer in VRAM.
2876  * The driver calls this after a GPU reset to see if the contents of
2877  * VRAM is lost or now.
2878  * returns true if vram is lost, false if not.
2879  */
2880 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2881 {
2882 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2883 			AMDGPU_RESET_MAGIC_NUM))
2884 		return true;
2885 
2886 	if (!amdgpu_in_reset(adev))
2887 		return false;
2888 
2889 	/*
2890 	 * For all ASICs with baco/mode1 reset, the VRAM is
2891 	 * always assumed to be lost.
2892 	 */
2893 	switch (amdgpu_asic_reset_method(adev)) {
2894 	case AMD_RESET_METHOD_BACO:
2895 	case AMD_RESET_METHOD_MODE1:
2896 		return true;
2897 	default:
2898 		return false;
2899 	}
2900 }
2901 
2902 /**
2903  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2904  *
2905  * @adev: amdgpu_device pointer
2906  * @state: clockgating state (gate or ungate)
2907  *
2908  * The list of all the hardware IPs that make up the asic is walked and the
2909  * set_clockgating_state callbacks are run.
2910  * Late initialization pass enabling clockgating for hardware IPs.
2911  * Fini or suspend, pass disabling clockgating for hardware IPs.
2912  * Returns 0 on success, negative error code on failure.
2913  */
2914 
2915 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2916 			       enum amd_clockgating_state state)
2917 {
2918 	int i, j, r;
2919 
2920 	if (amdgpu_emu_mode == 1)
2921 		return 0;
2922 
2923 	for (j = 0; j < adev->num_ip_blocks; j++) {
2924 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2925 		if (!adev->ip_blocks[i].status.late_initialized)
2926 			continue;
2927 		/* skip CG for GFX, SDMA on S0ix */
2928 		if (adev->in_s0ix &&
2929 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2930 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2931 			continue;
2932 		/* skip CG for VCE/UVD, it's handled specially */
2933 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2934 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2935 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2936 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2937 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2938 			/* enable clockgating to save power */
2939 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2940 										     state);
2941 			if (r) {
2942 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2943 					  adev->ip_blocks[i].version->funcs->name, r);
2944 				return r;
2945 			}
2946 		}
2947 	}
2948 
2949 	return 0;
2950 }
2951 
2952 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2953 			       enum amd_powergating_state state)
2954 {
2955 	int i, j, r;
2956 
2957 	if (amdgpu_emu_mode == 1)
2958 		return 0;
2959 
2960 	for (j = 0; j < adev->num_ip_blocks; j++) {
2961 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2962 		if (!adev->ip_blocks[i].status.late_initialized)
2963 			continue;
2964 		/* skip PG for GFX, SDMA on S0ix */
2965 		if (adev->in_s0ix &&
2966 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2967 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2968 			continue;
2969 		/* skip CG for VCE/UVD, it's handled specially */
2970 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2971 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2972 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2973 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2974 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2975 			/* enable powergating to save power */
2976 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2977 											state);
2978 			if (r) {
2979 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2980 					  adev->ip_blocks[i].version->funcs->name, r);
2981 				return r;
2982 			}
2983 		}
2984 	}
2985 	return 0;
2986 }
2987 
2988 static int amdgpu_device_enable_mgpu_fan_boost(void)
2989 {
2990 	struct amdgpu_gpu_instance *gpu_ins;
2991 	struct amdgpu_device *adev;
2992 	int i, ret = 0;
2993 
2994 	mutex_lock(&mgpu_info.mutex);
2995 
2996 	/*
2997 	 * MGPU fan boost feature should be enabled
2998 	 * only when there are two or more dGPUs in
2999 	 * the system
3000 	 */
3001 	if (mgpu_info.num_dgpu < 2)
3002 		goto out;
3003 
3004 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3005 		gpu_ins = &(mgpu_info.gpu_ins[i]);
3006 		adev = gpu_ins->adev;
3007 		if (!(adev->flags & AMD_IS_APU) &&
3008 		    !gpu_ins->mgpu_fan_enabled) {
3009 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3010 			if (ret)
3011 				break;
3012 
3013 			gpu_ins->mgpu_fan_enabled = 1;
3014 		}
3015 	}
3016 
3017 out:
3018 	mutex_unlock(&mgpu_info.mutex);
3019 
3020 	return ret;
3021 }
3022 
3023 /**
3024  * amdgpu_device_ip_late_init - run late init for hardware IPs
3025  *
3026  * @adev: amdgpu_device pointer
3027  *
3028  * Late initialization pass for hardware IPs.  The list of all the hardware
3029  * IPs that make up the asic is walked and the late_init callbacks are run.
3030  * late_init covers any special initialization that an IP requires
3031  * after all of the have been initialized or something that needs to happen
3032  * late in the init process.
3033  * Returns 0 on success, negative error code on failure.
3034  */
3035 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3036 {
3037 	struct amdgpu_gpu_instance *gpu_instance;
3038 	int i = 0, r;
3039 
3040 	for (i = 0; i < adev->num_ip_blocks; i++) {
3041 		if (!adev->ip_blocks[i].status.hw)
3042 			continue;
3043 		if (adev->ip_blocks[i].version->funcs->late_init) {
3044 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3045 			if (r) {
3046 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3047 					  adev->ip_blocks[i].version->funcs->name, r);
3048 				return r;
3049 			}
3050 		}
3051 		adev->ip_blocks[i].status.late_initialized = true;
3052 	}
3053 
3054 	r = amdgpu_ras_late_init(adev);
3055 	if (r) {
3056 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3057 		return r;
3058 	}
3059 
3060 	amdgpu_ras_set_error_query_ready(adev, true);
3061 
3062 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3063 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3064 
3065 	amdgpu_device_fill_reset_magic(adev);
3066 
3067 	r = amdgpu_device_enable_mgpu_fan_boost();
3068 	if (r)
3069 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3070 
3071 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3072 	if (amdgpu_passthrough(adev) &&
3073 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3074 	     adev->asic_type == CHIP_ALDEBARAN))
3075 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3076 
3077 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3078 		mutex_lock(&mgpu_info.mutex);
3079 
3080 		/*
3081 		 * Reset device p-state to low as this was booted with high.
3082 		 *
3083 		 * This should be performed only after all devices from the same
3084 		 * hive get initialized.
3085 		 *
3086 		 * However, it's unknown how many device in the hive in advance.
3087 		 * As this is counted one by one during devices initializations.
3088 		 *
3089 		 * So, we wait for all XGMI interlinked devices initialized.
3090 		 * This may bring some delays as those devices may come from
3091 		 * different hives. But that should be OK.
3092 		 */
3093 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3094 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3095 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3096 				if (gpu_instance->adev->flags & AMD_IS_APU)
3097 					continue;
3098 
3099 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3100 						AMDGPU_XGMI_PSTATE_MIN);
3101 				if (r) {
3102 					DRM_ERROR("pstate setting failed (%d).\n", r);
3103 					break;
3104 				}
3105 			}
3106 		}
3107 
3108 		mutex_unlock(&mgpu_info.mutex);
3109 	}
3110 
3111 	return 0;
3112 }
3113 
3114 /**
3115  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3116  *
3117  * @adev: amdgpu_device pointer
3118  *
3119  * For ASICs need to disable SMC first
3120  */
3121 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3122 {
3123 	int i, r;
3124 
3125 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3126 		return;
3127 
3128 	for (i = 0; i < adev->num_ip_blocks; i++) {
3129 		if (!adev->ip_blocks[i].status.hw)
3130 			continue;
3131 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3132 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3133 			/* XXX handle errors */
3134 			if (r) {
3135 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3136 					  adev->ip_blocks[i].version->funcs->name, r);
3137 			}
3138 			adev->ip_blocks[i].status.hw = false;
3139 			break;
3140 		}
3141 	}
3142 }
3143 
3144 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3145 {
3146 	int i, r;
3147 
3148 	for (i = 0; i < adev->num_ip_blocks; i++) {
3149 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3150 			continue;
3151 
3152 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3153 		if (r) {
3154 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3155 				  adev->ip_blocks[i].version->funcs->name, r);
3156 		}
3157 	}
3158 
3159 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3160 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3161 
3162 	amdgpu_amdkfd_suspend(adev, false);
3163 
3164 	/* Workaroud for ASICs need to disable SMC first */
3165 	amdgpu_device_smu_fini_early(adev);
3166 
3167 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3168 		if (!adev->ip_blocks[i].status.hw)
3169 			continue;
3170 
3171 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3172 		/* XXX handle errors */
3173 		if (r) {
3174 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3175 				  adev->ip_blocks[i].version->funcs->name, r);
3176 		}
3177 
3178 		adev->ip_blocks[i].status.hw = false;
3179 	}
3180 
3181 	if (amdgpu_sriov_vf(adev)) {
3182 		if (amdgpu_virt_release_full_gpu(adev, false))
3183 			DRM_ERROR("failed to release exclusive mode on fini\n");
3184 	}
3185 
3186 	return 0;
3187 }
3188 
3189 /**
3190  * amdgpu_device_ip_fini - run fini for hardware IPs
3191  *
3192  * @adev: amdgpu_device pointer
3193  *
3194  * Main teardown pass for hardware IPs.  The list of all the hardware
3195  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3196  * are run.  hw_fini tears down the hardware associated with each IP
3197  * and sw_fini tears down any software state associated with each IP.
3198  * Returns 0 on success, negative error code on failure.
3199  */
3200 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3201 {
3202 	int i, r;
3203 
3204 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3205 		amdgpu_virt_release_ras_err_handler_data(adev);
3206 
3207 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3208 		amdgpu_xgmi_remove_device(adev);
3209 
3210 	amdgpu_amdkfd_device_fini_sw(adev);
3211 
3212 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3213 		if (!adev->ip_blocks[i].status.sw)
3214 			continue;
3215 
3216 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3217 			amdgpu_ucode_free_bo(adev);
3218 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3219 			amdgpu_device_wb_fini(adev);
3220 			amdgpu_device_mem_scratch_fini(adev);
3221 			amdgpu_ib_pool_fini(adev);
3222 			amdgpu_seq64_fini(adev);
3223 		}
3224 
3225 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3226 		/* XXX handle errors */
3227 		if (r) {
3228 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3229 				  adev->ip_blocks[i].version->funcs->name, r);
3230 		}
3231 		adev->ip_blocks[i].status.sw = false;
3232 		adev->ip_blocks[i].status.valid = false;
3233 	}
3234 
3235 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3236 		if (!adev->ip_blocks[i].status.late_initialized)
3237 			continue;
3238 		if (adev->ip_blocks[i].version->funcs->late_fini)
3239 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3240 		adev->ip_blocks[i].status.late_initialized = false;
3241 	}
3242 
3243 	amdgpu_ras_fini(adev);
3244 
3245 	return 0;
3246 }
3247 
3248 /**
3249  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3250  *
3251  * @work: work_struct.
3252  */
3253 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3254 {
3255 	struct amdgpu_device *adev =
3256 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3257 	int r;
3258 
3259 	r = amdgpu_ib_ring_tests(adev);
3260 	if (r)
3261 		DRM_ERROR("ib ring test failed (%d).\n", r);
3262 }
3263 
3264 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3265 {
3266 	struct amdgpu_device *adev =
3267 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3268 
3269 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3270 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3271 
3272 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3273 		adev->gfx.gfx_off_state = true;
3274 }
3275 
3276 /**
3277  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3278  *
3279  * @adev: amdgpu_device pointer
3280  *
3281  * Main suspend function for hardware IPs.  The list of all the hardware
3282  * IPs that make up the asic is walked, clockgating is disabled and the
3283  * suspend callbacks are run.  suspend puts the hardware and software state
3284  * in each IP into a state suitable for suspend.
3285  * Returns 0 on success, negative error code on failure.
3286  */
3287 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3288 {
3289 	int i, r;
3290 
3291 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3292 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3293 
3294 	/*
3295 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3296 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3297 	 * scenario. Add the missing df cstate disablement here.
3298 	 */
3299 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3300 		dev_warn(adev->dev, "Failed to disallow df cstate");
3301 
3302 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3303 		if (!adev->ip_blocks[i].status.valid)
3304 			continue;
3305 
3306 		/* displays are handled separately */
3307 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3308 			continue;
3309 
3310 		/* XXX handle errors */
3311 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3312 		/* XXX handle errors */
3313 		if (r) {
3314 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3315 				  adev->ip_blocks[i].version->funcs->name, r);
3316 			return r;
3317 		}
3318 
3319 		adev->ip_blocks[i].status.hw = false;
3320 	}
3321 
3322 	return 0;
3323 }
3324 
3325 /**
3326  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3327  *
3328  * @adev: amdgpu_device pointer
3329  *
3330  * Main suspend function for hardware IPs.  The list of all the hardware
3331  * IPs that make up the asic is walked, clockgating is disabled and the
3332  * suspend callbacks are run.  suspend puts the hardware and software state
3333  * in each IP into a state suitable for suspend.
3334  * Returns 0 on success, negative error code on failure.
3335  */
3336 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3337 {
3338 	int i, r;
3339 
3340 	if (adev->in_s0ix)
3341 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3342 
3343 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3344 		if (!adev->ip_blocks[i].status.valid)
3345 			continue;
3346 		/* displays are handled in phase1 */
3347 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3348 			continue;
3349 		/* PSP lost connection when err_event_athub occurs */
3350 		if (amdgpu_ras_intr_triggered() &&
3351 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3352 			adev->ip_blocks[i].status.hw = false;
3353 			continue;
3354 		}
3355 
3356 		/* skip unnecessary suspend if we do not initialize them yet */
3357 		if (adev->gmc.xgmi.pending_reset &&
3358 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3359 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3360 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3361 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3362 			adev->ip_blocks[i].status.hw = false;
3363 			continue;
3364 		}
3365 
3366 		/* skip suspend of gfx/mes and psp for S0ix
3367 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3368 		 * like at runtime. PSP is also part of the always on hardware
3369 		 * so no need to suspend it.
3370 		 */
3371 		if (adev->in_s0ix &&
3372 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3373 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3374 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3375 			continue;
3376 
3377 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3378 		if (adev->in_s0ix &&
3379 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3380 		     IP_VERSION(5, 0, 0)) &&
3381 		    (adev->ip_blocks[i].version->type ==
3382 		     AMD_IP_BLOCK_TYPE_SDMA))
3383 			continue;
3384 
3385 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3386 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3387 		 * from this location and RLC Autoload automatically also gets loaded
3388 		 * from here based on PMFW -> PSP message during re-init sequence.
3389 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3390 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3391 		 */
3392 		if (amdgpu_in_reset(adev) &&
3393 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3394 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3395 			continue;
3396 
3397 		/* XXX handle errors */
3398 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3399 		/* XXX handle errors */
3400 		if (r) {
3401 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3402 				  adev->ip_blocks[i].version->funcs->name, r);
3403 		}
3404 		adev->ip_blocks[i].status.hw = false;
3405 		/* handle putting the SMC in the appropriate state */
3406 		if (!amdgpu_sriov_vf(adev)) {
3407 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3408 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3409 				if (r) {
3410 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3411 							adev->mp1_state, r);
3412 					return r;
3413 				}
3414 			}
3415 		}
3416 	}
3417 
3418 	return 0;
3419 }
3420 
3421 /**
3422  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3423  *
3424  * @adev: amdgpu_device pointer
3425  *
3426  * Main suspend function for hardware IPs.  The list of all the hardware
3427  * IPs that make up the asic is walked, clockgating is disabled and the
3428  * suspend callbacks are run.  suspend puts the hardware and software state
3429  * in each IP into a state suitable for suspend.
3430  * Returns 0 on success, negative error code on failure.
3431  */
3432 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3433 {
3434 	int r;
3435 
3436 	if (amdgpu_sriov_vf(adev)) {
3437 		amdgpu_virt_fini_data_exchange(adev);
3438 		amdgpu_virt_request_full_gpu(adev, false);
3439 	}
3440 
3441 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3442 
3443 	r = amdgpu_device_ip_suspend_phase1(adev);
3444 	if (r)
3445 		return r;
3446 	r = amdgpu_device_ip_suspend_phase2(adev);
3447 
3448 	if (amdgpu_sriov_vf(adev))
3449 		amdgpu_virt_release_full_gpu(adev, false);
3450 
3451 	return r;
3452 }
3453 
3454 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3455 {
3456 	int i, r;
3457 
3458 	static enum amd_ip_block_type ip_order[] = {
3459 		AMD_IP_BLOCK_TYPE_COMMON,
3460 		AMD_IP_BLOCK_TYPE_GMC,
3461 		AMD_IP_BLOCK_TYPE_PSP,
3462 		AMD_IP_BLOCK_TYPE_IH,
3463 	};
3464 
3465 	for (i = 0; i < adev->num_ip_blocks; i++) {
3466 		int j;
3467 		struct amdgpu_ip_block *block;
3468 
3469 		block = &adev->ip_blocks[i];
3470 		block->status.hw = false;
3471 
3472 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3473 
3474 			if (block->version->type != ip_order[j] ||
3475 				!block->status.valid)
3476 				continue;
3477 
3478 			r = block->version->funcs->hw_init(adev);
3479 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3480 			if (r)
3481 				return r;
3482 			block->status.hw = true;
3483 		}
3484 	}
3485 
3486 	return 0;
3487 }
3488 
3489 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3490 {
3491 	int i, r;
3492 
3493 	static enum amd_ip_block_type ip_order[] = {
3494 		AMD_IP_BLOCK_TYPE_SMC,
3495 		AMD_IP_BLOCK_TYPE_DCE,
3496 		AMD_IP_BLOCK_TYPE_GFX,
3497 		AMD_IP_BLOCK_TYPE_SDMA,
3498 		AMD_IP_BLOCK_TYPE_MES,
3499 		AMD_IP_BLOCK_TYPE_UVD,
3500 		AMD_IP_BLOCK_TYPE_VCE,
3501 		AMD_IP_BLOCK_TYPE_VCN,
3502 		AMD_IP_BLOCK_TYPE_JPEG
3503 	};
3504 
3505 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3506 		int j;
3507 		struct amdgpu_ip_block *block;
3508 
3509 		for (j = 0; j < adev->num_ip_blocks; j++) {
3510 			block = &adev->ip_blocks[j];
3511 
3512 			if (block->version->type != ip_order[i] ||
3513 				!block->status.valid ||
3514 				block->status.hw)
3515 				continue;
3516 
3517 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3518 				r = block->version->funcs->resume(adev);
3519 			else
3520 				r = block->version->funcs->hw_init(adev);
3521 
3522 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3523 			if (r)
3524 				return r;
3525 			block->status.hw = true;
3526 		}
3527 	}
3528 
3529 	return 0;
3530 }
3531 
3532 /**
3533  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3534  *
3535  * @adev: amdgpu_device pointer
3536  *
3537  * First resume function for hardware IPs.  The list of all the hardware
3538  * IPs that make up the asic is walked and the resume callbacks are run for
3539  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3540  * after a suspend and updates the software state as necessary.  This
3541  * function is also used for restoring the GPU after a GPU reset.
3542  * Returns 0 on success, negative error code on failure.
3543  */
3544 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3545 {
3546 	int i, r;
3547 
3548 	for (i = 0; i < adev->num_ip_blocks; i++) {
3549 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3550 			continue;
3551 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3552 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3553 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3554 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3555 
3556 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3557 			if (r) {
3558 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3559 					  adev->ip_blocks[i].version->funcs->name, r);
3560 				return r;
3561 			}
3562 			adev->ip_blocks[i].status.hw = true;
3563 		}
3564 	}
3565 
3566 	return 0;
3567 }
3568 
3569 /**
3570  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3571  *
3572  * @adev: amdgpu_device pointer
3573  *
3574  * First resume function for hardware IPs.  The list of all the hardware
3575  * IPs that make up the asic is walked and the resume callbacks are run for
3576  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3577  * functional state after a suspend and updates the software state as
3578  * necessary.  This function is also used for restoring the GPU after a GPU
3579  * reset.
3580  * Returns 0 on success, negative error code on failure.
3581  */
3582 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3583 {
3584 	int i, r;
3585 
3586 	for (i = 0; i < adev->num_ip_blocks; i++) {
3587 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3588 			continue;
3589 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3590 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3591 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3592 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3593 			continue;
3594 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3595 		if (r) {
3596 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3597 				  adev->ip_blocks[i].version->funcs->name, r);
3598 			return r;
3599 		}
3600 		adev->ip_blocks[i].status.hw = true;
3601 	}
3602 
3603 	return 0;
3604 }
3605 
3606 /**
3607  * amdgpu_device_ip_resume - run resume for hardware IPs
3608  *
3609  * @adev: amdgpu_device pointer
3610  *
3611  * Main resume function for hardware IPs.  The hardware IPs
3612  * are split into two resume functions because they are
3613  * also used in recovering from a GPU reset and some additional
3614  * steps need to be take between them.  In this case (S3/S4) they are
3615  * run sequentially.
3616  * Returns 0 on success, negative error code on failure.
3617  */
3618 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3619 {
3620 	int r;
3621 
3622 	r = amdgpu_device_ip_resume_phase1(adev);
3623 	if (r)
3624 		return r;
3625 
3626 	r = amdgpu_device_fw_loading(adev);
3627 	if (r)
3628 		return r;
3629 
3630 	r = amdgpu_device_ip_resume_phase2(adev);
3631 
3632 	if (adev->mman.buffer_funcs_ring->sched.ready)
3633 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3634 
3635 	return r;
3636 }
3637 
3638 /**
3639  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3640  *
3641  * @adev: amdgpu_device pointer
3642  *
3643  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3644  */
3645 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3646 {
3647 	if (amdgpu_sriov_vf(adev)) {
3648 		if (adev->is_atom_fw) {
3649 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3650 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3651 		} else {
3652 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3653 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3654 		}
3655 
3656 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3657 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3658 	}
3659 }
3660 
3661 /**
3662  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3663  *
3664  * @asic_type: AMD asic type
3665  *
3666  * Check if there is DC (new modesetting infrastructre) support for an asic.
3667  * returns true if DC has support, false if not.
3668  */
3669 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3670 {
3671 	switch (asic_type) {
3672 #ifdef CONFIG_DRM_AMDGPU_SI
3673 	case CHIP_HAINAN:
3674 #endif
3675 	case CHIP_TOPAZ:
3676 		/* chips with no display hardware */
3677 		return false;
3678 #if defined(CONFIG_DRM_AMD_DC)
3679 	case CHIP_TAHITI:
3680 	case CHIP_PITCAIRN:
3681 	case CHIP_VERDE:
3682 	case CHIP_OLAND:
3683 		/*
3684 		 * We have systems in the wild with these ASICs that require
3685 		 * LVDS and VGA support which is not supported with DC.
3686 		 *
3687 		 * Fallback to the non-DC driver here by default so as not to
3688 		 * cause regressions.
3689 		 */
3690 #if defined(CONFIG_DRM_AMD_DC_SI)
3691 		return amdgpu_dc > 0;
3692 #else
3693 		return false;
3694 #endif
3695 	case CHIP_BONAIRE:
3696 	case CHIP_KAVERI:
3697 	case CHIP_KABINI:
3698 	case CHIP_MULLINS:
3699 		/*
3700 		 * We have systems in the wild with these ASICs that require
3701 		 * VGA support which is not supported with DC.
3702 		 *
3703 		 * Fallback to the non-DC driver here by default so as not to
3704 		 * cause regressions.
3705 		 */
3706 		return amdgpu_dc > 0;
3707 	default:
3708 		return amdgpu_dc != 0;
3709 #else
3710 	default:
3711 		if (amdgpu_dc > 0)
3712 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3713 		return false;
3714 #endif
3715 	}
3716 }
3717 
3718 /**
3719  * amdgpu_device_has_dc_support - check if dc is supported
3720  *
3721  * @adev: amdgpu_device pointer
3722  *
3723  * Returns true for supported, false for not supported
3724  */
3725 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3726 {
3727 	if (adev->enable_virtual_display ||
3728 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3729 		return false;
3730 
3731 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3732 }
3733 
3734 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3735 {
3736 	struct amdgpu_device *adev =
3737 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3738 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3739 
3740 	/* It's a bug to not have a hive within this function */
3741 	if (WARN_ON(!hive))
3742 		return;
3743 
3744 	/*
3745 	 * Use task barrier to synchronize all xgmi reset works across the
3746 	 * hive. task_barrier_enter and task_barrier_exit will block
3747 	 * until all the threads running the xgmi reset works reach
3748 	 * those points. task_barrier_full will do both blocks.
3749 	 */
3750 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3751 
3752 		task_barrier_enter(&hive->tb);
3753 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3754 
3755 		if (adev->asic_reset_res)
3756 			goto fail;
3757 
3758 		task_barrier_exit(&hive->tb);
3759 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3760 
3761 		if (adev->asic_reset_res)
3762 			goto fail;
3763 
3764 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3765 	} else {
3766 
3767 		task_barrier_full(&hive->tb);
3768 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3769 	}
3770 
3771 fail:
3772 	if (adev->asic_reset_res)
3773 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3774 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3775 	amdgpu_put_xgmi_hive(hive);
3776 }
3777 
3778 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3779 {
3780 	char *input = amdgpu_lockup_timeout;
3781 	char *timeout_setting = NULL;
3782 	int index = 0;
3783 	long timeout;
3784 	int ret = 0;
3785 
3786 	/*
3787 	 * By default timeout for non compute jobs is 10000
3788 	 * and 60000 for compute jobs.
3789 	 * In SR-IOV or passthrough mode, timeout for compute
3790 	 * jobs are 60000 by default.
3791 	 */
3792 	adev->gfx_timeout = msecs_to_jiffies(10000);
3793 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3794 	if (amdgpu_sriov_vf(adev))
3795 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3796 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3797 	else
3798 		adev->compute_timeout =  msecs_to_jiffies(60000);
3799 
3800 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3801 		while ((timeout_setting = strsep(&input, ",")) &&
3802 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3803 			ret = kstrtol(timeout_setting, 0, &timeout);
3804 			if (ret)
3805 				return ret;
3806 
3807 			if (timeout == 0) {
3808 				index++;
3809 				continue;
3810 			} else if (timeout < 0) {
3811 				timeout = MAX_SCHEDULE_TIMEOUT;
3812 				dev_warn(adev->dev, "lockup timeout disabled");
3813 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3814 			} else {
3815 				timeout = msecs_to_jiffies(timeout);
3816 			}
3817 
3818 			switch (index++) {
3819 			case 0:
3820 				adev->gfx_timeout = timeout;
3821 				break;
3822 			case 1:
3823 				adev->compute_timeout = timeout;
3824 				break;
3825 			case 2:
3826 				adev->sdma_timeout = timeout;
3827 				break;
3828 			case 3:
3829 				adev->video_timeout = timeout;
3830 				break;
3831 			default:
3832 				break;
3833 			}
3834 		}
3835 		/*
3836 		 * There is only one value specified and
3837 		 * it should apply to all non-compute jobs.
3838 		 */
3839 		if (index == 1) {
3840 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3841 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3842 				adev->compute_timeout = adev->gfx_timeout;
3843 		}
3844 	}
3845 
3846 	return ret;
3847 }
3848 
3849 /**
3850  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3851  *
3852  * @adev: amdgpu_device pointer
3853  *
3854  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3855  */
3856 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3857 {
3858 	struct iommu_domain *domain;
3859 
3860 	domain = iommu_get_domain_for_dev(adev->dev);
3861 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3862 		adev->ram_is_direct_mapped = true;
3863 }
3864 
3865 static const struct attribute *amdgpu_dev_attributes[] = {
3866 	&dev_attr_pcie_replay_count.attr,
3867 	NULL
3868 };
3869 
3870 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3871 {
3872 	if (amdgpu_mcbp == 1)
3873 		adev->gfx.mcbp = true;
3874 	else if (amdgpu_mcbp == 0)
3875 		adev->gfx.mcbp = false;
3876 
3877 	if (amdgpu_sriov_vf(adev))
3878 		adev->gfx.mcbp = true;
3879 
3880 	if (adev->gfx.mcbp)
3881 		DRM_INFO("MCBP is enabled\n");
3882 }
3883 
3884 /**
3885  * amdgpu_device_init - initialize the driver
3886  *
3887  * @adev: amdgpu_device pointer
3888  * @flags: driver flags
3889  *
3890  * Initializes the driver info and hw (all asics).
3891  * Returns 0 for success or an error on failure.
3892  * Called at driver startup.
3893  */
3894 int amdgpu_device_init(struct amdgpu_device *adev,
3895 		       uint32_t flags)
3896 {
3897 	struct drm_device *ddev = adev_to_drm(adev);
3898 	struct pci_dev *pdev = adev->pdev;
3899 	int r, i;
3900 	bool px = false;
3901 	u32 max_MBps;
3902 	int tmp;
3903 
3904 	adev->shutdown = false;
3905 	adev->flags = flags;
3906 
3907 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3908 		adev->asic_type = amdgpu_force_asic_type;
3909 	else
3910 		adev->asic_type = flags & AMD_ASIC_MASK;
3911 
3912 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3913 	if (amdgpu_emu_mode == 1)
3914 		adev->usec_timeout *= 10;
3915 	adev->gmc.gart_size = 512 * 1024 * 1024;
3916 	adev->accel_working = false;
3917 	adev->num_rings = 0;
3918 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3919 	adev->mman.buffer_funcs = NULL;
3920 	adev->mman.buffer_funcs_ring = NULL;
3921 	adev->vm_manager.vm_pte_funcs = NULL;
3922 	adev->vm_manager.vm_pte_num_scheds = 0;
3923 	adev->gmc.gmc_funcs = NULL;
3924 	adev->harvest_ip_mask = 0x0;
3925 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3926 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3927 
3928 	adev->smc_rreg = &amdgpu_invalid_rreg;
3929 	adev->smc_wreg = &amdgpu_invalid_wreg;
3930 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3931 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3932 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3933 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3934 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3935 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3936 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3937 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3938 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3939 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
3940 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3941 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3942 	adev->didt_rreg = &amdgpu_invalid_rreg;
3943 	adev->didt_wreg = &amdgpu_invalid_wreg;
3944 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3945 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3946 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3947 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3948 
3949 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3950 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3951 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3952 
3953 	/* mutex initialization are all done here so we
3954 	 * can recall function without having locking issues
3955 	 */
3956 	mutex_init(&adev->firmware.mutex);
3957 	mutex_init(&adev->pm.mutex);
3958 	mutex_init(&adev->gfx.gpu_clock_mutex);
3959 	mutex_init(&adev->srbm_mutex);
3960 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3961 	mutex_init(&adev->gfx.gfx_off_mutex);
3962 	mutex_init(&adev->gfx.partition_mutex);
3963 	mutex_init(&adev->grbm_idx_mutex);
3964 	mutex_init(&adev->mn_lock);
3965 	mutex_init(&adev->virt.vf_errors.lock);
3966 	hash_init(adev->mn_hash);
3967 	mutex_init(&adev->psp.mutex);
3968 	mutex_init(&adev->notifier_lock);
3969 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3970 	mutex_init(&adev->benchmark_mutex);
3971 
3972 	amdgpu_device_init_apu_flags(adev);
3973 
3974 	r = amdgpu_device_check_arguments(adev);
3975 	if (r)
3976 		return r;
3977 
3978 	spin_lock_init(&adev->mmio_idx_lock);
3979 	spin_lock_init(&adev->smc_idx_lock);
3980 	spin_lock_init(&adev->pcie_idx_lock);
3981 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3982 	spin_lock_init(&adev->didt_idx_lock);
3983 	spin_lock_init(&adev->gc_cac_idx_lock);
3984 	spin_lock_init(&adev->se_cac_idx_lock);
3985 	spin_lock_init(&adev->audio_endpt_idx_lock);
3986 	spin_lock_init(&adev->mm_stats.lock);
3987 
3988 	INIT_LIST_HEAD(&adev->shadow_list);
3989 	mutex_init(&adev->shadow_list_lock);
3990 
3991 	INIT_LIST_HEAD(&adev->reset_list);
3992 
3993 	INIT_LIST_HEAD(&adev->ras_list);
3994 
3995 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3996 
3997 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3998 			  amdgpu_device_delayed_init_work_handler);
3999 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4000 			  amdgpu_device_delay_enable_gfx_off);
4001 
4002 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4003 
4004 	adev->gfx.gfx_off_req_count = 1;
4005 	adev->gfx.gfx_off_residency = 0;
4006 	adev->gfx.gfx_off_entrycount = 0;
4007 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4008 
4009 	atomic_set(&adev->throttling_logging_enabled, 1);
4010 	/*
4011 	 * If throttling continues, logging will be performed every minute
4012 	 * to avoid log flooding. "-1" is subtracted since the thermal
4013 	 * throttling interrupt comes every second. Thus, the total logging
4014 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4015 	 * for throttling interrupt) = 60 seconds.
4016 	 */
4017 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4018 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4019 
4020 	/* Registers mapping */
4021 	/* TODO: block userspace mapping of io register */
4022 	if (adev->asic_type >= CHIP_BONAIRE) {
4023 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4024 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4025 	} else {
4026 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4027 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4028 	}
4029 
4030 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4031 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4032 
4033 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4034 	if (!adev->rmmio)
4035 		return -ENOMEM;
4036 
4037 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4038 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4039 
4040 	/*
4041 	 * Reset domain needs to be present early, before XGMI hive discovered
4042 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4043 	 * early on during init and before calling to RREG32.
4044 	 */
4045 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4046 	if (!adev->reset_domain)
4047 		return -ENOMEM;
4048 
4049 	/* detect hw virtualization here */
4050 	amdgpu_detect_virtualization(adev);
4051 
4052 	amdgpu_device_get_pcie_info(adev);
4053 
4054 	r = amdgpu_device_get_job_timeout_settings(adev);
4055 	if (r) {
4056 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4057 		return r;
4058 	}
4059 
4060 	amdgpu_device_set_mcbp(adev);
4061 
4062 	/* early init functions */
4063 	r = amdgpu_device_ip_early_init(adev);
4064 	if (r)
4065 		return r;
4066 
4067 	/* Get rid of things like offb */
4068 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4069 	if (r)
4070 		return r;
4071 
4072 	/* Enable TMZ based on IP_VERSION */
4073 	amdgpu_gmc_tmz_set(adev);
4074 
4075 	if (amdgpu_sriov_vf(adev) &&
4076 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4077 		/* VF MMIO access (except mailbox range) from CPU
4078 		 * will be blocked during sriov runtime
4079 		 */
4080 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4081 
4082 	amdgpu_gmc_noretry_set(adev);
4083 	/* Need to get xgmi info early to decide the reset behavior*/
4084 	if (adev->gmc.xgmi.supported) {
4085 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4086 		if (r)
4087 			return r;
4088 	}
4089 
4090 	/* enable PCIE atomic ops */
4091 	if (amdgpu_sriov_vf(adev)) {
4092 		if (adev->virt.fw_reserve.p_pf2vf)
4093 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4094 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4095 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4096 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4097 	 * internal path natively support atomics, set have_atomics_support to true.
4098 	 */
4099 	} else if ((adev->flags & AMD_IS_APU) &&
4100 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4101 		    IP_VERSION(9, 0, 0))) {
4102 		adev->have_atomics_support = true;
4103 	} else {
4104 		adev->have_atomics_support =
4105 			!pci_enable_atomic_ops_to_root(adev->pdev,
4106 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4107 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4108 	}
4109 
4110 	if (!adev->have_atomics_support)
4111 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4112 
4113 	/* doorbell bar mapping and doorbell index init*/
4114 	amdgpu_doorbell_init(adev);
4115 
4116 	if (amdgpu_emu_mode == 1) {
4117 		/* post the asic on emulation mode */
4118 		emu_soc_asic_init(adev);
4119 		goto fence_driver_init;
4120 	}
4121 
4122 	amdgpu_reset_init(adev);
4123 
4124 	/* detect if we are with an SRIOV vbios */
4125 	if (adev->bios)
4126 		amdgpu_device_detect_sriov_bios(adev);
4127 
4128 	/* check if we need to reset the asic
4129 	 *  E.g., driver was not cleanly unloaded previously, etc.
4130 	 */
4131 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4132 		if (adev->gmc.xgmi.num_physical_nodes) {
4133 			dev_info(adev->dev, "Pending hive reset.\n");
4134 			adev->gmc.xgmi.pending_reset = true;
4135 			/* Only need to init necessary block for SMU to handle the reset */
4136 			for (i = 0; i < adev->num_ip_blocks; i++) {
4137 				if (!adev->ip_blocks[i].status.valid)
4138 					continue;
4139 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4140 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4141 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4142 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4143 					DRM_DEBUG("IP %s disabled for hw_init.\n",
4144 						adev->ip_blocks[i].version->funcs->name);
4145 					adev->ip_blocks[i].status.hw = true;
4146 				}
4147 			}
4148 		} else {
4149 			tmp = amdgpu_reset_method;
4150 			/* It should do a default reset when loading or reloading the driver,
4151 			 * regardless of the module parameter reset_method.
4152 			 */
4153 			amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4154 			r = amdgpu_asic_reset(adev);
4155 			amdgpu_reset_method = tmp;
4156 			if (r) {
4157 				dev_err(adev->dev, "asic reset on init failed\n");
4158 				goto failed;
4159 			}
4160 		}
4161 	}
4162 
4163 	/* Post card if necessary */
4164 	if (amdgpu_device_need_post(adev)) {
4165 		if (!adev->bios) {
4166 			dev_err(adev->dev, "no vBIOS found\n");
4167 			r = -EINVAL;
4168 			goto failed;
4169 		}
4170 		DRM_INFO("GPU posting now...\n");
4171 		r = amdgpu_device_asic_init(adev);
4172 		if (r) {
4173 			dev_err(adev->dev, "gpu post error!\n");
4174 			goto failed;
4175 		}
4176 	}
4177 
4178 	if (adev->bios) {
4179 		if (adev->is_atom_fw) {
4180 			/* Initialize clocks */
4181 			r = amdgpu_atomfirmware_get_clock_info(adev);
4182 			if (r) {
4183 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4184 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4185 				goto failed;
4186 			}
4187 		} else {
4188 			/* Initialize clocks */
4189 			r = amdgpu_atombios_get_clock_info(adev);
4190 			if (r) {
4191 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4192 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4193 				goto failed;
4194 			}
4195 			/* init i2c buses */
4196 			if (!amdgpu_device_has_dc_support(adev))
4197 				amdgpu_atombios_i2c_init(adev);
4198 		}
4199 	}
4200 
4201 fence_driver_init:
4202 	/* Fence driver */
4203 	r = amdgpu_fence_driver_sw_init(adev);
4204 	if (r) {
4205 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4206 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4207 		goto failed;
4208 	}
4209 
4210 	/* init the mode config */
4211 	drm_mode_config_init(adev_to_drm(adev));
4212 
4213 	r = amdgpu_device_ip_init(adev);
4214 	if (r) {
4215 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4216 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4217 		goto release_ras_con;
4218 	}
4219 
4220 	amdgpu_fence_driver_hw_init(adev);
4221 
4222 	dev_info(adev->dev,
4223 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4224 			adev->gfx.config.max_shader_engines,
4225 			adev->gfx.config.max_sh_per_se,
4226 			adev->gfx.config.max_cu_per_sh,
4227 			adev->gfx.cu_info.number);
4228 
4229 	adev->accel_working = true;
4230 
4231 	amdgpu_vm_check_compute_bug(adev);
4232 
4233 	/* Initialize the buffer migration limit. */
4234 	if (amdgpu_moverate >= 0)
4235 		max_MBps = amdgpu_moverate;
4236 	else
4237 		max_MBps = 8; /* Allow 8 MB/s. */
4238 	/* Get a log2 for easy divisions. */
4239 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4240 
4241 	/*
4242 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4243 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4244 	 * gpu instance is counted less.
4245 	 */
4246 	amdgpu_register_gpu_instance(adev);
4247 
4248 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4249 	 * explicit gating rather than handling it automatically.
4250 	 */
4251 	if (!adev->gmc.xgmi.pending_reset) {
4252 		r = amdgpu_device_ip_late_init(adev);
4253 		if (r) {
4254 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4255 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4256 			goto release_ras_con;
4257 		}
4258 		/* must succeed. */
4259 		amdgpu_ras_resume(adev);
4260 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4261 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4262 	}
4263 
4264 	if (amdgpu_sriov_vf(adev)) {
4265 		amdgpu_virt_release_full_gpu(adev, true);
4266 		flush_delayed_work(&adev->delayed_init_work);
4267 	}
4268 
4269 	/*
4270 	 * Place those sysfs registering after `late_init`. As some of those
4271 	 * operations performed in `late_init` might affect the sysfs
4272 	 * interfaces creating.
4273 	 */
4274 	r = amdgpu_atombios_sysfs_init(adev);
4275 	if (r)
4276 		drm_err(&adev->ddev,
4277 			"registering atombios sysfs failed (%d).\n", r);
4278 
4279 	r = amdgpu_pm_sysfs_init(adev);
4280 	if (r)
4281 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4282 
4283 	r = amdgpu_ucode_sysfs_init(adev);
4284 	if (r) {
4285 		adev->ucode_sysfs_en = false;
4286 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4287 	} else
4288 		adev->ucode_sysfs_en = true;
4289 
4290 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4291 	if (r)
4292 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4293 
4294 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4295 	if (r)
4296 		dev_err(adev->dev,
4297 			"Could not create amdgpu board attributes\n");
4298 
4299 	amdgpu_fru_sysfs_init(adev);
4300 	amdgpu_reg_state_sysfs_init(adev);
4301 
4302 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4303 		r = amdgpu_pmu_init(adev);
4304 	if (r)
4305 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4306 
4307 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4308 	if (amdgpu_device_cache_pci_state(adev->pdev))
4309 		pci_restore_state(pdev);
4310 
4311 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4312 	/* this will fail for cards that aren't VGA class devices, just
4313 	 * ignore it
4314 	 */
4315 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4316 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4317 
4318 	px = amdgpu_device_supports_px(ddev);
4319 
4320 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4321 				apple_gmux_detect(NULL, NULL)))
4322 		vga_switcheroo_register_client(adev->pdev,
4323 					       &amdgpu_switcheroo_ops, px);
4324 
4325 	if (px)
4326 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4327 
4328 	if (adev->gmc.xgmi.pending_reset)
4329 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4330 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4331 
4332 	amdgpu_device_check_iommu_direct_map(adev);
4333 
4334 	return 0;
4335 
4336 release_ras_con:
4337 	if (amdgpu_sriov_vf(adev))
4338 		amdgpu_virt_release_full_gpu(adev, true);
4339 
4340 	/* failed in exclusive mode due to timeout */
4341 	if (amdgpu_sriov_vf(adev) &&
4342 		!amdgpu_sriov_runtime(adev) &&
4343 		amdgpu_virt_mmio_blocked(adev) &&
4344 		!amdgpu_virt_wait_reset(adev)) {
4345 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4346 		/* Don't send request since VF is inactive. */
4347 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4348 		adev->virt.ops = NULL;
4349 		r = -EAGAIN;
4350 	}
4351 	amdgpu_release_ras_context(adev);
4352 
4353 failed:
4354 	amdgpu_vf_error_trans_all(adev);
4355 
4356 	return r;
4357 }
4358 
4359 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4360 {
4361 
4362 	/* Clear all CPU mappings pointing to this device */
4363 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4364 
4365 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4366 	amdgpu_doorbell_fini(adev);
4367 
4368 	iounmap(adev->rmmio);
4369 	adev->rmmio = NULL;
4370 	if (adev->mman.aper_base_kaddr)
4371 		iounmap(adev->mman.aper_base_kaddr);
4372 	adev->mman.aper_base_kaddr = NULL;
4373 
4374 	/* Memory manager related */
4375 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4376 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4377 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4378 	}
4379 }
4380 
4381 /**
4382  * amdgpu_device_fini_hw - tear down the driver
4383  *
4384  * @adev: amdgpu_device pointer
4385  *
4386  * Tear down the driver info (all asics).
4387  * Called at driver shutdown.
4388  */
4389 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4390 {
4391 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4392 	flush_delayed_work(&adev->delayed_init_work);
4393 	adev->shutdown = true;
4394 
4395 	/* make sure IB test finished before entering exclusive mode
4396 	 * to avoid preemption on IB test
4397 	 */
4398 	if (amdgpu_sriov_vf(adev)) {
4399 		amdgpu_virt_request_full_gpu(adev, false);
4400 		amdgpu_virt_fini_data_exchange(adev);
4401 	}
4402 
4403 	/* disable all interrupts */
4404 	amdgpu_irq_disable_all(adev);
4405 	if (adev->mode_info.mode_config_initialized) {
4406 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4407 			drm_helper_force_disable_all(adev_to_drm(adev));
4408 		else
4409 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4410 	}
4411 	amdgpu_fence_driver_hw_fini(adev);
4412 
4413 	if (adev->mman.initialized)
4414 		drain_workqueue(adev->mman.bdev.wq);
4415 
4416 	if (adev->pm.sysfs_initialized)
4417 		amdgpu_pm_sysfs_fini(adev);
4418 	if (adev->ucode_sysfs_en)
4419 		amdgpu_ucode_sysfs_fini(adev);
4420 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4421 	amdgpu_fru_sysfs_fini(adev);
4422 
4423 	amdgpu_reg_state_sysfs_fini(adev);
4424 
4425 	/* disable ras feature must before hw fini */
4426 	amdgpu_ras_pre_fini(adev);
4427 
4428 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4429 
4430 	amdgpu_device_ip_fini_early(adev);
4431 
4432 	amdgpu_irq_fini_hw(adev);
4433 
4434 	if (adev->mman.initialized)
4435 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4436 
4437 	amdgpu_gart_dummy_page_fini(adev);
4438 
4439 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4440 		amdgpu_device_unmap_mmio(adev);
4441 
4442 }
4443 
4444 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4445 {
4446 	int idx;
4447 	bool px;
4448 
4449 	amdgpu_fence_driver_sw_fini(adev);
4450 	amdgpu_device_ip_fini(adev);
4451 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4452 	adev->accel_working = false;
4453 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4454 
4455 	amdgpu_reset_fini(adev);
4456 
4457 	/* free i2c buses */
4458 	if (!amdgpu_device_has_dc_support(adev))
4459 		amdgpu_i2c_fini(adev);
4460 
4461 	if (amdgpu_emu_mode != 1)
4462 		amdgpu_atombios_fini(adev);
4463 
4464 	kfree(adev->bios);
4465 	adev->bios = NULL;
4466 
4467 	kfree(adev->fru_info);
4468 	adev->fru_info = NULL;
4469 
4470 	px = amdgpu_device_supports_px(adev_to_drm(adev));
4471 
4472 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4473 				apple_gmux_detect(NULL, NULL)))
4474 		vga_switcheroo_unregister_client(adev->pdev);
4475 
4476 	if (px)
4477 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4478 
4479 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4480 		vga_client_unregister(adev->pdev);
4481 
4482 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4483 
4484 		iounmap(adev->rmmio);
4485 		adev->rmmio = NULL;
4486 		amdgpu_doorbell_fini(adev);
4487 		drm_dev_exit(idx);
4488 	}
4489 
4490 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4491 		amdgpu_pmu_fini(adev);
4492 	if (adev->mman.discovery_bin)
4493 		amdgpu_discovery_fini(adev);
4494 
4495 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4496 	adev->reset_domain = NULL;
4497 
4498 	kfree(adev->pci_state);
4499 
4500 }
4501 
4502 /**
4503  * amdgpu_device_evict_resources - evict device resources
4504  * @adev: amdgpu device object
4505  *
4506  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4507  * of the vram memory type. Mainly used for evicting device resources
4508  * at suspend time.
4509  *
4510  */
4511 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4512 {
4513 	int ret;
4514 
4515 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4516 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4517 		return 0;
4518 
4519 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4520 	if (ret)
4521 		DRM_WARN("evicting device resources failed\n");
4522 	return ret;
4523 }
4524 
4525 /*
4526  * Suspend & resume.
4527  */
4528 /**
4529  * amdgpu_device_prepare - prepare for device suspend
4530  *
4531  * @dev: drm dev pointer
4532  *
4533  * Prepare to put the hw in the suspend state (all asics).
4534  * Returns 0 for success or an error on failure.
4535  * Called at driver suspend.
4536  */
4537 int amdgpu_device_prepare(struct drm_device *dev)
4538 {
4539 	struct amdgpu_device *adev = drm_to_adev(dev);
4540 	int i, r;
4541 
4542 	amdgpu_choose_low_power_state(adev);
4543 
4544 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4545 		return 0;
4546 
4547 	/* Evict the majority of BOs before starting suspend sequence */
4548 	r = amdgpu_device_evict_resources(adev);
4549 	if (r)
4550 		goto unprepare;
4551 
4552 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4553 
4554 	for (i = 0; i < adev->num_ip_blocks; i++) {
4555 		if (!adev->ip_blocks[i].status.valid)
4556 			continue;
4557 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4558 			continue;
4559 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4560 		if (r)
4561 			goto unprepare;
4562 	}
4563 
4564 	return 0;
4565 
4566 unprepare:
4567 	adev->in_s0ix = adev->in_s3 = false;
4568 
4569 	return r;
4570 }
4571 
4572 /**
4573  * amdgpu_device_suspend - initiate device suspend
4574  *
4575  * @dev: drm dev pointer
4576  * @fbcon : notify the fbdev of suspend
4577  *
4578  * Puts the hw in the suspend state (all asics).
4579  * Returns 0 for success or an error on failure.
4580  * Called at driver suspend.
4581  */
4582 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4583 {
4584 	struct amdgpu_device *adev = drm_to_adev(dev);
4585 	int r = 0;
4586 
4587 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4588 		return 0;
4589 
4590 	adev->in_suspend = true;
4591 
4592 	if (amdgpu_sriov_vf(adev)) {
4593 		amdgpu_virt_fini_data_exchange(adev);
4594 		r = amdgpu_virt_request_full_gpu(adev, false);
4595 		if (r)
4596 			return r;
4597 	}
4598 
4599 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4600 		DRM_WARN("smart shift update failed\n");
4601 
4602 	if (fbcon)
4603 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4604 
4605 	cancel_delayed_work_sync(&adev->delayed_init_work);
4606 
4607 	amdgpu_ras_suspend(adev);
4608 
4609 	amdgpu_device_ip_suspend_phase1(adev);
4610 
4611 	if (!adev->in_s0ix)
4612 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4613 
4614 	r = amdgpu_device_evict_resources(adev);
4615 	if (r)
4616 		return r;
4617 
4618 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4619 
4620 	amdgpu_fence_driver_hw_fini(adev);
4621 
4622 	amdgpu_device_ip_suspend_phase2(adev);
4623 
4624 	if (amdgpu_sriov_vf(adev))
4625 		amdgpu_virt_release_full_gpu(adev, false);
4626 
4627 	r = amdgpu_dpm_notify_rlc_state(adev, false);
4628 	if (r)
4629 		return r;
4630 
4631 	return 0;
4632 }
4633 
4634 /**
4635  * amdgpu_device_resume - initiate device resume
4636  *
4637  * @dev: drm dev pointer
4638  * @fbcon : notify the fbdev of resume
4639  *
4640  * Bring the hw back to operating state (all asics).
4641  * Returns 0 for success or an error on failure.
4642  * Called at driver resume.
4643  */
4644 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4645 {
4646 	struct amdgpu_device *adev = drm_to_adev(dev);
4647 	int r = 0;
4648 
4649 	if (amdgpu_sriov_vf(adev)) {
4650 		r = amdgpu_virt_request_full_gpu(adev, true);
4651 		if (r)
4652 			return r;
4653 	}
4654 
4655 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4656 		return 0;
4657 
4658 	if (adev->in_s0ix)
4659 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4660 
4661 	/* post card */
4662 	if (amdgpu_device_need_post(adev)) {
4663 		r = amdgpu_device_asic_init(adev);
4664 		if (r)
4665 			dev_err(adev->dev, "amdgpu asic init failed\n");
4666 	}
4667 
4668 	r = amdgpu_device_ip_resume(adev);
4669 
4670 	if (r) {
4671 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4672 		goto exit;
4673 	}
4674 	amdgpu_fence_driver_hw_init(adev);
4675 
4676 	if (!adev->in_s0ix) {
4677 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4678 		if (r)
4679 			goto exit;
4680 	}
4681 
4682 	r = amdgpu_device_ip_late_init(adev);
4683 	if (r)
4684 		goto exit;
4685 
4686 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4687 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4688 exit:
4689 	if (amdgpu_sriov_vf(adev)) {
4690 		amdgpu_virt_init_data_exchange(adev);
4691 		amdgpu_virt_release_full_gpu(adev, true);
4692 	}
4693 
4694 	if (r)
4695 		return r;
4696 
4697 	/* Make sure IB tests flushed */
4698 	flush_delayed_work(&adev->delayed_init_work);
4699 
4700 	if (fbcon)
4701 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4702 
4703 	amdgpu_ras_resume(adev);
4704 
4705 	if (adev->mode_info.num_crtc) {
4706 		/*
4707 		 * Most of the connector probing functions try to acquire runtime pm
4708 		 * refs to ensure that the GPU is powered on when connector polling is
4709 		 * performed. Since we're calling this from a runtime PM callback,
4710 		 * trying to acquire rpm refs will cause us to deadlock.
4711 		 *
4712 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4713 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4714 		 */
4715 #ifdef CONFIG_PM
4716 		dev->dev->power.disable_depth++;
4717 #endif
4718 		if (!adev->dc_enabled)
4719 			drm_helper_hpd_irq_event(dev);
4720 		else
4721 			drm_kms_helper_hotplug_event(dev);
4722 #ifdef CONFIG_PM
4723 		dev->dev->power.disable_depth--;
4724 #endif
4725 	}
4726 	adev->in_suspend = false;
4727 
4728 	if (adev->enable_mes)
4729 		amdgpu_mes_self_test(adev);
4730 
4731 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4732 		DRM_WARN("smart shift update failed\n");
4733 
4734 	return 0;
4735 }
4736 
4737 /**
4738  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4739  *
4740  * @adev: amdgpu_device pointer
4741  *
4742  * The list of all the hardware IPs that make up the asic is walked and
4743  * the check_soft_reset callbacks are run.  check_soft_reset determines
4744  * if the asic is still hung or not.
4745  * Returns true if any of the IPs are still in a hung state, false if not.
4746  */
4747 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4748 {
4749 	int i;
4750 	bool asic_hang = false;
4751 
4752 	if (amdgpu_sriov_vf(adev))
4753 		return true;
4754 
4755 	if (amdgpu_asic_need_full_reset(adev))
4756 		return true;
4757 
4758 	for (i = 0; i < adev->num_ip_blocks; i++) {
4759 		if (!adev->ip_blocks[i].status.valid)
4760 			continue;
4761 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4762 			adev->ip_blocks[i].status.hang =
4763 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4764 		if (adev->ip_blocks[i].status.hang) {
4765 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4766 			asic_hang = true;
4767 		}
4768 	}
4769 	return asic_hang;
4770 }
4771 
4772 /**
4773  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4774  *
4775  * @adev: amdgpu_device pointer
4776  *
4777  * The list of all the hardware IPs that make up the asic is walked and the
4778  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4779  * handles any IP specific hardware or software state changes that are
4780  * necessary for a soft reset to succeed.
4781  * Returns 0 on success, negative error code on failure.
4782  */
4783 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4784 {
4785 	int i, r = 0;
4786 
4787 	for (i = 0; i < adev->num_ip_blocks; i++) {
4788 		if (!adev->ip_blocks[i].status.valid)
4789 			continue;
4790 		if (adev->ip_blocks[i].status.hang &&
4791 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4792 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4793 			if (r)
4794 				return r;
4795 		}
4796 	}
4797 
4798 	return 0;
4799 }
4800 
4801 /**
4802  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4803  *
4804  * @adev: amdgpu_device pointer
4805  *
4806  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4807  * reset is necessary to recover.
4808  * Returns true if a full asic reset is required, false if not.
4809  */
4810 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4811 {
4812 	int i;
4813 
4814 	if (amdgpu_asic_need_full_reset(adev))
4815 		return true;
4816 
4817 	for (i = 0; i < adev->num_ip_blocks; i++) {
4818 		if (!adev->ip_blocks[i].status.valid)
4819 			continue;
4820 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4821 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4822 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4823 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4824 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4825 			if (adev->ip_blocks[i].status.hang) {
4826 				dev_info(adev->dev, "Some block need full reset!\n");
4827 				return true;
4828 			}
4829 		}
4830 	}
4831 	return false;
4832 }
4833 
4834 /**
4835  * amdgpu_device_ip_soft_reset - do a soft reset
4836  *
4837  * @adev: amdgpu_device pointer
4838  *
4839  * The list of all the hardware IPs that make up the asic is walked and the
4840  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4841  * IP specific hardware or software state changes that are necessary to soft
4842  * reset the IP.
4843  * Returns 0 on success, negative error code on failure.
4844  */
4845 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4846 {
4847 	int i, r = 0;
4848 
4849 	for (i = 0; i < adev->num_ip_blocks; i++) {
4850 		if (!adev->ip_blocks[i].status.valid)
4851 			continue;
4852 		if (adev->ip_blocks[i].status.hang &&
4853 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4854 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4855 			if (r)
4856 				return r;
4857 		}
4858 	}
4859 
4860 	return 0;
4861 }
4862 
4863 /**
4864  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4865  *
4866  * @adev: amdgpu_device pointer
4867  *
4868  * The list of all the hardware IPs that make up the asic is walked and the
4869  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4870  * handles any IP specific hardware or software state changes that are
4871  * necessary after the IP has been soft reset.
4872  * Returns 0 on success, negative error code on failure.
4873  */
4874 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4875 {
4876 	int i, r = 0;
4877 
4878 	for (i = 0; i < adev->num_ip_blocks; i++) {
4879 		if (!adev->ip_blocks[i].status.valid)
4880 			continue;
4881 		if (adev->ip_blocks[i].status.hang &&
4882 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4883 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4884 		if (r)
4885 			return r;
4886 	}
4887 
4888 	return 0;
4889 }
4890 
4891 /**
4892  * amdgpu_device_recover_vram - Recover some VRAM contents
4893  *
4894  * @adev: amdgpu_device pointer
4895  *
4896  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4897  * restore things like GPUVM page tables after a GPU reset where
4898  * the contents of VRAM might be lost.
4899  *
4900  * Returns:
4901  * 0 on success, negative error code on failure.
4902  */
4903 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4904 {
4905 	struct dma_fence *fence = NULL, *next = NULL;
4906 	struct amdgpu_bo *shadow;
4907 	struct amdgpu_bo_vm *vmbo;
4908 	long r = 1, tmo;
4909 
4910 	if (amdgpu_sriov_runtime(adev))
4911 		tmo = msecs_to_jiffies(8000);
4912 	else
4913 		tmo = msecs_to_jiffies(100);
4914 
4915 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4916 	mutex_lock(&adev->shadow_list_lock);
4917 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4918 		/* If vm is compute context or adev is APU, shadow will be NULL */
4919 		if (!vmbo->shadow)
4920 			continue;
4921 		shadow = vmbo->shadow;
4922 
4923 		/* No need to recover an evicted BO */
4924 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4925 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4926 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4927 			continue;
4928 
4929 		r = amdgpu_bo_restore_shadow(shadow, &next);
4930 		if (r)
4931 			break;
4932 
4933 		if (fence) {
4934 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4935 			dma_fence_put(fence);
4936 			fence = next;
4937 			if (tmo == 0) {
4938 				r = -ETIMEDOUT;
4939 				break;
4940 			} else if (tmo < 0) {
4941 				r = tmo;
4942 				break;
4943 			}
4944 		} else {
4945 			fence = next;
4946 		}
4947 	}
4948 	mutex_unlock(&adev->shadow_list_lock);
4949 
4950 	if (fence)
4951 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4952 	dma_fence_put(fence);
4953 
4954 	if (r < 0 || tmo <= 0) {
4955 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4956 		return -EIO;
4957 	}
4958 
4959 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4960 	return 0;
4961 }
4962 
4963 
4964 /**
4965  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4966  *
4967  * @adev: amdgpu_device pointer
4968  * @from_hypervisor: request from hypervisor
4969  *
4970  * do VF FLR and reinitialize Asic
4971  * return 0 means succeeded otherwise failed
4972  */
4973 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4974 				     bool from_hypervisor)
4975 {
4976 	int r;
4977 	struct amdgpu_hive_info *hive = NULL;
4978 	int retry_limit = 0;
4979 
4980 retry:
4981 	amdgpu_amdkfd_pre_reset(adev);
4982 
4983 	amdgpu_device_stop_pending_resets(adev);
4984 
4985 	if (from_hypervisor)
4986 		r = amdgpu_virt_request_full_gpu(adev, true);
4987 	else
4988 		r = amdgpu_virt_reset_gpu(adev);
4989 	if (r)
4990 		return r;
4991 	amdgpu_irq_gpu_reset_resume_helper(adev);
4992 
4993 	/* some sw clean up VF needs to do before recover */
4994 	amdgpu_virt_post_reset(adev);
4995 
4996 	/* Resume IP prior to SMC */
4997 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4998 	if (r)
4999 		goto error;
5000 
5001 	amdgpu_virt_init_data_exchange(adev);
5002 
5003 	r = amdgpu_device_fw_loading(adev);
5004 	if (r)
5005 		return r;
5006 
5007 	/* now we are okay to resume SMC/CP/SDMA */
5008 	r = amdgpu_device_ip_reinit_late_sriov(adev);
5009 	if (r)
5010 		goto error;
5011 
5012 	hive = amdgpu_get_xgmi_hive(adev);
5013 	/* Update PSP FW topology after reset */
5014 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5015 		r = amdgpu_xgmi_update_topology(hive, adev);
5016 
5017 	if (hive)
5018 		amdgpu_put_xgmi_hive(hive);
5019 
5020 	if (!r) {
5021 		r = amdgpu_ib_ring_tests(adev);
5022 
5023 		amdgpu_amdkfd_post_reset(adev);
5024 	}
5025 
5026 error:
5027 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5028 		amdgpu_inc_vram_lost(adev);
5029 		r = amdgpu_device_recover_vram(adev);
5030 	}
5031 	amdgpu_virt_release_full_gpu(adev, true);
5032 
5033 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5034 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5035 			retry_limit++;
5036 			goto retry;
5037 		} else
5038 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5039 	}
5040 
5041 	return r;
5042 }
5043 
5044 /**
5045  * amdgpu_device_has_job_running - check if there is any job in mirror list
5046  *
5047  * @adev: amdgpu_device pointer
5048  *
5049  * check if there is any job in mirror list
5050  */
5051 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5052 {
5053 	int i;
5054 	struct drm_sched_job *job;
5055 
5056 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5057 		struct amdgpu_ring *ring = adev->rings[i];
5058 
5059 		if (!amdgpu_ring_sched_ready(ring))
5060 			continue;
5061 
5062 		spin_lock(&ring->sched.job_list_lock);
5063 		job = list_first_entry_or_null(&ring->sched.pending_list,
5064 					       struct drm_sched_job, list);
5065 		spin_unlock(&ring->sched.job_list_lock);
5066 		if (job)
5067 			return true;
5068 	}
5069 	return false;
5070 }
5071 
5072 /**
5073  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5074  *
5075  * @adev: amdgpu_device pointer
5076  *
5077  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5078  * a hung GPU.
5079  */
5080 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5081 {
5082 
5083 	if (amdgpu_gpu_recovery == 0)
5084 		goto disabled;
5085 
5086 	/* Skip soft reset check in fatal error mode */
5087 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5088 		return true;
5089 
5090 	if (amdgpu_sriov_vf(adev))
5091 		return true;
5092 
5093 	if (amdgpu_gpu_recovery == -1) {
5094 		switch (adev->asic_type) {
5095 #ifdef CONFIG_DRM_AMDGPU_SI
5096 		case CHIP_VERDE:
5097 		case CHIP_TAHITI:
5098 		case CHIP_PITCAIRN:
5099 		case CHIP_OLAND:
5100 		case CHIP_HAINAN:
5101 #endif
5102 #ifdef CONFIG_DRM_AMDGPU_CIK
5103 		case CHIP_KAVERI:
5104 		case CHIP_KABINI:
5105 		case CHIP_MULLINS:
5106 #endif
5107 		case CHIP_CARRIZO:
5108 		case CHIP_STONEY:
5109 		case CHIP_CYAN_SKILLFISH:
5110 			goto disabled;
5111 		default:
5112 			break;
5113 		}
5114 	}
5115 
5116 	return true;
5117 
5118 disabled:
5119 		dev_info(adev->dev, "GPU recovery disabled.\n");
5120 		return false;
5121 }
5122 
5123 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5124 {
5125 	u32 i;
5126 	int ret = 0;
5127 
5128 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5129 
5130 	dev_info(adev->dev, "GPU mode1 reset\n");
5131 
5132 	/* disable BM */
5133 	pci_clear_master(adev->pdev);
5134 
5135 	amdgpu_device_cache_pci_state(adev->pdev);
5136 
5137 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5138 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5139 		ret = amdgpu_dpm_mode1_reset(adev);
5140 	} else {
5141 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5142 		ret = psp_gpu_reset(adev);
5143 	}
5144 
5145 	if (ret)
5146 		goto mode1_reset_failed;
5147 
5148 	amdgpu_device_load_pci_state(adev->pdev);
5149 	ret = amdgpu_psp_wait_for_bootloader(adev);
5150 	if (ret)
5151 		goto mode1_reset_failed;
5152 
5153 	/* wait for asic to come out of reset */
5154 	for (i = 0; i < adev->usec_timeout; i++) {
5155 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5156 
5157 		if (memsize != 0xffffffff)
5158 			break;
5159 		udelay(1);
5160 	}
5161 
5162 	if (i >= adev->usec_timeout) {
5163 		ret = -ETIMEDOUT;
5164 		goto mode1_reset_failed;
5165 	}
5166 
5167 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5168 
5169 	return 0;
5170 
5171 mode1_reset_failed:
5172 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5173 	return ret;
5174 }
5175 
5176 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5177 				 struct amdgpu_reset_context *reset_context)
5178 {
5179 	int i, r = 0;
5180 	struct amdgpu_job *job = NULL;
5181 	bool need_full_reset =
5182 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5183 
5184 	if (reset_context->reset_req_dev == adev)
5185 		job = reset_context->job;
5186 
5187 	if (amdgpu_sriov_vf(adev)) {
5188 		/* stop the data exchange thread */
5189 		amdgpu_virt_fini_data_exchange(adev);
5190 	}
5191 
5192 	amdgpu_fence_driver_isr_toggle(adev, true);
5193 
5194 	/* block all schedulers and reset given job's ring */
5195 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5196 		struct amdgpu_ring *ring = adev->rings[i];
5197 
5198 		if (!amdgpu_ring_sched_ready(ring))
5199 			continue;
5200 
5201 		/* Clear job fence from fence drv to avoid force_completion
5202 		 * leave NULL and vm flush fence in fence drv
5203 		 */
5204 		amdgpu_fence_driver_clear_job_fences(ring);
5205 
5206 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5207 		amdgpu_fence_driver_force_completion(ring);
5208 	}
5209 
5210 	amdgpu_fence_driver_isr_toggle(adev, false);
5211 
5212 	if (job && job->vm)
5213 		drm_sched_increase_karma(&job->base);
5214 
5215 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5216 	/* If reset handler not implemented, continue; otherwise return */
5217 	if (r == -EOPNOTSUPP)
5218 		r = 0;
5219 	else
5220 		return r;
5221 
5222 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5223 	if (!amdgpu_sriov_vf(adev)) {
5224 
5225 		if (!need_full_reset)
5226 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5227 
5228 		if (!need_full_reset && amdgpu_gpu_recovery &&
5229 		    amdgpu_device_ip_check_soft_reset(adev)) {
5230 			amdgpu_device_ip_pre_soft_reset(adev);
5231 			r = amdgpu_device_ip_soft_reset(adev);
5232 			amdgpu_device_ip_post_soft_reset(adev);
5233 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5234 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5235 				need_full_reset = true;
5236 			}
5237 		}
5238 
5239 		if (need_full_reset)
5240 			r = amdgpu_device_ip_suspend(adev);
5241 		if (need_full_reset)
5242 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5243 		else
5244 			clear_bit(AMDGPU_NEED_FULL_RESET,
5245 				  &reset_context->flags);
5246 	}
5247 
5248 	return r;
5249 }
5250 
5251 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5252 {
5253 	int i;
5254 
5255 	lockdep_assert_held(&adev->reset_domain->sem);
5256 
5257 	for (i = 0; i < adev->reset_info.num_regs; i++) {
5258 		adev->reset_info.reset_dump_reg_value[i] =
5259 			RREG32(adev->reset_info.reset_dump_reg_list[i]);
5260 
5261 		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5262 					     adev->reset_info.reset_dump_reg_value[i]);
5263 	}
5264 
5265 	return 0;
5266 }
5267 
5268 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5269 			 struct amdgpu_reset_context *reset_context)
5270 {
5271 	struct amdgpu_device *tmp_adev = NULL;
5272 	bool need_full_reset, skip_hw_reset, vram_lost = false;
5273 	int r = 0;
5274 
5275 	/* Try reset handler method first */
5276 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5277 				    reset_list);
5278 	amdgpu_reset_reg_dumps(tmp_adev);
5279 
5280 	reset_context->reset_device_list = device_list_handle;
5281 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5282 	/* If reset handler not implemented, continue; otherwise return */
5283 	if (r == -EOPNOTSUPP)
5284 		r = 0;
5285 	else
5286 		return r;
5287 
5288 	/* Reset handler not implemented, use the default method */
5289 	need_full_reset =
5290 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5291 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5292 
5293 	/*
5294 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5295 	 * to allow proper links negotiation in FW (within 1 sec)
5296 	 */
5297 	if (!skip_hw_reset && need_full_reset) {
5298 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5299 			/* For XGMI run all resets in parallel to speed up the process */
5300 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5301 				tmp_adev->gmc.xgmi.pending_reset = false;
5302 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5303 					r = -EALREADY;
5304 			} else
5305 				r = amdgpu_asic_reset(tmp_adev);
5306 
5307 			if (r) {
5308 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5309 					 r, adev_to_drm(tmp_adev)->unique);
5310 				goto out;
5311 			}
5312 		}
5313 
5314 		/* For XGMI wait for all resets to complete before proceed */
5315 		if (!r) {
5316 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5317 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5318 					flush_work(&tmp_adev->xgmi_reset_work);
5319 					r = tmp_adev->asic_reset_res;
5320 					if (r)
5321 						break;
5322 				}
5323 			}
5324 		}
5325 	}
5326 
5327 	if (!r && amdgpu_ras_intr_triggered()) {
5328 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5329 			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5330 		}
5331 
5332 		amdgpu_ras_intr_cleared();
5333 	}
5334 
5335 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5336 		if (need_full_reset) {
5337 			/* post card */
5338 			amdgpu_ras_set_fed(tmp_adev, false);
5339 			r = amdgpu_device_asic_init(tmp_adev);
5340 			if (r) {
5341 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5342 			} else {
5343 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5344 
5345 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5346 				if (r)
5347 					goto out;
5348 
5349 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5350 
5351 				amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5352 
5353 				if (vram_lost) {
5354 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5355 					amdgpu_inc_vram_lost(tmp_adev);
5356 				}
5357 
5358 				r = amdgpu_device_fw_loading(tmp_adev);
5359 				if (r)
5360 					return r;
5361 
5362 				r = amdgpu_xcp_restore_partition_mode(
5363 					tmp_adev->xcp_mgr);
5364 				if (r)
5365 					goto out;
5366 
5367 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5368 				if (r)
5369 					goto out;
5370 
5371 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5372 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5373 
5374 				if (vram_lost)
5375 					amdgpu_device_fill_reset_magic(tmp_adev);
5376 
5377 				/*
5378 				 * Add this ASIC as tracked as reset was already
5379 				 * complete successfully.
5380 				 */
5381 				amdgpu_register_gpu_instance(tmp_adev);
5382 
5383 				if (!reset_context->hive &&
5384 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5385 					amdgpu_xgmi_add_device(tmp_adev);
5386 
5387 				r = amdgpu_device_ip_late_init(tmp_adev);
5388 				if (r)
5389 					goto out;
5390 
5391 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5392 
5393 				/*
5394 				 * The GPU enters bad state once faulty pages
5395 				 * by ECC has reached the threshold, and ras
5396 				 * recovery is scheduled next. So add one check
5397 				 * here to break recovery if it indeed exceeds
5398 				 * bad page threshold, and remind user to
5399 				 * retire this GPU or setting one bigger
5400 				 * bad_page_threshold value to fix this once
5401 				 * probing driver again.
5402 				 */
5403 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5404 					/* must succeed. */
5405 					amdgpu_ras_resume(tmp_adev);
5406 				} else {
5407 					r = -EINVAL;
5408 					goto out;
5409 				}
5410 
5411 				/* Update PSP FW topology after reset */
5412 				if (reset_context->hive &&
5413 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5414 					r = amdgpu_xgmi_update_topology(
5415 						reset_context->hive, tmp_adev);
5416 			}
5417 		}
5418 
5419 out:
5420 		if (!r) {
5421 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5422 			r = amdgpu_ib_ring_tests(tmp_adev);
5423 			if (r) {
5424 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5425 				need_full_reset = true;
5426 				r = -EAGAIN;
5427 				goto end;
5428 			}
5429 		}
5430 
5431 		if (!r)
5432 			r = amdgpu_device_recover_vram(tmp_adev);
5433 		else
5434 			tmp_adev->asic_reset_res = r;
5435 	}
5436 
5437 end:
5438 	if (need_full_reset)
5439 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5440 	else
5441 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5442 	return r;
5443 }
5444 
5445 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5446 {
5447 
5448 	switch (amdgpu_asic_reset_method(adev)) {
5449 	case AMD_RESET_METHOD_MODE1:
5450 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5451 		break;
5452 	case AMD_RESET_METHOD_MODE2:
5453 		adev->mp1_state = PP_MP1_STATE_RESET;
5454 		break;
5455 	default:
5456 		adev->mp1_state = PP_MP1_STATE_NONE;
5457 		break;
5458 	}
5459 }
5460 
5461 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5462 {
5463 	amdgpu_vf_error_trans_all(adev);
5464 	adev->mp1_state = PP_MP1_STATE_NONE;
5465 }
5466 
5467 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5468 {
5469 	struct pci_dev *p = NULL;
5470 
5471 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5472 			adev->pdev->bus->number, 1);
5473 	if (p) {
5474 		pm_runtime_enable(&(p->dev));
5475 		pm_runtime_resume(&(p->dev));
5476 	}
5477 
5478 	pci_dev_put(p);
5479 }
5480 
5481 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5482 {
5483 	enum amd_reset_method reset_method;
5484 	struct pci_dev *p = NULL;
5485 	u64 expires;
5486 
5487 	/*
5488 	 * For now, only BACO and mode1 reset are confirmed
5489 	 * to suffer the audio issue without proper suspended.
5490 	 */
5491 	reset_method = amdgpu_asic_reset_method(adev);
5492 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5493 	     (reset_method != AMD_RESET_METHOD_MODE1))
5494 		return -EINVAL;
5495 
5496 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5497 			adev->pdev->bus->number, 1);
5498 	if (!p)
5499 		return -ENODEV;
5500 
5501 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5502 	if (!expires)
5503 		/*
5504 		 * If we cannot get the audio device autosuspend delay,
5505 		 * a fixed 4S interval will be used. Considering 3S is
5506 		 * the audio controller default autosuspend delay setting.
5507 		 * 4S used here is guaranteed to cover that.
5508 		 */
5509 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5510 
5511 	while (!pm_runtime_status_suspended(&(p->dev))) {
5512 		if (!pm_runtime_suspend(&(p->dev)))
5513 			break;
5514 
5515 		if (expires < ktime_get_mono_fast_ns()) {
5516 			dev_warn(adev->dev, "failed to suspend display audio\n");
5517 			pci_dev_put(p);
5518 			/* TODO: abort the succeeding gpu reset? */
5519 			return -ETIMEDOUT;
5520 		}
5521 	}
5522 
5523 	pm_runtime_disable(&(p->dev));
5524 
5525 	pci_dev_put(p);
5526 	return 0;
5527 }
5528 
5529 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5530 {
5531 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5532 
5533 #if defined(CONFIG_DEBUG_FS)
5534 	if (!amdgpu_sriov_vf(adev))
5535 		cancel_work(&adev->reset_work);
5536 #endif
5537 
5538 	if (adev->kfd.dev)
5539 		cancel_work(&adev->kfd.reset_work);
5540 
5541 	if (amdgpu_sriov_vf(adev))
5542 		cancel_work(&adev->virt.flr_work);
5543 
5544 	if (con && adev->ras_enabled)
5545 		cancel_work(&con->recovery_work);
5546 
5547 }
5548 
5549 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5550 {
5551 	struct amdgpu_device *tmp_adev;
5552 	int ret = 0;
5553 	u32 status;
5554 
5555 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5556 		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5557 		if (PCI_POSSIBLE_ERROR(status)) {
5558 			dev_err(tmp_adev->dev, "device lost from bus!");
5559 			ret = -ENODEV;
5560 		}
5561 	}
5562 
5563 	return ret;
5564 }
5565 
5566 /**
5567  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5568  *
5569  * @adev: amdgpu_device pointer
5570  * @job: which job trigger hang
5571  * @reset_context: amdgpu reset context pointer
5572  *
5573  * Attempt to reset the GPU if it has hung (all asics).
5574  * Attempt to do soft-reset or full-reset and reinitialize Asic
5575  * Returns 0 for success or an error on failure.
5576  */
5577 
5578 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5579 			      struct amdgpu_job *job,
5580 			      struct amdgpu_reset_context *reset_context)
5581 {
5582 	struct list_head device_list, *device_list_handle =  NULL;
5583 	bool job_signaled = false;
5584 	struct amdgpu_hive_info *hive = NULL;
5585 	struct amdgpu_device *tmp_adev = NULL;
5586 	int i, r = 0;
5587 	bool need_emergency_restart = false;
5588 	bool audio_suspended = false;
5589 
5590 	/*
5591 	 * Special case: RAS triggered and full reset isn't supported
5592 	 */
5593 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5594 
5595 	/*
5596 	 * Flush RAM to disk so that after reboot
5597 	 * the user can read log and see why the system rebooted.
5598 	 */
5599 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5600 		amdgpu_ras_get_context(adev)->reboot) {
5601 		DRM_WARN("Emergency reboot.");
5602 
5603 		ksys_sync_helper();
5604 		emergency_restart();
5605 	}
5606 
5607 	dev_info(adev->dev, "GPU %s begin!\n",
5608 		need_emergency_restart ? "jobs stop":"reset");
5609 
5610 	if (!amdgpu_sriov_vf(adev))
5611 		hive = amdgpu_get_xgmi_hive(adev);
5612 	if (hive)
5613 		mutex_lock(&hive->hive_lock);
5614 
5615 	reset_context->job = job;
5616 	reset_context->hive = hive;
5617 	/*
5618 	 * Build list of devices to reset.
5619 	 * In case we are in XGMI hive mode, resort the device list
5620 	 * to put adev in the 1st position.
5621 	 */
5622 	INIT_LIST_HEAD(&device_list);
5623 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5624 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5625 			list_add_tail(&tmp_adev->reset_list, &device_list);
5626 			if (adev->shutdown)
5627 				tmp_adev->shutdown = true;
5628 		}
5629 		if (!list_is_first(&adev->reset_list, &device_list))
5630 			list_rotate_to_front(&adev->reset_list, &device_list);
5631 		device_list_handle = &device_list;
5632 	} else {
5633 		list_add_tail(&adev->reset_list, &device_list);
5634 		device_list_handle = &device_list;
5635 	}
5636 
5637 	if (!amdgpu_sriov_vf(adev)) {
5638 		r = amdgpu_device_health_check(device_list_handle);
5639 		if (r)
5640 			goto end_reset;
5641 	}
5642 
5643 	/* We need to lock reset domain only once both for XGMI and single device */
5644 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5645 				    reset_list);
5646 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5647 
5648 	/* block all schedulers and reset given job's ring */
5649 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5650 
5651 		amdgpu_device_set_mp1_state(tmp_adev);
5652 
5653 		/*
5654 		 * Try to put the audio codec into suspend state
5655 		 * before gpu reset started.
5656 		 *
5657 		 * Due to the power domain of the graphics device
5658 		 * is shared with AZ power domain. Without this,
5659 		 * we may change the audio hardware from behind
5660 		 * the audio driver's back. That will trigger
5661 		 * some audio codec errors.
5662 		 */
5663 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5664 			audio_suspended = true;
5665 
5666 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5667 
5668 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5669 
5670 		if (!amdgpu_sriov_vf(tmp_adev))
5671 			amdgpu_amdkfd_pre_reset(tmp_adev);
5672 
5673 		/*
5674 		 * Mark these ASICs to be reseted as untracked first
5675 		 * And add them back after reset completed
5676 		 */
5677 		amdgpu_unregister_gpu_instance(tmp_adev);
5678 
5679 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5680 
5681 		/* disable ras on ALL IPs */
5682 		if (!need_emergency_restart &&
5683 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5684 			amdgpu_ras_suspend(tmp_adev);
5685 
5686 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5687 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5688 
5689 			if (!amdgpu_ring_sched_ready(ring))
5690 				continue;
5691 
5692 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5693 
5694 			if (need_emergency_restart)
5695 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5696 		}
5697 		atomic_inc(&tmp_adev->gpu_reset_counter);
5698 	}
5699 
5700 	if (need_emergency_restart)
5701 		goto skip_sched_resume;
5702 
5703 	/*
5704 	 * Must check guilty signal here since after this point all old
5705 	 * HW fences are force signaled.
5706 	 *
5707 	 * job->base holds a reference to parent fence
5708 	 */
5709 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5710 		job_signaled = true;
5711 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5712 		goto skip_hw_reset;
5713 	}
5714 
5715 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5716 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5717 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5718 		/*TODO Should we stop ?*/
5719 		if (r) {
5720 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5721 				  r, adev_to_drm(tmp_adev)->unique);
5722 			tmp_adev->asic_reset_res = r;
5723 		}
5724 
5725 		if (!amdgpu_sriov_vf(tmp_adev))
5726 			/*
5727 			* Drop all pending non scheduler resets. Scheduler resets
5728 			* were already dropped during drm_sched_stop
5729 			*/
5730 			amdgpu_device_stop_pending_resets(tmp_adev);
5731 	}
5732 
5733 	/* Actual ASIC resets if needed.*/
5734 	/* Host driver will handle XGMI hive reset for SRIOV */
5735 	if (amdgpu_sriov_vf(adev)) {
5736 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5737 		if (r)
5738 			adev->asic_reset_res = r;
5739 
5740 		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5741 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5742 			    IP_VERSION(9, 4, 2) ||
5743 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5744 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5745 			amdgpu_ras_resume(adev);
5746 	} else {
5747 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5748 		if (r && r == -EAGAIN)
5749 			goto retry;
5750 	}
5751 
5752 skip_hw_reset:
5753 
5754 	/* Post ASIC reset for all devs .*/
5755 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5756 
5757 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5758 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5759 
5760 			if (!amdgpu_ring_sched_ready(ring))
5761 				continue;
5762 
5763 			drm_sched_start(&ring->sched, true);
5764 		}
5765 
5766 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5767 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5768 
5769 		if (tmp_adev->asic_reset_res)
5770 			r = tmp_adev->asic_reset_res;
5771 
5772 		tmp_adev->asic_reset_res = 0;
5773 
5774 		if (r) {
5775 			/* bad news, how to tell it to userspace ? */
5776 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5777 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5778 		} else {
5779 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5780 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5781 				DRM_WARN("smart shift update failed\n");
5782 		}
5783 	}
5784 
5785 skip_sched_resume:
5786 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5787 		/* unlock kfd: SRIOV would do it separately */
5788 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5789 			amdgpu_amdkfd_post_reset(tmp_adev);
5790 
5791 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5792 		 * need to bring up kfd here if it's not be initialized before
5793 		 */
5794 		if (!adev->kfd.init_complete)
5795 			amdgpu_amdkfd_device_init(adev);
5796 
5797 		if (audio_suspended)
5798 			amdgpu_device_resume_display_audio(tmp_adev);
5799 
5800 		amdgpu_device_unset_mp1_state(tmp_adev);
5801 
5802 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5803 	}
5804 
5805 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5806 					    reset_list);
5807 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5808 
5809 end_reset:
5810 	if (hive) {
5811 		mutex_unlock(&hive->hive_lock);
5812 		amdgpu_put_xgmi_hive(hive);
5813 	}
5814 
5815 	if (r)
5816 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5817 
5818 	atomic_set(&adev->reset_domain->reset_res, r);
5819 	return r;
5820 }
5821 
5822 /**
5823  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5824  *
5825  * @adev: amdgpu_device pointer
5826  * @speed: pointer to the speed of the link
5827  * @width: pointer to the width of the link
5828  *
5829  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5830  * first physical partner to an AMD dGPU.
5831  * This will exclude any virtual switches and links.
5832  */
5833 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5834 					    enum pci_bus_speed *speed,
5835 					    enum pcie_link_width *width)
5836 {
5837 	struct pci_dev *parent = adev->pdev;
5838 
5839 	if (!speed || !width)
5840 		return;
5841 
5842 	*speed = PCI_SPEED_UNKNOWN;
5843 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5844 
5845 	while ((parent = pci_upstream_bridge(parent))) {
5846 		/* skip upstream/downstream switches internal to dGPU*/
5847 		if (parent->vendor == PCI_VENDOR_ID_ATI)
5848 			continue;
5849 		*speed = pcie_get_speed_cap(parent);
5850 		*width = pcie_get_width_cap(parent);
5851 		break;
5852 	}
5853 }
5854 
5855 /**
5856  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5857  *
5858  * @adev: amdgpu_device pointer
5859  *
5860  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5861  * and lanes) of the slot the device is in. Handles APUs and
5862  * virtualized environments where PCIE config space may not be available.
5863  */
5864 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5865 {
5866 	struct pci_dev *pdev;
5867 	enum pci_bus_speed speed_cap, platform_speed_cap;
5868 	enum pcie_link_width platform_link_width;
5869 
5870 	if (amdgpu_pcie_gen_cap)
5871 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5872 
5873 	if (amdgpu_pcie_lane_cap)
5874 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5875 
5876 	/* covers APUs as well */
5877 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5878 		if (adev->pm.pcie_gen_mask == 0)
5879 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5880 		if (adev->pm.pcie_mlw_mask == 0)
5881 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5882 		return;
5883 	}
5884 
5885 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5886 		return;
5887 
5888 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5889 					&platform_link_width);
5890 
5891 	if (adev->pm.pcie_gen_mask == 0) {
5892 		/* asic caps */
5893 		pdev = adev->pdev;
5894 		speed_cap = pcie_get_speed_cap(pdev);
5895 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5896 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5897 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5898 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5899 		} else {
5900 			if (speed_cap == PCIE_SPEED_32_0GT)
5901 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5902 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5903 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5904 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5905 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5906 			else if (speed_cap == PCIE_SPEED_16_0GT)
5907 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5908 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5909 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5910 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5911 			else if (speed_cap == PCIE_SPEED_8_0GT)
5912 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5913 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5914 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5915 			else if (speed_cap == PCIE_SPEED_5_0GT)
5916 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5917 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5918 			else
5919 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5920 		}
5921 		/* platform caps */
5922 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5923 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5924 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5925 		} else {
5926 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5927 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5928 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5929 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5930 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5931 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5932 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5933 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5934 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5935 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5936 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5937 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5938 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5939 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5940 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5941 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5942 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5943 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5944 			else
5945 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5946 
5947 		}
5948 	}
5949 	if (adev->pm.pcie_mlw_mask == 0) {
5950 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5951 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5952 		} else {
5953 			switch (platform_link_width) {
5954 			case PCIE_LNK_X32:
5955 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5956 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5957 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5958 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5959 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5960 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5961 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5962 				break;
5963 			case PCIE_LNK_X16:
5964 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5965 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5966 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5967 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5968 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5969 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5970 				break;
5971 			case PCIE_LNK_X12:
5972 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5973 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5974 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5975 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5976 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5977 				break;
5978 			case PCIE_LNK_X8:
5979 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5980 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5981 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5982 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5983 				break;
5984 			case PCIE_LNK_X4:
5985 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5986 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5987 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5988 				break;
5989 			case PCIE_LNK_X2:
5990 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5991 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5992 				break;
5993 			case PCIE_LNK_X1:
5994 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5995 				break;
5996 			default:
5997 				break;
5998 			}
5999 		}
6000 	}
6001 }
6002 
6003 /**
6004  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6005  *
6006  * @adev: amdgpu_device pointer
6007  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6008  *
6009  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6010  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6011  * @peer_adev.
6012  */
6013 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6014 				      struct amdgpu_device *peer_adev)
6015 {
6016 #ifdef CONFIG_HSA_AMD_P2P
6017 	uint64_t address_mask = peer_adev->dev->dma_mask ?
6018 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6019 	resource_size_t aper_limit =
6020 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
6021 	bool p2p_access =
6022 		!adev->gmc.xgmi.connected_to_cpu &&
6023 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6024 
6025 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6026 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6027 		!(adev->gmc.aper_base & address_mask ||
6028 		  aper_limit & address_mask));
6029 #else
6030 	return false;
6031 #endif
6032 }
6033 
6034 int amdgpu_device_baco_enter(struct drm_device *dev)
6035 {
6036 	struct amdgpu_device *adev = drm_to_adev(dev);
6037 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6038 
6039 	if (!amdgpu_device_supports_baco(dev))
6040 		return -ENOTSUPP;
6041 
6042 	if (ras && adev->ras_enabled &&
6043 	    adev->nbio.funcs->enable_doorbell_interrupt)
6044 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6045 
6046 	return amdgpu_dpm_baco_enter(adev);
6047 }
6048 
6049 int amdgpu_device_baco_exit(struct drm_device *dev)
6050 {
6051 	struct amdgpu_device *adev = drm_to_adev(dev);
6052 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6053 	int ret = 0;
6054 
6055 	if (!amdgpu_device_supports_baco(dev))
6056 		return -ENOTSUPP;
6057 
6058 	ret = amdgpu_dpm_baco_exit(adev);
6059 	if (ret)
6060 		return ret;
6061 
6062 	if (ras && adev->ras_enabled &&
6063 	    adev->nbio.funcs->enable_doorbell_interrupt)
6064 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6065 
6066 	if (amdgpu_passthrough(adev) &&
6067 	    adev->nbio.funcs->clear_doorbell_interrupt)
6068 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6069 
6070 	return 0;
6071 }
6072 
6073 /**
6074  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6075  * @pdev: PCI device struct
6076  * @state: PCI channel state
6077  *
6078  * Description: Called when a PCI error is detected.
6079  *
6080  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6081  */
6082 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6083 {
6084 	struct drm_device *dev = pci_get_drvdata(pdev);
6085 	struct amdgpu_device *adev = drm_to_adev(dev);
6086 	int i;
6087 
6088 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6089 
6090 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6091 		DRM_WARN("No support for XGMI hive yet...");
6092 		return PCI_ERS_RESULT_DISCONNECT;
6093 	}
6094 
6095 	adev->pci_channel_state = state;
6096 
6097 	switch (state) {
6098 	case pci_channel_io_normal:
6099 		return PCI_ERS_RESULT_CAN_RECOVER;
6100 	/* Fatal error, prepare for slot reset */
6101 	case pci_channel_io_frozen:
6102 		/*
6103 		 * Locking adev->reset_domain->sem will prevent any external access
6104 		 * to GPU during PCI error recovery
6105 		 */
6106 		amdgpu_device_lock_reset_domain(adev->reset_domain);
6107 		amdgpu_device_set_mp1_state(adev);
6108 
6109 		/*
6110 		 * Block any work scheduling as we do for regular GPU reset
6111 		 * for the duration of the recovery
6112 		 */
6113 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6114 			struct amdgpu_ring *ring = adev->rings[i];
6115 
6116 			if (!amdgpu_ring_sched_ready(ring))
6117 				continue;
6118 
6119 			drm_sched_stop(&ring->sched, NULL);
6120 		}
6121 		atomic_inc(&adev->gpu_reset_counter);
6122 		return PCI_ERS_RESULT_NEED_RESET;
6123 	case pci_channel_io_perm_failure:
6124 		/* Permanent error, prepare for device removal */
6125 		return PCI_ERS_RESULT_DISCONNECT;
6126 	}
6127 
6128 	return PCI_ERS_RESULT_NEED_RESET;
6129 }
6130 
6131 /**
6132  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6133  * @pdev: pointer to PCI device
6134  */
6135 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6136 {
6137 
6138 	DRM_INFO("PCI error: mmio enabled callback!!\n");
6139 
6140 	/* TODO - dump whatever for debugging purposes */
6141 
6142 	/* This called only if amdgpu_pci_error_detected returns
6143 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6144 	 * works, no need to reset slot.
6145 	 */
6146 
6147 	return PCI_ERS_RESULT_RECOVERED;
6148 }
6149 
6150 /**
6151  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6152  * @pdev: PCI device struct
6153  *
6154  * Description: This routine is called by the pci error recovery
6155  * code after the PCI slot has been reset, just before we
6156  * should resume normal operations.
6157  */
6158 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6159 {
6160 	struct drm_device *dev = pci_get_drvdata(pdev);
6161 	struct amdgpu_device *adev = drm_to_adev(dev);
6162 	int r, i;
6163 	struct amdgpu_reset_context reset_context;
6164 	u32 memsize;
6165 	struct list_head device_list;
6166 	struct amdgpu_hive_info *hive;
6167 	int hive_ras_recovery = 0;
6168 	struct amdgpu_ras *ras;
6169 
6170 	/* PCI error slot reset should be skipped During RAS recovery */
6171 	hive = amdgpu_get_xgmi_hive(adev);
6172 	if (hive) {
6173 		hive_ras_recovery = atomic_read(&hive->ras_recovery);
6174 		amdgpu_put_xgmi_hive(hive);
6175 	}
6176 	ras = amdgpu_ras_get_context(adev);
6177 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
6178 		 ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
6179 		return PCI_ERS_RESULT_RECOVERED;
6180 
6181 	DRM_INFO("PCI error: slot reset callback!!\n");
6182 
6183 	memset(&reset_context, 0, sizeof(reset_context));
6184 
6185 	INIT_LIST_HEAD(&device_list);
6186 	list_add_tail(&adev->reset_list, &device_list);
6187 
6188 	/* wait for asic to come out of reset */
6189 	msleep(500);
6190 
6191 	/* Restore PCI confspace */
6192 	amdgpu_device_load_pci_state(pdev);
6193 
6194 	/* confirm  ASIC came out of reset */
6195 	for (i = 0; i < adev->usec_timeout; i++) {
6196 		memsize = amdgpu_asic_get_config_memsize(adev);
6197 
6198 		if (memsize != 0xffffffff)
6199 			break;
6200 		udelay(1);
6201 	}
6202 	if (memsize == 0xffffffff) {
6203 		r = -ETIME;
6204 		goto out;
6205 	}
6206 
6207 	reset_context.method = AMD_RESET_METHOD_NONE;
6208 	reset_context.reset_req_dev = adev;
6209 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6210 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6211 
6212 	adev->no_hw_access = true;
6213 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6214 	adev->no_hw_access = false;
6215 	if (r)
6216 		goto out;
6217 
6218 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6219 
6220 out:
6221 	if (!r) {
6222 		if (amdgpu_device_cache_pci_state(adev->pdev))
6223 			pci_restore_state(adev->pdev);
6224 
6225 		DRM_INFO("PCIe error recovery succeeded\n");
6226 	} else {
6227 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6228 		amdgpu_device_unset_mp1_state(adev);
6229 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6230 	}
6231 
6232 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6233 }
6234 
6235 /**
6236  * amdgpu_pci_resume() - resume normal ops after PCI reset
6237  * @pdev: pointer to PCI device
6238  *
6239  * Called when the error recovery driver tells us that its
6240  * OK to resume normal operation.
6241  */
6242 void amdgpu_pci_resume(struct pci_dev *pdev)
6243 {
6244 	struct drm_device *dev = pci_get_drvdata(pdev);
6245 	struct amdgpu_device *adev = drm_to_adev(dev);
6246 	int i;
6247 
6248 
6249 	DRM_INFO("PCI error: resume callback!!\n");
6250 
6251 	/* Only continue execution for the case of pci_channel_io_frozen */
6252 	if (adev->pci_channel_state != pci_channel_io_frozen)
6253 		return;
6254 
6255 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6256 		struct amdgpu_ring *ring = adev->rings[i];
6257 
6258 		if (!amdgpu_ring_sched_ready(ring))
6259 			continue;
6260 
6261 		drm_sched_start(&ring->sched, true);
6262 	}
6263 
6264 	amdgpu_device_unset_mp1_state(adev);
6265 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6266 }
6267 
6268 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6269 {
6270 	struct drm_device *dev = pci_get_drvdata(pdev);
6271 	struct amdgpu_device *adev = drm_to_adev(dev);
6272 	int r;
6273 
6274 	r = pci_save_state(pdev);
6275 	if (!r) {
6276 		kfree(adev->pci_state);
6277 
6278 		adev->pci_state = pci_store_saved_state(pdev);
6279 
6280 		if (!adev->pci_state) {
6281 			DRM_ERROR("Failed to store PCI saved state");
6282 			return false;
6283 		}
6284 	} else {
6285 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6286 		return false;
6287 	}
6288 
6289 	return true;
6290 }
6291 
6292 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6293 {
6294 	struct drm_device *dev = pci_get_drvdata(pdev);
6295 	struct amdgpu_device *adev = drm_to_adev(dev);
6296 	int r;
6297 
6298 	if (!adev->pci_state)
6299 		return false;
6300 
6301 	r = pci_load_saved_state(pdev, adev->pci_state);
6302 
6303 	if (!r) {
6304 		pci_restore_state(pdev);
6305 	} else {
6306 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6307 		return false;
6308 	}
6309 
6310 	return true;
6311 }
6312 
6313 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6314 		struct amdgpu_ring *ring)
6315 {
6316 #ifdef CONFIG_X86_64
6317 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6318 		return;
6319 #endif
6320 	if (adev->gmc.xgmi.connected_to_cpu)
6321 		return;
6322 
6323 	if (ring && ring->funcs->emit_hdp_flush)
6324 		amdgpu_ring_emit_hdp_flush(ring);
6325 	else
6326 		amdgpu_asic_flush_hdp(adev, ring);
6327 }
6328 
6329 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6330 		struct amdgpu_ring *ring)
6331 {
6332 #ifdef CONFIG_X86_64
6333 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6334 		return;
6335 #endif
6336 	if (adev->gmc.xgmi.connected_to_cpu)
6337 		return;
6338 
6339 	amdgpu_asic_invalidate_hdp(adev, ring);
6340 }
6341 
6342 int amdgpu_in_reset(struct amdgpu_device *adev)
6343 {
6344 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6345 }
6346 
6347 /**
6348  * amdgpu_device_halt() - bring hardware to some kind of halt state
6349  *
6350  * @adev: amdgpu_device pointer
6351  *
6352  * Bring hardware to some kind of halt state so that no one can touch it
6353  * any more. It will help to maintain error context when error occurred.
6354  * Compare to a simple hang, the system will keep stable at least for SSH
6355  * access. Then it should be trivial to inspect the hardware state and
6356  * see what's going on. Implemented as following:
6357  *
6358  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6359  *    clears all CPU mappings to device, disallows remappings through page faults
6360  * 2. amdgpu_irq_disable_all() disables all interrupts
6361  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6362  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6363  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6364  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6365  *    flush any in flight DMA operations
6366  */
6367 void amdgpu_device_halt(struct amdgpu_device *adev)
6368 {
6369 	struct pci_dev *pdev = adev->pdev;
6370 	struct drm_device *ddev = adev_to_drm(adev);
6371 
6372 	amdgpu_xcp_dev_unplug(adev);
6373 	drm_dev_unplug(ddev);
6374 
6375 	amdgpu_irq_disable_all(adev);
6376 
6377 	amdgpu_fence_driver_hw_fini(adev);
6378 
6379 	adev->no_hw_access = true;
6380 
6381 	amdgpu_device_unmap_mmio(adev);
6382 
6383 	pci_disable_device(pdev);
6384 	pci_wait_for_pending_transaction(pdev);
6385 }
6386 
6387 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6388 				u32 reg)
6389 {
6390 	unsigned long flags, address, data;
6391 	u32 r;
6392 
6393 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6394 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6395 
6396 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6397 	WREG32(address, reg * 4);
6398 	(void)RREG32(address);
6399 	r = RREG32(data);
6400 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6401 	return r;
6402 }
6403 
6404 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6405 				u32 reg, u32 v)
6406 {
6407 	unsigned long flags, address, data;
6408 
6409 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6410 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6411 
6412 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6413 	WREG32(address, reg * 4);
6414 	(void)RREG32(address);
6415 	WREG32(data, v);
6416 	(void)RREG32(data);
6417 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6418 }
6419 
6420 /**
6421  * amdgpu_device_switch_gang - switch to a new gang
6422  * @adev: amdgpu_device pointer
6423  * @gang: the gang to switch to
6424  *
6425  * Try to switch to a new gang.
6426  * Returns: NULL if we switched to the new gang or a reference to the current
6427  * gang leader.
6428  */
6429 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6430 					    struct dma_fence *gang)
6431 {
6432 	struct dma_fence *old = NULL;
6433 
6434 	do {
6435 		dma_fence_put(old);
6436 		rcu_read_lock();
6437 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
6438 		rcu_read_unlock();
6439 
6440 		if (old == gang)
6441 			break;
6442 
6443 		if (!dma_fence_is_signaled(old))
6444 			return old;
6445 
6446 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6447 			 old, gang) != old);
6448 
6449 	dma_fence_put(old);
6450 	return NULL;
6451 }
6452 
6453 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6454 {
6455 	switch (adev->asic_type) {
6456 #ifdef CONFIG_DRM_AMDGPU_SI
6457 	case CHIP_HAINAN:
6458 #endif
6459 	case CHIP_TOPAZ:
6460 		/* chips with no display hardware */
6461 		return false;
6462 #ifdef CONFIG_DRM_AMDGPU_SI
6463 	case CHIP_TAHITI:
6464 	case CHIP_PITCAIRN:
6465 	case CHIP_VERDE:
6466 	case CHIP_OLAND:
6467 #endif
6468 #ifdef CONFIG_DRM_AMDGPU_CIK
6469 	case CHIP_BONAIRE:
6470 	case CHIP_HAWAII:
6471 	case CHIP_KAVERI:
6472 	case CHIP_KABINI:
6473 	case CHIP_MULLINS:
6474 #endif
6475 	case CHIP_TONGA:
6476 	case CHIP_FIJI:
6477 	case CHIP_POLARIS10:
6478 	case CHIP_POLARIS11:
6479 	case CHIP_POLARIS12:
6480 	case CHIP_VEGAM:
6481 	case CHIP_CARRIZO:
6482 	case CHIP_STONEY:
6483 		/* chips with display hardware */
6484 		return true;
6485 	default:
6486 		/* IP discovery */
6487 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6488 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6489 			return false;
6490 		return true;
6491 	}
6492 }
6493 
6494 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6495 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6496 		uint32_t expected_value, uint32_t mask)
6497 {
6498 	uint32_t ret = 0;
6499 	uint32_t old_ = 0;
6500 	uint32_t tmp_ = RREG32(reg_addr);
6501 	uint32_t loop = adev->usec_timeout;
6502 
6503 	while ((tmp_ & (mask)) != (expected_value)) {
6504 		if (old_ != tmp_) {
6505 			loop = adev->usec_timeout;
6506 			old_ = tmp_;
6507 		} else
6508 			udelay(1);
6509 		tmp_ = RREG32(reg_addr);
6510 		loop--;
6511 		if (!loop) {
6512 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6513 				  inst, reg_name, (uint32_t)expected_value,
6514 				  (uint32_t)(tmp_ & (mask)));
6515 			ret = -ETIMEDOUT;
6516 			break;
6517 		}
6518 	}
6519 	return ret;
6520 }
6521