xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 7cc9196675234d4de0e1e19b9da1a8b86ecfeedd)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37 
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 
78 #include <linux/suspend.h>
79 #include <drm/task_barrier.h>
80 #include <linux/pm_runtime.h>
81 
82 #include <drm/drm_drv.h>
83 
84 #if IS_ENABLED(CONFIG_X86)
85 #include <asm/intel-family.h>
86 #endif
87 
88 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
89 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
95 
96 #define AMDGPU_RESUME_MS		2000
97 #define AMDGPU_MAX_RETRY_LIMIT		2
98 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
99 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
100 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
101 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
102 
103 static const struct drm_driver amdgpu_kms_driver;
104 
105 const char *amdgpu_asic_name[] = {
106 	"TAHITI",
107 	"PITCAIRN",
108 	"VERDE",
109 	"OLAND",
110 	"HAINAN",
111 	"BONAIRE",
112 	"KAVERI",
113 	"KABINI",
114 	"HAWAII",
115 	"MULLINS",
116 	"TOPAZ",
117 	"TONGA",
118 	"FIJI",
119 	"CARRIZO",
120 	"STONEY",
121 	"POLARIS10",
122 	"POLARIS11",
123 	"POLARIS12",
124 	"VEGAM",
125 	"VEGA10",
126 	"VEGA12",
127 	"VEGA20",
128 	"RAVEN",
129 	"ARCTURUS",
130 	"RENOIR",
131 	"ALDEBARAN",
132 	"NAVI10",
133 	"CYAN_SKILLFISH",
134 	"NAVI14",
135 	"NAVI12",
136 	"SIENNA_CICHLID",
137 	"NAVY_FLOUNDER",
138 	"VANGOGH",
139 	"DIMGREY_CAVEFISH",
140 	"BEIGE_GOBY",
141 	"YELLOW_CARP",
142 	"IP DISCOVERY",
143 	"LAST",
144 };
145 
146 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
147 
148 /**
149  * DOC: pcie_replay_count
150  *
151  * The amdgpu driver provides a sysfs API for reporting the total number
152  * of PCIe replays (NAKs)
153  * The file pcie_replay_count is used for this and returns the total
154  * number of replays as a sum of the NAKs generated and NAKs received
155  */
156 
157 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
158 		struct device_attribute *attr, char *buf)
159 {
160 	struct drm_device *ddev = dev_get_drvdata(dev);
161 	struct amdgpu_device *adev = drm_to_adev(ddev);
162 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
163 
164 	return sysfs_emit(buf, "%llu\n", cnt);
165 }
166 
167 static DEVICE_ATTR(pcie_replay_count, 0444,
168 		amdgpu_device_get_pcie_replay_count, NULL);
169 
170 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
171 					  struct bin_attribute *attr, char *buf,
172 					  loff_t ppos, size_t count)
173 {
174 	struct device *dev = kobj_to_dev(kobj);
175 	struct drm_device *ddev = dev_get_drvdata(dev);
176 	struct amdgpu_device *adev = drm_to_adev(ddev);
177 	ssize_t bytes_read;
178 
179 	switch (ppos) {
180 	case AMDGPU_SYS_REG_STATE_XGMI:
181 		bytes_read = amdgpu_asic_get_reg_state(
182 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
183 		break;
184 	case AMDGPU_SYS_REG_STATE_WAFL:
185 		bytes_read = amdgpu_asic_get_reg_state(
186 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
187 		break;
188 	case AMDGPU_SYS_REG_STATE_PCIE:
189 		bytes_read = amdgpu_asic_get_reg_state(
190 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
191 		break;
192 	case AMDGPU_SYS_REG_STATE_USR:
193 		bytes_read = amdgpu_asic_get_reg_state(
194 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
195 		break;
196 	case AMDGPU_SYS_REG_STATE_USR_1:
197 		bytes_read = amdgpu_asic_get_reg_state(
198 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
199 		break;
200 	default:
201 		return -EINVAL;
202 	}
203 
204 	return bytes_read;
205 }
206 
207 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
208 	 AMDGPU_SYS_REG_STATE_END);
209 
210 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
211 {
212 	int ret;
213 
214 	if (!amdgpu_asic_get_reg_state_supported(adev))
215 		return 0;
216 
217 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
218 
219 	return ret;
220 }
221 
222 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
223 {
224 	if (!amdgpu_asic_get_reg_state_supported(adev))
225 		return;
226 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
227 }
228 
229 /**
230  * DOC: board_info
231  *
232  * The amdgpu driver provides a sysfs API for giving board related information.
233  * It provides the form factor information in the format
234  *
235  *   type : form factor
236  *
237  * Possible form factor values
238  *
239  * - "cem"		- PCIE CEM card
240  * - "oam"		- Open Compute Accelerator Module
241  * - "unknown"	- Not known
242  *
243  */
244 
245 static ssize_t amdgpu_device_get_board_info(struct device *dev,
246 					    struct device_attribute *attr,
247 					    char *buf)
248 {
249 	struct drm_device *ddev = dev_get_drvdata(dev);
250 	struct amdgpu_device *adev = drm_to_adev(ddev);
251 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
252 	const char *pkg;
253 
254 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
255 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
256 
257 	switch (pkg_type) {
258 	case AMDGPU_PKG_TYPE_CEM:
259 		pkg = "cem";
260 		break;
261 	case AMDGPU_PKG_TYPE_OAM:
262 		pkg = "oam";
263 		break;
264 	default:
265 		pkg = "unknown";
266 		break;
267 	}
268 
269 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
270 }
271 
272 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
273 
274 static struct attribute *amdgpu_board_attrs[] = {
275 	&dev_attr_board_info.attr,
276 	NULL,
277 };
278 
279 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
280 					     struct attribute *attr, int n)
281 {
282 	struct device *dev = kobj_to_dev(kobj);
283 	struct drm_device *ddev = dev_get_drvdata(dev);
284 	struct amdgpu_device *adev = drm_to_adev(ddev);
285 
286 	if (adev->flags & AMD_IS_APU)
287 		return 0;
288 
289 	return attr->mode;
290 }
291 
292 static const struct attribute_group amdgpu_board_attrs_group = {
293 	.attrs = amdgpu_board_attrs,
294 	.is_visible = amdgpu_board_attrs_is_visible
295 };
296 
297 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
298 
299 
300 /**
301  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
302  *
303  * @dev: drm_device pointer
304  *
305  * Returns true if the device is a dGPU with ATPX power control,
306  * otherwise return false.
307  */
308 bool amdgpu_device_supports_px(struct drm_device *dev)
309 {
310 	struct amdgpu_device *adev = drm_to_adev(dev);
311 
312 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
313 		return true;
314 	return false;
315 }
316 
317 /**
318  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
319  *
320  * @dev: drm_device pointer
321  *
322  * Returns true if the device is a dGPU with ACPI power control,
323  * otherwise return false.
324  */
325 bool amdgpu_device_supports_boco(struct drm_device *dev)
326 {
327 	struct amdgpu_device *adev = drm_to_adev(dev);
328 
329 	if (adev->has_pr3 ||
330 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
331 		return true;
332 	return false;
333 }
334 
335 /**
336  * amdgpu_device_supports_baco - Does the device support BACO
337  *
338  * @dev: drm_device pointer
339  *
340  * Returns true if the device supporte BACO,
341  * otherwise return false.
342  */
343 bool amdgpu_device_supports_baco(struct drm_device *dev)
344 {
345 	struct amdgpu_device *adev = drm_to_adev(dev);
346 
347 	return amdgpu_asic_supports_baco(adev);
348 }
349 
350 /**
351  * amdgpu_device_supports_smart_shift - Is the device dGPU with
352  * smart shift support
353  *
354  * @dev: drm_device pointer
355  *
356  * Returns true if the device is a dGPU with Smart Shift support,
357  * otherwise returns false.
358  */
359 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
360 {
361 	return (amdgpu_device_supports_boco(dev) &&
362 		amdgpu_acpi_is_power_shift_control_supported());
363 }
364 
365 /*
366  * VRAM access helper functions
367  */
368 
369 /**
370  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
371  *
372  * @adev: amdgpu_device pointer
373  * @pos: offset of the buffer in vram
374  * @buf: virtual address of the buffer in system memory
375  * @size: read/write size, sizeof(@buf) must > @size
376  * @write: true - write to vram, otherwise - read from vram
377  */
378 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
379 			     void *buf, size_t size, bool write)
380 {
381 	unsigned long flags;
382 	uint32_t hi = ~0, tmp = 0;
383 	uint32_t *data = buf;
384 	uint64_t last;
385 	int idx;
386 
387 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
388 		return;
389 
390 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
391 
392 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
393 	for (last = pos + size; pos < last; pos += 4) {
394 		tmp = pos >> 31;
395 
396 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
397 		if (tmp != hi) {
398 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
399 			hi = tmp;
400 		}
401 		if (write)
402 			WREG32_NO_KIQ(mmMM_DATA, *data++);
403 		else
404 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
405 	}
406 
407 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
408 	drm_dev_exit(idx);
409 }
410 
411 /**
412  * amdgpu_device_aper_access - access vram by vram aperature
413  *
414  * @adev: amdgpu_device pointer
415  * @pos: offset of the buffer in vram
416  * @buf: virtual address of the buffer in system memory
417  * @size: read/write size, sizeof(@buf) must > @size
418  * @write: true - write to vram, otherwise - read from vram
419  *
420  * The return value means how many bytes have been transferred.
421  */
422 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
423 				 void *buf, size_t size, bool write)
424 {
425 #ifdef CONFIG_64BIT
426 	void __iomem *addr;
427 	size_t count = 0;
428 	uint64_t last;
429 
430 	if (!adev->mman.aper_base_kaddr)
431 		return 0;
432 
433 	last = min(pos + size, adev->gmc.visible_vram_size);
434 	if (last > pos) {
435 		addr = adev->mman.aper_base_kaddr + pos;
436 		count = last - pos;
437 
438 		if (write) {
439 			memcpy_toio(addr, buf, count);
440 			/* Make sure HDP write cache flush happens without any reordering
441 			 * after the system memory contents are sent over PCIe device
442 			 */
443 			mb();
444 			amdgpu_device_flush_hdp(adev, NULL);
445 		} else {
446 			amdgpu_device_invalidate_hdp(adev, NULL);
447 			/* Make sure HDP read cache is invalidated before issuing a read
448 			 * to the PCIe device
449 			 */
450 			mb();
451 			memcpy_fromio(buf, addr, count);
452 		}
453 
454 	}
455 
456 	return count;
457 #else
458 	return 0;
459 #endif
460 }
461 
462 /**
463  * amdgpu_device_vram_access - read/write a buffer in vram
464  *
465  * @adev: amdgpu_device pointer
466  * @pos: offset of the buffer in vram
467  * @buf: virtual address of the buffer in system memory
468  * @size: read/write size, sizeof(@buf) must > @size
469  * @write: true - write to vram, otherwise - read from vram
470  */
471 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
472 			       void *buf, size_t size, bool write)
473 {
474 	size_t count;
475 
476 	/* try to using vram apreature to access vram first */
477 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
478 	size -= count;
479 	if (size) {
480 		/* using MM to access rest vram */
481 		pos += count;
482 		buf += count;
483 		amdgpu_device_mm_access(adev, pos, buf, size, write);
484 	}
485 }
486 
487 /*
488  * register access helper functions.
489  */
490 
491 /* Check if hw access should be skipped because of hotplug or device error */
492 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
493 {
494 	if (adev->no_hw_access)
495 		return true;
496 
497 #ifdef CONFIG_LOCKDEP
498 	/*
499 	 * This is a bit complicated to understand, so worth a comment. What we assert
500 	 * here is that the GPU reset is not running on another thread in parallel.
501 	 *
502 	 * For this we trylock the read side of the reset semaphore, if that succeeds
503 	 * we know that the reset is not running in paralell.
504 	 *
505 	 * If the trylock fails we assert that we are either already holding the read
506 	 * side of the lock or are the reset thread itself and hold the write side of
507 	 * the lock.
508 	 */
509 	if (in_task()) {
510 		if (down_read_trylock(&adev->reset_domain->sem))
511 			up_read(&adev->reset_domain->sem);
512 		else
513 			lockdep_assert_held(&adev->reset_domain->sem);
514 	}
515 #endif
516 	return false;
517 }
518 
519 /**
520  * amdgpu_device_rreg - read a memory mapped IO or indirect register
521  *
522  * @adev: amdgpu_device pointer
523  * @reg: dword aligned register offset
524  * @acc_flags: access flags which require special behavior
525  *
526  * Returns the 32 bit value from the offset specified.
527  */
528 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
529 			    uint32_t reg, uint32_t acc_flags)
530 {
531 	uint32_t ret;
532 
533 	if (amdgpu_device_skip_hw_access(adev))
534 		return 0;
535 
536 	if ((reg * 4) < adev->rmmio_size) {
537 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
538 		    amdgpu_sriov_runtime(adev) &&
539 		    down_read_trylock(&adev->reset_domain->sem)) {
540 			ret = amdgpu_kiq_rreg(adev, reg, 0);
541 			up_read(&adev->reset_domain->sem);
542 		} else {
543 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
544 		}
545 	} else {
546 		ret = adev->pcie_rreg(adev, reg * 4);
547 	}
548 
549 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
550 
551 	return ret;
552 }
553 
554 /*
555  * MMIO register read with bytes helper functions
556  * @offset:bytes offset from MMIO start
557  */
558 
559 /**
560  * amdgpu_mm_rreg8 - read a memory mapped IO register
561  *
562  * @adev: amdgpu_device pointer
563  * @offset: byte aligned register offset
564  *
565  * Returns the 8 bit value from the offset specified.
566  */
567 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
568 {
569 	if (amdgpu_device_skip_hw_access(adev))
570 		return 0;
571 
572 	if (offset < adev->rmmio_size)
573 		return (readb(adev->rmmio + offset));
574 	BUG();
575 }
576 
577 
578 /**
579  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
580  *
581  * @adev: amdgpu_device pointer
582  * @reg: dword aligned register offset
583  * @acc_flags: access flags which require special behavior
584  * @xcc_id: xcc accelerated compute core id
585  *
586  * Returns the 32 bit value from the offset specified.
587  */
588 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
589 				uint32_t reg, uint32_t acc_flags,
590 				uint32_t xcc_id)
591 {
592 	uint32_t ret, rlcg_flag;
593 
594 	if (amdgpu_device_skip_hw_access(adev))
595 		return 0;
596 
597 	if ((reg * 4) < adev->rmmio_size) {
598 		if (amdgpu_sriov_vf(adev) &&
599 		    !amdgpu_sriov_runtime(adev) &&
600 		    adev->gfx.rlc.rlcg_reg_access_supported &&
601 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
602 							 GC_HWIP, false,
603 							 &rlcg_flag)) {
604 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
605 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
606 		    amdgpu_sriov_runtime(adev) &&
607 		    down_read_trylock(&adev->reset_domain->sem)) {
608 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
609 			up_read(&adev->reset_domain->sem);
610 		} else {
611 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
612 		}
613 	} else {
614 		ret = adev->pcie_rreg(adev, reg * 4);
615 	}
616 
617 	return ret;
618 }
619 
620 /*
621  * MMIO register write with bytes helper functions
622  * @offset:bytes offset from MMIO start
623  * @value: the value want to be written to the register
624  */
625 
626 /**
627  * amdgpu_mm_wreg8 - read a memory mapped IO register
628  *
629  * @adev: amdgpu_device pointer
630  * @offset: byte aligned register offset
631  * @value: 8 bit value to write
632  *
633  * Writes the value specified to the offset specified.
634  */
635 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
636 {
637 	if (amdgpu_device_skip_hw_access(adev))
638 		return;
639 
640 	if (offset < adev->rmmio_size)
641 		writeb(value, adev->rmmio + offset);
642 	else
643 		BUG();
644 }
645 
646 /**
647  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
648  *
649  * @adev: amdgpu_device pointer
650  * @reg: dword aligned register offset
651  * @v: 32 bit value to write to the register
652  * @acc_flags: access flags which require special behavior
653  *
654  * Writes the value specified to the offset specified.
655  */
656 void amdgpu_device_wreg(struct amdgpu_device *adev,
657 			uint32_t reg, uint32_t v,
658 			uint32_t acc_flags)
659 {
660 	if (amdgpu_device_skip_hw_access(adev))
661 		return;
662 
663 	if ((reg * 4) < adev->rmmio_size) {
664 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
665 		    amdgpu_sriov_runtime(adev) &&
666 		    down_read_trylock(&adev->reset_domain->sem)) {
667 			amdgpu_kiq_wreg(adev, reg, v, 0);
668 			up_read(&adev->reset_domain->sem);
669 		} else {
670 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
671 		}
672 	} else {
673 		adev->pcie_wreg(adev, reg * 4, v);
674 	}
675 
676 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
677 }
678 
679 /**
680  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
681  *
682  * @adev: amdgpu_device pointer
683  * @reg: mmio/rlc register
684  * @v: value to write
685  * @xcc_id: xcc accelerated compute core id
686  *
687  * this function is invoked only for the debugfs register access
688  */
689 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
690 			     uint32_t reg, uint32_t v,
691 			     uint32_t xcc_id)
692 {
693 	if (amdgpu_device_skip_hw_access(adev))
694 		return;
695 
696 	if (amdgpu_sriov_fullaccess(adev) &&
697 	    adev->gfx.rlc.funcs &&
698 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
699 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
700 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
701 	} else if ((reg * 4) >= adev->rmmio_size) {
702 		adev->pcie_wreg(adev, reg * 4, v);
703 	} else {
704 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
705 	}
706 }
707 
708 /**
709  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
710  *
711  * @adev: amdgpu_device pointer
712  * @reg: dword aligned register offset
713  * @v: 32 bit value to write to the register
714  * @acc_flags: access flags which require special behavior
715  * @xcc_id: xcc accelerated compute core id
716  *
717  * Writes the value specified to the offset specified.
718  */
719 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
720 			uint32_t reg, uint32_t v,
721 			uint32_t acc_flags, uint32_t xcc_id)
722 {
723 	uint32_t rlcg_flag;
724 
725 	if (amdgpu_device_skip_hw_access(adev))
726 		return;
727 
728 	if ((reg * 4) < adev->rmmio_size) {
729 		if (amdgpu_sriov_vf(adev) &&
730 		    !amdgpu_sriov_runtime(adev) &&
731 		    adev->gfx.rlc.rlcg_reg_access_supported &&
732 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
733 							 GC_HWIP, true,
734 							 &rlcg_flag)) {
735 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
736 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
737 		    amdgpu_sriov_runtime(adev) &&
738 		    down_read_trylock(&adev->reset_domain->sem)) {
739 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
740 			up_read(&adev->reset_domain->sem);
741 		} else {
742 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
743 		}
744 	} else {
745 		adev->pcie_wreg(adev, reg * 4, v);
746 	}
747 }
748 
749 /**
750  * amdgpu_device_indirect_rreg - read an indirect register
751  *
752  * @adev: amdgpu_device pointer
753  * @reg_addr: indirect register address to read from
754  *
755  * Returns the value of indirect register @reg_addr
756  */
757 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
758 				u32 reg_addr)
759 {
760 	unsigned long flags, pcie_index, pcie_data;
761 	void __iomem *pcie_index_offset;
762 	void __iomem *pcie_data_offset;
763 	u32 r;
764 
765 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
766 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
767 
768 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
769 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
770 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
771 
772 	writel(reg_addr, pcie_index_offset);
773 	readl(pcie_index_offset);
774 	r = readl(pcie_data_offset);
775 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
776 
777 	return r;
778 }
779 
780 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
781 				    u64 reg_addr)
782 {
783 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
784 	u32 r;
785 	void __iomem *pcie_index_offset;
786 	void __iomem *pcie_index_hi_offset;
787 	void __iomem *pcie_data_offset;
788 
789 	if (unlikely(!adev->nbio.funcs)) {
790 		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
791 		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
792 	} else {
793 		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
794 		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
795 	}
796 
797 	if (reg_addr >> 32) {
798 		if (unlikely(!adev->nbio.funcs))
799 			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
800 		else
801 			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
802 	} else {
803 		pcie_index_hi = 0;
804 	}
805 
806 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
807 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
808 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
809 	if (pcie_index_hi != 0)
810 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
811 				pcie_index_hi * 4;
812 
813 	writel(reg_addr, pcie_index_offset);
814 	readl(pcie_index_offset);
815 	if (pcie_index_hi != 0) {
816 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
817 		readl(pcie_index_hi_offset);
818 	}
819 	r = readl(pcie_data_offset);
820 
821 	/* clear the high bits */
822 	if (pcie_index_hi != 0) {
823 		writel(0, pcie_index_hi_offset);
824 		readl(pcie_index_hi_offset);
825 	}
826 
827 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
828 
829 	return r;
830 }
831 
832 /**
833  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
834  *
835  * @adev: amdgpu_device pointer
836  * @reg_addr: indirect register address to read from
837  *
838  * Returns the value of indirect register @reg_addr
839  */
840 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
841 				  u32 reg_addr)
842 {
843 	unsigned long flags, pcie_index, pcie_data;
844 	void __iomem *pcie_index_offset;
845 	void __iomem *pcie_data_offset;
846 	u64 r;
847 
848 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
849 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
850 
851 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
852 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
853 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
854 
855 	/* read low 32 bits */
856 	writel(reg_addr, pcie_index_offset);
857 	readl(pcie_index_offset);
858 	r = readl(pcie_data_offset);
859 	/* read high 32 bits */
860 	writel(reg_addr + 4, pcie_index_offset);
861 	readl(pcie_index_offset);
862 	r |= ((u64)readl(pcie_data_offset) << 32);
863 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
864 
865 	return r;
866 }
867 
868 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
869 				  u64 reg_addr)
870 {
871 	unsigned long flags, pcie_index, pcie_data;
872 	unsigned long pcie_index_hi = 0;
873 	void __iomem *pcie_index_offset;
874 	void __iomem *pcie_index_hi_offset;
875 	void __iomem *pcie_data_offset;
876 	u64 r;
877 
878 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
879 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
880 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
881 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
882 
883 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
884 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
885 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
886 	if (pcie_index_hi != 0)
887 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
888 			pcie_index_hi * 4;
889 
890 	/* read low 32 bits */
891 	writel(reg_addr, pcie_index_offset);
892 	readl(pcie_index_offset);
893 	if (pcie_index_hi != 0) {
894 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
895 		readl(pcie_index_hi_offset);
896 	}
897 	r = readl(pcie_data_offset);
898 	/* read high 32 bits */
899 	writel(reg_addr + 4, pcie_index_offset);
900 	readl(pcie_index_offset);
901 	if (pcie_index_hi != 0) {
902 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
903 		readl(pcie_index_hi_offset);
904 	}
905 	r |= ((u64)readl(pcie_data_offset) << 32);
906 
907 	/* clear the high bits */
908 	if (pcie_index_hi != 0) {
909 		writel(0, pcie_index_hi_offset);
910 		readl(pcie_index_hi_offset);
911 	}
912 
913 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
914 
915 	return r;
916 }
917 
918 /**
919  * amdgpu_device_indirect_wreg - write an indirect register address
920  *
921  * @adev: amdgpu_device pointer
922  * @reg_addr: indirect register offset
923  * @reg_data: indirect register data
924  *
925  */
926 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
927 				 u32 reg_addr, u32 reg_data)
928 {
929 	unsigned long flags, pcie_index, pcie_data;
930 	void __iomem *pcie_index_offset;
931 	void __iomem *pcie_data_offset;
932 
933 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
934 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
935 
936 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
937 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
938 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
939 
940 	writel(reg_addr, pcie_index_offset);
941 	readl(pcie_index_offset);
942 	writel(reg_data, pcie_data_offset);
943 	readl(pcie_data_offset);
944 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
945 }
946 
947 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
948 				     u64 reg_addr, u32 reg_data)
949 {
950 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
951 	void __iomem *pcie_index_offset;
952 	void __iomem *pcie_index_hi_offset;
953 	void __iomem *pcie_data_offset;
954 
955 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
956 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
957 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
958 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
959 	else
960 		pcie_index_hi = 0;
961 
962 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
963 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
964 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
965 	if (pcie_index_hi != 0)
966 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
967 				pcie_index_hi * 4;
968 
969 	writel(reg_addr, pcie_index_offset);
970 	readl(pcie_index_offset);
971 	if (pcie_index_hi != 0) {
972 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
973 		readl(pcie_index_hi_offset);
974 	}
975 	writel(reg_data, pcie_data_offset);
976 	readl(pcie_data_offset);
977 
978 	/* clear the high bits */
979 	if (pcie_index_hi != 0) {
980 		writel(0, pcie_index_hi_offset);
981 		readl(pcie_index_hi_offset);
982 	}
983 
984 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
985 }
986 
987 /**
988  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
989  *
990  * @adev: amdgpu_device pointer
991  * @reg_addr: indirect register offset
992  * @reg_data: indirect register data
993  *
994  */
995 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
996 				   u32 reg_addr, u64 reg_data)
997 {
998 	unsigned long flags, pcie_index, pcie_data;
999 	void __iomem *pcie_index_offset;
1000 	void __iomem *pcie_data_offset;
1001 
1002 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1003 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1004 
1005 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1006 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1007 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1008 
1009 	/* write low 32 bits */
1010 	writel(reg_addr, pcie_index_offset);
1011 	readl(pcie_index_offset);
1012 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1013 	readl(pcie_data_offset);
1014 	/* write high 32 bits */
1015 	writel(reg_addr + 4, pcie_index_offset);
1016 	readl(pcie_index_offset);
1017 	writel((u32)(reg_data >> 32), pcie_data_offset);
1018 	readl(pcie_data_offset);
1019 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1020 }
1021 
1022 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1023 				   u64 reg_addr, u64 reg_data)
1024 {
1025 	unsigned long flags, pcie_index, pcie_data;
1026 	unsigned long pcie_index_hi = 0;
1027 	void __iomem *pcie_index_offset;
1028 	void __iomem *pcie_index_hi_offset;
1029 	void __iomem *pcie_data_offset;
1030 
1031 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1032 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1033 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1034 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1035 
1036 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1037 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1038 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1039 	if (pcie_index_hi != 0)
1040 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1041 				pcie_index_hi * 4;
1042 
1043 	/* write low 32 bits */
1044 	writel(reg_addr, pcie_index_offset);
1045 	readl(pcie_index_offset);
1046 	if (pcie_index_hi != 0) {
1047 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1048 		readl(pcie_index_hi_offset);
1049 	}
1050 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1051 	readl(pcie_data_offset);
1052 	/* write high 32 bits */
1053 	writel(reg_addr + 4, pcie_index_offset);
1054 	readl(pcie_index_offset);
1055 	if (pcie_index_hi != 0) {
1056 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1057 		readl(pcie_index_hi_offset);
1058 	}
1059 	writel((u32)(reg_data >> 32), pcie_data_offset);
1060 	readl(pcie_data_offset);
1061 
1062 	/* clear the high bits */
1063 	if (pcie_index_hi != 0) {
1064 		writel(0, pcie_index_hi_offset);
1065 		readl(pcie_index_hi_offset);
1066 	}
1067 
1068 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1069 }
1070 
1071 /**
1072  * amdgpu_device_get_rev_id - query device rev_id
1073  *
1074  * @adev: amdgpu_device pointer
1075  *
1076  * Return device rev_id
1077  */
1078 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1079 {
1080 	return adev->nbio.funcs->get_rev_id(adev);
1081 }
1082 
1083 /**
1084  * amdgpu_invalid_rreg - dummy reg read function
1085  *
1086  * @adev: amdgpu_device pointer
1087  * @reg: offset of register
1088  *
1089  * Dummy register read function.  Used for register blocks
1090  * that certain asics don't have (all asics).
1091  * Returns the value in the register.
1092  */
1093 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1094 {
1095 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1096 	BUG();
1097 	return 0;
1098 }
1099 
1100 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1101 {
1102 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1103 	BUG();
1104 	return 0;
1105 }
1106 
1107 /**
1108  * amdgpu_invalid_wreg - dummy reg write function
1109  *
1110  * @adev: amdgpu_device pointer
1111  * @reg: offset of register
1112  * @v: value to write to the register
1113  *
1114  * Dummy register read function.  Used for register blocks
1115  * that certain asics don't have (all asics).
1116  */
1117 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1118 {
1119 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1120 		  reg, v);
1121 	BUG();
1122 }
1123 
1124 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1125 {
1126 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1127 		  reg, v);
1128 	BUG();
1129 }
1130 
1131 /**
1132  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1133  *
1134  * @adev: amdgpu_device pointer
1135  * @reg: offset of register
1136  *
1137  * Dummy register read function.  Used for register blocks
1138  * that certain asics don't have (all asics).
1139  * Returns the value in the register.
1140  */
1141 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1142 {
1143 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1144 	BUG();
1145 	return 0;
1146 }
1147 
1148 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1149 {
1150 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1151 	BUG();
1152 	return 0;
1153 }
1154 
1155 /**
1156  * amdgpu_invalid_wreg64 - dummy reg write function
1157  *
1158  * @adev: amdgpu_device pointer
1159  * @reg: offset of register
1160  * @v: value to write to the register
1161  *
1162  * Dummy register read function.  Used for register blocks
1163  * that certain asics don't have (all asics).
1164  */
1165 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1166 {
1167 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1168 		  reg, v);
1169 	BUG();
1170 }
1171 
1172 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1173 {
1174 	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1175 		  reg, v);
1176 	BUG();
1177 }
1178 
1179 /**
1180  * amdgpu_block_invalid_rreg - dummy reg read function
1181  *
1182  * @adev: amdgpu_device pointer
1183  * @block: offset of instance
1184  * @reg: offset of register
1185  *
1186  * Dummy register read function.  Used for register blocks
1187  * that certain asics don't have (all asics).
1188  * Returns the value in the register.
1189  */
1190 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1191 					  uint32_t block, uint32_t reg)
1192 {
1193 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1194 		  reg, block);
1195 	BUG();
1196 	return 0;
1197 }
1198 
1199 /**
1200  * amdgpu_block_invalid_wreg - dummy reg write function
1201  *
1202  * @adev: amdgpu_device pointer
1203  * @block: offset of instance
1204  * @reg: offset of register
1205  * @v: value to write to the register
1206  *
1207  * Dummy register read function.  Used for register blocks
1208  * that certain asics don't have (all asics).
1209  */
1210 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1211 				      uint32_t block,
1212 				      uint32_t reg, uint32_t v)
1213 {
1214 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1215 		  reg, block, v);
1216 	BUG();
1217 }
1218 
1219 /**
1220  * amdgpu_device_asic_init - Wrapper for atom asic_init
1221  *
1222  * @adev: amdgpu_device pointer
1223  *
1224  * Does any asic specific work and then calls atom asic init.
1225  */
1226 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1227 {
1228 	int ret;
1229 
1230 	amdgpu_asic_pre_asic_init(adev);
1231 
1232 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1233 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1234 		amdgpu_psp_wait_for_bootloader(adev);
1235 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1236 		return ret;
1237 	} else {
1238 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1239 	}
1240 
1241 	return 0;
1242 }
1243 
1244 /**
1245  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1246  *
1247  * @adev: amdgpu_device pointer
1248  *
1249  * Allocates a scratch page of VRAM for use by various things in the
1250  * driver.
1251  */
1252 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1253 {
1254 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1255 				       AMDGPU_GEM_DOMAIN_VRAM |
1256 				       AMDGPU_GEM_DOMAIN_GTT,
1257 				       &adev->mem_scratch.robj,
1258 				       &adev->mem_scratch.gpu_addr,
1259 				       (void **)&adev->mem_scratch.ptr);
1260 }
1261 
1262 /**
1263  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1264  *
1265  * @adev: amdgpu_device pointer
1266  *
1267  * Frees the VRAM scratch page.
1268  */
1269 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1270 {
1271 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1272 }
1273 
1274 /**
1275  * amdgpu_device_program_register_sequence - program an array of registers.
1276  *
1277  * @adev: amdgpu_device pointer
1278  * @registers: pointer to the register array
1279  * @array_size: size of the register array
1280  *
1281  * Programs an array or registers with and or masks.
1282  * This is a helper for setting golden registers.
1283  */
1284 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1285 					     const u32 *registers,
1286 					     const u32 array_size)
1287 {
1288 	u32 tmp, reg, and_mask, or_mask;
1289 	int i;
1290 
1291 	if (array_size % 3)
1292 		return;
1293 
1294 	for (i = 0; i < array_size; i += 3) {
1295 		reg = registers[i + 0];
1296 		and_mask = registers[i + 1];
1297 		or_mask = registers[i + 2];
1298 
1299 		if (and_mask == 0xffffffff) {
1300 			tmp = or_mask;
1301 		} else {
1302 			tmp = RREG32(reg);
1303 			tmp &= ~and_mask;
1304 			if (adev->family >= AMDGPU_FAMILY_AI)
1305 				tmp |= (or_mask & and_mask);
1306 			else
1307 				tmp |= or_mask;
1308 		}
1309 		WREG32(reg, tmp);
1310 	}
1311 }
1312 
1313 /**
1314  * amdgpu_device_pci_config_reset - reset the GPU
1315  *
1316  * @adev: amdgpu_device pointer
1317  *
1318  * Resets the GPU using the pci config reset sequence.
1319  * Only applicable to asics prior to vega10.
1320  */
1321 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1322 {
1323 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1324 }
1325 
1326 /**
1327  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1328  *
1329  * @adev: amdgpu_device pointer
1330  *
1331  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1332  */
1333 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1334 {
1335 	return pci_reset_function(adev->pdev);
1336 }
1337 
1338 /*
1339  * amdgpu_device_wb_*()
1340  * Writeback is the method by which the GPU updates special pages in memory
1341  * with the status of certain GPU events (fences, ring pointers,etc.).
1342  */
1343 
1344 /**
1345  * amdgpu_device_wb_fini - Disable Writeback and free memory
1346  *
1347  * @adev: amdgpu_device pointer
1348  *
1349  * Disables Writeback and frees the Writeback memory (all asics).
1350  * Used at driver shutdown.
1351  */
1352 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1353 {
1354 	if (adev->wb.wb_obj) {
1355 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1356 				      &adev->wb.gpu_addr,
1357 				      (void **)&adev->wb.wb);
1358 		adev->wb.wb_obj = NULL;
1359 	}
1360 }
1361 
1362 /**
1363  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1364  *
1365  * @adev: amdgpu_device pointer
1366  *
1367  * Initializes writeback and allocates writeback memory (all asics).
1368  * Used at driver startup.
1369  * Returns 0 on success or an -error on failure.
1370  */
1371 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1372 {
1373 	int r;
1374 
1375 	if (adev->wb.wb_obj == NULL) {
1376 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1377 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1378 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1379 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1380 					    (void **)&adev->wb.wb);
1381 		if (r) {
1382 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1383 			return r;
1384 		}
1385 
1386 		adev->wb.num_wb = AMDGPU_MAX_WB;
1387 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1388 
1389 		/* clear wb memory */
1390 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1391 	}
1392 
1393 	return 0;
1394 }
1395 
1396 /**
1397  * amdgpu_device_wb_get - Allocate a wb entry
1398  *
1399  * @adev: amdgpu_device pointer
1400  * @wb: wb index
1401  *
1402  * Allocate a wb slot for use by the driver (all asics).
1403  * Returns 0 on success or -EINVAL on failure.
1404  */
1405 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1406 {
1407 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1408 
1409 	if (offset < adev->wb.num_wb) {
1410 		__set_bit(offset, adev->wb.used);
1411 		*wb = offset << 3; /* convert to dw offset */
1412 		return 0;
1413 	} else {
1414 		return -EINVAL;
1415 	}
1416 }
1417 
1418 /**
1419  * amdgpu_device_wb_free - Free a wb entry
1420  *
1421  * @adev: amdgpu_device pointer
1422  * @wb: wb index
1423  *
1424  * Free a wb slot allocated for use by the driver (all asics)
1425  */
1426 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1427 {
1428 	wb >>= 3;
1429 	if (wb < adev->wb.num_wb)
1430 		__clear_bit(wb, adev->wb.used);
1431 }
1432 
1433 /**
1434  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1435  *
1436  * @adev: amdgpu_device pointer
1437  *
1438  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1439  * to fail, but if any of the BARs is not accessible after the size we abort
1440  * driver loading by returning -ENODEV.
1441  */
1442 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1443 {
1444 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1445 	struct pci_bus *root;
1446 	struct resource *res;
1447 	unsigned int i;
1448 	u16 cmd;
1449 	int r;
1450 
1451 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1452 		return 0;
1453 
1454 	/* Bypass for VF */
1455 	if (amdgpu_sriov_vf(adev))
1456 		return 0;
1457 
1458 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1459 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1460 		DRM_WARN("System can't access extended configuration space,please check!!\n");
1461 
1462 	/* skip if the bios has already enabled large BAR */
1463 	if (adev->gmc.real_vram_size &&
1464 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1465 		return 0;
1466 
1467 	/* Check if the root BUS has 64bit memory resources */
1468 	root = adev->pdev->bus;
1469 	while (root->parent)
1470 		root = root->parent;
1471 
1472 	pci_bus_for_each_resource(root, res, i) {
1473 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1474 		    res->start > 0x100000000ull)
1475 			break;
1476 	}
1477 
1478 	/* Trying to resize is pointless without a root hub window above 4GB */
1479 	if (!res)
1480 		return 0;
1481 
1482 	/* Limit the BAR size to what is available */
1483 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1484 			rbar_size);
1485 
1486 	/* Disable memory decoding while we change the BAR addresses and size */
1487 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1488 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1489 			      cmd & ~PCI_COMMAND_MEMORY);
1490 
1491 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1492 	amdgpu_doorbell_fini(adev);
1493 	if (adev->asic_type >= CHIP_BONAIRE)
1494 		pci_release_resource(adev->pdev, 2);
1495 
1496 	pci_release_resource(adev->pdev, 0);
1497 
1498 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1499 	if (r == -ENOSPC)
1500 		DRM_INFO("Not enough PCI address space for a large BAR.");
1501 	else if (r && r != -ENOTSUPP)
1502 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1503 
1504 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1505 
1506 	/* When the doorbell or fb BAR isn't available we have no chance of
1507 	 * using the device.
1508 	 */
1509 	r = amdgpu_doorbell_init(adev);
1510 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1511 		return -ENODEV;
1512 
1513 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1514 
1515 	return 0;
1516 }
1517 
1518 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1519 {
1520 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1521 		return false;
1522 
1523 	return true;
1524 }
1525 
1526 /*
1527  * GPU helpers function.
1528  */
1529 /**
1530  * amdgpu_device_need_post - check if the hw need post or not
1531  *
1532  * @adev: amdgpu_device pointer
1533  *
1534  * Check if the asic has been initialized (all asics) at driver startup
1535  * or post is needed if  hw reset is performed.
1536  * Returns true if need or false if not.
1537  */
1538 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1539 {
1540 	uint32_t reg;
1541 
1542 	if (amdgpu_sriov_vf(adev))
1543 		return false;
1544 
1545 	if (!amdgpu_device_read_bios(adev))
1546 		return false;
1547 
1548 	if (amdgpu_passthrough(adev)) {
1549 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1550 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1551 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1552 		 * vpost executed for smc version below 22.15
1553 		 */
1554 		if (adev->asic_type == CHIP_FIJI) {
1555 			int err;
1556 			uint32_t fw_ver;
1557 
1558 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1559 			/* force vPost if error occured */
1560 			if (err)
1561 				return true;
1562 
1563 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1564 			release_firmware(adev->pm.fw);
1565 			if (fw_ver < 0x00160e00)
1566 				return true;
1567 		}
1568 	}
1569 
1570 	/* Don't post if we need to reset whole hive on init */
1571 	if (adev->gmc.xgmi.pending_reset)
1572 		return false;
1573 
1574 	if (adev->has_hw_reset) {
1575 		adev->has_hw_reset = false;
1576 		return true;
1577 	}
1578 
1579 	/* bios scratch used on CIK+ */
1580 	if (adev->asic_type >= CHIP_BONAIRE)
1581 		return amdgpu_atombios_scratch_need_asic_init(adev);
1582 
1583 	/* check MEM_SIZE for older asics */
1584 	reg = amdgpu_asic_get_config_memsize(adev);
1585 
1586 	if ((reg != 0) && (reg != 0xffffffff))
1587 		return false;
1588 
1589 	return true;
1590 }
1591 
1592 /*
1593  * Check whether seamless boot is supported.
1594  *
1595  * So far we only support seamless boot on DCE 3.0 or later.
1596  * If users report that it works on older ASICS as well, we may
1597  * loosen this.
1598  */
1599 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1600 {
1601 	switch (amdgpu_seamless) {
1602 	case -1:
1603 		break;
1604 	case 1:
1605 		return true;
1606 	case 0:
1607 		return false;
1608 	default:
1609 		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1610 			  amdgpu_seamless);
1611 		return false;
1612 	}
1613 
1614 	if (!(adev->flags & AMD_IS_APU))
1615 		return false;
1616 
1617 	if (adev->mman.keep_stolen_vga_memory)
1618 		return false;
1619 
1620 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1621 }
1622 
1623 /*
1624  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1625  * don't support dynamic speed switching. Until we have confirmation from Intel
1626  * that a specific host supports it, it's safer that we keep it disabled for all.
1627  *
1628  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1629  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1630  */
1631 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1632 {
1633 #if IS_ENABLED(CONFIG_X86)
1634 	struct cpuinfo_x86 *c = &cpu_data(0);
1635 
1636 	/* eGPU change speeds based on USB4 fabric conditions */
1637 	if (dev_is_removable(adev->dev))
1638 		return true;
1639 
1640 	if (c->x86_vendor == X86_VENDOR_INTEL)
1641 		return false;
1642 #endif
1643 	return true;
1644 }
1645 
1646 /**
1647  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1648  *
1649  * @adev: amdgpu_device pointer
1650  *
1651  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1652  * be set for this device.
1653  *
1654  * Returns true if it should be used or false if not.
1655  */
1656 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1657 {
1658 	switch (amdgpu_aspm) {
1659 	case -1:
1660 		break;
1661 	case 0:
1662 		return false;
1663 	case 1:
1664 		return true;
1665 	default:
1666 		return false;
1667 	}
1668 	if (adev->flags & AMD_IS_APU)
1669 		return false;
1670 	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1671 		return false;
1672 	return pcie_aspm_enabled(adev->pdev);
1673 }
1674 
1675 /* if we get transitioned to only one device, take VGA back */
1676 /**
1677  * amdgpu_device_vga_set_decode - enable/disable vga decode
1678  *
1679  * @pdev: PCI device pointer
1680  * @state: enable/disable vga decode
1681  *
1682  * Enable/disable vga decode (all asics).
1683  * Returns VGA resource flags.
1684  */
1685 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1686 		bool state)
1687 {
1688 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1689 
1690 	amdgpu_asic_set_vga_state(adev, state);
1691 	if (state)
1692 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1693 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1694 	else
1695 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1696 }
1697 
1698 /**
1699  * amdgpu_device_check_block_size - validate the vm block size
1700  *
1701  * @adev: amdgpu_device pointer
1702  *
1703  * Validates the vm block size specified via module parameter.
1704  * The vm block size defines number of bits in page table versus page directory,
1705  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1706  * page table and the remaining bits are in the page directory.
1707  */
1708 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1709 {
1710 	/* defines number of bits in page table versus page directory,
1711 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1712 	 * page table and the remaining bits are in the page directory
1713 	 */
1714 	if (amdgpu_vm_block_size == -1)
1715 		return;
1716 
1717 	if (amdgpu_vm_block_size < 9) {
1718 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1719 			 amdgpu_vm_block_size);
1720 		amdgpu_vm_block_size = -1;
1721 	}
1722 }
1723 
1724 /**
1725  * amdgpu_device_check_vm_size - validate the vm size
1726  *
1727  * @adev: amdgpu_device pointer
1728  *
1729  * Validates the vm size in GB specified via module parameter.
1730  * The VM size is the size of the GPU virtual memory space in GB.
1731  */
1732 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1733 {
1734 	/* no need to check the default value */
1735 	if (amdgpu_vm_size == -1)
1736 		return;
1737 
1738 	if (amdgpu_vm_size < 1) {
1739 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1740 			 amdgpu_vm_size);
1741 		amdgpu_vm_size = -1;
1742 	}
1743 }
1744 
1745 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1746 {
1747 	struct sysinfo si;
1748 	bool is_os_64 = (sizeof(void *) == 8);
1749 	uint64_t total_memory;
1750 	uint64_t dram_size_seven_GB = 0x1B8000000;
1751 	uint64_t dram_size_three_GB = 0xB8000000;
1752 
1753 	if (amdgpu_smu_memory_pool_size == 0)
1754 		return;
1755 
1756 	if (!is_os_64) {
1757 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1758 		goto def_value;
1759 	}
1760 	si_meminfo(&si);
1761 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1762 
1763 	if ((amdgpu_smu_memory_pool_size == 1) ||
1764 		(amdgpu_smu_memory_pool_size == 2)) {
1765 		if (total_memory < dram_size_three_GB)
1766 			goto def_value1;
1767 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1768 		(amdgpu_smu_memory_pool_size == 8)) {
1769 		if (total_memory < dram_size_seven_GB)
1770 			goto def_value1;
1771 	} else {
1772 		DRM_WARN("Smu memory pool size not supported\n");
1773 		goto def_value;
1774 	}
1775 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1776 
1777 	return;
1778 
1779 def_value1:
1780 	DRM_WARN("No enough system memory\n");
1781 def_value:
1782 	adev->pm.smu_prv_buffer_size = 0;
1783 }
1784 
1785 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1786 {
1787 	if (!(adev->flags & AMD_IS_APU) ||
1788 	    adev->asic_type < CHIP_RAVEN)
1789 		return 0;
1790 
1791 	switch (adev->asic_type) {
1792 	case CHIP_RAVEN:
1793 		if (adev->pdev->device == 0x15dd)
1794 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1795 		if (adev->pdev->device == 0x15d8)
1796 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1797 		break;
1798 	case CHIP_RENOIR:
1799 		if ((adev->pdev->device == 0x1636) ||
1800 		    (adev->pdev->device == 0x164c))
1801 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1802 		else
1803 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1804 		break;
1805 	case CHIP_VANGOGH:
1806 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1807 		break;
1808 	case CHIP_YELLOW_CARP:
1809 		break;
1810 	case CHIP_CYAN_SKILLFISH:
1811 		if ((adev->pdev->device == 0x13FE) ||
1812 		    (adev->pdev->device == 0x143F))
1813 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1814 		break;
1815 	default:
1816 		break;
1817 	}
1818 
1819 	return 0;
1820 }
1821 
1822 /**
1823  * amdgpu_device_check_arguments - validate module params
1824  *
1825  * @adev: amdgpu_device pointer
1826  *
1827  * Validates certain module parameters and updates
1828  * the associated values used by the driver (all asics).
1829  */
1830 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1831 {
1832 	if (amdgpu_sched_jobs < 4) {
1833 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1834 			 amdgpu_sched_jobs);
1835 		amdgpu_sched_jobs = 4;
1836 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1837 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1838 			 amdgpu_sched_jobs);
1839 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1840 	}
1841 
1842 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1843 		/* gart size must be greater or equal to 32M */
1844 		dev_warn(adev->dev, "gart size (%d) too small\n",
1845 			 amdgpu_gart_size);
1846 		amdgpu_gart_size = -1;
1847 	}
1848 
1849 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1850 		/* gtt size must be greater or equal to 32M */
1851 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1852 				 amdgpu_gtt_size);
1853 		amdgpu_gtt_size = -1;
1854 	}
1855 
1856 	/* valid range is between 4 and 9 inclusive */
1857 	if (amdgpu_vm_fragment_size != -1 &&
1858 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1859 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1860 		amdgpu_vm_fragment_size = -1;
1861 	}
1862 
1863 	if (amdgpu_sched_hw_submission < 2) {
1864 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1865 			 amdgpu_sched_hw_submission);
1866 		amdgpu_sched_hw_submission = 2;
1867 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1868 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1869 			 amdgpu_sched_hw_submission);
1870 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1871 	}
1872 
1873 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1874 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1875 		amdgpu_reset_method = -1;
1876 	}
1877 
1878 	amdgpu_device_check_smu_prv_buffer_size(adev);
1879 
1880 	amdgpu_device_check_vm_size(adev);
1881 
1882 	amdgpu_device_check_block_size(adev);
1883 
1884 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1885 
1886 	return 0;
1887 }
1888 
1889 /**
1890  * amdgpu_switcheroo_set_state - set switcheroo state
1891  *
1892  * @pdev: pci dev pointer
1893  * @state: vga_switcheroo state
1894  *
1895  * Callback for the switcheroo driver.  Suspends or resumes
1896  * the asics before or after it is powered up using ACPI methods.
1897  */
1898 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1899 					enum vga_switcheroo_state state)
1900 {
1901 	struct drm_device *dev = pci_get_drvdata(pdev);
1902 	int r;
1903 
1904 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1905 		return;
1906 
1907 	if (state == VGA_SWITCHEROO_ON) {
1908 		pr_info("switched on\n");
1909 		/* don't suspend or resume card normally */
1910 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1911 
1912 		pci_set_power_state(pdev, PCI_D0);
1913 		amdgpu_device_load_pci_state(pdev);
1914 		r = pci_enable_device(pdev);
1915 		if (r)
1916 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1917 		amdgpu_device_resume(dev, true);
1918 
1919 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1920 	} else {
1921 		pr_info("switched off\n");
1922 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1923 		amdgpu_device_prepare(dev);
1924 		amdgpu_device_suspend(dev, true);
1925 		amdgpu_device_cache_pci_state(pdev);
1926 		/* Shut down the device */
1927 		pci_disable_device(pdev);
1928 		pci_set_power_state(pdev, PCI_D3cold);
1929 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1930 	}
1931 }
1932 
1933 /**
1934  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1935  *
1936  * @pdev: pci dev pointer
1937  *
1938  * Callback for the switcheroo driver.  Check of the switcheroo
1939  * state can be changed.
1940  * Returns true if the state can be changed, false if not.
1941  */
1942 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1943 {
1944 	struct drm_device *dev = pci_get_drvdata(pdev);
1945 
1946        /*
1947 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1948 	* locking inversion with the driver load path. And the access here is
1949 	* completely racy anyway. So don't bother with locking for now.
1950 	*/
1951 	return atomic_read(&dev->open_count) == 0;
1952 }
1953 
1954 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1955 	.set_gpu_state = amdgpu_switcheroo_set_state,
1956 	.reprobe = NULL,
1957 	.can_switch = amdgpu_switcheroo_can_switch,
1958 };
1959 
1960 /**
1961  * amdgpu_device_ip_set_clockgating_state - set the CG state
1962  *
1963  * @dev: amdgpu_device pointer
1964  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1965  * @state: clockgating state (gate or ungate)
1966  *
1967  * Sets the requested clockgating state for all instances of
1968  * the hardware IP specified.
1969  * Returns the error code from the last instance.
1970  */
1971 int amdgpu_device_ip_set_clockgating_state(void *dev,
1972 					   enum amd_ip_block_type block_type,
1973 					   enum amd_clockgating_state state)
1974 {
1975 	struct amdgpu_device *adev = dev;
1976 	int i, r = 0;
1977 
1978 	for (i = 0; i < adev->num_ip_blocks; i++) {
1979 		if (!adev->ip_blocks[i].status.valid)
1980 			continue;
1981 		if (adev->ip_blocks[i].version->type != block_type)
1982 			continue;
1983 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1984 			continue;
1985 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1986 			(void *)adev, state);
1987 		if (r)
1988 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1989 				  adev->ip_blocks[i].version->funcs->name, r);
1990 	}
1991 	return r;
1992 }
1993 
1994 /**
1995  * amdgpu_device_ip_set_powergating_state - set the PG state
1996  *
1997  * @dev: amdgpu_device pointer
1998  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1999  * @state: powergating state (gate or ungate)
2000  *
2001  * Sets the requested powergating state for all instances of
2002  * the hardware IP specified.
2003  * Returns the error code from the last instance.
2004  */
2005 int amdgpu_device_ip_set_powergating_state(void *dev,
2006 					   enum amd_ip_block_type block_type,
2007 					   enum amd_powergating_state state)
2008 {
2009 	struct amdgpu_device *adev = dev;
2010 	int i, r = 0;
2011 
2012 	for (i = 0; i < adev->num_ip_blocks; i++) {
2013 		if (!adev->ip_blocks[i].status.valid)
2014 			continue;
2015 		if (adev->ip_blocks[i].version->type != block_type)
2016 			continue;
2017 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2018 			continue;
2019 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2020 			(void *)adev, state);
2021 		if (r)
2022 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2023 				  adev->ip_blocks[i].version->funcs->name, r);
2024 	}
2025 	return r;
2026 }
2027 
2028 /**
2029  * amdgpu_device_ip_get_clockgating_state - get the CG state
2030  *
2031  * @adev: amdgpu_device pointer
2032  * @flags: clockgating feature flags
2033  *
2034  * Walks the list of IPs on the device and updates the clockgating
2035  * flags for each IP.
2036  * Updates @flags with the feature flags for each hardware IP where
2037  * clockgating is enabled.
2038  */
2039 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2040 					    u64 *flags)
2041 {
2042 	int i;
2043 
2044 	for (i = 0; i < adev->num_ip_blocks; i++) {
2045 		if (!adev->ip_blocks[i].status.valid)
2046 			continue;
2047 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2048 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2049 	}
2050 }
2051 
2052 /**
2053  * amdgpu_device_ip_wait_for_idle - wait for idle
2054  *
2055  * @adev: amdgpu_device pointer
2056  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2057  *
2058  * Waits for the request hardware IP to be idle.
2059  * Returns 0 for success or a negative error code on failure.
2060  */
2061 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2062 				   enum amd_ip_block_type block_type)
2063 {
2064 	int i, r;
2065 
2066 	for (i = 0; i < adev->num_ip_blocks; i++) {
2067 		if (!adev->ip_blocks[i].status.valid)
2068 			continue;
2069 		if (adev->ip_blocks[i].version->type == block_type) {
2070 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2071 			if (r)
2072 				return r;
2073 			break;
2074 		}
2075 	}
2076 	return 0;
2077 
2078 }
2079 
2080 /**
2081  * amdgpu_device_ip_is_idle - is the hardware IP idle
2082  *
2083  * @adev: amdgpu_device pointer
2084  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2085  *
2086  * Check if the hardware IP is idle or not.
2087  * Returns true if it the IP is idle, false if not.
2088  */
2089 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2090 			      enum amd_ip_block_type block_type)
2091 {
2092 	int i;
2093 
2094 	for (i = 0; i < adev->num_ip_blocks; i++) {
2095 		if (!adev->ip_blocks[i].status.valid)
2096 			continue;
2097 		if (adev->ip_blocks[i].version->type == block_type)
2098 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2099 	}
2100 	return true;
2101 
2102 }
2103 
2104 /**
2105  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2106  *
2107  * @adev: amdgpu_device pointer
2108  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2109  *
2110  * Returns a pointer to the hardware IP block structure
2111  * if it exists for the asic, otherwise NULL.
2112  */
2113 struct amdgpu_ip_block *
2114 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2115 			      enum amd_ip_block_type type)
2116 {
2117 	int i;
2118 
2119 	for (i = 0; i < adev->num_ip_blocks; i++)
2120 		if (adev->ip_blocks[i].version->type == type)
2121 			return &adev->ip_blocks[i];
2122 
2123 	return NULL;
2124 }
2125 
2126 /**
2127  * amdgpu_device_ip_block_version_cmp
2128  *
2129  * @adev: amdgpu_device pointer
2130  * @type: enum amd_ip_block_type
2131  * @major: major version
2132  * @minor: minor version
2133  *
2134  * return 0 if equal or greater
2135  * return 1 if smaller or the ip_block doesn't exist
2136  */
2137 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2138 				       enum amd_ip_block_type type,
2139 				       u32 major, u32 minor)
2140 {
2141 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2142 
2143 	if (ip_block && ((ip_block->version->major > major) ||
2144 			((ip_block->version->major == major) &&
2145 			(ip_block->version->minor >= minor))))
2146 		return 0;
2147 
2148 	return 1;
2149 }
2150 
2151 /**
2152  * amdgpu_device_ip_block_add
2153  *
2154  * @adev: amdgpu_device pointer
2155  * @ip_block_version: pointer to the IP to add
2156  *
2157  * Adds the IP block driver information to the collection of IPs
2158  * on the asic.
2159  */
2160 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2161 			       const struct amdgpu_ip_block_version *ip_block_version)
2162 {
2163 	if (!ip_block_version)
2164 		return -EINVAL;
2165 
2166 	switch (ip_block_version->type) {
2167 	case AMD_IP_BLOCK_TYPE_VCN:
2168 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2169 			return 0;
2170 		break;
2171 	case AMD_IP_BLOCK_TYPE_JPEG:
2172 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2173 			return 0;
2174 		break;
2175 	default:
2176 		break;
2177 	}
2178 
2179 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2180 		  ip_block_version->funcs->name);
2181 
2182 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2183 
2184 	return 0;
2185 }
2186 
2187 /**
2188  * amdgpu_device_enable_virtual_display - enable virtual display feature
2189  *
2190  * @adev: amdgpu_device pointer
2191  *
2192  * Enabled the virtual display feature if the user has enabled it via
2193  * the module parameter virtual_display.  This feature provides a virtual
2194  * display hardware on headless boards or in virtualized environments.
2195  * This function parses and validates the configuration string specified by
2196  * the user and configues the virtual display configuration (number of
2197  * virtual connectors, crtcs, etc.) specified.
2198  */
2199 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2200 {
2201 	adev->enable_virtual_display = false;
2202 
2203 	if (amdgpu_virtual_display) {
2204 		const char *pci_address_name = pci_name(adev->pdev);
2205 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2206 
2207 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2208 		pciaddstr_tmp = pciaddstr;
2209 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2210 			pciaddname = strsep(&pciaddname_tmp, ",");
2211 			if (!strcmp("all", pciaddname)
2212 			    || !strcmp(pci_address_name, pciaddname)) {
2213 				long num_crtc;
2214 				int res = -1;
2215 
2216 				adev->enable_virtual_display = true;
2217 
2218 				if (pciaddname_tmp)
2219 					res = kstrtol(pciaddname_tmp, 10,
2220 						      &num_crtc);
2221 
2222 				if (!res) {
2223 					if (num_crtc < 1)
2224 						num_crtc = 1;
2225 					if (num_crtc > 6)
2226 						num_crtc = 6;
2227 					adev->mode_info.num_crtc = num_crtc;
2228 				} else {
2229 					adev->mode_info.num_crtc = 1;
2230 				}
2231 				break;
2232 			}
2233 		}
2234 
2235 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2236 			 amdgpu_virtual_display, pci_address_name,
2237 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2238 
2239 		kfree(pciaddstr);
2240 	}
2241 }
2242 
2243 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2244 {
2245 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2246 		adev->mode_info.num_crtc = 1;
2247 		adev->enable_virtual_display = true;
2248 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2249 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2250 	}
2251 }
2252 
2253 /**
2254  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2255  *
2256  * @adev: amdgpu_device pointer
2257  *
2258  * Parses the asic configuration parameters specified in the gpu info
2259  * firmware and makes them availale to the driver for use in configuring
2260  * the asic.
2261  * Returns 0 on success, -EINVAL on failure.
2262  */
2263 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2264 {
2265 	const char *chip_name;
2266 	char fw_name[40];
2267 	int err;
2268 	const struct gpu_info_firmware_header_v1_0 *hdr;
2269 
2270 	adev->firmware.gpu_info_fw = NULL;
2271 
2272 	if (adev->mman.discovery_bin)
2273 		return 0;
2274 
2275 	switch (adev->asic_type) {
2276 	default:
2277 		return 0;
2278 	case CHIP_VEGA10:
2279 		chip_name = "vega10";
2280 		break;
2281 	case CHIP_VEGA12:
2282 		chip_name = "vega12";
2283 		break;
2284 	case CHIP_RAVEN:
2285 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2286 			chip_name = "raven2";
2287 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2288 			chip_name = "picasso";
2289 		else
2290 			chip_name = "raven";
2291 		break;
2292 	case CHIP_ARCTURUS:
2293 		chip_name = "arcturus";
2294 		break;
2295 	case CHIP_NAVI12:
2296 		chip_name = "navi12";
2297 		break;
2298 	}
2299 
2300 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2301 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2302 	if (err) {
2303 		dev_err(adev->dev,
2304 			"Failed to get gpu_info firmware \"%s\"\n",
2305 			fw_name);
2306 		goto out;
2307 	}
2308 
2309 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2310 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2311 
2312 	switch (hdr->version_major) {
2313 	case 1:
2314 	{
2315 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2316 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2317 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2318 
2319 		/*
2320 		 * Should be droped when DAL no longer needs it.
2321 		 */
2322 		if (adev->asic_type == CHIP_NAVI12)
2323 			goto parse_soc_bounding_box;
2324 
2325 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2326 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2327 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2328 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2329 		adev->gfx.config.max_texture_channel_caches =
2330 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2331 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2332 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2333 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2334 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2335 		adev->gfx.config.double_offchip_lds_buf =
2336 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2337 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2338 		adev->gfx.cu_info.max_waves_per_simd =
2339 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2340 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2341 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2342 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2343 		if (hdr->version_minor >= 1) {
2344 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2345 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2346 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2347 			adev->gfx.config.num_sc_per_sh =
2348 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2349 			adev->gfx.config.num_packer_per_sc =
2350 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2351 		}
2352 
2353 parse_soc_bounding_box:
2354 		/*
2355 		 * soc bounding box info is not integrated in disocovery table,
2356 		 * we always need to parse it from gpu info firmware if needed.
2357 		 */
2358 		if (hdr->version_minor == 2) {
2359 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2360 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2361 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2362 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2363 		}
2364 		break;
2365 	}
2366 	default:
2367 		dev_err(adev->dev,
2368 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2369 		err = -EINVAL;
2370 		goto out;
2371 	}
2372 out:
2373 	return err;
2374 }
2375 
2376 /**
2377  * amdgpu_device_ip_early_init - run early init for hardware IPs
2378  *
2379  * @adev: amdgpu_device pointer
2380  *
2381  * Early initialization pass for hardware IPs.  The hardware IPs that make
2382  * up each asic are discovered each IP's early_init callback is run.  This
2383  * is the first stage in initializing the asic.
2384  * Returns 0 on success, negative error code on failure.
2385  */
2386 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2387 {
2388 	struct pci_dev *parent;
2389 	int i, r;
2390 	bool total;
2391 
2392 	amdgpu_device_enable_virtual_display(adev);
2393 
2394 	if (amdgpu_sriov_vf(adev)) {
2395 		r = amdgpu_virt_request_full_gpu(adev, true);
2396 		if (r)
2397 			return r;
2398 	}
2399 
2400 	switch (adev->asic_type) {
2401 #ifdef CONFIG_DRM_AMDGPU_SI
2402 	case CHIP_VERDE:
2403 	case CHIP_TAHITI:
2404 	case CHIP_PITCAIRN:
2405 	case CHIP_OLAND:
2406 	case CHIP_HAINAN:
2407 		adev->family = AMDGPU_FAMILY_SI;
2408 		r = si_set_ip_blocks(adev);
2409 		if (r)
2410 			return r;
2411 		break;
2412 #endif
2413 #ifdef CONFIG_DRM_AMDGPU_CIK
2414 	case CHIP_BONAIRE:
2415 	case CHIP_HAWAII:
2416 	case CHIP_KAVERI:
2417 	case CHIP_KABINI:
2418 	case CHIP_MULLINS:
2419 		if (adev->flags & AMD_IS_APU)
2420 			adev->family = AMDGPU_FAMILY_KV;
2421 		else
2422 			adev->family = AMDGPU_FAMILY_CI;
2423 
2424 		r = cik_set_ip_blocks(adev);
2425 		if (r)
2426 			return r;
2427 		break;
2428 #endif
2429 	case CHIP_TOPAZ:
2430 	case CHIP_TONGA:
2431 	case CHIP_FIJI:
2432 	case CHIP_POLARIS10:
2433 	case CHIP_POLARIS11:
2434 	case CHIP_POLARIS12:
2435 	case CHIP_VEGAM:
2436 	case CHIP_CARRIZO:
2437 	case CHIP_STONEY:
2438 		if (adev->flags & AMD_IS_APU)
2439 			adev->family = AMDGPU_FAMILY_CZ;
2440 		else
2441 			adev->family = AMDGPU_FAMILY_VI;
2442 
2443 		r = vi_set_ip_blocks(adev);
2444 		if (r)
2445 			return r;
2446 		break;
2447 	default:
2448 		r = amdgpu_discovery_set_ip_blocks(adev);
2449 		if (r)
2450 			return r;
2451 		break;
2452 	}
2453 
2454 	if (amdgpu_has_atpx() &&
2455 	    (amdgpu_is_atpx_hybrid() ||
2456 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2457 	    ((adev->flags & AMD_IS_APU) == 0) &&
2458 	    !dev_is_removable(&adev->pdev->dev))
2459 		adev->flags |= AMD_IS_PX;
2460 
2461 	if (!(adev->flags & AMD_IS_APU)) {
2462 		parent = pcie_find_root_port(adev->pdev);
2463 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2464 	}
2465 
2466 
2467 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2468 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2469 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2470 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2471 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2472 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2473 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2474 
2475 	total = true;
2476 	for (i = 0; i < adev->num_ip_blocks; i++) {
2477 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2478 			DRM_WARN("disabled ip block: %d <%s>\n",
2479 				  i, adev->ip_blocks[i].version->funcs->name);
2480 			adev->ip_blocks[i].status.valid = false;
2481 		} else {
2482 			if (adev->ip_blocks[i].version->funcs->early_init) {
2483 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2484 				if (r == -ENOENT) {
2485 					adev->ip_blocks[i].status.valid = false;
2486 				} else if (r) {
2487 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2488 						  adev->ip_blocks[i].version->funcs->name, r);
2489 					total = false;
2490 				} else {
2491 					adev->ip_blocks[i].status.valid = true;
2492 				}
2493 			} else {
2494 				adev->ip_blocks[i].status.valid = true;
2495 			}
2496 		}
2497 		/* get the vbios after the asic_funcs are set up */
2498 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2499 			r = amdgpu_device_parse_gpu_info_fw(adev);
2500 			if (r)
2501 				return r;
2502 
2503 			/* Read BIOS */
2504 			if (amdgpu_device_read_bios(adev)) {
2505 				if (!amdgpu_get_bios(adev))
2506 					return -EINVAL;
2507 
2508 				r = amdgpu_atombios_init(adev);
2509 				if (r) {
2510 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2511 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2512 					return r;
2513 				}
2514 			}
2515 
2516 			/*get pf2vf msg info at it's earliest time*/
2517 			if (amdgpu_sriov_vf(adev))
2518 				amdgpu_virt_init_data_exchange(adev);
2519 
2520 		}
2521 	}
2522 	if (!total)
2523 		return -ENODEV;
2524 
2525 	amdgpu_amdkfd_device_probe(adev);
2526 	adev->cg_flags &= amdgpu_cg_mask;
2527 	adev->pg_flags &= amdgpu_pg_mask;
2528 
2529 	return 0;
2530 }
2531 
2532 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2533 {
2534 	int i, r;
2535 
2536 	for (i = 0; i < adev->num_ip_blocks; i++) {
2537 		if (!adev->ip_blocks[i].status.sw)
2538 			continue;
2539 		if (adev->ip_blocks[i].status.hw)
2540 			continue;
2541 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2542 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2543 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2544 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2545 			if (r) {
2546 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2547 					  adev->ip_blocks[i].version->funcs->name, r);
2548 				return r;
2549 			}
2550 			adev->ip_blocks[i].status.hw = true;
2551 		}
2552 	}
2553 
2554 	return 0;
2555 }
2556 
2557 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2558 {
2559 	int i, r;
2560 
2561 	for (i = 0; i < adev->num_ip_blocks; i++) {
2562 		if (!adev->ip_blocks[i].status.sw)
2563 			continue;
2564 		if (adev->ip_blocks[i].status.hw)
2565 			continue;
2566 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2567 		if (r) {
2568 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2569 				  adev->ip_blocks[i].version->funcs->name, r);
2570 			return r;
2571 		}
2572 		adev->ip_blocks[i].status.hw = true;
2573 	}
2574 
2575 	return 0;
2576 }
2577 
2578 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2579 {
2580 	int r = 0;
2581 	int i;
2582 	uint32_t smu_version;
2583 
2584 	if (adev->asic_type >= CHIP_VEGA10) {
2585 		for (i = 0; i < adev->num_ip_blocks; i++) {
2586 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2587 				continue;
2588 
2589 			if (!adev->ip_blocks[i].status.sw)
2590 				continue;
2591 
2592 			/* no need to do the fw loading again if already done*/
2593 			if (adev->ip_blocks[i].status.hw == true)
2594 				break;
2595 
2596 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2597 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2598 				if (r) {
2599 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2600 							  adev->ip_blocks[i].version->funcs->name, r);
2601 					return r;
2602 				}
2603 			} else {
2604 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2605 				if (r) {
2606 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2607 							  adev->ip_blocks[i].version->funcs->name, r);
2608 					return r;
2609 				}
2610 			}
2611 
2612 			adev->ip_blocks[i].status.hw = true;
2613 			break;
2614 		}
2615 	}
2616 
2617 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2618 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2619 
2620 	return r;
2621 }
2622 
2623 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2624 {
2625 	long timeout;
2626 	int r, i;
2627 
2628 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2629 		struct amdgpu_ring *ring = adev->rings[i];
2630 
2631 		/* No need to setup the GPU scheduler for rings that don't need it */
2632 		if (!ring || ring->no_scheduler)
2633 			continue;
2634 
2635 		switch (ring->funcs->type) {
2636 		case AMDGPU_RING_TYPE_GFX:
2637 			timeout = adev->gfx_timeout;
2638 			break;
2639 		case AMDGPU_RING_TYPE_COMPUTE:
2640 			timeout = adev->compute_timeout;
2641 			break;
2642 		case AMDGPU_RING_TYPE_SDMA:
2643 			timeout = adev->sdma_timeout;
2644 			break;
2645 		default:
2646 			timeout = adev->video_timeout;
2647 			break;
2648 		}
2649 
2650 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2651 				   DRM_SCHED_PRIORITY_COUNT,
2652 				   ring->num_hw_submission, 0,
2653 				   timeout, adev->reset_domain->wq,
2654 				   ring->sched_score, ring->name,
2655 				   adev->dev);
2656 		if (r) {
2657 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2658 				  ring->name);
2659 			return r;
2660 		}
2661 		r = amdgpu_uvd_entity_init(adev, ring);
2662 		if (r) {
2663 			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2664 				  ring->name);
2665 			return r;
2666 		}
2667 		r = amdgpu_vce_entity_init(adev, ring);
2668 		if (r) {
2669 			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2670 				  ring->name);
2671 			return r;
2672 		}
2673 	}
2674 
2675 	amdgpu_xcp_update_partition_sched_list(adev);
2676 
2677 	return 0;
2678 }
2679 
2680 
2681 /**
2682  * amdgpu_device_ip_init - run init for hardware IPs
2683  *
2684  * @adev: amdgpu_device pointer
2685  *
2686  * Main initialization pass for hardware IPs.  The list of all the hardware
2687  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2688  * are run.  sw_init initializes the software state associated with each IP
2689  * and hw_init initializes the hardware associated with each IP.
2690  * Returns 0 on success, negative error code on failure.
2691  */
2692 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2693 {
2694 	int i, r;
2695 
2696 	r = amdgpu_ras_init(adev);
2697 	if (r)
2698 		return r;
2699 
2700 	for (i = 0; i < adev->num_ip_blocks; i++) {
2701 		if (!adev->ip_blocks[i].status.valid)
2702 			continue;
2703 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2704 		if (r) {
2705 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2706 				  adev->ip_blocks[i].version->funcs->name, r);
2707 			goto init_failed;
2708 		}
2709 		adev->ip_blocks[i].status.sw = true;
2710 
2711 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2712 			/* need to do common hw init early so everything is set up for gmc */
2713 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2714 			if (r) {
2715 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2716 				goto init_failed;
2717 			}
2718 			adev->ip_blocks[i].status.hw = true;
2719 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2720 			/* need to do gmc hw init early so we can allocate gpu mem */
2721 			/* Try to reserve bad pages early */
2722 			if (amdgpu_sriov_vf(adev))
2723 				amdgpu_virt_exchange_data(adev);
2724 
2725 			r = amdgpu_device_mem_scratch_init(adev);
2726 			if (r) {
2727 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2728 				goto init_failed;
2729 			}
2730 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2731 			if (r) {
2732 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2733 				goto init_failed;
2734 			}
2735 			r = amdgpu_device_wb_init(adev);
2736 			if (r) {
2737 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2738 				goto init_failed;
2739 			}
2740 			adev->ip_blocks[i].status.hw = true;
2741 
2742 			/* right after GMC hw init, we create CSA */
2743 			if (adev->gfx.mcbp) {
2744 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2745 							       AMDGPU_GEM_DOMAIN_VRAM |
2746 							       AMDGPU_GEM_DOMAIN_GTT,
2747 							       AMDGPU_CSA_SIZE);
2748 				if (r) {
2749 					DRM_ERROR("allocate CSA failed %d\n", r);
2750 					goto init_failed;
2751 				}
2752 			}
2753 
2754 			r = amdgpu_seq64_init(adev);
2755 			if (r) {
2756 				DRM_ERROR("allocate seq64 failed %d\n", r);
2757 				goto init_failed;
2758 			}
2759 		}
2760 	}
2761 
2762 	if (amdgpu_sriov_vf(adev))
2763 		amdgpu_virt_init_data_exchange(adev);
2764 
2765 	r = amdgpu_ib_pool_init(adev);
2766 	if (r) {
2767 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2768 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2769 		goto init_failed;
2770 	}
2771 
2772 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2773 	if (r)
2774 		goto init_failed;
2775 
2776 	r = amdgpu_device_ip_hw_init_phase1(adev);
2777 	if (r)
2778 		goto init_failed;
2779 
2780 	r = amdgpu_device_fw_loading(adev);
2781 	if (r)
2782 		goto init_failed;
2783 
2784 	r = amdgpu_device_ip_hw_init_phase2(adev);
2785 	if (r)
2786 		goto init_failed;
2787 
2788 	/*
2789 	 * retired pages will be loaded from eeprom and reserved here,
2790 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2791 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2792 	 * for I2C communication which only true at this point.
2793 	 *
2794 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2795 	 * failure from bad gpu situation and stop amdgpu init process
2796 	 * accordingly. For other failed cases, it will still release all
2797 	 * the resource and print error message, rather than returning one
2798 	 * negative value to upper level.
2799 	 *
2800 	 * Note: theoretically, this should be called before all vram allocations
2801 	 * to protect retired page from abusing
2802 	 */
2803 	r = amdgpu_ras_recovery_init(adev);
2804 	if (r)
2805 		goto init_failed;
2806 
2807 	/**
2808 	 * In case of XGMI grab extra reference for reset domain for this device
2809 	 */
2810 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2811 		if (amdgpu_xgmi_add_device(adev) == 0) {
2812 			if (!amdgpu_sriov_vf(adev)) {
2813 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2814 
2815 				if (WARN_ON(!hive)) {
2816 					r = -ENOENT;
2817 					goto init_failed;
2818 				}
2819 
2820 				if (!hive->reset_domain ||
2821 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2822 					r = -ENOENT;
2823 					amdgpu_put_xgmi_hive(hive);
2824 					goto init_failed;
2825 				}
2826 
2827 				/* Drop the early temporary reset domain we created for device */
2828 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2829 				adev->reset_domain = hive->reset_domain;
2830 				amdgpu_put_xgmi_hive(hive);
2831 			}
2832 		}
2833 	}
2834 
2835 	r = amdgpu_device_init_schedulers(adev);
2836 	if (r)
2837 		goto init_failed;
2838 
2839 	if (adev->mman.buffer_funcs_ring->sched.ready)
2840 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2841 
2842 	/* Don't init kfd if whole hive need to be reset during init */
2843 	if (!adev->gmc.xgmi.pending_reset) {
2844 		kgd2kfd_init_zone_device(adev);
2845 		amdgpu_amdkfd_device_init(adev);
2846 	}
2847 
2848 	amdgpu_fru_get_product_info(adev);
2849 
2850 init_failed:
2851 
2852 	return r;
2853 }
2854 
2855 /**
2856  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2857  *
2858  * @adev: amdgpu_device pointer
2859  *
2860  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2861  * this function before a GPU reset.  If the value is retained after a
2862  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2863  */
2864 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2865 {
2866 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2867 }
2868 
2869 /**
2870  * amdgpu_device_check_vram_lost - check if vram is valid
2871  *
2872  * @adev: amdgpu_device pointer
2873  *
2874  * Checks the reset magic value written to the gart pointer in VRAM.
2875  * The driver calls this after a GPU reset to see if the contents of
2876  * VRAM is lost or now.
2877  * returns true if vram is lost, false if not.
2878  */
2879 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2880 {
2881 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2882 			AMDGPU_RESET_MAGIC_NUM))
2883 		return true;
2884 
2885 	if (!amdgpu_in_reset(adev))
2886 		return false;
2887 
2888 	/*
2889 	 * For all ASICs with baco/mode1 reset, the VRAM is
2890 	 * always assumed to be lost.
2891 	 */
2892 	switch (amdgpu_asic_reset_method(adev)) {
2893 	case AMD_RESET_METHOD_BACO:
2894 	case AMD_RESET_METHOD_MODE1:
2895 		return true;
2896 	default:
2897 		return false;
2898 	}
2899 }
2900 
2901 /**
2902  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2903  *
2904  * @adev: amdgpu_device pointer
2905  * @state: clockgating state (gate or ungate)
2906  *
2907  * The list of all the hardware IPs that make up the asic is walked and the
2908  * set_clockgating_state callbacks are run.
2909  * Late initialization pass enabling clockgating for hardware IPs.
2910  * Fini or suspend, pass disabling clockgating for hardware IPs.
2911  * Returns 0 on success, negative error code on failure.
2912  */
2913 
2914 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2915 			       enum amd_clockgating_state state)
2916 {
2917 	int i, j, r;
2918 
2919 	if (amdgpu_emu_mode == 1)
2920 		return 0;
2921 
2922 	for (j = 0; j < adev->num_ip_blocks; j++) {
2923 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2924 		if (!adev->ip_blocks[i].status.late_initialized)
2925 			continue;
2926 		/* skip CG for GFX, SDMA on S0ix */
2927 		if (adev->in_s0ix &&
2928 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2929 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2930 			continue;
2931 		/* skip CG for VCE/UVD, it's handled specially */
2932 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2933 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2934 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2935 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2936 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2937 			/* enable clockgating to save power */
2938 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2939 										     state);
2940 			if (r) {
2941 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2942 					  adev->ip_blocks[i].version->funcs->name, r);
2943 				return r;
2944 			}
2945 		}
2946 	}
2947 
2948 	return 0;
2949 }
2950 
2951 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2952 			       enum amd_powergating_state state)
2953 {
2954 	int i, j, r;
2955 
2956 	if (amdgpu_emu_mode == 1)
2957 		return 0;
2958 
2959 	for (j = 0; j < adev->num_ip_blocks; j++) {
2960 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2961 		if (!adev->ip_blocks[i].status.late_initialized)
2962 			continue;
2963 		/* skip PG for GFX, SDMA on S0ix */
2964 		if (adev->in_s0ix &&
2965 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2966 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2967 			continue;
2968 		/* skip CG for VCE/UVD, it's handled specially */
2969 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2970 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2971 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2972 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2973 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2974 			/* enable powergating to save power */
2975 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2976 											state);
2977 			if (r) {
2978 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2979 					  adev->ip_blocks[i].version->funcs->name, r);
2980 				return r;
2981 			}
2982 		}
2983 	}
2984 	return 0;
2985 }
2986 
2987 static int amdgpu_device_enable_mgpu_fan_boost(void)
2988 {
2989 	struct amdgpu_gpu_instance *gpu_ins;
2990 	struct amdgpu_device *adev;
2991 	int i, ret = 0;
2992 
2993 	mutex_lock(&mgpu_info.mutex);
2994 
2995 	/*
2996 	 * MGPU fan boost feature should be enabled
2997 	 * only when there are two or more dGPUs in
2998 	 * the system
2999 	 */
3000 	if (mgpu_info.num_dgpu < 2)
3001 		goto out;
3002 
3003 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3004 		gpu_ins = &(mgpu_info.gpu_ins[i]);
3005 		adev = gpu_ins->adev;
3006 		if (!(adev->flags & AMD_IS_APU) &&
3007 		    !gpu_ins->mgpu_fan_enabled) {
3008 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3009 			if (ret)
3010 				break;
3011 
3012 			gpu_ins->mgpu_fan_enabled = 1;
3013 		}
3014 	}
3015 
3016 out:
3017 	mutex_unlock(&mgpu_info.mutex);
3018 
3019 	return ret;
3020 }
3021 
3022 /**
3023  * amdgpu_device_ip_late_init - run late init for hardware IPs
3024  *
3025  * @adev: amdgpu_device pointer
3026  *
3027  * Late initialization pass for hardware IPs.  The list of all the hardware
3028  * IPs that make up the asic is walked and the late_init callbacks are run.
3029  * late_init covers any special initialization that an IP requires
3030  * after all of the have been initialized or something that needs to happen
3031  * late in the init process.
3032  * Returns 0 on success, negative error code on failure.
3033  */
3034 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3035 {
3036 	struct amdgpu_gpu_instance *gpu_instance;
3037 	int i = 0, r;
3038 
3039 	for (i = 0; i < adev->num_ip_blocks; i++) {
3040 		if (!adev->ip_blocks[i].status.hw)
3041 			continue;
3042 		if (adev->ip_blocks[i].version->funcs->late_init) {
3043 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3044 			if (r) {
3045 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3046 					  adev->ip_blocks[i].version->funcs->name, r);
3047 				return r;
3048 			}
3049 		}
3050 		adev->ip_blocks[i].status.late_initialized = true;
3051 	}
3052 
3053 	r = amdgpu_ras_late_init(adev);
3054 	if (r) {
3055 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3056 		return r;
3057 	}
3058 
3059 	amdgpu_ras_set_error_query_ready(adev, true);
3060 
3061 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3062 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3063 
3064 	amdgpu_device_fill_reset_magic(adev);
3065 
3066 	r = amdgpu_device_enable_mgpu_fan_boost();
3067 	if (r)
3068 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3069 
3070 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3071 	if (amdgpu_passthrough(adev) &&
3072 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3073 	     adev->asic_type == CHIP_ALDEBARAN))
3074 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3075 
3076 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3077 		mutex_lock(&mgpu_info.mutex);
3078 
3079 		/*
3080 		 * Reset device p-state to low as this was booted with high.
3081 		 *
3082 		 * This should be performed only after all devices from the same
3083 		 * hive get initialized.
3084 		 *
3085 		 * However, it's unknown how many device in the hive in advance.
3086 		 * As this is counted one by one during devices initializations.
3087 		 *
3088 		 * So, we wait for all XGMI interlinked devices initialized.
3089 		 * This may bring some delays as those devices may come from
3090 		 * different hives. But that should be OK.
3091 		 */
3092 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3093 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3094 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3095 				if (gpu_instance->adev->flags & AMD_IS_APU)
3096 					continue;
3097 
3098 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3099 						AMDGPU_XGMI_PSTATE_MIN);
3100 				if (r) {
3101 					DRM_ERROR("pstate setting failed (%d).\n", r);
3102 					break;
3103 				}
3104 			}
3105 		}
3106 
3107 		mutex_unlock(&mgpu_info.mutex);
3108 	}
3109 
3110 	return 0;
3111 }
3112 
3113 /**
3114  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3115  *
3116  * @adev: amdgpu_device pointer
3117  *
3118  * For ASICs need to disable SMC first
3119  */
3120 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3121 {
3122 	int i, r;
3123 
3124 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3125 		return;
3126 
3127 	for (i = 0; i < adev->num_ip_blocks; i++) {
3128 		if (!adev->ip_blocks[i].status.hw)
3129 			continue;
3130 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3131 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3132 			/* XXX handle errors */
3133 			if (r) {
3134 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3135 					  adev->ip_blocks[i].version->funcs->name, r);
3136 			}
3137 			adev->ip_blocks[i].status.hw = false;
3138 			break;
3139 		}
3140 	}
3141 }
3142 
3143 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3144 {
3145 	int i, r;
3146 
3147 	for (i = 0; i < adev->num_ip_blocks; i++) {
3148 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3149 			continue;
3150 
3151 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3152 		if (r) {
3153 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3154 				  adev->ip_blocks[i].version->funcs->name, r);
3155 		}
3156 	}
3157 
3158 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3159 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3160 
3161 	amdgpu_amdkfd_suspend(adev, false);
3162 
3163 	/* Workaroud for ASICs need to disable SMC first */
3164 	amdgpu_device_smu_fini_early(adev);
3165 
3166 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3167 		if (!adev->ip_blocks[i].status.hw)
3168 			continue;
3169 
3170 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3171 		/* XXX handle errors */
3172 		if (r) {
3173 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3174 				  adev->ip_blocks[i].version->funcs->name, r);
3175 		}
3176 
3177 		adev->ip_blocks[i].status.hw = false;
3178 	}
3179 
3180 	if (amdgpu_sriov_vf(adev)) {
3181 		if (amdgpu_virt_release_full_gpu(adev, false))
3182 			DRM_ERROR("failed to release exclusive mode on fini\n");
3183 	}
3184 
3185 	return 0;
3186 }
3187 
3188 /**
3189  * amdgpu_device_ip_fini - run fini for hardware IPs
3190  *
3191  * @adev: amdgpu_device pointer
3192  *
3193  * Main teardown pass for hardware IPs.  The list of all the hardware
3194  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3195  * are run.  hw_fini tears down the hardware associated with each IP
3196  * and sw_fini tears down any software state associated with each IP.
3197  * Returns 0 on success, negative error code on failure.
3198  */
3199 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3200 {
3201 	int i, r;
3202 
3203 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3204 		amdgpu_virt_release_ras_err_handler_data(adev);
3205 
3206 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3207 		amdgpu_xgmi_remove_device(adev);
3208 
3209 	amdgpu_amdkfd_device_fini_sw(adev);
3210 
3211 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3212 		if (!adev->ip_blocks[i].status.sw)
3213 			continue;
3214 
3215 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3216 			amdgpu_ucode_free_bo(adev);
3217 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3218 			amdgpu_device_wb_fini(adev);
3219 			amdgpu_device_mem_scratch_fini(adev);
3220 			amdgpu_ib_pool_fini(adev);
3221 			amdgpu_seq64_fini(adev);
3222 		}
3223 
3224 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3225 		/* XXX handle errors */
3226 		if (r) {
3227 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3228 				  adev->ip_blocks[i].version->funcs->name, r);
3229 		}
3230 		adev->ip_blocks[i].status.sw = false;
3231 		adev->ip_blocks[i].status.valid = false;
3232 	}
3233 
3234 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3235 		if (!adev->ip_blocks[i].status.late_initialized)
3236 			continue;
3237 		if (adev->ip_blocks[i].version->funcs->late_fini)
3238 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3239 		adev->ip_blocks[i].status.late_initialized = false;
3240 	}
3241 
3242 	amdgpu_ras_fini(adev);
3243 
3244 	return 0;
3245 }
3246 
3247 /**
3248  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3249  *
3250  * @work: work_struct.
3251  */
3252 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3253 {
3254 	struct amdgpu_device *adev =
3255 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3256 	int r;
3257 
3258 	r = amdgpu_ib_ring_tests(adev);
3259 	if (r)
3260 		DRM_ERROR("ib ring test failed (%d).\n", r);
3261 }
3262 
3263 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3264 {
3265 	struct amdgpu_device *adev =
3266 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3267 
3268 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3269 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3270 
3271 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3272 		adev->gfx.gfx_off_state = true;
3273 }
3274 
3275 /**
3276  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3277  *
3278  * @adev: amdgpu_device pointer
3279  *
3280  * Main suspend function for hardware IPs.  The list of all the hardware
3281  * IPs that make up the asic is walked, clockgating is disabled and the
3282  * suspend callbacks are run.  suspend puts the hardware and software state
3283  * in each IP into a state suitable for suspend.
3284  * Returns 0 on success, negative error code on failure.
3285  */
3286 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3287 {
3288 	int i, r;
3289 
3290 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3291 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3292 
3293 	/*
3294 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3295 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3296 	 * scenario. Add the missing df cstate disablement here.
3297 	 */
3298 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3299 		dev_warn(adev->dev, "Failed to disallow df cstate");
3300 
3301 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3302 		if (!adev->ip_blocks[i].status.valid)
3303 			continue;
3304 
3305 		/* displays are handled separately */
3306 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3307 			continue;
3308 
3309 		/* XXX handle errors */
3310 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3311 		/* XXX handle errors */
3312 		if (r) {
3313 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3314 				  adev->ip_blocks[i].version->funcs->name, r);
3315 			return r;
3316 		}
3317 
3318 		adev->ip_blocks[i].status.hw = false;
3319 	}
3320 
3321 	return 0;
3322 }
3323 
3324 /**
3325  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3326  *
3327  * @adev: amdgpu_device pointer
3328  *
3329  * Main suspend function for hardware IPs.  The list of all the hardware
3330  * IPs that make up the asic is walked, clockgating is disabled and the
3331  * suspend callbacks are run.  suspend puts the hardware and software state
3332  * in each IP into a state suitable for suspend.
3333  * Returns 0 on success, negative error code on failure.
3334  */
3335 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3336 {
3337 	int i, r;
3338 
3339 	if (adev->in_s0ix)
3340 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3341 
3342 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3343 		if (!adev->ip_blocks[i].status.valid)
3344 			continue;
3345 		/* displays are handled in phase1 */
3346 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3347 			continue;
3348 		/* PSP lost connection when err_event_athub occurs */
3349 		if (amdgpu_ras_intr_triggered() &&
3350 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3351 			adev->ip_blocks[i].status.hw = false;
3352 			continue;
3353 		}
3354 
3355 		/* skip unnecessary suspend if we do not initialize them yet */
3356 		if (adev->gmc.xgmi.pending_reset &&
3357 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3358 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3359 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3360 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3361 			adev->ip_blocks[i].status.hw = false;
3362 			continue;
3363 		}
3364 
3365 		/* skip suspend of gfx/mes and psp for S0ix
3366 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3367 		 * like at runtime. PSP is also part of the always on hardware
3368 		 * so no need to suspend it.
3369 		 */
3370 		if (adev->in_s0ix &&
3371 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3372 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3373 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3374 			continue;
3375 
3376 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3377 		if (adev->in_s0ix &&
3378 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3379 		     IP_VERSION(5, 0, 0)) &&
3380 		    (adev->ip_blocks[i].version->type ==
3381 		     AMD_IP_BLOCK_TYPE_SDMA))
3382 			continue;
3383 
3384 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3385 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3386 		 * from this location and RLC Autoload automatically also gets loaded
3387 		 * from here based on PMFW -> PSP message during re-init sequence.
3388 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3389 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3390 		 */
3391 		if (amdgpu_in_reset(adev) &&
3392 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3393 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3394 			continue;
3395 
3396 		/* XXX handle errors */
3397 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3398 		/* XXX handle errors */
3399 		if (r) {
3400 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3401 				  adev->ip_blocks[i].version->funcs->name, r);
3402 		}
3403 		adev->ip_blocks[i].status.hw = false;
3404 		/* handle putting the SMC in the appropriate state */
3405 		if (!amdgpu_sriov_vf(adev)) {
3406 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3407 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3408 				if (r) {
3409 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3410 							adev->mp1_state, r);
3411 					return r;
3412 				}
3413 			}
3414 		}
3415 	}
3416 
3417 	return 0;
3418 }
3419 
3420 /**
3421  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3422  *
3423  * @adev: amdgpu_device pointer
3424  *
3425  * Main suspend function for hardware IPs.  The list of all the hardware
3426  * IPs that make up the asic is walked, clockgating is disabled and the
3427  * suspend callbacks are run.  suspend puts the hardware and software state
3428  * in each IP into a state suitable for suspend.
3429  * Returns 0 on success, negative error code on failure.
3430  */
3431 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3432 {
3433 	int r;
3434 
3435 	if (amdgpu_sriov_vf(adev)) {
3436 		amdgpu_virt_fini_data_exchange(adev);
3437 		amdgpu_virt_request_full_gpu(adev, false);
3438 	}
3439 
3440 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3441 
3442 	r = amdgpu_device_ip_suspend_phase1(adev);
3443 	if (r)
3444 		return r;
3445 	r = amdgpu_device_ip_suspend_phase2(adev);
3446 
3447 	if (amdgpu_sriov_vf(adev))
3448 		amdgpu_virt_release_full_gpu(adev, false);
3449 
3450 	return r;
3451 }
3452 
3453 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3454 {
3455 	int i, r;
3456 
3457 	static enum amd_ip_block_type ip_order[] = {
3458 		AMD_IP_BLOCK_TYPE_COMMON,
3459 		AMD_IP_BLOCK_TYPE_GMC,
3460 		AMD_IP_BLOCK_TYPE_PSP,
3461 		AMD_IP_BLOCK_TYPE_IH,
3462 	};
3463 
3464 	for (i = 0; i < adev->num_ip_blocks; i++) {
3465 		int j;
3466 		struct amdgpu_ip_block *block;
3467 
3468 		block = &adev->ip_blocks[i];
3469 		block->status.hw = false;
3470 
3471 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3472 
3473 			if (block->version->type != ip_order[j] ||
3474 				!block->status.valid)
3475 				continue;
3476 
3477 			r = block->version->funcs->hw_init(adev);
3478 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3479 			if (r)
3480 				return r;
3481 			block->status.hw = true;
3482 		}
3483 	}
3484 
3485 	return 0;
3486 }
3487 
3488 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3489 {
3490 	int i, r;
3491 
3492 	static enum amd_ip_block_type ip_order[] = {
3493 		AMD_IP_BLOCK_TYPE_SMC,
3494 		AMD_IP_BLOCK_TYPE_DCE,
3495 		AMD_IP_BLOCK_TYPE_GFX,
3496 		AMD_IP_BLOCK_TYPE_SDMA,
3497 		AMD_IP_BLOCK_TYPE_MES,
3498 		AMD_IP_BLOCK_TYPE_UVD,
3499 		AMD_IP_BLOCK_TYPE_VCE,
3500 		AMD_IP_BLOCK_TYPE_VCN,
3501 		AMD_IP_BLOCK_TYPE_JPEG
3502 	};
3503 
3504 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3505 		int j;
3506 		struct amdgpu_ip_block *block;
3507 
3508 		for (j = 0; j < adev->num_ip_blocks; j++) {
3509 			block = &adev->ip_blocks[j];
3510 
3511 			if (block->version->type != ip_order[i] ||
3512 				!block->status.valid ||
3513 				block->status.hw)
3514 				continue;
3515 
3516 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3517 				r = block->version->funcs->resume(adev);
3518 			else
3519 				r = block->version->funcs->hw_init(adev);
3520 
3521 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3522 			if (r)
3523 				return r;
3524 			block->status.hw = true;
3525 		}
3526 	}
3527 
3528 	return 0;
3529 }
3530 
3531 /**
3532  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3533  *
3534  * @adev: amdgpu_device pointer
3535  *
3536  * First resume function for hardware IPs.  The list of all the hardware
3537  * IPs that make up the asic is walked and the resume callbacks are run for
3538  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3539  * after a suspend and updates the software state as necessary.  This
3540  * function is also used for restoring the GPU after a GPU reset.
3541  * Returns 0 on success, negative error code on failure.
3542  */
3543 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3544 {
3545 	int i, r;
3546 
3547 	for (i = 0; i < adev->num_ip_blocks; i++) {
3548 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3549 			continue;
3550 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3551 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3552 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3553 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3554 
3555 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3556 			if (r) {
3557 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3558 					  adev->ip_blocks[i].version->funcs->name, r);
3559 				return r;
3560 			}
3561 			adev->ip_blocks[i].status.hw = true;
3562 		}
3563 	}
3564 
3565 	return 0;
3566 }
3567 
3568 /**
3569  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3570  *
3571  * @adev: amdgpu_device pointer
3572  *
3573  * First resume function for hardware IPs.  The list of all the hardware
3574  * IPs that make up the asic is walked and the resume callbacks are run for
3575  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3576  * functional state after a suspend and updates the software state as
3577  * necessary.  This function is also used for restoring the GPU after a GPU
3578  * reset.
3579  * Returns 0 on success, negative error code on failure.
3580  */
3581 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3582 {
3583 	int i, r;
3584 
3585 	for (i = 0; i < adev->num_ip_blocks; i++) {
3586 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3587 			continue;
3588 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3589 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3590 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3591 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3592 			continue;
3593 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3594 		if (r) {
3595 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3596 				  adev->ip_blocks[i].version->funcs->name, r);
3597 			return r;
3598 		}
3599 		adev->ip_blocks[i].status.hw = true;
3600 	}
3601 
3602 	return 0;
3603 }
3604 
3605 /**
3606  * amdgpu_device_ip_resume - run resume for hardware IPs
3607  *
3608  * @adev: amdgpu_device pointer
3609  *
3610  * Main resume function for hardware IPs.  The hardware IPs
3611  * are split into two resume functions because they are
3612  * also used in recovering from a GPU reset and some additional
3613  * steps need to be take between them.  In this case (S3/S4) they are
3614  * run sequentially.
3615  * Returns 0 on success, negative error code on failure.
3616  */
3617 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3618 {
3619 	int r;
3620 
3621 	r = amdgpu_device_ip_resume_phase1(adev);
3622 	if (r)
3623 		return r;
3624 
3625 	r = amdgpu_device_fw_loading(adev);
3626 	if (r)
3627 		return r;
3628 
3629 	r = amdgpu_device_ip_resume_phase2(adev);
3630 
3631 	if (adev->mman.buffer_funcs_ring->sched.ready)
3632 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3633 
3634 	return r;
3635 }
3636 
3637 /**
3638  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3639  *
3640  * @adev: amdgpu_device pointer
3641  *
3642  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3643  */
3644 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3645 {
3646 	if (amdgpu_sriov_vf(adev)) {
3647 		if (adev->is_atom_fw) {
3648 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3649 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3650 		} else {
3651 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3652 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3653 		}
3654 
3655 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3656 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3657 	}
3658 }
3659 
3660 /**
3661  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3662  *
3663  * @asic_type: AMD asic type
3664  *
3665  * Check if there is DC (new modesetting infrastructre) support for an asic.
3666  * returns true if DC has support, false if not.
3667  */
3668 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3669 {
3670 	switch (asic_type) {
3671 #ifdef CONFIG_DRM_AMDGPU_SI
3672 	case CHIP_HAINAN:
3673 #endif
3674 	case CHIP_TOPAZ:
3675 		/* chips with no display hardware */
3676 		return false;
3677 #if defined(CONFIG_DRM_AMD_DC)
3678 	case CHIP_TAHITI:
3679 	case CHIP_PITCAIRN:
3680 	case CHIP_VERDE:
3681 	case CHIP_OLAND:
3682 		/*
3683 		 * We have systems in the wild with these ASICs that require
3684 		 * LVDS and VGA support which is not supported with DC.
3685 		 *
3686 		 * Fallback to the non-DC driver here by default so as not to
3687 		 * cause regressions.
3688 		 */
3689 #if defined(CONFIG_DRM_AMD_DC_SI)
3690 		return amdgpu_dc > 0;
3691 #else
3692 		return false;
3693 #endif
3694 	case CHIP_BONAIRE:
3695 	case CHIP_KAVERI:
3696 	case CHIP_KABINI:
3697 	case CHIP_MULLINS:
3698 		/*
3699 		 * We have systems in the wild with these ASICs that require
3700 		 * VGA support which is not supported with DC.
3701 		 *
3702 		 * Fallback to the non-DC driver here by default so as not to
3703 		 * cause regressions.
3704 		 */
3705 		return amdgpu_dc > 0;
3706 	default:
3707 		return amdgpu_dc != 0;
3708 #else
3709 	default:
3710 		if (amdgpu_dc > 0)
3711 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3712 		return false;
3713 #endif
3714 	}
3715 }
3716 
3717 /**
3718  * amdgpu_device_has_dc_support - check if dc is supported
3719  *
3720  * @adev: amdgpu_device pointer
3721  *
3722  * Returns true for supported, false for not supported
3723  */
3724 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3725 {
3726 	if (adev->enable_virtual_display ||
3727 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3728 		return false;
3729 
3730 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3731 }
3732 
3733 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3734 {
3735 	struct amdgpu_device *adev =
3736 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3737 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3738 
3739 	/* It's a bug to not have a hive within this function */
3740 	if (WARN_ON(!hive))
3741 		return;
3742 
3743 	/*
3744 	 * Use task barrier to synchronize all xgmi reset works across the
3745 	 * hive. task_barrier_enter and task_barrier_exit will block
3746 	 * until all the threads running the xgmi reset works reach
3747 	 * those points. task_barrier_full will do both blocks.
3748 	 */
3749 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3750 
3751 		task_barrier_enter(&hive->tb);
3752 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3753 
3754 		if (adev->asic_reset_res)
3755 			goto fail;
3756 
3757 		task_barrier_exit(&hive->tb);
3758 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3759 
3760 		if (adev->asic_reset_res)
3761 			goto fail;
3762 
3763 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3764 	} else {
3765 
3766 		task_barrier_full(&hive->tb);
3767 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3768 	}
3769 
3770 fail:
3771 	if (adev->asic_reset_res)
3772 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3773 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3774 	amdgpu_put_xgmi_hive(hive);
3775 }
3776 
3777 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3778 {
3779 	char *input = amdgpu_lockup_timeout;
3780 	char *timeout_setting = NULL;
3781 	int index = 0;
3782 	long timeout;
3783 	int ret = 0;
3784 
3785 	/*
3786 	 * By default timeout for non compute jobs is 10000
3787 	 * and 60000 for compute jobs.
3788 	 * In SR-IOV or passthrough mode, timeout for compute
3789 	 * jobs are 60000 by default.
3790 	 */
3791 	adev->gfx_timeout = msecs_to_jiffies(10000);
3792 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3793 	if (amdgpu_sriov_vf(adev))
3794 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3795 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3796 	else
3797 		adev->compute_timeout =  msecs_to_jiffies(60000);
3798 
3799 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3800 		while ((timeout_setting = strsep(&input, ",")) &&
3801 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3802 			ret = kstrtol(timeout_setting, 0, &timeout);
3803 			if (ret)
3804 				return ret;
3805 
3806 			if (timeout == 0) {
3807 				index++;
3808 				continue;
3809 			} else if (timeout < 0) {
3810 				timeout = MAX_SCHEDULE_TIMEOUT;
3811 				dev_warn(adev->dev, "lockup timeout disabled");
3812 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3813 			} else {
3814 				timeout = msecs_to_jiffies(timeout);
3815 			}
3816 
3817 			switch (index++) {
3818 			case 0:
3819 				adev->gfx_timeout = timeout;
3820 				break;
3821 			case 1:
3822 				adev->compute_timeout = timeout;
3823 				break;
3824 			case 2:
3825 				adev->sdma_timeout = timeout;
3826 				break;
3827 			case 3:
3828 				adev->video_timeout = timeout;
3829 				break;
3830 			default:
3831 				break;
3832 			}
3833 		}
3834 		/*
3835 		 * There is only one value specified and
3836 		 * it should apply to all non-compute jobs.
3837 		 */
3838 		if (index == 1) {
3839 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3840 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3841 				adev->compute_timeout = adev->gfx_timeout;
3842 		}
3843 	}
3844 
3845 	return ret;
3846 }
3847 
3848 /**
3849  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3850  *
3851  * @adev: amdgpu_device pointer
3852  *
3853  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3854  */
3855 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3856 {
3857 	struct iommu_domain *domain;
3858 
3859 	domain = iommu_get_domain_for_dev(adev->dev);
3860 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3861 		adev->ram_is_direct_mapped = true;
3862 }
3863 
3864 static const struct attribute *amdgpu_dev_attributes[] = {
3865 	&dev_attr_pcie_replay_count.attr,
3866 	NULL
3867 };
3868 
3869 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3870 {
3871 	if (amdgpu_mcbp == 1)
3872 		adev->gfx.mcbp = true;
3873 	else if (amdgpu_mcbp == 0)
3874 		adev->gfx.mcbp = false;
3875 
3876 	if (amdgpu_sriov_vf(adev))
3877 		adev->gfx.mcbp = true;
3878 
3879 	if (adev->gfx.mcbp)
3880 		DRM_INFO("MCBP is enabled\n");
3881 }
3882 
3883 /**
3884  * amdgpu_device_init - initialize the driver
3885  *
3886  * @adev: amdgpu_device pointer
3887  * @flags: driver flags
3888  *
3889  * Initializes the driver info and hw (all asics).
3890  * Returns 0 for success or an error on failure.
3891  * Called at driver startup.
3892  */
3893 int amdgpu_device_init(struct amdgpu_device *adev,
3894 		       uint32_t flags)
3895 {
3896 	struct drm_device *ddev = adev_to_drm(adev);
3897 	struct pci_dev *pdev = adev->pdev;
3898 	int r, i;
3899 	bool px = false;
3900 	u32 max_MBps;
3901 	int tmp;
3902 
3903 	adev->shutdown = false;
3904 	adev->flags = flags;
3905 
3906 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3907 		adev->asic_type = amdgpu_force_asic_type;
3908 	else
3909 		adev->asic_type = flags & AMD_ASIC_MASK;
3910 
3911 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3912 	if (amdgpu_emu_mode == 1)
3913 		adev->usec_timeout *= 10;
3914 	adev->gmc.gart_size = 512 * 1024 * 1024;
3915 	adev->accel_working = false;
3916 	adev->num_rings = 0;
3917 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3918 	adev->mman.buffer_funcs = NULL;
3919 	adev->mman.buffer_funcs_ring = NULL;
3920 	adev->vm_manager.vm_pte_funcs = NULL;
3921 	adev->vm_manager.vm_pte_num_scheds = 0;
3922 	adev->gmc.gmc_funcs = NULL;
3923 	adev->harvest_ip_mask = 0x0;
3924 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3925 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3926 
3927 	adev->smc_rreg = &amdgpu_invalid_rreg;
3928 	adev->smc_wreg = &amdgpu_invalid_wreg;
3929 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3930 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3931 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3932 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3933 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3934 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3935 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3936 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3937 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3938 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
3939 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3940 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3941 	adev->didt_rreg = &amdgpu_invalid_rreg;
3942 	adev->didt_wreg = &amdgpu_invalid_wreg;
3943 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3944 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3945 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3946 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3947 
3948 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3949 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3950 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3951 
3952 	/* mutex initialization are all done here so we
3953 	 * can recall function without having locking issues
3954 	 */
3955 	mutex_init(&adev->firmware.mutex);
3956 	mutex_init(&adev->pm.mutex);
3957 	mutex_init(&adev->gfx.gpu_clock_mutex);
3958 	mutex_init(&adev->srbm_mutex);
3959 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3960 	mutex_init(&adev->gfx.gfx_off_mutex);
3961 	mutex_init(&adev->gfx.partition_mutex);
3962 	mutex_init(&adev->grbm_idx_mutex);
3963 	mutex_init(&adev->mn_lock);
3964 	mutex_init(&adev->virt.vf_errors.lock);
3965 	hash_init(adev->mn_hash);
3966 	mutex_init(&adev->psp.mutex);
3967 	mutex_init(&adev->notifier_lock);
3968 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3969 	mutex_init(&adev->benchmark_mutex);
3970 
3971 	amdgpu_device_init_apu_flags(adev);
3972 
3973 	r = amdgpu_device_check_arguments(adev);
3974 	if (r)
3975 		return r;
3976 
3977 	spin_lock_init(&adev->mmio_idx_lock);
3978 	spin_lock_init(&adev->smc_idx_lock);
3979 	spin_lock_init(&adev->pcie_idx_lock);
3980 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3981 	spin_lock_init(&adev->didt_idx_lock);
3982 	spin_lock_init(&adev->gc_cac_idx_lock);
3983 	spin_lock_init(&adev->se_cac_idx_lock);
3984 	spin_lock_init(&adev->audio_endpt_idx_lock);
3985 	spin_lock_init(&adev->mm_stats.lock);
3986 
3987 	INIT_LIST_HEAD(&adev->shadow_list);
3988 	mutex_init(&adev->shadow_list_lock);
3989 
3990 	INIT_LIST_HEAD(&adev->reset_list);
3991 
3992 	INIT_LIST_HEAD(&adev->ras_list);
3993 
3994 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3995 
3996 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3997 			  amdgpu_device_delayed_init_work_handler);
3998 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3999 			  amdgpu_device_delay_enable_gfx_off);
4000 
4001 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4002 
4003 	adev->gfx.gfx_off_req_count = 1;
4004 	adev->gfx.gfx_off_residency = 0;
4005 	adev->gfx.gfx_off_entrycount = 0;
4006 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4007 
4008 	atomic_set(&adev->throttling_logging_enabled, 1);
4009 	/*
4010 	 * If throttling continues, logging will be performed every minute
4011 	 * to avoid log flooding. "-1" is subtracted since the thermal
4012 	 * throttling interrupt comes every second. Thus, the total logging
4013 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4014 	 * for throttling interrupt) = 60 seconds.
4015 	 */
4016 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4017 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4018 
4019 	/* Registers mapping */
4020 	/* TODO: block userspace mapping of io register */
4021 	if (adev->asic_type >= CHIP_BONAIRE) {
4022 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4023 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4024 	} else {
4025 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4026 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4027 	}
4028 
4029 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4030 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4031 
4032 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4033 	if (!adev->rmmio)
4034 		return -ENOMEM;
4035 
4036 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4037 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4038 
4039 	/*
4040 	 * Reset domain needs to be present early, before XGMI hive discovered
4041 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4042 	 * early on during init and before calling to RREG32.
4043 	 */
4044 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4045 	if (!adev->reset_domain)
4046 		return -ENOMEM;
4047 
4048 	/* detect hw virtualization here */
4049 	amdgpu_detect_virtualization(adev);
4050 
4051 	amdgpu_device_get_pcie_info(adev);
4052 
4053 	r = amdgpu_device_get_job_timeout_settings(adev);
4054 	if (r) {
4055 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4056 		return r;
4057 	}
4058 
4059 	amdgpu_device_set_mcbp(adev);
4060 
4061 	/* early init functions */
4062 	r = amdgpu_device_ip_early_init(adev);
4063 	if (r)
4064 		return r;
4065 
4066 	/* Get rid of things like offb */
4067 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4068 	if (r)
4069 		return r;
4070 
4071 	/* Enable TMZ based on IP_VERSION */
4072 	amdgpu_gmc_tmz_set(adev);
4073 
4074 	amdgpu_gmc_noretry_set(adev);
4075 	/* Need to get xgmi info early to decide the reset behavior*/
4076 	if (adev->gmc.xgmi.supported) {
4077 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4078 		if (r)
4079 			return r;
4080 	}
4081 
4082 	/* enable PCIE atomic ops */
4083 	if (amdgpu_sriov_vf(adev)) {
4084 		if (adev->virt.fw_reserve.p_pf2vf)
4085 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4086 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4087 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4088 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4089 	 * internal path natively support atomics, set have_atomics_support to true.
4090 	 */
4091 	} else if ((adev->flags & AMD_IS_APU) &&
4092 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4093 		    IP_VERSION(9, 0, 0))) {
4094 		adev->have_atomics_support = true;
4095 	} else {
4096 		adev->have_atomics_support =
4097 			!pci_enable_atomic_ops_to_root(adev->pdev,
4098 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4099 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4100 	}
4101 
4102 	if (!adev->have_atomics_support)
4103 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4104 
4105 	/* doorbell bar mapping and doorbell index init*/
4106 	amdgpu_doorbell_init(adev);
4107 
4108 	if (amdgpu_emu_mode == 1) {
4109 		/* post the asic on emulation mode */
4110 		emu_soc_asic_init(adev);
4111 		goto fence_driver_init;
4112 	}
4113 
4114 	amdgpu_reset_init(adev);
4115 
4116 	/* detect if we are with an SRIOV vbios */
4117 	if (adev->bios)
4118 		amdgpu_device_detect_sriov_bios(adev);
4119 
4120 	/* check if we need to reset the asic
4121 	 *  E.g., driver was not cleanly unloaded previously, etc.
4122 	 */
4123 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4124 		if (adev->gmc.xgmi.num_physical_nodes) {
4125 			dev_info(adev->dev, "Pending hive reset.\n");
4126 			adev->gmc.xgmi.pending_reset = true;
4127 			/* Only need to init necessary block for SMU to handle the reset */
4128 			for (i = 0; i < adev->num_ip_blocks; i++) {
4129 				if (!adev->ip_blocks[i].status.valid)
4130 					continue;
4131 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4132 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4133 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4134 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4135 					DRM_DEBUG("IP %s disabled for hw_init.\n",
4136 						adev->ip_blocks[i].version->funcs->name);
4137 					adev->ip_blocks[i].status.hw = true;
4138 				}
4139 			}
4140 		} else {
4141 			tmp = amdgpu_reset_method;
4142 			/* It should do a default reset when loading or reloading the driver,
4143 			 * regardless of the module parameter reset_method.
4144 			 */
4145 			amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4146 			r = amdgpu_asic_reset(adev);
4147 			amdgpu_reset_method = tmp;
4148 			if (r) {
4149 				dev_err(adev->dev, "asic reset on init failed\n");
4150 				goto failed;
4151 			}
4152 		}
4153 	}
4154 
4155 	/* Post card if necessary */
4156 	if (amdgpu_device_need_post(adev)) {
4157 		if (!adev->bios) {
4158 			dev_err(adev->dev, "no vBIOS found\n");
4159 			r = -EINVAL;
4160 			goto failed;
4161 		}
4162 		DRM_INFO("GPU posting now...\n");
4163 		r = amdgpu_device_asic_init(adev);
4164 		if (r) {
4165 			dev_err(adev->dev, "gpu post error!\n");
4166 			goto failed;
4167 		}
4168 	}
4169 
4170 	if (adev->bios) {
4171 		if (adev->is_atom_fw) {
4172 			/* Initialize clocks */
4173 			r = amdgpu_atomfirmware_get_clock_info(adev);
4174 			if (r) {
4175 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4176 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4177 				goto failed;
4178 			}
4179 		} else {
4180 			/* Initialize clocks */
4181 			r = amdgpu_atombios_get_clock_info(adev);
4182 			if (r) {
4183 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4184 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4185 				goto failed;
4186 			}
4187 			/* init i2c buses */
4188 			if (!amdgpu_device_has_dc_support(adev))
4189 				amdgpu_atombios_i2c_init(adev);
4190 		}
4191 	}
4192 
4193 fence_driver_init:
4194 	/* Fence driver */
4195 	r = amdgpu_fence_driver_sw_init(adev);
4196 	if (r) {
4197 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4198 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4199 		goto failed;
4200 	}
4201 
4202 	/* init the mode config */
4203 	drm_mode_config_init(adev_to_drm(adev));
4204 
4205 	r = amdgpu_device_ip_init(adev);
4206 	if (r) {
4207 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4208 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4209 		goto release_ras_con;
4210 	}
4211 
4212 	amdgpu_fence_driver_hw_init(adev);
4213 
4214 	dev_info(adev->dev,
4215 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4216 			adev->gfx.config.max_shader_engines,
4217 			adev->gfx.config.max_sh_per_se,
4218 			adev->gfx.config.max_cu_per_sh,
4219 			adev->gfx.cu_info.number);
4220 
4221 	adev->accel_working = true;
4222 
4223 	amdgpu_vm_check_compute_bug(adev);
4224 
4225 	/* Initialize the buffer migration limit. */
4226 	if (amdgpu_moverate >= 0)
4227 		max_MBps = amdgpu_moverate;
4228 	else
4229 		max_MBps = 8; /* Allow 8 MB/s. */
4230 	/* Get a log2 for easy divisions. */
4231 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4232 
4233 	/*
4234 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4235 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4236 	 * gpu instance is counted less.
4237 	 */
4238 	amdgpu_register_gpu_instance(adev);
4239 
4240 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4241 	 * explicit gating rather than handling it automatically.
4242 	 */
4243 	if (!adev->gmc.xgmi.pending_reset) {
4244 		r = amdgpu_device_ip_late_init(adev);
4245 		if (r) {
4246 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4247 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4248 			goto release_ras_con;
4249 		}
4250 		/* must succeed. */
4251 		amdgpu_ras_resume(adev);
4252 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4253 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4254 	}
4255 
4256 	if (amdgpu_sriov_vf(adev)) {
4257 		amdgpu_virt_release_full_gpu(adev, true);
4258 		flush_delayed_work(&adev->delayed_init_work);
4259 	}
4260 
4261 	/*
4262 	 * Place those sysfs registering after `late_init`. As some of those
4263 	 * operations performed in `late_init` might affect the sysfs
4264 	 * interfaces creating.
4265 	 */
4266 	r = amdgpu_atombios_sysfs_init(adev);
4267 	if (r)
4268 		drm_err(&adev->ddev,
4269 			"registering atombios sysfs failed (%d).\n", r);
4270 
4271 	r = amdgpu_pm_sysfs_init(adev);
4272 	if (r)
4273 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4274 
4275 	r = amdgpu_ucode_sysfs_init(adev);
4276 	if (r) {
4277 		adev->ucode_sysfs_en = false;
4278 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4279 	} else
4280 		adev->ucode_sysfs_en = true;
4281 
4282 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4283 	if (r)
4284 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4285 
4286 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4287 	if (r)
4288 		dev_err(adev->dev,
4289 			"Could not create amdgpu board attributes\n");
4290 
4291 	amdgpu_fru_sysfs_init(adev);
4292 	amdgpu_reg_state_sysfs_init(adev);
4293 
4294 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4295 		r = amdgpu_pmu_init(adev);
4296 	if (r)
4297 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4298 
4299 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4300 	if (amdgpu_device_cache_pci_state(adev->pdev))
4301 		pci_restore_state(pdev);
4302 
4303 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4304 	/* this will fail for cards that aren't VGA class devices, just
4305 	 * ignore it
4306 	 */
4307 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4308 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4309 
4310 	px = amdgpu_device_supports_px(ddev);
4311 
4312 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4313 				apple_gmux_detect(NULL, NULL)))
4314 		vga_switcheroo_register_client(adev->pdev,
4315 					       &amdgpu_switcheroo_ops, px);
4316 
4317 	if (px)
4318 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4319 
4320 	if (adev->gmc.xgmi.pending_reset)
4321 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4322 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4323 
4324 	amdgpu_device_check_iommu_direct_map(adev);
4325 
4326 	return 0;
4327 
4328 release_ras_con:
4329 	if (amdgpu_sriov_vf(adev))
4330 		amdgpu_virt_release_full_gpu(adev, true);
4331 
4332 	/* failed in exclusive mode due to timeout */
4333 	if (amdgpu_sriov_vf(adev) &&
4334 		!amdgpu_sriov_runtime(adev) &&
4335 		amdgpu_virt_mmio_blocked(adev) &&
4336 		!amdgpu_virt_wait_reset(adev)) {
4337 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4338 		/* Don't send request since VF is inactive. */
4339 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4340 		adev->virt.ops = NULL;
4341 		r = -EAGAIN;
4342 	}
4343 	amdgpu_release_ras_context(adev);
4344 
4345 failed:
4346 	amdgpu_vf_error_trans_all(adev);
4347 
4348 	return r;
4349 }
4350 
4351 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4352 {
4353 
4354 	/* Clear all CPU mappings pointing to this device */
4355 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4356 
4357 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4358 	amdgpu_doorbell_fini(adev);
4359 
4360 	iounmap(adev->rmmio);
4361 	adev->rmmio = NULL;
4362 	if (adev->mman.aper_base_kaddr)
4363 		iounmap(adev->mman.aper_base_kaddr);
4364 	adev->mman.aper_base_kaddr = NULL;
4365 
4366 	/* Memory manager related */
4367 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4368 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4369 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4370 	}
4371 }
4372 
4373 /**
4374  * amdgpu_device_fini_hw - tear down the driver
4375  *
4376  * @adev: amdgpu_device pointer
4377  *
4378  * Tear down the driver info (all asics).
4379  * Called at driver shutdown.
4380  */
4381 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4382 {
4383 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4384 	flush_delayed_work(&adev->delayed_init_work);
4385 	adev->shutdown = true;
4386 
4387 	/* make sure IB test finished before entering exclusive mode
4388 	 * to avoid preemption on IB test
4389 	 */
4390 	if (amdgpu_sriov_vf(adev)) {
4391 		amdgpu_virt_request_full_gpu(adev, false);
4392 		amdgpu_virt_fini_data_exchange(adev);
4393 	}
4394 
4395 	/* disable all interrupts */
4396 	amdgpu_irq_disable_all(adev);
4397 	if (adev->mode_info.mode_config_initialized) {
4398 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4399 			drm_helper_force_disable_all(adev_to_drm(adev));
4400 		else
4401 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4402 	}
4403 	amdgpu_fence_driver_hw_fini(adev);
4404 
4405 	if (adev->mman.initialized)
4406 		drain_workqueue(adev->mman.bdev.wq);
4407 
4408 	if (adev->pm.sysfs_initialized)
4409 		amdgpu_pm_sysfs_fini(adev);
4410 	if (adev->ucode_sysfs_en)
4411 		amdgpu_ucode_sysfs_fini(adev);
4412 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4413 	amdgpu_fru_sysfs_fini(adev);
4414 
4415 	amdgpu_reg_state_sysfs_fini(adev);
4416 
4417 	/* disable ras feature must before hw fini */
4418 	amdgpu_ras_pre_fini(adev);
4419 
4420 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4421 
4422 	amdgpu_device_ip_fini_early(adev);
4423 
4424 	amdgpu_irq_fini_hw(adev);
4425 
4426 	if (adev->mman.initialized)
4427 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4428 
4429 	amdgpu_gart_dummy_page_fini(adev);
4430 
4431 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4432 		amdgpu_device_unmap_mmio(adev);
4433 
4434 }
4435 
4436 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4437 {
4438 	int idx;
4439 	bool px;
4440 
4441 	amdgpu_fence_driver_sw_fini(adev);
4442 	amdgpu_device_ip_fini(adev);
4443 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4444 	adev->accel_working = false;
4445 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4446 
4447 	amdgpu_reset_fini(adev);
4448 
4449 	/* free i2c buses */
4450 	if (!amdgpu_device_has_dc_support(adev))
4451 		amdgpu_i2c_fini(adev);
4452 
4453 	if (amdgpu_emu_mode != 1)
4454 		amdgpu_atombios_fini(adev);
4455 
4456 	kfree(adev->bios);
4457 	adev->bios = NULL;
4458 
4459 	kfree(adev->fru_info);
4460 	adev->fru_info = NULL;
4461 
4462 	px = amdgpu_device_supports_px(adev_to_drm(adev));
4463 
4464 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4465 				apple_gmux_detect(NULL, NULL)))
4466 		vga_switcheroo_unregister_client(adev->pdev);
4467 
4468 	if (px)
4469 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4470 
4471 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4472 		vga_client_unregister(adev->pdev);
4473 
4474 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4475 
4476 		iounmap(adev->rmmio);
4477 		adev->rmmio = NULL;
4478 		amdgpu_doorbell_fini(adev);
4479 		drm_dev_exit(idx);
4480 	}
4481 
4482 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4483 		amdgpu_pmu_fini(adev);
4484 	if (adev->mman.discovery_bin)
4485 		amdgpu_discovery_fini(adev);
4486 
4487 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4488 	adev->reset_domain = NULL;
4489 
4490 	kfree(adev->pci_state);
4491 
4492 }
4493 
4494 /**
4495  * amdgpu_device_evict_resources - evict device resources
4496  * @adev: amdgpu device object
4497  *
4498  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4499  * of the vram memory type. Mainly used for evicting device resources
4500  * at suspend time.
4501  *
4502  */
4503 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4504 {
4505 	int ret;
4506 
4507 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4508 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4509 		return 0;
4510 
4511 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4512 	if (ret)
4513 		DRM_WARN("evicting device resources failed\n");
4514 	return ret;
4515 }
4516 
4517 /*
4518  * Suspend & resume.
4519  */
4520 /**
4521  * amdgpu_device_prepare - prepare for device suspend
4522  *
4523  * @dev: drm dev pointer
4524  *
4525  * Prepare to put the hw in the suspend state (all asics).
4526  * Returns 0 for success or an error on failure.
4527  * Called at driver suspend.
4528  */
4529 int amdgpu_device_prepare(struct drm_device *dev)
4530 {
4531 	struct amdgpu_device *adev = drm_to_adev(dev);
4532 	int i, r;
4533 
4534 	amdgpu_choose_low_power_state(adev);
4535 
4536 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4537 		return 0;
4538 
4539 	/* Evict the majority of BOs before starting suspend sequence */
4540 	r = amdgpu_device_evict_resources(adev);
4541 	if (r)
4542 		goto unprepare;
4543 
4544 	for (i = 0; i < adev->num_ip_blocks; i++) {
4545 		if (!adev->ip_blocks[i].status.valid)
4546 			continue;
4547 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4548 			continue;
4549 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4550 		if (r)
4551 			goto unprepare;
4552 	}
4553 
4554 	return 0;
4555 
4556 unprepare:
4557 	adev->in_s0ix = adev->in_s3 = false;
4558 
4559 	return r;
4560 }
4561 
4562 /**
4563  * amdgpu_device_suspend - initiate device suspend
4564  *
4565  * @dev: drm dev pointer
4566  * @fbcon : notify the fbdev of suspend
4567  *
4568  * Puts the hw in the suspend state (all asics).
4569  * Returns 0 for success or an error on failure.
4570  * Called at driver suspend.
4571  */
4572 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4573 {
4574 	struct amdgpu_device *adev = drm_to_adev(dev);
4575 	int r = 0;
4576 
4577 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4578 		return 0;
4579 
4580 	adev->in_suspend = true;
4581 
4582 	if (amdgpu_sriov_vf(adev)) {
4583 		amdgpu_virt_fini_data_exchange(adev);
4584 		r = amdgpu_virt_request_full_gpu(adev, false);
4585 		if (r)
4586 			return r;
4587 	}
4588 
4589 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4590 		DRM_WARN("smart shift update failed\n");
4591 
4592 	if (fbcon)
4593 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4594 
4595 	cancel_delayed_work_sync(&adev->delayed_init_work);
4596 
4597 	amdgpu_ras_suspend(adev);
4598 
4599 	amdgpu_device_ip_suspend_phase1(adev);
4600 
4601 	if (!adev->in_s0ix)
4602 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4603 
4604 	r = amdgpu_device_evict_resources(adev);
4605 	if (r)
4606 		return r;
4607 
4608 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4609 
4610 	amdgpu_fence_driver_hw_fini(adev);
4611 
4612 	amdgpu_device_ip_suspend_phase2(adev);
4613 
4614 	if (amdgpu_sriov_vf(adev))
4615 		amdgpu_virt_release_full_gpu(adev, false);
4616 
4617 	r = amdgpu_dpm_notify_rlc_state(adev, false);
4618 	if (r)
4619 		return r;
4620 
4621 	return 0;
4622 }
4623 
4624 /**
4625  * amdgpu_device_resume - initiate device resume
4626  *
4627  * @dev: drm dev pointer
4628  * @fbcon : notify the fbdev of resume
4629  *
4630  * Bring the hw back to operating state (all asics).
4631  * Returns 0 for success or an error on failure.
4632  * Called at driver resume.
4633  */
4634 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4635 {
4636 	struct amdgpu_device *adev = drm_to_adev(dev);
4637 	int r = 0;
4638 
4639 	if (amdgpu_sriov_vf(adev)) {
4640 		r = amdgpu_virt_request_full_gpu(adev, true);
4641 		if (r)
4642 			return r;
4643 	}
4644 
4645 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4646 		return 0;
4647 
4648 	if (adev->in_s0ix)
4649 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4650 
4651 	/* post card */
4652 	if (amdgpu_device_need_post(adev)) {
4653 		r = amdgpu_device_asic_init(adev);
4654 		if (r)
4655 			dev_err(adev->dev, "amdgpu asic init failed\n");
4656 	}
4657 
4658 	r = amdgpu_device_ip_resume(adev);
4659 
4660 	if (r) {
4661 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4662 		goto exit;
4663 	}
4664 	amdgpu_fence_driver_hw_init(adev);
4665 
4666 	if (!adev->in_s0ix) {
4667 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4668 		if (r)
4669 			goto exit;
4670 	}
4671 
4672 	r = amdgpu_device_ip_late_init(adev);
4673 	if (r)
4674 		goto exit;
4675 
4676 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4677 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4678 exit:
4679 	if (amdgpu_sriov_vf(adev)) {
4680 		amdgpu_virt_init_data_exchange(adev);
4681 		amdgpu_virt_release_full_gpu(adev, true);
4682 	}
4683 
4684 	if (r)
4685 		return r;
4686 
4687 	/* Make sure IB tests flushed */
4688 	flush_delayed_work(&adev->delayed_init_work);
4689 
4690 	if (fbcon)
4691 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4692 
4693 	amdgpu_ras_resume(adev);
4694 
4695 	if (adev->mode_info.num_crtc) {
4696 		/*
4697 		 * Most of the connector probing functions try to acquire runtime pm
4698 		 * refs to ensure that the GPU is powered on when connector polling is
4699 		 * performed. Since we're calling this from a runtime PM callback,
4700 		 * trying to acquire rpm refs will cause us to deadlock.
4701 		 *
4702 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4703 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4704 		 */
4705 #ifdef CONFIG_PM
4706 		dev->dev->power.disable_depth++;
4707 #endif
4708 		if (!adev->dc_enabled)
4709 			drm_helper_hpd_irq_event(dev);
4710 		else
4711 			drm_kms_helper_hotplug_event(dev);
4712 #ifdef CONFIG_PM
4713 		dev->dev->power.disable_depth--;
4714 #endif
4715 	}
4716 	adev->in_suspend = false;
4717 
4718 	if (adev->enable_mes)
4719 		amdgpu_mes_self_test(adev);
4720 
4721 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4722 		DRM_WARN("smart shift update failed\n");
4723 
4724 	return 0;
4725 }
4726 
4727 /**
4728  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4729  *
4730  * @adev: amdgpu_device pointer
4731  *
4732  * The list of all the hardware IPs that make up the asic is walked and
4733  * the check_soft_reset callbacks are run.  check_soft_reset determines
4734  * if the asic is still hung or not.
4735  * Returns true if any of the IPs are still in a hung state, false if not.
4736  */
4737 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4738 {
4739 	int i;
4740 	bool asic_hang = false;
4741 
4742 	if (amdgpu_sriov_vf(adev))
4743 		return true;
4744 
4745 	if (amdgpu_asic_need_full_reset(adev))
4746 		return true;
4747 
4748 	for (i = 0; i < adev->num_ip_blocks; i++) {
4749 		if (!adev->ip_blocks[i].status.valid)
4750 			continue;
4751 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4752 			adev->ip_blocks[i].status.hang =
4753 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4754 		if (adev->ip_blocks[i].status.hang) {
4755 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4756 			asic_hang = true;
4757 		}
4758 	}
4759 	return asic_hang;
4760 }
4761 
4762 /**
4763  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4764  *
4765  * @adev: amdgpu_device pointer
4766  *
4767  * The list of all the hardware IPs that make up the asic is walked and the
4768  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4769  * handles any IP specific hardware or software state changes that are
4770  * necessary for a soft reset to succeed.
4771  * Returns 0 on success, negative error code on failure.
4772  */
4773 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4774 {
4775 	int i, r = 0;
4776 
4777 	for (i = 0; i < adev->num_ip_blocks; i++) {
4778 		if (!adev->ip_blocks[i].status.valid)
4779 			continue;
4780 		if (adev->ip_blocks[i].status.hang &&
4781 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4782 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4783 			if (r)
4784 				return r;
4785 		}
4786 	}
4787 
4788 	return 0;
4789 }
4790 
4791 /**
4792  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4793  *
4794  * @adev: amdgpu_device pointer
4795  *
4796  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4797  * reset is necessary to recover.
4798  * Returns true if a full asic reset is required, false if not.
4799  */
4800 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4801 {
4802 	int i;
4803 
4804 	if (amdgpu_asic_need_full_reset(adev))
4805 		return true;
4806 
4807 	for (i = 0; i < adev->num_ip_blocks; i++) {
4808 		if (!adev->ip_blocks[i].status.valid)
4809 			continue;
4810 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4811 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4812 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4813 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4814 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4815 			if (adev->ip_blocks[i].status.hang) {
4816 				dev_info(adev->dev, "Some block need full reset!\n");
4817 				return true;
4818 			}
4819 		}
4820 	}
4821 	return false;
4822 }
4823 
4824 /**
4825  * amdgpu_device_ip_soft_reset - do a soft reset
4826  *
4827  * @adev: amdgpu_device pointer
4828  *
4829  * The list of all the hardware IPs that make up the asic is walked and the
4830  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4831  * IP specific hardware or software state changes that are necessary to soft
4832  * reset the IP.
4833  * Returns 0 on success, negative error code on failure.
4834  */
4835 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4836 {
4837 	int i, r = 0;
4838 
4839 	for (i = 0; i < adev->num_ip_blocks; i++) {
4840 		if (!adev->ip_blocks[i].status.valid)
4841 			continue;
4842 		if (adev->ip_blocks[i].status.hang &&
4843 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4844 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4845 			if (r)
4846 				return r;
4847 		}
4848 	}
4849 
4850 	return 0;
4851 }
4852 
4853 /**
4854  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4855  *
4856  * @adev: amdgpu_device pointer
4857  *
4858  * The list of all the hardware IPs that make up the asic is walked and the
4859  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4860  * handles any IP specific hardware or software state changes that are
4861  * necessary after the IP has been soft reset.
4862  * Returns 0 on success, negative error code on failure.
4863  */
4864 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4865 {
4866 	int i, r = 0;
4867 
4868 	for (i = 0; i < adev->num_ip_blocks; i++) {
4869 		if (!adev->ip_blocks[i].status.valid)
4870 			continue;
4871 		if (adev->ip_blocks[i].status.hang &&
4872 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4873 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4874 		if (r)
4875 			return r;
4876 	}
4877 
4878 	return 0;
4879 }
4880 
4881 /**
4882  * amdgpu_device_recover_vram - Recover some VRAM contents
4883  *
4884  * @adev: amdgpu_device pointer
4885  *
4886  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4887  * restore things like GPUVM page tables after a GPU reset where
4888  * the contents of VRAM might be lost.
4889  *
4890  * Returns:
4891  * 0 on success, negative error code on failure.
4892  */
4893 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4894 {
4895 	struct dma_fence *fence = NULL, *next = NULL;
4896 	struct amdgpu_bo *shadow;
4897 	struct amdgpu_bo_vm *vmbo;
4898 	long r = 1, tmo;
4899 
4900 	if (amdgpu_sriov_runtime(adev))
4901 		tmo = msecs_to_jiffies(8000);
4902 	else
4903 		tmo = msecs_to_jiffies(100);
4904 
4905 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4906 	mutex_lock(&adev->shadow_list_lock);
4907 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4908 		/* If vm is compute context or adev is APU, shadow will be NULL */
4909 		if (!vmbo->shadow)
4910 			continue;
4911 		shadow = vmbo->shadow;
4912 
4913 		/* No need to recover an evicted BO */
4914 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4915 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4916 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4917 			continue;
4918 
4919 		r = amdgpu_bo_restore_shadow(shadow, &next);
4920 		if (r)
4921 			break;
4922 
4923 		if (fence) {
4924 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4925 			dma_fence_put(fence);
4926 			fence = next;
4927 			if (tmo == 0) {
4928 				r = -ETIMEDOUT;
4929 				break;
4930 			} else if (tmo < 0) {
4931 				r = tmo;
4932 				break;
4933 			}
4934 		} else {
4935 			fence = next;
4936 		}
4937 	}
4938 	mutex_unlock(&adev->shadow_list_lock);
4939 
4940 	if (fence)
4941 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4942 	dma_fence_put(fence);
4943 
4944 	if (r < 0 || tmo <= 0) {
4945 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4946 		return -EIO;
4947 	}
4948 
4949 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4950 	return 0;
4951 }
4952 
4953 
4954 /**
4955  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4956  *
4957  * @adev: amdgpu_device pointer
4958  * @from_hypervisor: request from hypervisor
4959  *
4960  * do VF FLR and reinitialize Asic
4961  * return 0 means succeeded otherwise failed
4962  */
4963 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4964 				     bool from_hypervisor)
4965 {
4966 	int r;
4967 	struct amdgpu_hive_info *hive = NULL;
4968 	int retry_limit = 0;
4969 
4970 retry:
4971 	amdgpu_amdkfd_pre_reset(adev);
4972 
4973 	amdgpu_device_stop_pending_resets(adev);
4974 
4975 	if (from_hypervisor)
4976 		r = amdgpu_virt_request_full_gpu(adev, true);
4977 	else
4978 		r = amdgpu_virt_reset_gpu(adev);
4979 	if (r)
4980 		return r;
4981 	amdgpu_irq_gpu_reset_resume_helper(adev);
4982 
4983 	/* some sw clean up VF needs to do before recover */
4984 	amdgpu_virt_post_reset(adev);
4985 
4986 	/* Resume IP prior to SMC */
4987 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4988 	if (r)
4989 		goto error;
4990 
4991 	amdgpu_virt_init_data_exchange(adev);
4992 
4993 	r = amdgpu_device_fw_loading(adev);
4994 	if (r)
4995 		return r;
4996 
4997 	/* now we are okay to resume SMC/CP/SDMA */
4998 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4999 	if (r)
5000 		goto error;
5001 
5002 	hive = amdgpu_get_xgmi_hive(adev);
5003 	/* Update PSP FW topology after reset */
5004 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5005 		r = amdgpu_xgmi_update_topology(hive, adev);
5006 
5007 	if (hive)
5008 		amdgpu_put_xgmi_hive(hive);
5009 
5010 	if (!r) {
5011 		r = amdgpu_ib_ring_tests(adev);
5012 
5013 		amdgpu_amdkfd_post_reset(adev);
5014 	}
5015 
5016 error:
5017 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5018 		amdgpu_inc_vram_lost(adev);
5019 		r = amdgpu_device_recover_vram(adev);
5020 	}
5021 	amdgpu_virt_release_full_gpu(adev, true);
5022 
5023 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5024 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5025 			retry_limit++;
5026 			goto retry;
5027 		} else
5028 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5029 	}
5030 
5031 	return r;
5032 }
5033 
5034 /**
5035  * amdgpu_device_has_job_running - check if there is any job in mirror list
5036  *
5037  * @adev: amdgpu_device pointer
5038  *
5039  * check if there is any job in mirror list
5040  */
5041 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5042 {
5043 	int i;
5044 	struct drm_sched_job *job;
5045 
5046 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5047 		struct amdgpu_ring *ring = adev->rings[i];
5048 
5049 		if (!amdgpu_ring_sched_ready(ring))
5050 			continue;
5051 
5052 		spin_lock(&ring->sched.job_list_lock);
5053 		job = list_first_entry_or_null(&ring->sched.pending_list,
5054 					       struct drm_sched_job, list);
5055 		spin_unlock(&ring->sched.job_list_lock);
5056 		if (job)
5057 			return true;
5058 	}
5059 	return false;
5060 }
5061 
5062 /**
5063  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5064  *
5065  * @adev: amdgpu_device pointer
5066  *
5067  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5068  * a hung GPU.
5069  */
5070 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5071 {
5072 
5073 	if (amdgpu_gpu_recovery == 0)
5074 		goto disabled;
5075 
5076 	/* Skip soft reset check in fatal error mode */
5077 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5078 		return true;
5079 
5080 	if (amdgpu_sriov_vf(adev))
5081 		return true;
5082 
5083 	if (amdgpu_gpu_recovery == -1) {
5084 		switch (adev->asic_type) {
5085 #ifdef CONFIG_DRM_AMDGPU_SI
5086 		case CHIP_VERDE:
5087 		case CHIP_TAHITI:
5088 		case CHIP_PITCAIRN:
5089 		case CHIP_OLAND:
5090 		case CHIP_HAINAN:
5091 #endif
5092 #ifdef CONFIG_DRM_AMDGPU_CIK
5093 		case CHIP_KAVERI:
5094 		case CHIP_KABINI:
5095 		case CHIP_MULLINS:
5096 #endif
5097 		case CHIP_CARRIZO:
5098 		case CHIP_STONEY:
5099 		case CHIP_CYAN_SKILLFISH:
5100 			goto disabled;
5101 		default:
5102 			break;
5103 		}
5104 	}
5105 
5106 	return true;
5107 
5108 disabled:
5109 		dev_info(adev->dev, "GPU recovery disabled.\n");
5110 		return false;
5111 }
5112 
5113 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5114 {
5115 	u32 i;
5116 	int ret = 0;
5117 
5118 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5119 
5120 	dev_info(adev->dev, "GPU mode1 reset\n");
5121 
5122 	/* disable BM */
5123 	pci_clear_master(adev->pdev);
5124 
5125 	amdgpu_device_cache_pci_state(adev->pdev);
5126 
5127 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5128 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5129 		ret = amdgpu_dpm_mode1_reset(adev);
5130 	} else {
5131 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5132 		ret = psp_gpu_reset(adev);
5133 	}
5134 
5135 	if (ret)
5136 		goto mode1_reset_failed;
5137 
5138 	amdgpu_device_load_pci_state(adev->pdev);
5139 	ret = amdgpu_psp_wait_for_bootloader(adev);
5140 	if (ret)
5141 		goto mode1_reset_failed;
5142 
5143 	/* wait for asic to come out of reset */
5144 	for (i = 0; i < adev->usec_timeout; i++) {
5145 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5146 
5147 		if (memsize != 0xffffffff)
5148 			break;
5149 		udelay(1);
5150 	}
5151 
5152 	if (i >= adev->usec_timeout) {
5153 		ret = -ETIMEDOUT;
5154 		goto mode1_reset_failed;
5155 	}
5156 
5157 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5158 
5159 	return 0;
5160 
5161 mode1_reset_failed:
5162 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5163 	return ret;
5164 }
5165 
5166 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5167 				 struct amdgpu_reset_context *reset_context)
5168 {
5169 	int i, r = 0;
5170 	struct amdgpu_job *job = NULL;
5171 	bool need_full_reset =
5172 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5173 
5174 	if (reset_context->reset_req_dev == adev)
5175 		job = reset_context->job;
5176 
5177 	if (amdgpu_sriov_vf(adev)) {
5178 		/* stop the data exchange thread */
5179 		amdgpu_virt_fini_data_exchange(adev);
5180 	}
5181 
5182 	amdgpu_fence_driver_isr_toggle(adev, true);
5183 
5184 	/* block all schedulers and reset given job's ring */
5185 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5186 		struct amdgpu_ring *ring = adev->rings[i];
5187 
5188 		if (!amdgpu_ring_sched_ready(ring))
5189 			continue;
5190 
5191 		/* Clear job fence from fence drv to avoid force_completion
5192 		 * leave NULL and vm flush fence in fence drv
5193 		 */
5194 		amdgpu_fence_driver_clear_job_fences(ring);
5195 
5196 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5197 		amdgpu_fence_driver_force_completion(ring);
5198 	}
5199 
5200 	amdgpu_fence_driver_isr_toggle(adev, false);
5201 
5202 	if (job && job->vm)
5203 		drm_sched_increase_karma(&job->base);
5204 
5205 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5206 	/* If reset handler not implemented, continue; otherwise return */
5207 	if (r == -EOPNOTSUPP)
5208 		r = 0;
5209 	else
5210 		return r;
5211 
5212 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5213 	if (!amdgpu_sriov_vf(adev)) {
5214 
5215 		if (!need_full_reset)
5216 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5217 
5218 		if (!need_full_reset && amdgpu_gpu_recovery &&
5219 		    amdgpu_device_ip_check_soft_reset(adev)) {
5220 			amdgpu_device_ip_pre_soft_reset(adev);
5221 			r = amdgpu_device_ip_soft_reset(adev);
5222 			amdgpu_device_ip_post_soft_reset(adev);
5223 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5224 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5225 				need_full_reset = true;
5226 			}
5227 		}
5228 
5229 		if (need_full_reset)
5230 			r = amdgpu_device_ip_suspend(adev);
5231 		if (need_full_reset)
5232 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5233 		else
5234 			clear_bit(AMDGPU_NEED_FULL_RESET,
5235 				  &reset_context->flags);
5236 	}
5237 
5238 	return r;
5239 }
5240 
5241 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5242 {
5243 	int i;
5244 
5245 	lockdep_assert_held(&adev->reset_domain->sem);
5246 
5247 	for (i = 0; i < adev->reset_info.num_regs; i++) {
5248 		adev->reset_info.reset_dump_reg_value[i] =
5249 			RREG32(adev->reset_info.reset_dump_reg_list[i]);
5250 
5251 		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5252 					     adev->reset_info.reset_dump_reg_value[i]);
5253 	}
5254 
5255 	return 0;
5256 }
5257 
5258 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5259 			 struct amdgpu_reset_context *reset_context)
5260 {
5261 	struct amdgpu_device *tmp_adev = NULL;
5262 	bool need_full_reset, skip_hw_reset, vram_lost = false;
5263 	int r = 0;
5264 
5265 	/* Try reset handler method first */
5266 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5267 				    reset_list);
5268 	amdgpu_reset_reg_dumps(tmp_adev);
5269 
5270 	reset_context->reset_device_list = device_list_handle;
5271 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5272 	/* If reset handler not implemented, continue; otherwise return */
5273 	if (r == -EOPNOTSUPP)
5274 		r = 0;
5275 	else
5276 		return r;
5277 
5278 	/* Reset handler not implemented, use the default method */
5279 	need_full_reset =
5280 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5281 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5282 
5283 	/*
5284 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5285 	 * to allow proper links negotiation in FW (within 1 sec)
5286 	 */
5287 	if (!skip_hw_reset && need_full_reset) {
5288 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5289 			/* For XGMI run all resets in parallel to speed up the process */
5290 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5291 				tmp_adev->gmc.xgmi.pending_reset = false;
5292 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5293 					r = -EALREADY;
5294 			} else
5295 				r = amdgpu_asic_reset(tmp_adev);
5296 
5297 			if (r) {
5298 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5299 					 r, adev_to_drm(tmp_adev)->unique);
5300 				goto out;
5301 			}
5302 		}
5303 
5304 		/* For XGMI wait for all resets to complete before proceed */
5305 		if (!r) {
5306 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5307 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5308 					flush_work(&tmp_adev->xgmi_reset_work);
5309 					r = tmp_adev->asic_reset_res;
5310 					if (r)
5311 						break;
5312 				}
5313 			}
5314 		}
5315 	}
5316 
5317 	if (!r && amdgpu_ras_intr_triggered()) {
5318 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5319 			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5320 		}
5321 
5322 		amdgpu_ras_intr_cleared();
5323 	}
5324 
5325 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5326 		if (need_full_reset) {
5327 			/* post card */
5328 			amdgpu_ras_set_fed(tmp_adev, false);
5329 			r = amdgpu_device_asic_init(tmp_adev);
5330 			if (r) {
5331 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5332 			} else {
5333 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5334 
5335 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5336 				if (r)
5337 					goto out;
5338 
5339 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5340 
5341 				amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5342 
5343 				if (vram_lost) {
5344 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5345 					amdgpu_inc_vram_lost(tmp_adev);
5346 				}
5347 
5348 				r = amdgpu_device_fw_loading(tmp_adev);
5349 				if (r)
5350 					return r;
5351 
5352 				r = amdgpu_xcp_restore_partition_mode(
5353 					tmp_adev->xcp_mgr);
5354 				if (r)
5355 					goto out;
5356 
5357 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5358 				if (r)
5359 					goto out;
5360 
5361 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5362 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5363 
5364 				if (vram_lost)
5365 					amdgpu_device_fill_reset_magic(tmp_adev);
5366 
5367 				/*
5368 				 * Add this ASIC as tracked as reset was already
5369 				 * complete successfully.
5370 				 */
5371 				amdgpu_register_gpu_instance(tmp_adev);
5372 
5373 				if (!reset_context->hive &&
5374 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5375 					amdgpu_xgmi_add_device(tmp_adev);
5376 
5377 				r = amdgpu_device_ip_late_init(tmp_adev);
5378 				if (r)
5379 					goto out;
5380 
5381 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5382 
5383 				/*
5384 				 * The GPU enters bad state once faulty pages
5385 				 * by ECC has reached the threshold, and ras
5386 				 * recovery is scheduled next. So add one check
5387 				 * here to break recovery if it indeed exceeds
5388 				 * bad page threshold, and remind user to
5389 				 * retire this GPU or setting one bigger
5390 				 * bad_page_threshold value to fix this once
5391 				 * probing driver again.
5392 				 */
5393 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5394 					/* must succeed. */
5395 					amdgpu_ras_resume(tmp_adev);
5396 				} else {
5397 					r = -EINVAL;
5398 					goto out;
5399 				}
5400 
5401 				/* Update PSP FW topology after reset */
5402 				if (reset_context->hive &&
5403 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5404 					r = amdgpu_xgmi_update_topology(
5405 						reset_context->hive, tmp_adev);
5406 			}
5407 		}
5408 
5409 out:
5410 		if (!r) {
5411 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5412 			r = amdgpu_ib_ring_tests(tmp_adev);
5413 			if (r) {
5414 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5415 				need_full_reset = true;
5416 				r = -EAGAIN;
5417 				goto end;
5418 			}
5419 		}
5420 
5421 		if (!r)
5422 			r = amdgpu_device_recover_vram(tmp_adev);
5423 		else
5424 			tmp_adev->asic_reset_res = r;
5425 	}
5426 
5427 end:
5428 	if (need_full_reset)
5429 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5430 	else
5431 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5432 	return r;
5433 }
5434 
5435 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5436 {
5437 
5438 	switch (amdgpu_asic_reset_method(adev)) {
5439 	case AMD_RESET_METHOD_MODE1:
5440 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5441 		break;
5442 	case AMD_RESET_METHOD_MODE2:
5443 		adev->mp1_state = PP_MP1_STATE_RESET;
5444 		break;
5445 	default:
5446 		adev->mp1_state = PP_MP1_STATE_NONE;
5447 		break;
5448 	}
5449 }
5450 
5451 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5452 {
5453 	amdgpu_vf_error_trans_all(adev);
5454 	adev->mp1_state = PP_MP1_STATE_NONE;
5455 }
5456 
5457 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5458 {
5459 	struct pci_dev *p = NULL;
5460 
5461 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5462 			adev->pdev->bus->number, 1);
5463 	if (p) {
5464 		pm_runtime_enable(&(p->dev));
5465 		pm_runtime_resume(&(p->dev));
5466 	}
5467 
5468 	pci_dev_put(p);
5469 }
5470 
5471 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5472 {
5473 	enum amd_reset_method reset_method;
5474 	struct pci_dev *p = NULL;
5475 	u64 expires;
5476 
5477 	/*
5478 	 * For now, only BACO and mode1 reset are confirmed
5479 	 * to suffer the audio issue without proper suspended.
5480 	 */
5481 	reset_method = amdgpu_asic_reset_method(adev);
5482 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5483 	     (reset_method != AMD_RESET_METHOD_MODE1))
5484 		return -EINVAL;
5485 
5486 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5487 			adev->pdev->bus->number, 1);
5488 	if (!p)
5489 		return -ENODEV;
5490 
5491 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5492 	if (!expires)
5493 		/*
5494 		 * If we cannot get the audio device autosuspend delay,
5495 		 * a fixed 4S interval will be used. Considering 3S is
5496 		 * the audio controller default autosuspend delay setting.
5497 		 * 4S used here is guaranteed to cover that.
5498 		 */
5499 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5500 
5501 	while (!pm_runtime_status_suspended(&(p->dev))) {
5502 		if (!pm_runtime_suspend(&(p->dev)))
5503 			break;
5504 
5505 		if (expires < ktime_get_mono_fast_ns()) {
5506 			dev_warn(adev->dev, "failed to suspend display audio\n");
5507 			pci_dev_put(p);
5508 			/* TODO: abort the succeeding gpu reset? */
5509 			return -ETIMEDOUT;
5510 		}
5511 	}
5512 
5513 	pm_runtime_disable(&(p->dev));
5514 
5515 	pci_dev_put(p);
5516 	return 0;
5517 }
5518 
5519 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5520 {
5521 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5522 
5523 #if defined(CONFIG_DEBUG_FS)
5524 	if (!amdgpu_sriov_vf(adev))
5525 		cancel_work(&adev->reset_work);
5526 #endif
5527 
5528 	if (adev->kfd.dev)
5529 		cancel_work(&adev->kfd.reset_work);
5530 
5531 	if (amdgpu_sriov_vf(adev))
5532 		cancel_work(&adev->virt.flr_work);
5533 
5534 	if (con && adev->ras_enabled)
5535 		cancel_work(&con->recovery_work);
5536 
5537 }
5538 
5539 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5540 {
5541 	struct amdgpu_device *tmp_adev;
5542 	int ret = 0;
5543 	u32 status;
5544 
5545 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5546 		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5547 		if (PCI_POSSIBLE_ERROR(status)) {
5548 			dev_err(tmp_adev->dev, "device lost from bus!");
5549 			ret = -ENODEV;
5550 		}
5551 	}
5552 
5553 	return ret;
5554 }
5555 
5556 /**
5557  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5558  *
5559  * @adev: amdgpu_device pointer
5560  * @job: which job trigger hang
5561  * @reset_context: amdgpu reset context pointer
5562  *
5563  * Attempt to reset the GPU if it has hung (all asics).
5564  * Attempt to do soft-reset or full-reset and reinitialize Asic
5565  * Returns 0 for success or an error on failure.
5566  */
5567 
5568 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5569 			      struct amdgpu_job *job,
5570 			      struct amdgpu_reset_context *reset_context)
5571 {
5572 	struct list_head device_list, *device_list_handle =  NULL;
5573 	bool job_signaled = false;
5574 	struct amdgpu_hive_info *hive = NULL;
5575 	struct amdgpu_device *tmp_adev = NULL;
5576 	int i, r = 0;
5577 	bool need_emergency_restart = false;
5578 	bool audio_suspended = false;
5579 
5580 	/*
5581 	 * Special case: RAS triggered and full reset isn't supported
5582 	 */
5583 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5584 
5585 	/*
5586 	 * Flush RAM to disk so that after reboot
5587 	 * the user can read log and see why the system rebooted.
5588 	 */
5589 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5590 		amdgpu_ras_get_context(adev)->reboot) {
5591 		DRM_WARN("Emergency reboot.");
5592 
5593 		ksys_sync_helper();
5594 		emergency_restart();
5595 	}
5596 
5597 	dev_info(adev->dev, "GPU %s begin!\n",
5598 		need_emergency_restart ? "jobs stop":"reset");
5599 
5600 	if (!amdgpu_sriov_vf(adev))
5601 		hive = amdgpu_get_xgmi_hive(adev);
5602 	if (hive)
5603 		mutex_lock(&hive->hive_lock);
5604 
5605 	reset_context->job = job;
5606 	reset_context->hive = hive;
5607 	/*
5608 	 * Build list of devices to reset.
5609 	 * In case we are in XGMI hive mode, resort the device list
5610 	 * to put adev in the 1st position.
5611 	 */
5612 	INIT_LIST_HEAD(&device_list);
5613 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5614 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5615 			list_add_tail(&tmp_adev->reset_list, &device_list);
5616 			if (adev->shutdown)
5617 				tmp_adev->shutdown = true;
5618 		}
5619 		if (!list_is_first(&adev->reset_list, &device_list))
5620 			list_rotate_to_front(&adev->reset_list, &device_list);
5621 		device_list_handle = &device_list;
5622 	} else {
5623 		list_add_tail(&adev->reset_list, &device_list);
5624 		device_list_handle = &device_list;
5625 	}
5626 
5627 	if (!amdgpu_sriov_vf(adev)) {
5628 		r = amdgpu_device_health_check(device_list_handle);
5629 		if (r)
5630 			goto end_reset;
5631 	}
5632 
5633 	/* We need to lock reset domain only once both for XGMI and single device */
5634 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5635 				    reset_list);
5636 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5637 
5638 	/* block all schedulers and reset given job's ring */
5639 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5640 
5641 		amdgpu_device_set_mp1_state(tmp_adev);
5642 
5643 		/*
5644 		 * Try to put the audio codec into suspend state
5645 		 * before gpu reset started.
5646 		 *
5647 		 * Due to the power domain of the graphics device
5648 		 * is shared with AZ power domain. Without this,
5649 		 * we may change the audio hardware from behind
5650 		 * the audio driver's back. That will trigger
5651 		 * some audio codec errors.
5652 		 */
5653 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5654 			audio_suspended = true;
5655 
5656 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5657 
5658 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5659 
5660 		if (!amdgpu_sriov_vf(tmp_adev))
5661 			amdgpu_amdkfd_pre_reset(tmp_adev);
5662 
5663 		/*
5664 		 * Mark these ASICs to be reseted as untracked first
5665 		 * And add them back after reset completed
5666 		 */
5667 		amdgpu_unregister_gpu_instance(tmp_adev);
5668 
5669 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5670 
5671 		/* disable ras on ALL IPs */
5672 		if (!need_emergency_restart &&
5673 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5674 			amdgpu_ras_suspend(tmp_adev);
5675 
5676 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5677 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5678 
5679 			if (!amdgpu_ring_sched_ready(ring))
5680 				continue;
5681 
5682 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5683 
5684 			if (need_emergency_restart)
5685 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5686 		}
5687 		atomic_inc(&tmp_adev->gpu_reset_counter);
5688 	}
5689 
5690 	if (need_emergency_restart)
5691 		goto skip_sched_resume;
5692 
5693 	/*
5694 	 * Must check guilty signal here since after this point all old
5695 	 * HW fences are force signaled.
5696 	 *
5697 	 * job->base holds a reference to parent fence
5698 	 */
5699 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5700 		job_signaled = true;
5701 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5702 		goto skip_hw_reset;
5703 	}
5704 
5705 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5706 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5707 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5708 		/*TODO Should we stop ?*/
5709 		if (r) {
5710 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5711 				  r, adev_to_drm(tmp_adev)->unique);
5712 			tmp_adev->asic_reset_res = r;
5713 		}
5714 
5715 		if (!amdgpu_sriov_vf(tmp_adev))
5716 			/*
5717 			* Drop all pending non scheduler resets. Scheduler resets
5718 			* were already dropped during drm_sched_stop
5719 			*/
5720 			amdgpu_device_stop_pending_resets(tmp_adev);
5721 	}
5722 
5723 	/* Actual ASIC resets if needed.*/
5724 	/* Host driver will handle XGMI hive reset for SRIOV */
5725 	if (amdgpu_sriov_vf(adev)) {
5726 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5727 		if (r)
5728 			adev->asic_reset_res = r;
5729 
5730 		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5731 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5732 			    IP_VERSION(9, 4, 2) ||
5733 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5734 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5735 			amdgpu_ras_resume(adev);
5736 	} else {
5737 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5738 		if (r && r == -EAGAIN)
5739 			goto retry;
5740 	}
5741 
5742 skip_hw_reset:
5743 
5744 	/* Post ASIC reset for all devs .*/
5745 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5746 
5747 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5748 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5749 
5750 			if (!amdgpu_ring_sched_ready(ring))
5751 				continue;
5752 
5753 			drm_sched_start(&ring->sched, true);
5754 		}
5755 
5756 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5757 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5758 
5759 		if (tmp_adev->asic_reset_res)
5760 			r = tmp_adev->asic_reset_res;
5761 
5762 		tmp_adev->asic_reset_res = 0;
5763 
5764 		if (r) {
5765 			/* bad news, how to tell it to userspace ? */
5766 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5767 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5768 		} else {
5769 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5770 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5771 				DRM_WARN("smart shift update failed\n");
5772 		}
5773 	}
5774 
5775 skip_sched_resume:
5776 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5777 		/* unlock kfd: SRIOV would do it separately */
5778 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5779 			amdgpu_amdkfd_post_reset(tmp_adev);
5780 
5781 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5782 		 * need to bring up kfd here if it's not be initialized before
5783 		 */
5784 		if (!adev->kfd.init_complete)
5785 			amdgpu_amdkfd_device_init(adev);
5786 
5787 		if (audio_suspended)
5788 			amdgpu_device_resume_display_audio(tmp_adev);
5789 
5790 		amdgpu_device_unset_mp1_state(tmp_adev);
5791 
5792 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5793 	}
5794 
5795 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5796 					    reset_list);
5797 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5798 
5799 end_reset:
5800 	if (hive) {
5801 		mutex_unlock(&hive->hive_lock);
5802 		amdgpu_put_xgmi_hive(hive);
5803 	}
5804 
5805 	if (r)
5806 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5807 
5808 	atomic_set(&adev->reset_domain->reset_res, r);
5809 	return r;
5810 }
5811 
5812 /**
5813  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5814  *
5815  * @adev: amdgpu_device pointer
5816  * @speed: pointer to the speed of the link
5817  * @width: pointer to the width of the link
5818  *
5819  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5820  * first physical partner to an AMD dGPU.
5821  * This will exclude any virtual switches and links.
5822  */
5823 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5824 					    enum pci_bus_speed *speed,
5825 					    enum pcie_link_width *width)
5826 {
5827 	struct pci_dev *parent = adev->pdev;
5828 
5829 	if (!speed || !width)
5830 		return;
5831 
5832 	*speed = PCI_SPEED_UNKNOWN;
5833 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5834 
5835 	while ((parent = pci_upstream_bridge(parent))) {
5836 		/* skip upstream/downstream switches internal to dGPU*/
5837 		if (parent->vendor == PCI_VENDOR_ID_ATI)
5838 			continue;
5839 		*speed = pcie_get_speed_cap(parent);
5840 		*width = pcie_get_width_cap(parent);
5841 		break;
5842 	}
5843 }
5844 
5845 /**
5846  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5847  *
5848  * @adev: amdgpu_device pointer
5849  *
5850  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5851  * and lanes) of the slot the device is in. Handles APUs and
5852  * virtualized environments where PCIE config space may not be available.
5853  */
5854 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5855 {
5856 	struct pci_dev *pdev;
5857 	enum pci_bus_speed speed_cap, platform_speed_cap;
5858 	enum pcie_link_width platform_link_width;
5859 
5860 	if (amdgpu_pcie_gen_cap)
5861 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5862 
5863 	if (amdgpu_pcie_lane_cap)
5864 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5865 
5866 	/* covers APUs as well */
5867 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5868 		if (adev->pm.pcie_gen_mask == 0)
5869 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5870 		if (adev->pm.pcie_mlw_mask == 0)
5871 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5872 		return;
5873 	}
5874 
5875 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5876 		return;
5877 
5878 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5879 					&platform_link_width);
5880 
5881 	if (adev->pm.pcie_gen_mask == 0) {
5882 		/* asic caps */
5883 		pdev = adev->pdev;
5884 		speed_cap = pcie_get_speed_cap(pdev);
5885 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5886 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5887 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5888 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5889 		} else {
5890 			if (speed_cap == PCIE_SPEED_32_0GT)
5891 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5892 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5893 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5894 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5895 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5896 			else if (speed_cap == PCIE_SPEED_16_0GT)
5897 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5898 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5899 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5900 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5901 			else if (speed_cap == PCIE_SPEED_8_0GT)
5902 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5903 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5904 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5905 			else if (speed_cap == PCIE_SPEED_5_0GT)
5906 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5907 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5908 			else
5909 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5910 		}
5911 		/* platform caps */
5912 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5913 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5914 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5915 		} else {
5916 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5917 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5918 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5919 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5920 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5921 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5922 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5923 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5924 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5925 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5926 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5927 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5928 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5929 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5930 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5931 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5932 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5933 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5934 			else
5935 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5936 
5937 		}
5938 	}
5939 	if (adev->pm.pcie_mlw_mask == 0) {
5940 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5941 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5942 		} else {
5943 			switch (platform_link_width) {
5944 			case PCIE_LNK_X32:
5945 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5946 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5947 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5948 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5949 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5950 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5951 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5952 				break;
5953 			case PCIE_LNK_X16:
5954 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5955 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5956 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5957 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5958 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5959 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5960 				break;
5961 			case PCIE_LNK_X12:
5962 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5963 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5964 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5965 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5966 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5967 				break;
5968 			case PCIE_LNK_X8:
5969 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5970 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5971 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5972 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5973 				break;
5974 			case PCIE_LNK_X4:
5975 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5976 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5977 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5978 				break;
5979 			case PCIE_LNK_X2:
5980 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5981 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5982 				break;
5983 			case PCIE_LNK_X1:
5984 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5985 				break;
5986 			default:
5987 				break;
5988 			}
5989 		}
5990 	}
5991 }
5992 
5993 /**
5994  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5995  *
5996  * @adev: amdgpu_device pointer
5997  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5998  *
5999  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6000  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6001  * @peer_adev.
6002  */
6003 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6004 				      struct amdgpu_device *peer_adev)
6005 {
6006 #ifdef CONFIG_HSA_AMD_P2P
6007 	uint64_t address_mask = peer_adev->dev->dma_mask ?
6008 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6009 	resource_size_t aper_limit =
6010 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
6011 	bool p2p_access =
6012 		!adev->gmc.xgmi.connected_to_cpu &&
6013 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6014 
6015 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6016 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6017 		!(adev->gmc.aper_base & address_mask ||
6018 		  aper_limit & address_mask));
6019 #else
6020 	return false;
6021 #endif
6022 }
6023 
6024 int amdgpu_device_baco_enter(struct drm_device *dev)
6025 {
6026 	struct amdgpu_device *adev = drm_to_adev(dev);
6027 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6028 
6029 	if (!amdgpu_device_supports_baco(dev))
6030 		return -ENOTSUPP;
6031 
6032 	if (ras && adev->ras_enabled &&
6033 	    adev->nbio.funcs->enable_doorbell_interrupt)
6034 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6035 
6036 	return amdgpu_dpm_baco_enter(adev);
6037 }
6038 
6039 int amdgpu_device_baco_exit(struct drm_device *dev)
6040 {
6041 	struct amdgpu_device *adev = drm_to_adev(dev);
6042 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6043 	int ret = 0;
6044 
6045 	if (!amdgpu_device_supports_baco(dev))
6046 		return -ENOTSUPP;
6047 
6048 	ret = amdgpu_dpm_baco_exit(adev);
6049 	if (ret)
6050 		return ret;
6051 
6052 	if (ras && adev->ras_enabled &&
6053 	    adev->nbio.funcs->enable_doorbell_interrupt)
6054 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6055 
6056 	if (amdgpu_passthrough(adev) &&
6057 	    adev->nbio.funcs->clear_doorbell_interrupt)
6058 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6059 
6060 	return 0;
6061 }
6062 
6063 /**
6064  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6065  * @pdev: PCI device struct
6066  * @state: PCI channel state
6067  *
6068  * Description: Called when a PCI error is detected.
6069  *
6070  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6071  */
6072 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6073 {
6074 	struct drm_device *dev = pci_get_drvdata(pdev);
6075 	struct amdgpu_device *adev = drm_to_adev(dev);
6076 	int i;
6077 
6078 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6079 
6080 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6081 		DRM_WARN("No support for XGMI hive yet...");
6082 		return PCI_ERS_RESULT_DISCONNECT;
6083 	}
6084 
6085 	adev->pci_channel_state = state;
6086 
6087 	switch (state) {
6088 	case pci_channel_io_normal:
6089 		return PCI_ERS_RESULT_CAN_RECOVER;
6090 	/* Fatal error, prepare for slot reset */
6091 	case pci_channel_io_frozen:
6092 		/*
6093 		 * Locking adev->reset_domain->sem will prevent any external access
6094 		 * to GPU during PCI error recovery
6095 		 */
6096 		amdgpu_device_lock_reset_domain(adev->reset_domain);
6097 		amdgpu_device_set_mp1_state(adev);
6098 
6099 		/*
6100 		 * Block any work scheduling as we do for regular GPU reset
6101 		 * for the duration of the recovery
6102 		 */
6103 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6104 			struct amdgpu_ring *ring = adev->rings[i];
6105 
6106 			if (!amdgpu_ring_sched_ready(ring))
6107 				continue;
6108 
6109 			drm_sched_stop(&ring->sched, NULL);
6110 		}
6111 		atomic_inc(&adev->gpu_reset_counter);
6112 		return PCI_ERS_RESULT_NEED_RESET;
6113 	case pci_channel_io_perm_failure:
6114 		/* Permanent error, prepare for device removal */
6115 		return PCI_ERS_RESULT_DISCONNECT;
6116 	}
6117 
6118 	return PCI_ERS_RESULT_NEED_RESET;
6119 }
6120 
6121 /**
6122  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6123  * @pdev: pointer to PCI device
6124  */
6125 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6126 {
6127 
6128 	DRM_INFO("PCI error: mmio enabled callback!!\n");
6129 
6130 	/* TODO - dump whatever for debugging purposes */
6131 
6132 	/* This called only if amdgpu_pci_error_detected returns
6133 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6134 	 * works, no need to reset slot.
6135 	 */
6136 
6137 	return PCI_ERS_RESULT_RECOVERED;
6138 }
6139 
6140 /**
6141  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6142  * @pdev: PCI device struct
6143  *
6144  * Description: This routine is called by the pci error recovery
6145  * code after the PCI slot has been reset, just before we
6146  * should resume normal operations.
6147  */
6148 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6149 {
6150 	struct drm_device *dev = pci_get_drvdata(pdev);
6151 	struct amdgpu_device *adev = drm_to_adev(dev);
6152 	int r, i;
6153 	struct amdgpu_reset_context reset_context;
6154 	u32 memsize;
6155 	struct list_head device_list;
6156 	struct amdgpu_hive_info *hive;
6157 	int hive_ras_recovery = 0;
6158 	struct amdgpu_ras *ras;
6159 
6160 	/* PCI error slot reset should be skipped During RAS recovery */
6161 	hive = amdgpu_get_xgmi_hive(adev);
6162 	if (hive) {
6163 		hive_ras_recovery = atomic_read(&hive->ras_recovery);
6164 		amdgpu_put_xgmi_hive(hive);
6165 	}
6166 	ras = amdgpu_ras_get_context(adev);
6167 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
6168 		 ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
6169 		return PCI_ERS_RESULT_RECOVERED;
6170 
6171 	DRM_INFO("PCI error: slot reset callback!!\n");
6172 
6173 	memset(&reset_context, 0, sizeof(reset_context));
6174 
6175 	INIT_LIST_HEAD(&device_list);
6176 	list_add_tail(&adev->reset_list, &device_list);
6177 
6178 	/* wait for asic to come out of reset */
6179 	msleep(500);
6180 
6181 	/* Restore PCI confspace */
6182 	amdgpu_device_load_pci_state(pdev);
6183 
6184 	/* confirm  ASIC came out of reset */
6185 	for (i = 0; i < adev->usec_timeout; i++) {
6186 		memsize = amdgpu_asic_get_config_memsize(adev);
6187 
6188 		if (memsize != 0xffffffff)
6189 			break;
6190 		udelay(1);
6191 	}
6192 	if (memsize == 0xffffffff) {
6193 		r = -ETIME;
6194 		goto out;
6195 	}
6196 
6197 	reset_context.method = AMD_RESET_METHOD_NONE;
6198 	reset_context.reset_req_dev = adev;
6199 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6200 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6201 
6202 	adev->no_hw_access = true;
6203 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6204 	adev->no_hw_access = false;
6205 	if (r)
6206 		goto out;
6207 
6208 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6209 
6210 out:
6211 	if (!r) {
6212 		if (amdgpu_device_cache_pci_state(adev->pdev))
6213 			pci_restore_state(adev->pdev);
6214 
6215 		DRM_INFO("PCIe error recovery succeeded\n");
6216 	} else {
6217 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6218 		amdgpu_device_unset_mp1_state(adev);
6219 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6220 	}
6221 
6222 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6223 }
6224 
6225 /**
6226  * amdgpu_pci_resume() - resume normal ops after PCI reset
6227  * @pdev: pointer to PCI device
6228  *
6229  * Called when the error recovery driver tells us that its
6230  * OK to resume normal operation.
6231  */
6232 void amdgpu_pci_resume(struct pci_dev *pdev)
6233 {
6234 	struct drm_device *dev = pci_get_drvdata(pdev);
6235 	struct amdgpu_device *adev = drm_to_adev(dev);
6236 	int i;
6237 
6238 
6239 	DRM_INFO("PCI error: resume callback!!\n");
6240 
6241 	/* Only continue execution for the case of pci_channel_io_frozen */
6242 	if (adev->pci_channel_state != pci_channel_io_frozen)
6243 		return;
6244 
6245 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6246 		struct amdgpu_ring *ring = adev->rings[i];
6247 
6248 		if (!amdgpu_ring_sched_ready(ring))
6249 			continue;
6250 
6251 		drm_sched_start(&ring->sched, true);
6252 	}
6253 
6254 	amdgpu_device_unset_mp1_state(adev);
6255 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6256 }
6257 
6258 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6259 {
6260 	struct drm_device *dev = pci_get_drvdata(pdev);
6261 	struct amdgpu_device *adev = drm_to_adev(dev);
6262 	int r;
6263 
6264 	r = pci_save_state(pdev);
6265 	if (!r) {
6266 		kfree(adev->pci_state);
6267 
6268 		adev->pci_state = pci_store_saved_state(pdev);
6269 
6270 		if (!adev->pci_state) {
6271 			DRM_ERROR("Failed to store PCI saved state");
6272 			return false;
6273 		}
6274 	} else {
6275 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6276 		return false;
6277 	}
6278 
6279 	return true;
6280 }
6281 
6282 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6283 {
6284 	struct drm_device *dev = pci_get_drvdata(pdev);
6285 	struct amdgpu_device *adev = drm_to_adev(dev);
6286 	int r;
6287 
6288 	if (!adev->pci_state)
6289 		return false;
6290 
6291 	r = pci_load_saved_state(pdev, adev->pci_state);
6292 
6293 	if (!r) {
6294 		pci_restore_state(pdev);
6295 	} else {
6296 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6297 		return false;
6298 	}
6299 
6300 	return true;
6301 }
6302 
6303 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6304 		struct amdgpu_ring *ring)
6305 {
6306 #ifdef CONFIG_X86_64
6307 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6308 		return;
6309 #endif
6310 	if (adev->gmc.xgmi.connected_to_cpu)
6311 		return;
6312 
6313 	if (ring && ring->funcs->emit_hdp_flush)
6314 		amdgpu_ring_emit_hdp_flush(ring);
6315 	else
6316 		amdgpu_asic_flush_hdp(adev, ring);
6317 }
6318 
6319 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6320 		struct amdgpu_ring *ring)
6321 {
6322 #ifdef CONFIG_X86_64
6323 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6324 		return;
6325 #endif
6326 	if (adev->gmc.xgmi.connected_to_cpu)
6327 		return;
6328 
6329 	amdgpu_asic_invalidate_hdp(adev, ring);
6330 }
6331 
6332 int amdgpu_in_reset(struct amdgpu_device *adev)
6333 {
6334 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6335 }
6336 
6337 /**
6338  * amdgpu_device_halt() - bring hardware to some kind of halt state
6339  *
6340  * @adev: amdgpu_device pointer
6341  *
6342  * Bring hardware to some kind of halt state so that no one can touch it
6343  * any more. It will help to maintain error context when error occurred.
6344  * Compare to a simple hang, the system will keep stable at least for SSH
6345  * access. Then it should be trivial to inspect the hardware state and
6346  * see what's going on. Implemented as following:
6347  *
6348  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6349  *    clears all CPU mappings to device, disallows remappings through page faults
6350  * 2. amdgpu_irq_disable_all() disables all interrupts
6351  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6352  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6353  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6354  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6355  *    flush any in flight DMA operations
6356  */
6357 void amdgpu_device_halt(struct amdgpu_device *adev)
6358 {
6359 	struct pci_dev *pdev = adev->pdev;
6360 	struct drm_device *ddev = adev_to_drm(adev);
6361 
6362 	amdgpu_xcp_dev_unplug(adev);
6363 	drm_dev_unplug(ddev);
6364 
6365 	amdgpu_irq_disable_all(adev);
6366 
6367 	amdgpu_fence_driver_hw_fini(adev);
6368 
6369 	adev->no_hw_access = true;
6370 
6371 	amdgpu_device_unmap_mmio(adev);
6372 
6373 	pci_disable_device(pdev);
6374 	pci_wait_for_pending_transaction(pdev);
6375 }
6376 
6377 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6378 				u32 reg)
6379 {
6380 	unsigned long flags, address, data;
6381 	u32 r;
6382 
6383 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6384 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6385 
6386 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6387 	WREG32(address, reg * 4);
6388 	(void)RREG32(address);
6389 	r = RREG32(data);
6390 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6391 	return r;
6392 }
6393 
6394 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6395 				u32 reg, u32 v)
6396 {
6397 	unsigned long flags, address, data;
6398 
6399 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6400 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6401 
6402 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6403 	WREG32(address, reg * 4);
6404 	(void)RREG32(address);
6405 	WREG32(data, v);
6406 	(void)RREG32(data);
6407 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6408 }
6409 
6410 /**
6411  * amdgpu_device_switch_gang - switch to a new gang
6412  * @adev: amdgpu_device pointer
6413  * @gang: the gang to switch to
6414  *
6415  * Try to switch to a new gang.
6416  * Returns: NULL if we switched to the new gang or a reference to the current
6417  * gang leader.
6418  */
6419 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6420 					    struct dma_fence *gang)
6421 {
6422 	struct dma_fence *old = NULL;
6423 
6424 	do {
6425 		dma_fence_put(old);
6426 		rcu_read_lock();
6427 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
6428 		rcu_read_unlock();
6429 
6430 		if (old == gang)
6431 			break;
6432 
6433 		if (!dma_fence_is_signaled(old))
6434 			return old;
6435 
6436 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6437 			 old, gang) != old);
6438 
6439 	dma_fence_put(old);
6440 	return NULL;
6441 }
6442 
6443 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6444 {
6445 	switch (adev->asic_type) {
6446 #ifdef CONFIG_DRM_AMDGPU_SI
6447 	case CHIP_HAINAN:
6448 #endif
6449 	case CHIP_TOPAZ:
6450 		/* chips with no display hardware */
6451 		return false;
6452 #ifdef CONFIG_DRM_AMDGPU_SI
6453 	case CHIP_TAHITI:
6454 	case CHIP_PITCAIRN:
6455 	case CHIP_VERDE:
6456 	case CHIP_OLAND:
6457 #endif
6458 #ifdef CONFIG_DRM_AMDGPU_CIK
6459 	case CHIP_BONAIRE:
6460 	case CHIP_HAWAII:
6461 	case CHIP_KAVERI:
6462 	case CHIP_KABINI:
6463 	case CHIP_MULLINS:
6464 #endif
6465 	case CHIP_TONGA:
6466 	case CHIP_FIJI:
6467 	case CHIP_POLARIS10:
6468 	case CHIP_POLARIS11:
6469 	case CHIP_POLARIS12:
6470 	case CHIP_VEGAM:
6471 	case CHIP_CARRIZO:
6472 	case CHIP_STONEY:
6473 		/* chips with display hardware */
6474 		return true;
6475 	default:
6476 		/* IP discovery */
6477 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6478 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6479 			return false;
6480 		return true;
6481 	}
6482 }
6483 
6484 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6485 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6486 		uint32_t expected_value, uint32_t mask)
6487 {
6488 	uint32_t ret = 0;
6489 	uint32_t old_ = 0;
6490 	uint32_t tmp_ = RREG32(reg_addr);
6491 	uint32_t loop = adev->usec_timeout;
6492 
6493 	while ((tmp_ & (mask)) != (expected_value)) {
6494 		if (old_ != tmp_) {
6495 			loop = adev->usec_timeout;
6496 			old_ = tmp_;
6497 		} else
6498 			udelay(1);
6499 		tmp_ = RREG32(reg_addr);
6500 		loop--;
6501 		if (!loop) {
6502 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6503 				  inst, reg_name, (uint32_t)expected_value,
6504 				  (uint32_t)(tmp_ & (mask)));
6505 			ret = -ETIMEDOUT;
6506 			break;
6507 		}
6508 	}
6509 	return ret;
6510 }
6511