xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 3e0bc2855b573bcffa2a52955a878f537f5ac0cd)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37 
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 
78 #include <linux/suspend.h>
79 #include <drm/task_barrier.h>
80 #include <linux/pm_runtime.h>
81 
82 #include <drm/drm_drv.h>
83 
84 #if IS_ENABLED(CONFIG_X86)
85 #include <asm/intel-family.h>
86 #endif
87 
88 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
89 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
95 
96 #define AMDGPU_RESUME_MS		2000
97 #define AMDGPU_MAX_RETRY_LIMIT		2
98 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
99 
100 static const struct drm_driver amdgpu_kms_driver;
101 
102 const char *amdgpu_asic_name[] = {
103 	"TAHITI",
104 	"PITCAIRN",
105 	"VERDE",
106 	"OLAND",
107 	"HAINAN",
108 	"BONAIRE",
109 	"KAVERI",
110 	"KABINI",
111 	"HAWAII",
112 	"MULLINS",
113 	"TOPAZ",
114 	"TONGA",
115 	"FIJI",
116 	"CARRIZO",
117 	"STONEY",
118 	"POLARIS10",
119 	"POLARIS11",
120 	"POLARIS12",
121 	"VEGAM",
122 	"VEGA10",
123 	"VEGA12",
124 	"VEGA20",
125 	"RAVEN",
126 	"ARCTURUS",
127 	"RENOIR",
128 	"ALDEBARAN",
129 	"NAVI10",
130 	"CYAN_SKILLFISH",
131 	"NAVI14",
132 	"NAVI12",
133 	"SIENNA_CICHLID",
134 	"NAVY_FLOUNDER",
135 	"VANGOGH",
136 	"DIMGREY_CAVEFISH",
137 	"BEIGE_GOBY",
138 	"YELLOW_CARP",
139 	"IP DISCOVERY",
140 	"LAST",
141 };
142 
143 /**
144  * DOC: pcie_replay_count
145  *
146  * The amdgpu driver provides a sysfs API for reporting the total number
147  * of PCIe replays (NAKs)
148  * The file pcie_replay_count is used for this and returns the total
149  * number of replays as a sum of the NAKs generated and NAKs received
150  */
151 
152 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 		struct device_attribute *attr, char *buf)
154 {
155 	struct drm_device *ddev = dev_get_drvdata(dev);
156 	struct amdgpu_device *adev = drm_to_adev(ddev);
157 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158 
159 	return sysfs_emit(buf, "%llu\n", cnt);
160 }
161 
162 static DEVICE_ATTR(pcie_replay_count, 0444,
163 		amdgpu_device_get_pcie_replay_count, NULL);
164 
165 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
166 					  struct bin_attribute *attr, char *buf,
167 					  loff_t ppos, size_t count)
168 {
169 	struct device *dev = kobj_to_dev(kobj);
170 	struct drm_device *ddev = dev_get_drvdata(dev);
171 	struct amdgpu_device *adev = drm_to_adev(ddev);
172 	ssize_t bytes_read;
173 
174 	switch (ppos) {
175 	case AMDGPU_SYS_REG_STATE_XGMI:
176 		bytes_read = amdgpu_asic_get_reg_state(
177 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
178 		break;
179 	case AMDGPU_SYS_REG_STATE_WAFL:
180 		bytes_read = amdgpu_asic_get_reg_state(
181 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
182 		break;
183 	case AMDGPU_SYS_REG_STATE_PCIE:
184 		bytes_read = amdgpu_asic_get_reg_state(
185 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
186 		break;
187 	case AMDGPU_SYS_REG_STATE_USR:
188 		bytes_read = amdgpu_asic_get_reg_state(
189 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
190 		break;
191 	case AMDGPU_SYS_REG_STATE_USR_1:
192 		bytes_read = amdgpu_asic_get_reg_state(
193 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
194 		break;
195 	default:
196 		return -EINVAL;
197 	}
198 
199 	return bytes_read;
200 }
201 
202 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
203 	 AMDGPU_SYS_REG_STATE_END);
204 
205 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
206 {
207 	int ret;
208 
209 	if (!amdgpu_asic_get_reg_state_supported(adev))
210 		return 0;
211 
212 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
213 
214 	return ret;
215 }
216 
217 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
218 {
219 	if (!amdgpu_asic_get_reg_state_supported(adev))
220 		return;
221 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
222 }
223 
224 /**
225  * DOC: board_info
226  *
227  * The amdgpu driver provides a sysfs API for giving board related information.
228  * It provides the form factor information in the format
229  *
230  *   type : form factor
231  *
232  * Possible form factor values
233  *
234  * - "cem"		- PCIE CEM card
235  * - "oam"		- Open Compute Accelerator Module
236  * - "unknown"	- Not known
237  *
238  */
239 
240 static ssize_t amdgpu_device_get_board_info(struct device *dev,
241 					    struct device_attribute *attr,
242 					    char *buf)
243 {
244 	struct drm_device *ddev = dev_get_drvdata(dev);
245 	struct amdgpu_device *adev = drm_to_adev(ddev);
246 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
247 	const char *pkg;
248 
249 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
250 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
251 
252 	switch (pkg_type) {
253 	case AMDGPU_PKG_TYPE_CEM:
254 		pkg = "cem";
255 		break;
256 	case AMDGPU_PKG_TYPE_OAM:
257 		pkg = "oam";
258 		break;
259 	default:
260 		pkg = "unknown";
261 		break;
262 	}
263 
264 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
265 }
266 
267 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
268 
269 static struct attribute *amdgpu_board_attrs[] = {
270 	&dev_attr_board_info.attr,
271 	NULL,
272 };
273 
274 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
275 					     struct attribute *attr, int n)
276 {
277 	struct device *dev = kobj_to_dev(kobj);
278 	struct drm_device *ddev = dev_get_drvdata(dev);
279 	struct amdgpu_device *adev = drm_to_adev(ddev);
280 
281 	if (adev->flags & AMD_IS_APU)
282 		return 0;
283 
284 	return attr->mode;
285 }
286 
287 static const struct attribute_group amdgpu_board_attrs_group = {
288 	.attrs = amdgpu_board_attrs,
289 	.is_visible = amdgpu_board_attrs_is_visible
290 };
291 
292 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
293 
294 
295 /**
296  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
297  *
298  * @dev: drm_device pointer
299  *
300  * Returns true if the device is a dGPU with ATPX power control,
301  * otherwise return false.
302  */
303 bool amdgpu_device_supports_px(struct drm_device *dev)
304 {
305 	struct amdgpu_device *adev = drm_to_adev(dev);
306 
307 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
308 		return true;
309 	return false;
310 }
311 
312 /**
313  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
314  *
315  * @dev: drm_device pointer
316  *
317  * Returns true if the device is a dGPU with ACPI power control,
318  * otherwise return false.
319  */
320 bool amdgpu_device_supports_boco(struct drm_device *dev)
321 {
322 	struct amdgpu_device *adev = drm_to_adev(dev);
323 
324 	if (adev->has_pr3 ||
325 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
326 		return true;
327 	return false;
328 }
329 
330 /**
331  * amdgpu_device_supports_baco - Does the device support BACO
332  *
333  * @dev: drm_device pointer
334  *
335  * Returns true if the device supporte BACO,
336  * otherwise return false.
337  */
338 bool amdgpu_device_supports_baco(struct drm_device *dev)
339 {
340 	struct amdgpu_device *adev = drm_to_adev(dev);
341 
342 	return amdgpu_asic_supports_baco(adev);
343 }
344 
345 /**
346  * amdgpu_device_supports_smart_shift - Is the device dGPU with
347  * smart shift support
348  *
349  * @dev: drm_device pointer
350  *
351  * Returns true if the device is a dGPU with Smart Shift support,
352  * otherwise returns false.
353  */
354 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
355 {
356 	return (amdgpu_device_supports_boco(dev) &&
357 		amdgpu_acpi_is_power_shift_control_supported());
358 }
359 
360 /*
361  * VRAM access helper functions
362  */
363 
364 /**
365  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
366  *
367  * @adev: amdgpu_device pointer
368  * @pos: offset of the buffer in vram
369  * @buf: virtual address of the buffer in system memory
370  * @size: read/write size, sizeof(@buf) must > @size
371  * @write: true - write to vram, otherwise - read from vram
372  */
373 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
374 			     void *buf, size_t size, bool write)
375 {
376 	unsigned long flags;
377 	uint32_t hi = ~0, tmp = 0;
378 	uint32_t *data = buf;
379 	uint64_t last;
380 	int idx;
381 
382 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
383 		return;
384 
385 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
386 
387 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
388 	for (last = pos + size; pos < last; pos += 4) {
389 		tmp = pos >> 31;
390 
391 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
392 		if (tmp != hi) {
393 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
394 			hi = tmp;
395 		}
396 		if (write)
397 			WREG32_NO_KIQ(mmMM_DATA, *data++);
398 		else
399 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
400 	}
401 
402 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
403 	drm_dev_exit(idx);
404 }
405 
406 /**
407  * amdgpu_device_aper_access - access vram by vram aperature
408  *
409  * @adev: amdgpu_device pointer
410  * @pos: offset of the buffer in vram
411  * @buf: virtual address of the buffer in system memory
412  * @size: read/write size, sizeof(@buf) must > @size
413  * @write: true - write to vram, otherwise - read from vram
414  *
415  * The return value means how many bytes have been transferred.
416  */
417 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
418 				 void *buf, size_t size, bool write)
419 {
420 #ifdef CONFIG_64BIT
421 	void __iomem *addr;
422 	size_t count = 0;
423 	uint64_t last;
424 
425 	if (!adev->mman.aper_base_kaddr)
426 		return 0;
427 
428 	last = min(pos + size, adev->gmc.visible_vram_size);
429 	if (last > pos) {
430 		addr = adev->mman.aper_base_kaddr + pos;
431 		count = last - pos;
432 
433 		if (write) {
434 			memcpy_toio(addr, buf, count);
435 			/* Make sure HDP write cache flush happens without any reordering
436 			 * after the system memory contents are sent over PCIe device
437 			 */
438 			mb();
439 			amdgpu_device_flush_hdp(adev, NULL);
440 		} else {
441 			amdgpu_device_invalidate_hdp(adev, NULL);
442 			/* Make sure HDP read cache is invalidated before issuing a read
443 			 * to the PCIe device
444 			 */
445 			mb();
446 			memcpy_fromio(buf, addr, count);
447 		}
448 
449 	}
450 
451 	return count;
452 #else
453 	return 0;
454 #endif
455 }
456 
457 /**
458  * amdgpu_device_vram_access - read/write a buffer in vram
459  *
460  * @adev: amdgpu_device pointer
461  * @pos: offset of the buffer in vram
462  * @buf: virtual address of the buffer in system memory
463  * @size: read/write size, sizeof(@buf) must > @size
464  * @write: true - write to vram, otherwise - read from vram
465  */
466 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
467 			       void *buf, size_t size, bool write)
468 {
469 	size_t count;
470 
471 	/* try to using vram apreature to access vram first */
472 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
473 	size -= count;
474 	if (size) {
475 		/* using MM to access rest vram */
476 		pos += count;
477 		buf += count;
478 		amdgpu_device_mm_access(adev, pos, buf, size, write);
479 	}
480 }
481 
482 /*
483  * register access helper functions.
484  */
485 
486 /* Check if hw access should be skipped because of hotplug or device error */
487 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
488 {
489 	if (adev->no_hw_access)
490 		return true;
491 
492 #ifdef CONFIG_LOCKDEP
493 	/*
494 	 * This is a bit complicated to understand, so worth a comment. What we assert
495 	 * here is that the GPU reset is not running on another thread in parallel.
496 	 *
497 	 * For this we trylock the read side of the reset semaphore, if that succeeds
498 	 * we know that the reset is not running in paralell.
499 	 *
500 	 * If the trylock fails we assert that we are either already holding the read
501 	 * side of the lock or are the reset thread itself and hold the write side of
502 	 * the lock.
503 	 */
504 	if (in_task()) {
505 		if (down_read_trylock(&adev->reset_domain->sem))
506 			up_read(&adev->reset_domain->sem);
507 		else
508 			lockdep_assert_held(&adev->reset_domain->sem);
509 	}
510 #endif
511 	return false;
512 }
513 
514 /**
515  * amdgpu_device_rreg - read a memory mapped IO or indirect register
516  *
517  * @adev: amdgpu_device pointer
518  * @reg: dword aligned register offset
519  * @acc_flags: access flags which require special behavior
520  *
521  * Returns the 32 bit value from the offset specified.
522  */
523 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
524 			    uint32_t reg, uint32_t acc_flags)
525 {
526 	uint32_t ret;
527 
528 	if (amdgpu_device_skip_hw_access(adev))
529 		return 0;
530 
531 	if ((reg * 4) < adev->rmmio_size) {
532 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
533 		    amdgpu_sriov_runtime(adev) &&
534 		    down_read_trylock(&adev->reset_domain->sem)) {
535 			ret = amdgpu_kiq_rreg(adev, reg, 0);
536 			up_read(&adev->reset_domain->sem);
537 		} else {
538 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
539 		}
540 	} else {
541 		ret = adev->pcie_rreg(adev, reg * 4);
542 	}
543 
544 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
545 
546 	return ret;
547 }
548 
549 /*
550  * MMIO register read with bytes helper functions
551  * @offset:bytes offset from MMIO start
552  */
553 
554 /**
555  * amdgpu_mm_rreg8 - read a memory mapped IO register
556  *
557  * @adev: amdgpu_device pointer
558  * @offset: byte aligned register offset
559  *
560  * Returns the 8 bit value from the offset specified.
561  */
562 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
563 {
564 	if (amdgpu_device_skip_hw_access(adev))
565 		return 0;
566 
567 	if (offset < adev->rmmio_size)
568 		return (readb(adev->rmmio + offset));
569 	BUG();
570 }
571 
572 
573 /**
574  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
575  *
576  * @adev: amdgpu_device pointer
577  * @reg: dword aligned register offset
578  * @acc_flags: access flags which require special behavior
579  * @xcc_id: xcc accelerated compute core id
580  *
581  * Returns the 32 bit value from the offset specified.
582  */
583 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
584 				uint32_t reg, uint32_t acc_flags,
585 				uint32_t xcc_id)
586 {
587 	uint32_t ret, rlcg_flag;
588 
589 	if (amdgpu_device_skip_hw_access(adev))
590 		return 0;
591 
592 	if ((reg * 4) < adev->rmmio_size) {
593 		if (amdgpu_sriov_vf(adev) &&
594 		    !amdgpu_sriov_runtime(adev) &&
595 		    adev->gfx.rlc.rlcg_reg_access_supported &&
596 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
597 							 GC_HWIP, false,
598 							 &rlcg_flag)) {
599 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
600 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
601 		    amdgpu_sriov_runtime(adev) &&
602 		    down_read_trylock(&adev->reset_domain->sem)) {
603 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
604 			up_read(&adev->reset_domain->sem);
605 		} else {
606 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
607 		}
608 	} else {
609 		ret = adev->pcie_rreg(adev, reg * 4);
610 	}
611 
612 	return ret;
613 }
614 
615 /*
616  * MMIO register write with bytes helper functions
617  * @offset:bytes offset from MMIO start
618  * @value: the value want to be written to the register
619  */
620 
621 /**
622  * amdgpu_mm_wreg8 - read a memory mapped IO register
623  *
624  * @adev: amdgpu_device pointer
625  * @offset: byte aligned register offset
626  * @value: 8 bit value to write
627  *
628  * Writes the value specified to the offset specified.
629  */
630 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
631 {
632 	if (amdgpu_device_skip_hw_access(adev))
633 		return;
634 
635 	if (offset < adev->rmmio_size)
636 		writeb(value, adev->rmmio + offset);
637 	else
638 		BUG();
639 }
640 
641 /**
642  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
643  *
644  * @adev: amdgpu_device pointer
645  * @reg: dword aligned register offset
646  * @v: 32 bit value to write to the register
647  * @acc_flags: access flags which require special behavior
648  *
649  * Writes the value specified to the offset specified.
650  */
651 void amdgpu_device_wreg(struct amdgpu_device *adev,
652 			uint32_t reg, uint32_t v,
653 			uint32_t acc_flags)
654 {
655 	if (amdgpu_device_skip_hw_access(adev))
656 		return;
657 
658 	if ((reg * 4) < adev->rmmio_size) {
659 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
660 		    amdgpu_sriov_runtime(adev) &&
661 		    down_read_trylock(&adev->reset_domain->sem)) {
662 			amdgpu_kiq_wreg(adev, reg, v, 0);
663 			up_read(&adev->reset_domain->sem);
664 		} else {
665 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
666 		}
667 	} else {
668 		adev->pcie_wreg(adev, reg * 4, v);
669 	}
670 
671 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
672 }
673 
674 /**
675  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
676  *
677  * @adev: amdgpu_device pointer
678  * @reg: mmio/rlc register
679  * @v: value to write
680  * @xcc_id: xcc accelerated compute core id
681  *
682  * this function is invoked only for the debugfs register access
683  */
684 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
685 			     uint32_t reg, uint32_t v,
686 			     uint32_t xcc_id)
687 {
688 	if (amdgpu_device_skip_hw_access(adev))
689 		return;
690 
691 	if (amdgpu_sriov_fullaccess(adev) &&
692 	    adev->gfx.rlc.funcs &&
693 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
694 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
695 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
696 	} else if ((reg * 4) >= adev->rmmio_size) {
697 		adev->pcie_wreg(adev, reg * 4, v);
698 	} else {
699 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
700 	}
701 }
702 
703 /**
704  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
705  *
706  * @adev: amdgpu_device pointer
707  * @reg: dword aligned register offset
708  * @v: 32 bit value to write to the register
709  * @acc_flags: access flags which require special behavior
710  * @xcc_id: xcc accelerated compute core id
711  *
712  * Writes the value specified to the offset specified.
713  */
714 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
715 			uint32_t reg, uint32_t v,
716 			uint32_t acc_flags, uint32_t xcc_id)
717 {
718 	uint32_t rlcg_flag;
719 
720 	if (amdgpu_device_skip_hw_access(adev))
721 		return;
722 
723 	if ((reg * 4) < adev->rmmio_size) {
724 		if (amdgpu_sriov_vf(adev) &&
725 		    !amdgpu_sriov_runtime(adev) &&
726 		    adev->gfx.rlc.rlcg_reg_access_supported &&
727 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
728 							 GC_HWIP, true,
729 							 &rlcg_flag)) {
730 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
731 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
732 		    amdgpu_sriov_runtime(adev) &&
733 		    down_read_trylock(&adev->reset_domain->sem)) {
734 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
735 			up_read(&adev->reset_domain->sem);
736 		} else {
737 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
738 		}
739 	} else {
740 		adev->pcie_wreg(adev, reg * 4, v);
741 	}
742 }
743 
744 /**
745  * amdgpu_device_indirect_rreg - read an indirect register
746  *
747  * @adev: amdgpu_device pointer
748  * @reg_addr: indirect register address to read from
749  *
750  * Returns the value of indirect register @reg_addr
751  */
752 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
753 				u32 reg_addr)
754 {
755 	unsigned long flags, pcie_index, pcie_data;
756 	void __iomem *pcie_index_offset;
757 	void __iomem *pcie_data_offset;
758 	u32 r;
759 
760 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
761 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
762 
763 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
764 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
765 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
766 
767 	writel(reg_addr, pcie_index_offset);
768 	readl(pcie_index_offset);
769 	r = readl(pcie_data_offset);
770 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
771 
772 	return r;
773 }
774 
775 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
776 				    u64 reg_addr)
777 {
778 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
779 	u32 r;
780 	void __iomem *pcie_index_offset;
781 	void __iomem *pcie_index_hi_offset;
782 	void __iomem *pcie_data_offset;
783 
784 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
785 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
786 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
787 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
788 	else
789 		pcie_index_hi = 0;
790 
791 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
792 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
793 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
794 	if (pcie_index_hi != 0)
795 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
796 				pcie_index_hi * 4;
797 
798 	writel(reg_addr, pcie_index_offset);
799 	readl(pcie_index_offset);
800 	if (pcie_index_hi != 0) {
801 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
802 		readl(pcie_index_hi_offset);
803 	}
804 	r = readl(pcie_data_offset);
805 
806 	/* clear the high bits */
807 	if (pcie_index_hi != 0) {
808 		writel(0, pcie_index_hi_offset);
809 		readl(pcie_index_hi_offset);
810 	}
811 
812 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
813 
814 	return r;
815 }
816 
817 /**
818  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
819  *
820  * @adev: amdgpu_device pointer
821  * @reg_addr: indirect register address to read from
822  *
823  * Returns the value of indirect register @reg_addr
824  */
825 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
826 				  u32 reg_addr)
827 {
828 	unsigned long flags, pcie_index, pcie_data;
829 	void __iomem *pcie_index_offset;
830 	void __iomem *pcie_data_offset;
831 	u64 r;
832 
833 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
834 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
835 
836 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
837 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
838 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
839 
840 	/* read low 32 bits */
841 	writel(reg_addr, pcie_index_offset);
842 	readl(pcie_index_offset);
843 	r = readl(pcie_data_offset);
844 	/* read high 32 bits */
845 	writel(reg_addr + 4, pcie_index_offset);
846 	readl(pcie_index_offset);
847 	r |= ((u64)readl(pcie_data_offset) << 32);
848 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
849 
850 	return r;
851 }
852 
853 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
854 				  u64 reg_addr)
855 {
856 	unsigned long flags, pcie_index, pcie_data;
857 	unsigned long pcie_index_hi = 0;
858 	void __iomem *pcie_index_offset;
859 	void __iomem *pcie_index_hi_offset;
860 	void __iomem *pcie_data_offset;
861 	u64 r;
862 
863 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
864 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
865 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
866 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
867 
868 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
869 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
870 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
871 	if (pcie_index_hi != 0)
872 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
873 			pcie_index_hi * 4;
874 
875 	/* read low 32 bits */
876 	writel(reg_addr, pcie_index_offset);
877 	readl(pcie_index_offset);
878 	if (pcie_index_hi != 0) {
879 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
880 		readl(pcie_index_hi_offset);
881 	}
882 	r = readl(pcie_data_offset);
883 	/* read high 32 bits */
884 	writel(reg_addr + 4, pcie_index_offset);
885 	readl(pcie_index_offset);
886 	if (pcie_index_hi != 0) {
887 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
888 		readl(pcie_index_hi_offset);
889 	}
890 	r |= ((u64)readl(pcie_data_offset) << 32);
891 
892 	/* clear the high bits */
893 	if (pcie_index_hi != 0) {
894 		writel(0, pcie_index_hi_offset);
895 		readl(pcie_index_hi_offset);
896 	}
897 
898 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
899 
900 	return r;
901 }
902 
903 /**
904  * amdgpu_device_indirect_wreg - write an indirect register address
905  *
906  * @adev: amdgpu_device pointer
907  * @reg_addr: indirect register offset
908  * @reg_data: indirect register data
909  *
910  */
911 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
912 				 u32 reg_addr, u32 reg_data)
913 {
914 	unsigned long flags, pcie_index, pcie_data;
915 	void __iomem *pcie_index_offset;
916 	void __iomem *pcie_data_offset;
917 
918 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
919 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
920 
921 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
922 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
923 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
924 
925 	writel(reg_addr, pcie_index_offset);
926 	readl(pcie_index_offset);
927 	writel(reg_data, pcie_data_offset);
928 	readl(pcie_data_offset);
929 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
930 }
931 
932 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
933 				     u64 reg_addr, u32 reg_data)
934 {
935 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
936 	void __iomem *pcie_index_offset;
937 	void __iomem *pcie_index_hi_offset;
938 	void __iomem *pcie_data_offset;
939 
940 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
941 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
942 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
943 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
944 	else
945 		pcie_index_hi = 0;
946 
947 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
948 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
949 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
950 	if (pcie_index_hi != 0)
951 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
952 				pcie_index_hi * 4;
953 
954 	writel(reg_addr, pcie_index_offset);
955 	readl(pcie_index_offset);
956 	if (pcie_index_hi != 0) {
957 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
958 		readl(pcie_index_hi_offset);
959 	}
960 	writel(reg_data, pcie_data_offset);
961 	readl(pcie_data_offset);
962 
963 	/* clear the high bits */
964 	if (pcie_index_hi != 0) {
965 		writel(0, pcie_index_hi_offset);
966 		readl(pcie_index_hi_offset);
967 	}
968 
969 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
970 }
971 
972 /**
973  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
974  *
975  * @adev: amdgpu_device pointer
976  * @reg_addr: indirect register offset
977  * @reg_data: indirect register data
978  *
979  */
980 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
981 				   u32 reg_addr, u64 reg_data)
982 {
983 	unsigned long flags, pcie_index, pcie_data;
984 	void __iomem *pcie_index_offset;
985 	void __iomem *pcie_data_offset;
986 
987 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
988 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
989 
990 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
991 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
992 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
993 
994 	/* write low 32 bits */
995 	writel(reg_addr, pcie_index_offset);
996 	readl(pcie_index_offset);
997 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
998 	readl(pcie_data_offset);
999 	/* write high 32 bits */
1000 	writel(reg_addr + 4, pcie_index_offset);
1001 	readl(pcie_index_offset);
1002 	writel((u32)(reg_data >> 32), pcie_data_offset);
1003 	readl(pcie_data_offset);
1004 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1005 }
1006 
1007 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1008 				   u64 reg_addr, u64 reg_data)
1009 {
1010 	unsigned long flags, pcie_index, pcie_data;
1011 	unsigned long pcie_index_hi = 0;
1012 	void __iomem *pcie_index_offset;
1013 	void __iomem *pcie_index_hi_offset;
1014 	void __iomem *pcie_data_offset;
1015 
1016 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1017 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1018 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1019 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1020 
1021 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1022 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1023 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1024 	if (pcie_index_hi != 0)
1025 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1026 				pcie_index_hi * 4;
1027 
1028 	/* write low 32 bits */
1029 	writel(reg_addr, pcie_index_offset);
1030 	readl(pcie_index_offset);
1031 	if (pcie_index_hi != 0) {
1032 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1033 		readl(pcie_index_hi_offset);
1034 	}
1035 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1036 	readl(pcie_data_offset);
1037 	/* write high 32 bits */
1038 	writel(reg_addr + 4, pcie_index_offset);
1039 	readl(pcie_index_offset);
1040 	if (pcie_index_hi != 0) {
1041 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1042 		readl(pcie_index_hi_offset);
1043 	}
1044 	writel((u32)(reg_data >> 32), pcie_data_offset);
1045 	readl(pcie_data_offset);
1046 
1047 	/* clear the high bits */
1048 	if (pcie_index_hi != 0) {
1049 		writel(0, pcie_index_hi_offset);
1050 		readl(pcie_index_hi_offset);
1051 	}
1052 
1053 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1054 }
1055 
1056 /**
1057  * amdgpu_device_get_rev_id - query device rev_id
1058  *
1059  * @adev: amdgpu_device pointer
1060  *
1061  * Return device rev_id
1062  */
1063 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1064 {
1065 	return adev->nbio.funcs->get_rev_id(adev);
1066 }
1067 
1068 /**
1069  * amdgpu_invalid_rreg - dummy reg read function
1070  *
1071  * @adev: amdgpu_device pointer
1072  * @reg: offset of register
1073  *
1074  * Dummy register read function.  Used for register blocks
1075  * that certain asics don't have (all asics).
1076  * Returns the value in the register.
1077  */
1078 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1079 {
1080 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1081 	BUG();
1082 	return 0;
1083 }
1084 
1085 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1086 {
1087 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1088 	BUG();
1089 	return 0;
1090 }
1091 
1092 /**
1093  * amdgpu_invalid_wreg - dummy reg write function
1094  *
1095  * @adev: amdgpu_device pointer
1096  * @reg: offset of register
1097  * @v: value to write to the register
1098  *
1099  * Dummy register read function.  Used for register blocks
1100  * that certain asics don't have (all asics).
1101  */
1102 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1103 {
1104 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1105 		  reg, v);
1106 	BUG();
1107 }
1108 
1109 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1110 {
1111 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1112 		  reg, v);
1113 	BUG();
1114 }
1115 
1116 /**
1117  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1118  *
1119  * @adev: amdgpu_device pointer
1120  * @reg: offset of register
1121  *
1122  * Dummy register read function.  Used for register blocks
1123  * that certain asics don't have (all asics).
1124  * Returns the value in the register.
1125  */
1126 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1127 {
1128 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1129 	BUG();
1130 	return 0;
1131 }
1132 
1133 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1134 {
1135 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1136 	BUG();
1137 	return 0;
1138 }
1139 
1140 /**
1141  * amdgpu_invalid_wreg64 - dummy reg write function
1142  *
1143  * @adev: amdgpu_device pointer
1144  * @reg: offset of register
1145  * @v: value to write to the register
1146  *
1147  * Dummy register read function.  Used for register blocks
1148  * that certain asics don't have (all asics).
1149  */
1150 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1151 {
1152 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1153 		  reg, v);
1154 	BUG();
1155 }
1156 
1157 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1158 {
1159 	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1160 		  reg, v);
1161 	BUG();
1162 }
1163 
1164 /**
1165  * amdgpu_block_invalid_rreg - dummy reg read function
1166  *
1167  * @adev: amdgpu_device pointer
1168  * @block: offset of instance
1169  * @reg: offset of register
1170  *
1171  * Dummy register read function.  Used for register blocks
1172  * that certain asics don't have (all asics).
1173  * Returns the value in the register.
1174  */
1175 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1176 					  uint32_t block, uint32_t reg)
1177 {
1178 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1179 		  reg, block);
1180 	BUG();
1181 	return 0;
1182 }
1183 
1184 /**
1185  * amdgpu_block_invalid_wreg - dummy reg write function
1186  *
1187  * @adev: amdgpu_device pointer
1188  * @block: offset of instance
1189  * @reg: offset of register
1190  * @v: value to write to the register
1191  *
1192  * Dummy register read function.  Used for register blocks
1193  * that certain asics don't have (all asics).
1194  */
1195 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1196 				      uint32_t block,
1197 				      uint32_t reg, uint32_t v)
1198 {
1199 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1200 		  reg, block, v);
1201 	BUG();
1202 }
1203 
1204 /**
1205  * amdgpu_device_asic_init - Wrapper for atom asic_init
1206  *
1207  * @adev: amdgpu_device pointer
1208  *
1209  * Does any asic specific work and then calls atom asic init.
1210  */
1211 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1212 {
1213 	int ret;
1214 
1215 	amdgpu_asic_pre_asic_init(adev);
1216 
1217 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1218 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1219 		amdgpu_psp_wait_for_bootloader(adev);
1220 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1221 		/* TODO: check the return val and stop device initialization if boot fails */
1222 		amdgpu_psp_query_boot_status(adev);
1223 		return ret;
1224 	} else {
1225 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 /**
1232  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1233  *
1234  * @adev: amdgpu_device pointer
1235  *
1236  * Allocates a scratch page of VRAM for use by various things in the
1237  * driver.
1238  */
1239 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1240 {
1241 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1242 				       AMDGPU_GEM_DOMAIN_VRAM |
1243 				       AMDGPU_GEM_DOMAIN_GTT,
1244 				       &adev->mem_scratch.robj,
1245 				       &adev->mem_scratch.gpu_addr,
1246 				       (void **)&adev->mem_scratch.ptr);
1247 }
1248 
1249 /**
1250  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1251  *
1252  * @adev: amdgpu_device pointer
1253  *
1254  * Frees the VRAM scratch page.
1255  */
1256 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1257 {
1258 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1259 }
1260 
1261 /**
1262  * amdgpu_device_program_register_sequence - program an array of registers.
1263  *
1264  * @adev: amdgpu_device pointer
1265  * @registers: pointer to the register array
1266  * @array_size: size of the register array
1267  *
1268  * Programs an array or registers with and or masks.
1269  * This is a helper for setting golden registers.
1270  */
1271 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1272 					     const u32 *registers,
1273 					     const u32 array_size)
1274 {
1275 	u32 tmp, reg, and_mask, or_mask;
1276 	int i;
1277 
1278 	if (array_size % 3)
1279 		return;
1280 
1281 	for (i = 0; i < array_size; i += 3) {
1282 		reg = registers[i + 0];
1283 		and_mask = registers[i + 1];
1284 		or_mask = registers[i + 2];
1285 
1286 		if (and_mask == 0xffffffff) {
1287 			tmp = or_mask;
1288 		} else {
1289 			tmp = RREG32(reg);
1290 			tmp &= ~and_mask;
1291 			if (adev->family >= AMDGPU_FAMILY_AI)
1292 				tmp |= (or_mask & and_mask);
1293 			else
1294 				tmp |= or_mask;
1295 		}
1296 		WREG32(reg, tmp);
1297 	}
1298 }
1299 
1300 /**
1301  * amdgpu_device_pci_config_reset - reset the GPU
1302  *
1303  * @adev: amdgpu_device pointer
1304  *
1305  * Resets the GPU using the pci config reset sequence.
1306  * Only applicable to asics prior to vega10.
1307  */
1308 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1309 {
1310 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1311 }
1312 
1313 /**
1314  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1315  *
1316  * @adev: amdgpu_device pointer
1317  *
1318  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1319  */
1320 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1321 {
1322 	return pci_reset_function(adev->pdev);
1323 }
1324 
1325 /*
1326  * amdgpu_device_wb_*()
1327  * Writeback is the method by which the GPU updates special pages in memory
1328  * with the status of certain GPU events (fences, ring pointers,etc.).
1329  */
1330 
1331 /**
1332  * amdgpu_device_wb_fini - Disable Writeback and free memory
1333  *
1334  * @adev: amdgpu_device pointer
1335  *
1336  * Disables Writeback and frees the Writeback memory (all asics).
1337  * Used at driver shutdown.
1338  */
1339 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1340 {
1341 	if (adev->wb.wb_obj) {
1342 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1343 				      &adev->wb.gpu_addr,
1344 				      (void **)&adev->wb.wb);
1345 		adev->wb.wb_obj = NULL;
1346 	}
1347 }
1348 
1349 /**
1350  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1351  *
1352  * @adev: amdgpu_device pointer
1353  *
1354  * Initializes writeback and allocates writeback memory (all asics).
1355  * Used at driver startup.
1356  * Returns 0 on success or an -error on failure.
1357  */
1358 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1359 {
1360 	int r;
1361 
1362 	if (adev->wb.wb_obj == NULL) {
1363 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1364 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1365 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1366 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1367 					    (void **)&adev->wb.wb);
1368 		if (r) {
1369 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1370 			return r;
1371 		}
1372 
1373 		adev->wb.num_wb = AMDGPU_MAX_WB;
1374 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1375 
1376 		/* clear wb memory */
1377 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1378 	}
1379 
1380 	return 0;
1381 }
1382 
1383 /**
1384  * amdgpu_device_wb_get - Allocate a wb entry
1385  *
1386  * @adev: amdgpu_device pointer
1387  * @wb: wb index
1388  *
1389  * Allocate a wb slot for use by the driver (all asics).
1390  * Returns 0 on success or -EINVAL on failure.
1391  */
1392 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1393 {
1394 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1395 
1396 	if (offset < adev->wb.num_wb) {
1397 		__set_bit(offset, adev->wb.used);
1398 		*wb = offset << 3; /* convert to dw offset */
1399 		return 0;
1400 	} else {
1401 		return -EINVAL;
1402 	}
1403 }
1404 
1405 /**
1406  * amdgpu_device_wb_free - Free a wb entry
1407  *
1408  * @adev: amdgpu_device pointer
1409  * @wb: wb index
1410  *
1411  * Free a wb slot allocated for use by the driver (all asics)
1412  */
1413 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1414 {
1415 	wb >>= 3;
1416 	if (wb < adev->wb.num_wb)
1417 		__clear_bit(wb, adev->wb.used);
1418 }
1419 
1420 /**
1421  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1422  *
1423  * @adev: amdgpu_device pointer
1424  *
1425  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1426  * to fail, but if any of the BARs is not accessible after the size we abort
1427  * driver loading by returning -ENODEV.
1428  */
1429 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1430 {
1431 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1432 	struct pci_bus *root;
1433 	struct resource *res;
1434 	unsigned int i;
1435 	u16 cmd;
1436 	int r;
1437 
1438 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1439 		return 0;
1440 
1441 	/* Bypass for VF */
1442 	if (amdgpu_sriov_vf(adev))
1443 		return 0;
1444 
1445 	/* skip if the bios has already enabled large BAR */
1446 	if (adev->gmc.real_vram_size &&
1447 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1448 		return 0;
1449 
1450 	/* Check if the root BUS has 64bit memory resources */
1451 	root = adev->pdev->bus;
1452 	while (root->parent)
1453 		root = root->parent;
1454 
1455 	pci_bus_for_each_resource(root, res, i) {
1456 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1457 		    res->start > 0x100000000ull)
1458 			break;
1459 	}
1460 
1461 	/* Trying to resize is pointless without a root hub window above 4GB */
1462 	if (!res)
1463 		return 0;
1464 
1465 	/* Limit the BAR size to what is available */
1466 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1467 			rbar_size);
1468 
1469 	/* Disable memory decoding while we change the BAR addresses and size */
1470 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1471 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1472 			      cmd & ~PCI_COMMAND_MEMORY);
1473 
1474 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1475 	amdgpu_doorbell_fini(adev);
1476 	if (adev->asic_type >= CHIP_BONAIRE)
1477 		pci_release_resource(adev->pdev, 2);
1478 
1479 	pci_release_resource(adev->pdev, 0);
1480 
1481 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1482 	if (r == -ENOSPC)
1483 		DRM_INFO("Not enough PCI address space for a large BAR.");
1484 	else if (r && r != -ENOTSUPP)
1485 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1486 
1487 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1488 
1489 	/* When the doorbell or fb BAR isn't available we have no chance of
1490 	 * using the device.
1491 	 */
1492 	r = amdgpu_doorbell_init(adev);
1493 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1494 		return -ENODEV;
1495 
1496 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1497 
1498 	return 0;
1499 }
1500 
1501 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1502 {
1503 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1504 		return false;
1505 
1506 	return true;
1507 }
1508 
1509 /*
1510  * GPU helpers function.
1511  */
1512 /**
1513  * amdgpu_device_need_post - check if the hw need post or not
1514  *
1515  * @adev: amdgpu_device pointer
1516  *
1517  * Check if the asic has been initialized (all asics) at driver startup
1518  * or post is needed if  hw reset is performed.
1519  * Returns true if need or false if not.
1520  */
1521 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1522 {
1523 	uint32_t reg;
1524 
1525 	if (amdgpu_sriov_vf(adev))
1526 		return false;
1527 
1528 	if (!amdgpu_device_read_bios(adev))
1529 		return false;
1530 
1531 	if (amdgpu_passthrough(adev)) {
1532 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1533 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1534 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1535 		 * vpost executed for smc version below 22.15
1536 		 */
1537 		if (adev->asic_type == CHIP_FIJI) {
1538 			int err;
1539 			uint32_t fw_ver;
1540 
1541 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1542 			/* force vPost if error occured */
1543 			if (err)
1544 				return true;
1545 
1546 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1547 			if (fw_ver < 0x00160e00)
1548 				return true;
1549 		}
1550 	}
1551 
1552 	/* Don't post if we need to reset whole hive on init */
1553 	if (adev->gmc.xgmi.pending_reset)
1554 		return false;
1555 
1556 	if (adev->has_hw_reset) {
1557 		adev->has_hw_reset = false;
1558 		return true;
1559 	}
1560 
1561 	/* bios scratch used on CIK+ */
1562 	if (adev->asic_type >= CHIP_BONAIRE)
1563 		return amdgpu_atombios_scratch_need_asic_init(adev);
1564 
1565 	/* check MEM_SIZE for older asics */
1566 	reg = amdgpu_asic_get_config_memsize(adev);
1567 
1568 	if ((reg != 0) && (reg != 0xffffffff))
1569 		return false;
1570 
1571 	return true;
1572 }
1573 
1574 /*
1575  * Check whether seamless boot is supported.
1576  *
1577  * So far we only support seamless boot on DCE 3.0 or later.
1578  * If users report that it works on older ASICS as well, we may
1579  * loosen this.
1580  */
1581 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1582 {
1583 	switch (amdgpu_seamless) {
1584 	case -1:
1585 		break;
1586 	case 1:
1587 		return true;
1588 	case 0:
1589 		return false;
1590 	default:
1591 		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1592 			  amdgpu_seamless);
1593 		return false;
1594 	}
1595 
1596 	if (!(adev->flags & AMD_IS_APU))
1597 		return false;
1598 
1599 	if (adev->mman.keep_stolen_vga_memory)
1600 		return false;
1601 
1602 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1603 }
1604 
1605 /*
1606  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1607  * don't support dynamic speed switching. Until we have confirmation from Intel
1608  * that a specific host supports it, it's safer that we keep it disabled for all.
1609  *
1610  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1611  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1612  */
1613 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1614 {
1615 #if IS_ENABLED(CONFIG_X86)
1616 	struct cpuinfo_x86 *c = &cpu_data(0);
1617 
1618 	/* eGPU change speeds based on USB4 fabric conditions */
1619 	if (dev_is_removable(adev->dev))
1620 		return true;
1621 
1622 	if (c->x86_vendor == X86_VENDOR_INTEL)
1623 		return false;
1624 #endif
1625 	return true;
1626 }
1627 
1628 /**
1629  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1630  *
1631  * @adev: amdgpu_device pointer
1632  *
1633  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1634  * be set for this device.
1635  *
1636  * Returns true if it should be used or false if not.
1637  */
1638 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1639 {
1640 	switch (amdgpu_aspm) {
1641 	case -1:
1642 		break;
1643 	case 0:
1644 		return false;
1645 	case 1:
1646 		return true;
1647 	default:
1648 		return false;
1649 	}
1650 	if (adev->flags & AMD_IS_APU)
1651 		return false;
1652 	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1653 		return false;
1654 	return pcie_aspm_enabled(adev->pdev);
1655 }
1656 
1657 /* if we get transitioned to only one device, take VGA back */
1658 /**
1659  * amdgpu_device_vga_set_decode - enable/disable vga decode
1660  *
1661  * @pdev: PCI device pointer
1662  * @state: enable/disable vga decode
1663  *
1664  * Enable/disable vga decode (all asics).
1665  * Returns VGA resource flags.
1666  */
1667 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1668 		bool state)
1669 {
1670 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1671 
1672 	amdgpu_asic_set_vga_state(adev, state);
1673 	if (state)
1674 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1675 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1676 	else
1677 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1678 }
1679 
1680 /**
1681  * amdgpu_device_check_block_size - validate the vm block size
1682  *
1683  * @adev: amdgpu_device pointer
1684  *
1685  * Validates the vm block size specified via module parameter.
1686  * The vm block size defines number of bits in page table versus page directory,
1687  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1688  * page table and the remaining bits are in the page directory.
1689  */
1690 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1691 {
1692 	/* defines number of bits in page table versus page directory,
1693 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1694 	 * page table and the remaining bits are in the page directory
1695 	 */
1696 	if (amdgpu_vm_block_size == -1)
1697 		return;
1698 
1699 	if (amdgpu_vm_block_size < 9) {
1700 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1701 			 amdgpu_vm_block_size);
1702 		amdgpu_vm_block_size = -1;
1703 	}
1704 }
1705 
1706 /**
1707  * amdgpu_device_check_vm_size - validate the vm size
1708  *
1709  * @adev: amdgpu_device pointer
1710  *
1711  * Validates the vm size in GB specified via module parameter.
1712  * The VM size is the size of the GPU virtual memory space in GB.
1713  */
1714 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1715 {
1716 	/* no need to check the default value */
1717 	if (amdgpu_vm_size == -1)
1718 		return;
1719 
1720 	if (amdgpu_vm_size < 1) {
1721 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1722 			 amdgpu_vm_size);
1723 		amdgpu_vm_size = -1;
1724 	}
1725 }
1726 
1727 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1728 {
1729 	struct sysinfo si;
1730 	bool is_os_64 = (sizeof(void *) == 8);
1731 	uint64_t total_memory;
1732 	uint64_t dram_size_seven_GB = 0x1B8000000;
1733 	uint64_t dram_size_three_GB = 0xB8000000;
1734 
1735 	if (amdgpu_smu_memory_pool_size == 0)
1736 		return;
1737 
1738 	if (!is_os_64) {
1739 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1740 		goto def_value;
1741 	}
1742 	si_meminfo(&si);
1743 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1744 
1745 	if ((amdgpu_smu_memory_pool_size == 1) ||
1746 		(amdgpu_smu_memory_pool_size == 2)) {
1747 		if (total_memory < dram_size_three_GB)
1748 			goto def_value1;
1749 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1750 		(amdgpu_smu_memory_pool_size == 8)) {
1751 		if (total_memory < dram_size_seven_GB)
1752 			goto def_value1;
1753 	} else {
1754 		DRM_WARN("Smu memory pool size not supported\n");
1755 		goto def_value;
1756 	}
1757 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1758 
1759 	return;
1760 
1761 def_value1:
1762 	DRM_WARN("No enough system memory\n");
1763 def_value:
1764 	adev->pm.smu_prv_buffer_size = 0;
1765 }
1766 
1767 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1768 {
1769 	if (!(adev->flags & AMD_IS_APU) ||
1770 	    adev->asic_type < CHIP_RAVEN)
1771 		return 0;
1772 
1773 	switch (adev->asic_type) {
1774 	case CHIP_RAVEN:
1775 		if (adev->pdev->device == 0x15dd)
1776 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1777 		if (adev->pdev->device == 0x15d8)
1778 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1779 		break;
1780 	case CHIP_RENOIR:
1781 		if ((adev->pdev->device == 0x1636) ||
1782 		    (adev->pdev->device == 0x164c))
1783 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1784 		else
1785 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1786 		break;
1787 	case CHIP_VANGOGH:
1788 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1789 		break;
1790 	case CHIP_YELLOW_CARP:
1791 		break;
1792 	case CHIP_CYAN_SKILLFISH:
1793 		if ((adev->pdev->device == 0x13FE) ||
1794 		    (adev->pdev->device == 0x143F))
1795 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1796 		break;
1797 	default:
1798 		break;
1799 	}
1800 
1801 	return 0;
1802 }
1803 
1804 /**
1805  * amdgpu_device_check_arguments - validate module params
1806  *
1807  * @adev: amdgpu_device pointer
1808  *
1809  * Validates certain module parameters and updates
1810  * the associated values used by the driver (all asics).
1811  */
1812 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1813 {
1814 	if (amdgpu_sched_jobs < 4) {
1815 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1816 			 amdgpu_sched_jobs);
1817 		amdgpu_sched_jobs = 4;
1818 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1819 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1820 			 amdgpu_sched_jobs);
1821 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1822 	}
1823 
1824 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1825 		/* gart size must be greater or equal to 32M */
1826 		dev_warn(adev->dev, "gart size (%d) too small\n",
1827 			 amdgpu_gart_size);
1828 		amdgpu_gart_size = -1;
1829 	}
1830 
1831 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1832 		/* gtt size must be greater or equal to 32M */
1833 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1834 				 amdgpu_gtt_size);
1835 		amdgpu_gtt_size = -1;
1836 	}
1837 
1838 	/* valid range is between 4 and 9 inclusive */
1839 	if (amdgpu_vm_fragment_size != -1 &&
1840 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1841 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1842 		amdgpu_vm_fragment_size = -1;
1843 	}
1844 
1845 	if (amdgpu_sched_hw_submission < 2) {
1846 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1847 			 amdgpu_sched_hw_submission);
1848 		amdgpu_sched_hw_submission = 2;
1849 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1850 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1851 			 amdgpu_sched_hw_submission);
1852 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1853 	}
1854 
1855 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1856 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1857 		amdgpu_reset_method = -1;
1858 	}
1859 
1860 	amdgpu_device_check_smu_prv_buffer_size(adev);
1861 
1862 	amdgpu_device_check_vm_size(adev);
1863 
1864 	amdgpu_device_check_block_size(adev);
1865 
1866 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1867 
1868 	return 0;
1869 }
1870 
1871 /**
1872  * amdgpu_switcheroo_set_state - set switcheroo state
1873  *
1874  * @pdev: pci dev pointer
1875  * @state: vga_switcheroo state
1876  *
1877  * Callback for the switcheroo driver.  Suspends or resumes
1878  * the asics before or after it is powered up using ACPI methods.
1879  */
1880 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1881 					enum vga_switcheroo_state state)
1882 {
1883 	struct drm_device *dev = pci_get_drvdata(pdev);
1884 	int r;
1885 
1886 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1887 		return;
1888 
1889 	if (state == VGA_SWITCHEROO_ON) {
1890 		pr_info("switched on\n");
1891 		/* don't suspend or resume card normally */
1892 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1893 
1894 		pci_set_power_state(pdev, PCI_D0);
1895 		amdgpu_device_load_pci_state(pdev);
1896 		r = pci_enable_device(pdev);
1897 		if (r)
1898 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1899 		amdgpu_device_resume(dev, true);
1900 
1901 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1902 	} else {
1903 		pr_info("switched off\n");
1904 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1905 		amdgpu_device_prepare(dev);
1906 		amdgpu_device_suspend(dev, true);
1907 		amdgpu_device_cache_pci_state(pdev);
1908 		/* Shut down the device */
1909 		pci_disable_device(pdev);
1910 		pci_set_power_state(pdev, PCI_D3cold);
1911 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1912 	}
1913 }
1914 
1915 /**
1916  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1917  *
1918  * @pdev: pci dev pointer
1919  *
1920  * Callback for the switcheroo driver.  Check of the switcheroo
1921  * state can be changed.
1922  * Returns true if the state can be changed, false if not.
1923  */
1924 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1925 {
1926 	struct drm_device *dev = pci_get_drvdata(pdev);
1927 
1928        /*
1929 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1930 	* locking inversion with the driver load path. And the access here is
1931 	* completely racy anyway. So don't bother with locking for now.
1932 	*/
1933 	return atomic_read(&dev->open_count) == 0;
1934 }
1935 
1936 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1937 	.set_gpu_state = amdgpu_switcheroo_set_state,
1938 	.reprobe = NULL,
1939 	.can_switch = amdgpu_switcheroo_can_switch,
1940 };
1941 
1942 /**
1943  * amdgpu_device_ip_set_clockgating_state - set the CG state
1944  *
1945  * @dev: amdgpu_device pointer
1946  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1947  * @state: clockgating state (gate or ungate)
1948  *
1949  * Sets the requested clockgating state for all instances of
1950  * the hardware IP specified.
1951  * Returns the error code from the last instance.
1952  */
1953 int amdgpu_device_ip_set_clockgating_state(void *dev,
1954 					   enum amd_ip_block_type block_type,
1955 					   enum amd_clockgating_state state)
1956 {
1957 	struct amdgpu_device *adev = dev;
1958 	int i, r = 0;
1959 
1960 	for (i = 0; i < adev->num_ip_blocks; i++) {
1961 		if (!adev->ip_blocks[i].status.valid)
1962 			continue;
1963 		if (adev->ip_blocks[i].version->type != block_type)
1964 			continue;
1965 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1966 			continue;
1967 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1968 			(void *)adev, state);
1969 		if (r)
1970 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1971 				  adev->ip_blocks[i].version->funcs->name, r);
1972 	}
1973 	return r;
1974 }
1975 
1976 /**
1977  * amdgpu_device_ip_set_powergating_state - set the PG state
1978  *
1979  * @dev: amdgpu_device pointer
1980  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1981  * @state: powergating state (gate or ungate)
1982  *
1983  * Sets the requested powergating state for all instances of
1984  * the hardware IP specified.
1985  * Returns the error code from the last instance.
1986  */
1987 int amdgpu_device_ip_set_powergating_state(void *dev,
1988 					   enum amd_ip_block_type block_type,
1989 					   enum amd_powergating_state state)
1990 {
1991 	struct amdgpu_device *adev = dev;
1992 	int i, r = 0;
1993 
1994 	for (i = 0; i < adev->num_ip_blocks; i++) {
1995 		if (!adev->ip_blocks[i].status.valid)
1996 			continue;
1997 		if (adev->ip_blocks[i].version->type != block_type)
1998 			continue;
1999 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2000 			continue;
2001 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2002 			(void *)adev, state);
2003 		if (r)
2004 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2005 				  adev->ip_blocks[i].version->funcs->name, r);
2006 	}
2007 	return r;
2008 }
2009 
2010 /**
2011  * amdgpu_device_ip_get_clockgating_state - get the CG state
2012  *
2013  * @adev: amdgpu_device pointer
2014  * @flags: clockgating feature flags
2015  *
2016  * Walks the list of IPs on the device and updates the clockgating
2017  * flags for each IP.
2018  * Updates @flags with the feature flags for each hardware IP where
2019  * clockgating is enabled.
2020  */
2021 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2022 					    u64 *flags)
2023 {
2024 	int i;
2025 
2026 	for (i = 0; i < adev->num_ip_blocks; i++) {
2027 		if (!adev->ip_blocks[i].status.valid)
2028 			continue;
2029 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2030 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2031 	}
2032 }
2033 
2034 /**
2035  * amdgpu_device_ip_wait_for_idle - wait for idle
2036  *
2037  * @adev: amdgpu_device pointer
2038  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2039  *
2040  * Waits for the request hardware IP to be idle.
2041  * Returns 0 for success or a negative error code on failure.
2042  */
2043 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2044 				   enum amd_ip_block_type block_type)
2045 {
2046 	int i, r;
2047 
2048 	for (i = 0; i < adev->num_ip_blocks; i++) {
2049 		if (!adev->ip_blocks[i].status.valid)
2050 			continue;
2051 		if (adev->ip_blocks[i].version->type == block_type) {
2052 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2053 			if (r)
2054 				return r;
2055 			break;
2056 		}
2057 	}
2058 	return 0;
2059 
2060 }
2061 
2062 /**
2063  * amdgpu_device_ip_is_idle - is the hardware IP idle
2064  *
2065  * @adev: amdgpu_device pointer
2066  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2067  *
2068  * Check if the hardware IP is idle or not.
2069  * Returns true if it the IP is idle, false if not.
2070  */
2071 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2072 			      enum amd_ip_block_type block_type)
2073 {
2074 	int i;
2075 
2076 	for (i = 0; i < adev->num_ip_blocks; i++) {
2077 		if (!adev->ip_blocks[i].status.valid)
2078 			continue;
2079 		if (adev->ip_blocks[i].version->type == block_type)
2080 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2081 	}
2082 	return true;
2083 
2084 }
2085 
2086 /**
2087  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2088  *
2089  * @adev: amdgpu_device pointer
2090  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2091  *
2092  * Returns a pointer to the hardware IP block structure
2093  * if it exists for the asic, otherwise NULL.
2094  */
2095 struct amdgpu_ip_block *
2096 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2097 			      enum amd_ip_block_type type)
2098 {
2099 	int i;
2100 
2101 	for (i = 0; i < adev->num_ip_blocks; i++)
2102 		if (adev->ip_blocks[i].version->type == type)
2103 			return &adev->ip_blocks[i];
2104 
2105 	return NULL;
2106 }
2107 
2108 /**
2109  * amdgpu_device_ip_block_version_cmp
2110  *
2111  * @adev: amdgpu_device pointer
2112  * @type: enum amd_ip_block_type
2113  * @major: major version
2114  * @minor: minor version
2115  *
2116  * return 0 if equal or greater
2117  * return 1 if smaller or the ip_block doesn't exist
2118  */
2119 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2120 				       enum amd_ip_block_type type,
2121 				       u32 major, u32 minor)
2122 {
2123 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2124 
2125 	if (ip_block && ((ip_block->version->major > major) ||
2126 			((ip_block->version->major == major) &&
2127 			(ip_block->version->minor >= minor))))
2128 		return 0;
2129 
2130 	return 1;
2131 }
2132 
2133 /**
2134  * amdgpu_device_ip_block_add
2135  *
2136  * @adev: amdgpu_device pointer
2137  * @ip_block_version: pointer to the IP to add
2138  *
2139  * Adds the IP block driver information to the collection of IPs
2140  * on the asic.
2141  */
2142 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2143 			       const struct amdgpu_ip_block_version *ip_block_version)
2144 {
2145 	if (!ip_block_version)
2146 		return -EINVAL;
2147 
2148 	switch (ip_block_version->type) {
2149 	case AMD_IP_BLOCK_TYPE_VCN:
2150 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2151 			return 0;
2152 		break;
2153 	case AMD_IP_BLOCK_TYPE_JPEG:
2154 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2155 			return 0;
2156 		break;
2157 	default:
2158 		break;
2159 	}
2160 
2161 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2162 		  ip_block_version->funcs->name);
2163 
2164 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2165 
2166 	return 0;
2167 }
2168 
2169 /**
2170  * amdgpu_device_enable_virtual_display - enable virtual display feature
2171  *
2172  * @adev: amdgpu_device pointer
2173  *
2174  * Enabled the virtual display feature if the user has enabled it via
2175  * the module parameter virtual_display.  This feature provides a virtual
2176  * display hardware on headless boards or in virtualized environments.
2177  * This function parses and validates the configuration string specified by
2178  * the user and configues the virtual display configuration (number of
2179  * virtual connectors, crtcs, etc.) specified.
2180  */
2181 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2182 {
2183 	adev->enable_virtual_display = false;
2184 
2185 	if (amdgpu_virtual_display) {
2186 		const char *pci_address_name = pci_name(adev->pdev);
2187 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2188 
2189 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2190 		pciaddstr_tmp = pciaddstr;
2191 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2192 			pciaddname = strsep(&pciaddname_tmp, ",");
2193 			if (!strcmp("all", pciaddname)
2194 			    || !strcmp(pci_address_name, pciaddname)) {
2195 				long num_crtc;
2196 				int res = -1;
2197 
2198 				adev->enable_virtual_display = true;
2199 
2200 				if (pciaddname_tmp)
2201 					res = kstrtol(pciaddname_tmp, 10,
2202 						      &num_crtc);
2203 
2204 				if (!res) {
2205 					if (num_crtc < 1)
2206 						num_crtc = 1;
2207 					if (num_crtc > 6)
2208 						num_crtc = 6;
2209 					adev->mode_info.num_crtc = num_crtc;
2210 				} else {
2211 					adev->mode_info.num_crtc = 1;
2212 				}
2213 				break;
2214 			}
2215 		}
2216 
2217 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2218 			 amdgpu_virtual_display, pci_address_name,
2219 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2220 
2221 		kfree(pciaddstr);
2222 	}
2223 }
2224 
2225 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2226 {
2227 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2228 		adev->mode_info.num_crtc = 1;
2229 		adev->enable_virtual_display = true;
2230 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2231 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2232 	}
2233 }
2234 
2235 /**
2236  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2237  *
2238  * @adev: amdgpu_device pointer
2239  *
2240  * Parses the asic configuration parameters specified in the gpu info
2241  * firmware and makes them availale to the driver for use in configuring
2242  * the asic.
2243  * Returns 0 on success, -EINVAL on failure.
2244  */
2245 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2246 {
2247 	const char *chip_name;
2248 	char fw_name[40];
2249 	int err;
2250 	const struct gpu_info_firmware_header_v1_0 *hdr;
2251 
2252 	adev->firmware.gpu_info_fw = NULL;
2253 
2254 	if (adev->mman.discovery_bin)
2255 		return 0;
2256 
2257 	switch (adev->asic_type) {
2258 	default:
2259 		return 0;
2260 	case CHIP_VEGA10:
2261 		chip_name = "vega10";
2262 		break;
2263 	case CHIP_VEGA12:
2264 		chip_name = "vega12";
2265 		break;
2266 	case CHIP_RAVEN:
2267 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2268 			chip_name = "raven2";
2269 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2270 			chip_name = "picasso";
2271 		else
2272 			chip_name = "raven";
2273 		break;
2274 	case CHIP_ARCTURUS:
2275 		chip_name = "arcturus";
2276 		break;
2277 	case CHIP_NAVI12:
2278 		chip_name = "navi12";
2279 		break;
2280 	}
2281 
2282 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2283 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2284 	if (err) {
2285 		dev_err(adev->dev,
2286 			"Failed to get gpu_info firmware \"%s\"\n",
2287 			fw_name);
2288 		goto out;
2289 	}
2290 
2291 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2292 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2293 
2294 	switch (hdr->version_major) {
2295 	case 1:
2296 	{
2297 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2298 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2299 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2300 
2301 		/*
2302 		 * Should be droped when DAL no longer needs it.
2303 		 */
2304 		if (adev->asic_type == CHIP_NAVI12)
2305 			goto parse_soc_bounding_box;
2306 
2307 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2308 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2309 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2310 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2311 		adev->gfx.config.max_texture_channel_caches =
2312 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2313 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2314 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2315 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2316 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2317 		adev->gfx.config.double_offchip_lds_buf =
2318 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2319 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2320 		adev->gfx.cu_info.max_waves_per_simd =
2321 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2322 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2323 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2324 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2325 		if (hdr->version_minor >= 1) {
2326 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2327 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2328 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2329 			adev->gfx.config.num_sc_per_sh =
2330 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2331 			adev->gfx.config.num_packer_per_sc =
2332 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2333 		}
2334 
2335 parse_soc_bounding_box:
2336 		/*
2337 		 * soc bounding box info is not integrated in disocovery table,
2338 		 * we always need to parse it from gpu info firmware if needed.
2339 		 */
2340 		if (hdr->version_minor == 2) {
2341 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2342 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2343 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2344 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2345 		}
2346 		break;
2347 	}
2348 	default:
2349 		dev_err(adev->dev,
2350 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2351 		err = -EINVAL;
2352 		goto out;
2353 	}
2354 out:
2355 	return err;
2356 }
2357 
2358 /**
2359  * amdgpu_device_ip_early_init - run early init for hardware IPs
2360  *
2361  * @adev: amdgpu_device pointer
2362  *
2363  * Early initialization pass for hardware IPs.  The hardware IPs that make
2364  * up each asic are discovered each IP's early_init callback is run.  This
2365  * is the first stage in initializing the asic.
2366  * Returns 0 on success, negative error code on failure.
2367  */
2368 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2369 {
2370 	struct pci_dev *parent;
2371 	int i, r;
2372 	bool total;
2373 
2374 	amdgpu_device_enable_virtual_display(adev);
2375 
2376 	if (amdgpu_sriov_vf(adev)) {
2377 		r = amdgpu_virt_request_full_gpu(adev, true);
2378 		if (r)
2379 			return r;
2380 	}
2381 
2382 	switch (adev->asic_type) {
2383 #ifdef CONFIG_DRM_AMDGPU_SI
2384 	case CHIP_VERDE:
2385 	case CHIP_TAHITI:
2386 	case CHIP_PITCAIRN:
2387 	case CHIP_OLAND:
2388 	case CHIP_HAINAN:
2389 		adev->family = AMDGPU_FAMILY_SI;
2390 		r = si_set_ip_blocks(adev);
2391 		if (r)
2392 			return r;
2393 		break;
2394 #endif
2395 #ifdef CONFIG_DRM_AMDGPU_CIK
2396 	case CHIP_BONAIRE:
2397 	case CHIP_HAWAII:
2398 	case CHIP_KAVERI:
2399 	case CHIP_KABINI:
2400 	case CHIP_MULLINS:
2401 		if (adev->flags & AMD_IS_APU)
2402 			adev->family = AMDGPU_FAMILY_KV;
2403 		else
2404 			adev->family = AMDGPU_FAMILY_CI;
2405 
2406 		r = cik_set_ip_blocks(adev);
2407 		if (r)
2408 			return r;
2409 		break;
2410 #endif
2411 	case CHIP_TOPAZ:
2412 	case CHIP_TONGA:
2413 	case CHIP_FIJI:
2414 	case CHIP_POLARIS10:
2415 	case CHIP_POLARIS11:
2416 	case CHIP_POLARIS12:
2417 	case CHIP_VEGAM:
2418 	case CHIP_CARRIZO:
2419 	case CHIP_STONEY:
2420 		if (adev->flags & AMD_IS_APU)
2421 			adev->family = AMDGPU_FAMILY_CZ;
2422 		else
2423 			adev->family = AMDGPU_FAMILY_VI;
2424 
2425 		r = vi_set_ip_blocks(adev);
2426 		if (r)
2427 			return r;
2428 		break;
2429 	default:
2430 		r = amdgpu_discovery_set_ip_blocks(adev);
2431 		if (r)
2432 			return r;
2433 		break;
2434 	}
2435 
2436 	if (amdgpu_has_atpx() &&
2437 	    (amdgpu_is_atpx_hybrid() ||
2438 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2439 	    ((adev->flags & AMD_IS_APU) == 0) &&
2440 	    !dev_is_removable(&adev->pdev->dev))
2441 		adev->flags |= AMD_IS_PX;
2442 
2443 	if (!(adev->flags & AMD_IS_APU)) {
2444 		parent = pcie_find_root_port(adev->pdev);
2445 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2446 	}
2447 
2448 
2449 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2450 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2451 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2452 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2453 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2454 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2455 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2456 
2457 	total = true;
2458 	for (i = 0; i < adev->num_ip_blocks; i++) {
2459 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2460 			DRM_WARN("disabled ip block: %d <%s>\n",
2461 				  i, adev->ip_blocks[i].version->funcs->name);
2462 			adev->ip_blocks[i].status.valid = false;
2463 		} else {
2464 			if (adev->ip_blocks[i].version->funcs->early_init) {
2465 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2466 				if (r == -ENOENT) {
2467 					adev->ip_blocks[i].status.valid = false;
2468 				} else if (r) {
2469 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2470 						  adev->ip_blocks[i].version->funcs->name, r);
2471 					total = false;
2472 				} else {
2473 					adev->ip_blocks[i].status.valid = true;
2474 				}
2475 			} else {
2476 				adev->ip_blocks[i].status.valid = true;
2477 			}
2478 		}
2479 		/* get the vbios after the asic_funcs are set up */
2480 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2481 			r = amdgpu_device_parse_gpu_info_fw(adev);
2482 			if (r)
2483 				return r;
2484 
2485 			/* Read BIOS */
2486 			if (amdgpu_device_read_bios(adev)) {
2487 				if (!amdgpu_get_bios(adev))
2488 					return -EINVAL;
2489 
2490 				r = amdgpu_atombios_init(adev);
2491 				if (r) {
2492 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2493 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2494 					return r;
2495 				}
2496 			}
2497 
2498 			/*get pf2vf msg info at it's earliest time*/
2499 			if (amdgpu_sriov_vf(adev))
2500 				amdgpu_virt_init_data_exchange(adev);
2501 
2502 		}
2503 	}
2504 	if (!total)
2505 		return -ENODEV;
2506 
2507 	amdgpu_amdkfd_device_probe(adev);
2508 	adev->cg_flags &= amdgpu_cg_mask;
2509 	adev->pg_flags &= amdgpu_pg_mask;
2510 
2511 	return 0;
2512 }
2513 
2514 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2515 {
2516 	int i, r;
2517 
2518 	for (i = 0; i < adev->num_ip_blocks; i++) {
2519 		if (!adev->ip_blocks[i].status.sw)
2520 			continue;
2521 		if (adev->ip_blocks[i].status.hw)
2522 			continue;
2523 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2524 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2525 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2526 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2527 			if (r) {
2528 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2529 					  adev->ip_blocks[i].version->funcs->name, r);
2530 				return r;
2531 			}
2532 			adev->ip_blocks[i].status.hw = true;
2533 		}
2534 	}
2535 
2536 	return 0;
2537 }
2538 
2539 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2540 {
2541 	int i, r;
2542 
2543 	for (i = 0; i < adev->num_ip_blocks; i++) {
2544 		if (!adev->ip_blocks[i].status.sw)
2545 			continue;
2546 		if (adev->ip_blocks[i].status.hw)
2547 			continue;
2548 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2549 		if (r) {
2550 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2551 				  adev->ip_blocks[i].version->funcs->name, r);
2552 			return r;
2553 		}
2554 		adev->ip_blocks[i].status.hw = true;
2555 	}
2556 
2557 	return 0;
2558 }
2559 
2560 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2561 {
2562 	int r = 0;
2563 	int i;
2564 	uint32_t smu_version;
2565 
2566 	if (adev->asic_type >= CHIP_VEGA10) {
2567 		for (i = 0; i < adev->num_ip_blocks; i++) {
2568 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2569 				continue;
2570 
2571 			if (!adev->ip_blocks[i].status.sw)
2572 				continue;
2573 
2574 			/* no need to do the fw loading again if already done*/
2575 			if (adev->ip_blocks[i].status.hw == true)
2576 				break;
2577 
2578 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2579 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2580 				if (r) {
2581 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2582 							  adev->ip_blocks[i].version->funcs->name, r);
2583 					return r;
2584 				}
2585 			} else {
2586 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2587 				if (r) {
2588 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2589 							  adev->ip_blocks[i].version->funcs->name, r);
2590 					return r;
2591 				}
2592 			}
2593 
2594 			adev->ip_blocks[i].status.hw = true;
2595 			break;
2596 		}
2597 	}
2598 
2599 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2600 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2601 
2602 	return r;
2603 }
2604 
2605 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2606 {
2607 	long timeout;
2608 	int r, i;
2609 
2610 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2611 		struct amdgpu_ring *ring = adev->rings[i];
2612 
2613 		/* No need to setup the GPU scheduler for rings that don't need it */
2614 		if (!ring || ring->no_scheduler)
2615 			continue;
2616 
2617 		switch (ring->funcs->type) {
2618 		case AMDGPU_RING_TYPE_GFX:
2619 			timeout = adev->gfx_timeout;
2620 			break;
2621 		case AMDGPU_RING_TYPE_COMPUTE:
2622 			timeout = adev->compute_timeout;
2623 			break;
2624 		case AMDGPU_RING_TYPE_SDMA:
2625 			timeout = adev->sdma_timeout;
2626 			break;
2627 		default:
2628 			timeout = adev->video_timeout;
2629 			break;
2630 		}
2631 
2632 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2633 				   DRM_SCHED_PRIORITY_COUNT,
2634 				   ring->num_hw_submission, 0,
2635 				   timeout, adev->reset_domain->wq,
2636 				   ring->sched_score, ring->name,
2637 				   adev->dev);
2638 		if (r) {
2639 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2640 				  ring->name);
2641 			return r;
2642 		}
2643 		r = amdgpu_uvd_entity_init(adev, ring);
2644 		if (r) {
2645 			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2646 				  ring->name);
2647 			return r;
2648 		}
2649 		r = amdgpu_vce_entity_init(adev, ring);
2650 		if (r) {
2651 			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2652 				  ring->name);
2653 			return r;
2654 		}
2655 	}
2656 
2657 	amdgpu_xcp_update_partition_sched_list(adev);
2658 
2659 	return 0;
2660 }
2661 
2662 
2663 /**
2664  * amdgpu_device_ip_init - run init for hardware IPs
2665  *
2666  * @adev: amdgpu_device pointer
2667  *
2668  * Main initialization pass for hardware IPs.  The list of all the hardware
2669  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2670  * are run.  sw_init initializes the software state associated with each IP
2671  * and hw_init initializes the hardware associated with each IP.
2672  * Returns 0 on success, negative error code on failure.
2673  */
2674 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2675 {
2676 	int i, r;
2677 
2678 	r = amdgpu_ras_init(adev);
2679 	if (r)
2680 		return r;
2681 
2682 	for (i = 0; i < adev->num_ip_blocks; i++) {
2683 		if (!adev->ip_blocks[i].status.valid)
2684 			continue;
2685 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2686 		if (r) {
2687 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2688 				  adev->ip_blocks[i].version->funcs->name, r);
2689 			goto init_failed;
2690 		}
2691 		adev->ip_blocks[i].status.sw = true;
2692 
2693 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2694 			/* need to do common hw init early so everything is set up for gmc */
2695 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2696 			if (r) {
2697 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2698 				goto init_failed;
2699 			}
2700 			adev->ip_blocks[i].status.hw = true;
2701 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2702 			/* need to do gmc hw init early so we can allocate gpu mem */
2703 			/* Try to reserve bad pages early */
2704 			if (amdgpu_sriov_vf(adev))
2705 				amdgpu_virt_exchange_data(adev);
2706 
2707 			r = amdgpu_device_mem_scratch_init(adev);
2708 			if (r) {
2709 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2710 				goto init_failed;
2711 			}
2712 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2713 			if (r) {
2714 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2715 				goto init_failed;
2716 			}
2717 			r = amdgpu_device_wb_init(adev);
2718 			if (r) {
2719 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2720 				goto init_failed;
2721 			}
2722 			adev->ip_blocks[i].status.hw = true;
2723 
2724 			/* right after GMC hw init, we create CSA */
2725 			if (adev->gfx.mcbp) {
2726 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2727 							       AMDGPU_GEM_DOMAIN_VRAM |
2728 							       AMDGPU_GEM_DOMAIN_GTT,
2729 							       AMDGPU_CSA_SIZE);
2730 				if (r) {
2731 					DRM_ERROR("allocate CSA failed %d\n", r);
2732 					goto init_failed;
2733 				}
2734 			}
2735 
2736 			r = amdgpu_seq64_init(adev);
2737 			if (r) {
2738 				DRM_ERROR("allocate seq64 failed %d\n", r);
2739 				goto init_failed;
2740 			}
2741 		}
2742 	}
2743 
2744 	if (amdgpu_sriov_vf(adev))
2745 		amdgpu_virt_init_data_exchange(adev);
2746 
2747 	r = amdgpu_ib_pool_init(adev);
2748 	if (r) {
2749 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2750 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2751 		goto init_failed;
2752 	}
2753 
2754 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2755 	if (r)
2756 		goto init_failed;
2757 
2758 	r = amdgpu_device_ip_hw_init_phase1(adev);
2759 	if (r)
2760 		goto init_failed;
2761 
2762 	r = amdgpu_device_fw_loading(adev);
2763 	if (r)
2764 		goto init_failed;
2765 
2766 	r = amdgpu_device_ip_hw_init_phase2(adev);
2767 	if (r)
2768 		goto init_failed;
2769 
2770 	/*
2771 	 * retired pages will be loaded from eeprom and reserved here,
2772 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2773 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2774 	 * for I2C communication which only true at this point.
2775 	 *
2776 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2777 	 * failure from bad gpu situation and stop amdgpu init process
2778 	 * accordingly. For other failed cases, it will still release all
2779 	 * the resource and print error message, rather than returning one
2780 	 * negative value to upper level.
2781 	 *
2782 	 * Note: theoretically, this should be called before all vram allocations
2783 	 * to protect retired page from abusing
2784 	 */
2785 	r = amdgpu_ras_recovery_init(adev);
2786 	if (r)
2787 		goto init_failed;
2788 
2789 	/**
2790 	 * In case of XGMI grab extra reference for reset domain for this device
2791 	 */
2792 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2793 		if (amdgpu_xgmi_add_device(adev) == 0) {
2794 			if (!amdgpu_sriov_vf(adev)) {
2795 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2796 
2797 				if (WARN_ON(!hive)) {
2798 					r = -ENOENT;
2799 					goto init_failed;
2800 				}
2801 
2802 				if (!hive->reset_domain ||
2803 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2804 					r = -ENOENT;
2805 					amdgpu_put_xgmi_hive(hive);
2806 					goto init_failed;
2807 				}
2808 
2809 				/* Drop the early temporary reset domain we created for device */
2810 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2811 				adev->reset_domain = hive->reset_domain;
2812 				amdgpu_put_xgmi_hive(hive);
2813 			}
2814 		}
2815 	}
2816 
2817 	r = amdgpu_device_init_schedulers(adev);
2818 	if (r)
2819 		goto init_failed;
2820 
2821 	if (adev->mman.buffer_funcs_ring->sched.ready)
2822 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2823 
2824 	/* Don't init kfd if whole hive need to be reset during init */
2825 	if (!adev->gmc.xgmi.pending_reset) {
2826 		kgd2kfd_init_zone_device(adev);
2827 		amdgpu_amdkfd_device_init(adev);
2828 	}
2829 
2830 	amdgpu_fru_get_product_info(adev);
2831 
2832 init_failed:
2833 
2834 	return r;
2835 }
2836 
2837 /**
2838  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2839  *
2840  * @adev: amdgpu_device pointer
2841  *
2842  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2843  * this function before a GPU reset.  If the value is retained after a
2844  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2845  */
2846 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2847 {
2848 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2849 }
2850 
2851 /**
2852  * amdgpu_device_check_vram_lost - check if vram is valid
2853  *
2854  * @adev: amdgpu_device pointer
2855  *
2856  * Checks the reset magic value written to the gart pointer in VRAM.
2857  * The driver calls this after a GPU reset to see if the contents of
2858  * VRAM is lost or now.
2859  * returns true if vram is lost, false if not.
2860  */
2861 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2862 {
2863 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2864 			AMDGPU_RESET_MAGIC_NUM))
2865 		return true;
2866 
2867 	if (!amdgpu_in_reset(adev))
2868 		return false;
2869 
2870 	/*
2871 	 * For all ASICs with baco/mode1 reset, the VRAM is
2872 	 * always assumed to be lost.
2873 	 */
2874 	switch (amdgpu_asic_reset_method(adev)) {
2875 	case AMD_RESET_METHOD_BACO:
2876 	case AMD_RESET_METHOD_MODE1:
2877 		return true;
2878 	default:
2879 		return false;
2880 	}
2881 }
2882 
2883 /**
2884  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2885  *
2886  * @adev: amdgpu_device pointer
2887  * @state: clockgating state (gate or ungate)
2888  *
2889  * The list of all the hardware IPs that make up the asic is walked and the
2890  * set_clockgating_state callbacks are run.
2891  * Late initialization pass enabling clockgating for hardware IPs.
2892  * Fini or suspend, pass disabling clockgating for hardware IPs.
2893  * Returns 0 on success, negative error code on failure.
2894  */
2895 
2896 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2897 			       enum amd_clockgating_state state)
2898 {
2899 	int i, j, r;
2900 
2901 	if (amdgpu_emu_mode == 1)
2902 		return 0;
2903 
2904 	for (j = 0; j < adev->num_ip_blocks; j++) {
2905 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2906 		if (!adev->ip_blocks[i].status.late_initialized)
2907 			continue;
2908 		/* skip CG for GFX, SDMA on S0ix */
2909 		if (adev->in_s0ix &&
2910 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2911 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2912 			continue;
2913 		/* skip CG for VCE/UVD, it's handled specially */
2914 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2915 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2916 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2917 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2918 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2919 			/* enable clockgating to save power */
2920 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2921 										     state);
2922 			if (r) {
2923 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2924 					  adev->ip_blocks[i].version->funcs->name, r);
2925 				return r;
2926 			}
2927 		}
2928 	}
2929 
2930 	return 0;
2931 }
2932 
2933 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2934 			       enum amd_powergating_state state)
2935 {
2936 	int i, j, r;
2937 
2938 	if (amdgpu_emu_mode == 1)
2939 		return 0;
2940 
2941 	for (j = 0; j < adev->num_ip_blocks; j++) {
2942 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2943 		if (!adev->ip_blocks[i].status.late_initialized)
2944 			continue;
2945 		/* skip PG for GFX, SDMA on S0ix */
2946 		if (adev->in_s0ix &&
2947 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2948 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2949 			continue;
2950 		/* skip CG for VCE/UVD, it's handled specially */
2951 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2952 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2953 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2954 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2955 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2956 			/* enable powergating to save power */
2957 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2958 											state);
2959 			if (r) {
2960 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2961 					  adev->ip_blocks[i].version->funcs->name, r);
2962 				return r;
2963 			}
2964 		}
2965 	}
2966 	return 0;
2967 }
2968 
2969 static int amdgpu_device_enable_mgpu_fan_boost(void)
2970 {
2971 	struct amdgpu_gpu_instance *gpu_ins;
2972 	struct amdgpu_device *adev;
2973 	int i, ret = 0;
2974 
2975 	mutex_lock(&mgpu_info.mutex);
2976 
2977 	/*
2978 	 * MGPU fan boost feature should be enabled
2979 	 * only when there are two or more dGPUs in
2980 	 * the system
2981 	 */
2982 	if (mgpu_info.num_dgpu < 2)
2983 		goto out;
2984 
2985 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2986 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2987 		adev = gpu_ins->adev;
2988 		if (!(adev->flags & AMD_IS_APU) &&
2989 		    !gpu_ins->mgpu_fan_enabled) {
2990 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2991 			if (ret)
2992 				break;
2993 
2994 			gpu_ins->mgpu_fan_enabled = 1;
2995 		}
2996 	}
2997 
2998 out:
2999 	mutex_unlock(&mgpu_info.mutex);
3000 
3001 	return ret;
3002 }
3003 
3004 /**
3005  * amdgpu_device_ip_late_init - run late init for hardware IPs
3006  *
3007  * @adev: amdgpu_device pointer
3008  *
3009  * Late initialization pass for hardware IPs.  The list of all the hardware
3010  * IPs that make up the asic is walked and the late_init callbacks are run.
3011  * late_init covers any special initialization that an IP requires
3012  * after all of the have been initialized or something that needs to happen
3013  * late in the init process.
3014  * Returns 0 on success, negative error code on failure.
3015  */
3016 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3017 {
3018 	struct amdgpu_gpu_instance *gpu_instance;
3019 	int i = 0, r;
3020 
3021 	for (i = 0; i < adev->num_ip_blocks; i++) {
3022 		if (!adev->ip_blocks[i].status.hw)
3023 			continue;
3024 		if (adev->ip_blocks[i].version->funcs->late_init) {
3025 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3026 			if (r) {
3027 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3028 					  adev->ip_blocks[i].version->funcs->name, r);
3029 				return r;
3030 			}
3031 		}
3032 		adev->ip_blocks[i].status.late_initialized = true;
3033 	}
3034 
3035 	r = amdgpu_ras_late_init(adev);
3036 	if (r) {
3037 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3038 		return r;
3039 	}
3040 
3041 	amdgpu_ras_set_error_query_ready(adev, true);
3042 
3043 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3044 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3045 
3046 	amdgpu_device_fill_reset_magic(adev);
3047 
3048 	r = amdgpu_device_enable_mgpu_fan_boost();
3049 	if (r)
3050 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3051 
3052 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3053 	if (amdgpu_passthrough(adev) &&
3054 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3055 	     adev->asic_type == CHIP_ALDEBARAN))
3056 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3057 
3058 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3059 		mutex_lock(&mgpu_info.mutex);
3060 
3061 		/*
3062 		 * Reset device p-state to low as this was booted with high.
3063 		 *
3064 		 * This should be performed only after all devices from the same
3065 		 * hive get initialized.
3066 		 *
3067 		 * However, it's unknown how many device in the hive in advance.
3068 		 * As this is counted one by one during devices initializations.
3069 		 *
3070 		 * So, we wait for all XGMI interlinked devices initialized.
3071 		 * This may bring some delays as those devices may come from
3072 		 * different hives. But that should be OK.
3073 		 */
3074 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3075 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3076 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3077 				if (gpu_instance->adev->flags & AMD_IS_APU)
3078 					continue;
3079 
3080 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3081 						AMDGPU_XGMI_PSTATE_MIN);
3082 				if (r) {
3083 					DRM_ERROR("pstate setting failed (%d).\n", r);
3084 					break;
3085 				}
3086 			}
3087 		}
3088 
3089 		mutex_unlock(&mgpu_info.mutex);
3090 	}
3091 
3092 	return 0;
3093 }
3094 
3095 /**
3096  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3097  *
3098  * @adev: amdgpu_device pointer
3099  *
3100  * For ASICs need to disable SMC first
3101  */
3102 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3103 {
3104 	int i, r;
3105 
3106 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3107 		return;
3108 
3109 	for (i = 0; i < adev->num_ip_blocks; i++) {
3110 		if (!adev->ip_blocks[i].status.hw)
3111 			continue;
3112 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3113 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3114 			/* XXX handle errors */
3115 			if (r) {
3116 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3117 					  adev->ip_blocks[i].version->funcs->name, r);
3118 			}
3119 			adev->ip_blocks[i].status.hw = false;
3120 			break;
3121 		}
3122 	}
3123 }
3124 
3125 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3126 {
3127 	int i, r;
3128 
3129 	for (i = 0; i < adev->num_ip_blocks; i++) {
3130 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3131 			continue;
3132 
3133 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3134 		if (r) {
3135 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3136 				  adev->ip_blocks[i].version->funcs->name, r);
3137 		}
3138 	}
3139 
3140 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3141 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3142 
3143 	amdgpu_amdkfd_suspend(adev, false);
3144 
3145 	/* Workaroud for ASICs need to disable SMC first */
3146 	amdgpu_device_smu_fini_early(adev);
3147 
3148 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3149 		if (!adev->ip_blocks[i].status.hw)
3150 			continue;
3151 
3152 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3153 		/* XXX handle errors */
3154 		if (r) {
3155 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3156 				  adev->ip_blocks[i].version->funcs->name, r);
3157 		}
3158 
3159 		adev->ip_blocks[i].status.hw = false;
3160 	}
3161 
3162 	if (amdgpu_sriov_vf(adev)) {
3163 		if (amdgpu_virt_release_full_gpu(adev, false))
3164 			DRM_ERROR("failed to release exclusive mode on fini\n");
3165 	}
3166 
3167 	return 0;
3168 }
3169 
3170 /**
3171  * amdgpu_device_ip_fini - run fini for hardware IPs
3172  *
3173  * @adev: amdgpu_device pointer
3174  *
3175  * Main teardown pass for hardware IPs.  The list of all the hardware
3176  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3177  * are run.  hw_fini tears down the hardware associated with each IP
3178  * and sw_fini tears down any software state associated with each IP.
3179  * Returns 0 on success, negative error code on failure.
3180  */
3181 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3182 {
3183 	int i, r;
3184 
3185 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3186 		amdgpu_virt_release_ras_err_handler_data(adev);
3187 
3188 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3189 		amdgpu_xgmi_remove_device(adev);
3190 
3191 	amdgpu_amdkfd_device_fini_sw(adev);
3192 
3193 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3194 		if (!adev->ip_blocks[i].status.sw)
3195 			continue;
3196 
3197 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3198 			amdgpu_ucode_free_bo(adev);
3199 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3200 			amdgpu_device_wb_fini(adev);
3201 			amdgpu_device_mem_scratch_fini(adev);
3202 			amdgpu_ib_pool_fini(adev);
3203 			amdgpu_seq64_fini(adev);
3204 		}
3205 
3206 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3207 		/* XXX handle errors */
3208 		if (r) {
3209 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3210 				  adev->ip_blocks[i].version->funcs->name, r);
3211 		}
3212 		adev->ip_blocks[i].status.sw = false;
3213 		adev->ip_blocks[i].status.valid = false;
3214 	}
3215 
3216 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3217 		if (!adev->ip_blocks[i].status.late_initialized)
3218 			continue;
3219 		if (adev->ip_blocks[i].version->funcs->late_fini)
3220 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3221 		adev->ip_blocks[i].status.late_initialized = false;
3222 	}
3223 
3224 	amdgpu_ras_fini(adev);
3225 
3226 	return 0;
3227 }
3228 
3229 /**
3230  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3231  *
3232  * @work: work_struct.
3233  */
3234 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3235 {
3236 	struct amdgpu_device *adev =
3237 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3238 	int r;
3239 
3240 	r = amdgpu_ib_ring_tests(adev);
3241 	if (r)
3242 		DRM_ERROR("ib ring test failed (%d).\n", r);
3243 }
3244 
3245 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3246 {
3247 	struct amdgpu_device *adev =
3248 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3249 
3250 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3251 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3252 
3253 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3254 		adev->gfx.gfx_off_state = true;
3255 }
3256 
3257 /**
3258  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3259  *
3260  * @adev: amdgpu_device pointer
3261  *
3262  * Main suspend function for hardware IPs.  The list of all the hardware
3263  * IPs that make up the asic is walked, clockgating is disabled and the
3264  * suspend callbacks are run.  suspend puts the hardware and software state
3265  * in each IP into a state suitable for suspend.
3266  * Returns 0 on success, negative error code on failure.
3267  */
3268 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3269 {
3270 	int i, r;
3271 
3272 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3273 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3274 
3275 	/*
3276 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3277 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3278 	 * scenario. Add the missing df cstate disablement here.
3279 	 */
3280 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3281 		dev_warn(adev->dev, "Failed to disallow df cstate");
3282 
3283 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3284 		if (!adev->ip_blocks[i].status.valid)
3285 			continue;
3286 
3287 		/* displays are handled separately */
3288 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3289 			continue;
3290 
3291 		/* XXX handle errors */
3292 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3293 		/* XXX handle errors */
3294 		if (r) {
3295 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3296 				  adev->ip_blocks[i].version->funcs->name, r);
3297 			return r;
3298 		}
3299 
3300 		adev->ip_blocks[i].status.hw = false;
3301 	}
3302 
3303 	return 0;
3304 }
3305 
3306 /**
3307  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3308  *
3309  * @adev: amdgpu_device pointer
3310  *
3311  * Main suspend function for hardware IPs.  The list of all the hardware
3312  * IPs that make up the asic is walked, clockgating is disabled and the
3313  * suspend callbacks are run.  suspend puts the hardware and software state
3314  * in each IP into a state suitable for suspend.
3315  * Returns 0 on success, negative error code on failure.
3316  */
3317 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3318 {
3319 	int i, r;
3320 
3321 	if (adev->in_s0ix)
3322 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3323 
3324 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3325 		if (!adev->ip_blocks[i].status.valid)
3326 			continue;
3327 		/* displays are handled in phase1 */
3328 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3329 			continue;
3330 		/* PSP lost connection when err_event_athub occurs */
3331 		if (amdgpu_ras_intr_triggered() &&
3332 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3333 			adev->ip_blocks[i].status.hw = false;
3334 			continue;
3335 		}
3336 
3337 		/* skip unnecessary suspend if we do not initialize them yet */
3338 		if (adev->gmc.xgmi.pending_reset &&
3339 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3340 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3341 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3342 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3343 			adev->ip_blocks[i].status.hw = false;
3344 			continue;
3345 		}
3346 
3347 		/* skip suspend of gfx/mes and psp for S0ix
3348 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3349 		 * like at runtime. PSP is also part of the always on hardware
3350 		 * so no need to suspend it.
3351 		 */
3352 		if (adev->in_s0ix &&
3353 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3354 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3355 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3356 			continue;
3357 
3358 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3359 		if (adev->in_s0ix &&
3360 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3361 		     IP_VERSION(5, 0, 0)) &&
3362 		    (adev->ip_blocks[i].version->type ==
3363 		     AMD_IP_BLOCK_TYPE_SDMA))
3364 			continue;
3365 
3366 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3367 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3368 		 * from this location and RLC Autoload automatically also gets loaded
3369 		 * from here based on PMFW -> PSP message during re-init sequence.
3370 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3371 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3372 		 */
3373 		if (amdgpu_in_reset(adev) &&
3374 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3375 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3376 			continue;
3377 
3378 		/* XXX handle errors */
3379 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3380 		/* XXX handle errors */
3381 		if (r) {
3382 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3383 				  adev->ip_blocks[i].version->funcs->name, r);
3384 		}
3385 		adev->ip_blocks[i].status.hw = false;
3386 		/* handle putting the SMC in the appropriate state */
3387 		if (!amdgpu_sriov_vf(adev)) {
3388 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3389 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3390 				if (r) {
3391 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3392 							adev->mp1_state, r);
3393 					return r;
3394 				}
3395 			}
3396 		}
3397 	}
3398 
3399 	return 0;
3400 }
3401 
3402 /**
3403  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3404  *
3405  * @adev: amdgpu_device pointer
3406  *
3407  * Main suspend function for hardware IPs.  The list of all the hardware
3408  * IPs that make up the asic is walked, clockgating is disabled and the
3409  * suspend callbacks are run.  suspend puts the hardware and software state
3410  * in each IP into a state suitable for suspend.
3411  * Returns 0 on success, negative error code on failure.
3412  */
3413 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3414 {
3415 	int r;
3416 
3417 	if (amdgpu_sriov_vf(adev)) {
3418 		amdgpu_virt_fini_data_exchange(adev);
3419 		amdgpu_virt_request_full_gpu(adev, false);
3420 	}
3421 
3422 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3423 
3424 	r = amdgpu_device_ip_suspend_phase1(adev);
3425 	if (r)
3426 		return r;
3427 	r = amdgpu_device_ip_suspend_phase2(adev);
3428 
3429 	if (amdgpu_sriov_vf(adev))
3430 		amdgpu_virt_release_full_gpu(adev, false);
3431 
3432 	return r;
3433 }
3434 
3435 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3436 {
3437 	int i, r;
3438 
3439 	static enum amd_ip_block_type ip_order[] = {
3440 		AMD_IP_BLOCK_TYPE_COMMON,
3441 		AMD_IP_BLOCK_TYPE_GMC,
3442 		AMD_IP_BLOCK_TYPE_PSP,
3443 		AMD_IP_BLOCK_TYPE_IH,
3444 	};
3445 
3446 	for (i = 0; i < adev->num_ip_blocks; i++) {
3447 		int j;
3448 		struct amdgpu_ip_block *block;
3449 
3450 		block = &adev->ip_blocks[i];
3451 		block->status.hw = false;
3452 
3453 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3454 
3455 			if (block->version->type != ip_order[j] ||
3456 				!block->status.valid)
3457 				continue;
3458 
3459 			r = block->version->funcs->hw_init(adev);
3460 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3461 			if (r)
3462 				return r;
3463 			block->status.hw = true;
3464 		}
3465 	}
3466 
3467 	return 0;
3468 }
3469 
3470 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3471 {
3472 	int i, r;
3473 
3474 	static enum amd_ip_block_type ip_order[] = {
3475 		AMD_IP_BLOCK_TYPE_SMC,
3476 		AMD_IP_BLOCK_TYPE_DCE,
3477 		AMD_IP_BLOCK_TYPE_GFX,
3478 		AMD_IP_BLOCK_TYPE_SDMA,
3479 		AMD_IP_BLOCK_TYPE_MES,
3480 		AMD_IP_BLOCK_TYPE_UVD,
3481 		AMD_IP_BLOCK_TYPE_VCE,
3482 		AMD_IP_BLOCK_TYPE_VCN,
3483 		AMD_IP_BLOCK_TYPE_JPEG
3484 	};
3485 
3486 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3487 		int j;
3488 		struct amdgpu_ip_block *block;
3489 
3490 		for (j = 0; j < adev->num_ip_blocks; j++) {
3491 			block = &adev->ip_blocks[j];
3492 
3493 			if (block->version->type != ip_order[i] ||
3494 				!block->status.valid ||
3495 				block->status.hw)
3496 				continue;
3497 
3498 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3499 				r = block->version->funcs->resume(adev);
3500 			else
3501 				r = block->version->funcs->hw_init(adev);
3502 
3503 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3504 			if (r)
3505 				return r;
3506 			block->status.hw = true;
3507 		}
3508 	}
3509 
3510 	return 0;
3511 }
3512 
3513 /**
3514  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3515  *
3516  * @adev: amdgpu_device pointer
3517  *
3518  * First resume function for hardware IPs.  The list of all the hardware
3519  * IPs that make up the asic is walked and the resume callbacks are run for
3520  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3521  * after a suspend and updates the software state as necessary.  This
3522  * function is also used for restoring the GPU after a GPU reset.
3523  * Returns 0 on success, negative error code on failure.
3524  */
3525 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3526 {
3527 	int i, r;
3528 
3529 	for (i = 0; i < adev->num_ip_blocks; i++) {
3530 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3531 			continue;
3532 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3533 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3534 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3535 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3536 
3537 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3538 			if (r) {
3539 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3540 					  adev->ip_blocks[i].version->funcs->name, r);
3541 				return r;
3542 			}
3543 			adev->ip_blocks[i].status.hw = true;
3544 		}
3545 	}
3546 
3547 	return 0;
3548 }
3549 
3550 /**
3551  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3552  *
3553  * @adev: amdgpu_device pointer
3554  *
3555  * First resume function for hardware IPs.  The list of all the hardware
3556  * IPs that make up the asic is walked and the resume callbacks are run for
3557  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3558  * functional state after a suspend and updates the software state as
3559  * necessary.  This function is also used for restoring the GPU after a GPU
3560  * reset.
3561  * Returns 0 on success, negative error code on failure.
3562  */
3563 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3564 {
3565 	int i, r;
3566 
3567 	for (i = 0; i < adev->num_ip_blocks; i++) {
3568 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3569 			continue;
3570 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3571 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3572 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3573 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3574 			continue;
3575 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3576 		if (r) {
3577 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3578 				  adev->ip_blocks[i].version->funcs->name, r);
3579 			return r;
3580 		}
3581 		adev->ip_blocks[i].status.hw = true;
3582 	}
3583 
3584 	return 0;
3585 }
3586 
3587 /**
3588  * amdgpu_device_ip_resume - run resume for hardware IPs
3589  *
3590  * @adev: amdgpu_device pointer
3591  *
3592  * Main resume function for hardware IPs.  The hardware IPs
3593  * are split into two resume functions because they are
3594  * also used in recovering from a GPU reset and some additional
3595  * steps need to be take between them.  In this case (S3/S4) they are
3596  * run sequentially.
3597  * Returns 0 on success, negative error code on failure.
3598  */
3599 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3600 {
3601 	int r;
3602 
3603 	r = amdgpu_device_ip_resume_phase1(adev);
3604 	if (r)
3605 		return r;
3606 
3607 	r = amdgpu_device_fw_loading(adev);
3608 	if (r)
3609 		return r;
3610 
3611 	r = amdgpu_device_ip_resume_phase2(adev);
3612 
3613 	if (adev->mman.buffer_funcs_ring->sched.ready)
3614 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3615 
3616 	return r;
3617 }
3618 
3619 /**
3620  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3621  *
3622  * @adev: amdgpu_device pointer
3623  *
3624  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3625  */
3626 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3627 {
3628 	if (amdgpu_sriov_vf(adev)) {
3629 		if (adev->is_atom_fw) {
3630 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3631 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3632 		} else {
3633 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3634 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3635 		}
3636 
3637 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3638 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3639 	}
3640 }
3641 
3642 /**
3643  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3644  *
3645  * @asic_type: AMD asic type
3646  *
3647  * Check if there is DC (new modesetting infrastructre) support for an asic.
3648  * returns true if DC has support, false if not.
3649  */
3650 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3651 {
3652 	switch (asic_type) {
3653 #ifdef CONFIG_DRM_AMDGPU_SI
3654 	case CHIP_HAINAN:
3655 #endif
3656 	case CHIP_TOPAZ:
3657 		/* chips with no display hardware */
3658 		return false;
3659 #if defined(CONFIG_DRM_AMD_DC)
3660 	case CHIP_TAHITI:
3661 	case CHIP_PITCAIRN:
3662 	case CHIP_VERDE:
3663 	case CHIP_OLAND:
3664 		/*
3665 		 * We have systems in the wild with these ASICs that require
3666 		 * LVDS and VGA support which is not supported with DC.
3667 		 *
3668 		 * Fallback to the non-DC driver here by default so as not to
3669 		 * cause regressions.
3670 		 */
3671 #if defined(CONFIG_DRM_AMD_DC_SI)
3672 		return amdgpu_dc > 0;
3673 #else
3674 		return false;
3675 #endif
3676 	case CHIP_BONAIRE:
3677 	case CHIP_KAVERI:
3678 	case CHIP_KABINI:
3679 	case CHIP_MULLINS:
3680 		/*
3681 		 * We have systems in the wild with these ASICs that require
3682 		 * VGA support which is not supported with DC.
3683 		 *
3684 		 * Fallback to the non-DC driver here by default so as not to
3685 		 * cause regressions.
3686 		 */
3687 		return amdgpu_dc > 0;
3688 	default:
3689 		return amdgpu_dc != 0;
3690 #else
3691 	default:
3692 		if (amdgpu_dc > 0)
3693 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3694 		return false;
3695 #endif
3696 	}
3697 }
3698 
3699 /**
3700  * amdgpu_device_has_dc_support - check if dc is supported
3701  *
3702  * @adev: amdgpu_device pointer
3703  *
3704  * Returns true for supported, false for not supported
3705  */
3706 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3707 {
3708 	if (adev->enable_virtual_display ||
3709 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3710 		return false;
3711 
3712 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3713 }
3714 
3715 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3716 {
3717 	struct amdgpu_device *adev =
3718 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3719 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3720 
3721 	/* It's a bug to not have a hive within this function */
3722 	if (WARN_ON(!hive))
3723 		return;
3724 
3725 	/*
3726 	 * Use task barrier to synchronize all xgmi reset works across the
3727 	 * hive. task_barrier_enter and task_barrier_exit will block
3728 	 * until all the threads running the xgmi reset works reach
3729 	 * those points. task_barrier_full will do both blocks.
3730 	 */
3731 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3732 
3733 		task_barrier_enter(&hive->tb);
3734 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3735 
3736 		if (adev->asic_reset_res)
3737 			goto fail;
3738 
3739 		task_barrier_exit(&hive->tb);
3740 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3741 
3742 		if (adev->asic_reset_res)
3743 			goto fail;
3744 
3745 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3746 	} else {
3747 
3748 		task_barrier_full(&hive->tb);
3749 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3750 	}
3751 
3752 fail:
3753 	if (adev->asic_reset_res)
3754 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3755 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3756 	amdgpu_put_xgmi_hive(hive);
3757 }
3758 
3759 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3760 {
3761 	char *input = amdgpu_lockup_timeout;
3762 	char *timeout_setting = NULL;
3763 	int index = 0;
3764 	long timeout;
3765 	int ret = 0;
3766 
3767 	/*
3768 	 * By default timeout for non compute jobs is 10000
3769 	 * and 60000 for compute jobs.
3770 	 * In SR-IOV or passthrough mode, timeout for compute
3771 	 * jobs are 60000 by default.
3772 	 */
3773 	adev->gfx_timeout = msecs_to_jiffies(10000);
3774 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3775 	if (amdgpu_sriov_vf(adev))
3776 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3777 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3778 	else
3779 		adev->compute_timeout =  msecs_to_jiffies(60000);
3780 
3781 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3782 		while ((timeout_setting = strsep(&input, ",")) &&
3783 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3784 			ret = kstrtol(timeout_setting, 0, &timeout);
3785 			if (ret)
3786 				return ret;
3787 
3788 			if (timeout == 0) {
3789 				index++;
3790 				continue;
3791 			} else if (timeout < 0) {
3792 				timeout = MAX_SCHEDULE_TIMEOUT;
3793 				dev_warn(adev->dev, "lockup timeout disabled");
3794 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3795 			} else {
3796 				timeout = msecs_to_jiffies(timeout);
3797 			}
3798 
3799 			switch (index++) {
3800 			case 0:
3801 				adev->gfx_timeout = timeout;
3802 				break;
3803 			case 1:
3804 				adev->compute_timeout = timeout;
3805 				break;
3806 			case 2:
3807 				adev->sdma_timeout = timeout;
3808 				break;
3809 			case 3:
3810 				adev->video_timeout = timeout;
3811 				break;
3812 			default:
3813 				break;
3814 			}
3815 		}
3816 		/*
3817 		 * There is only one value specified and
3818 		 * it should apply to all non-compute jobs.
3819 		 */
3820 		if (index == 1) {
3821 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3822 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3823 				adev->compute_timeout = adev->gfx_timeout;
3824 		}
3825 	}
3826 
3827 	return ret;
3828 }
3829 
3830 /**
3831  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3832  *
3833  * @adev: amdgpu_device pointer
3834  *
3835  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3836  */
3837 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3838 {
3839 	struct iommu_domain *domain;
3840 
3841 	domain = iommu_get_domain_for_dev(adev->dev);
3842 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3843 		adev->ram_is_direct_mapped = true;
3844 }
3845 
3846 static const struct attribute *amdgpu_dev_attributes[] = {
3847 	&dev_attr_pcie_replay_count.attr,
3848 	NULL
3849 };
3850 
3851 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3852 {
3853 	if (amdgpu_mcbp == 1)
3854 		adev->gfx.mcbp = true;
3855 	else if (amdgpu_mcbp == 0)
3856 		adev->gfx.mcbp = false;
3857 
3858 	if (amdgpu_sriov_vf(adev))
3859 		adev->gfx.mcbp = true;
3860 
3861 	if (adev->gfx.mcbp)
3862 		DRM_INFO("MCBP is enabled\n");
3863 }
3864 
3865 /**
3866  * amdgpu_device_init - initialize the driver
3867  *
3868  * @adev: amdgpu_device pointer
3869  * @flags: driver flags
3870  *
3871  * Initializes the driver info and hw (all asics).
3872  * Returns 0 for success or an error on failure.
3873  * Called at driver startup.
3874  */
3875 int amdgpu_device_init(struct amdgpu_device *adev,
3876 		       uint32_t flags)
3877 {
3878 	struct drm_device *ddev = adev_to_drm(adev);
3879 	struct pci_dev *pdev = adev->pdev;
3880 	int r, i;
3881 	bool px = false;
3882 	u32 max_MBps;
3883 	int tmp;
3884 
3885 	adev->shutdown = false;
3886 	adev->flags = flags;
3887 
3888 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3889 		adev->asic_type = amdgpu_force_asic_type;
3890 	else
3891 		adev->asic_type = flags & AMD_ASIC_MASK;
3892 
3893 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3894 	if (amdgpu_emu_mode == 1)
3895 		adev->usec_timeout *= 10;
3896 	adev->gmc.gart_size = 512 * 1024 * 1024;
3897 	adev->accel_working = false;
3898 	adev->num_rings = 0;
3899 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3900 	adev->mman.buffer_funcs = NULL;
3901 	adev->mman.buffer_funcs_ring = NULL;
3902 	adev->vm_manager.vm_pte_funcs = NULL;
3903 	adev->vm_manager.vm_pte_num_scheds = 0;
3904 	adev->gmc.gmc_funcs = NULL;
3905 	adev->harvest_ip_mask = 0x0;
3906 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3907 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3908 
3909 	adev->smc_rreg = &amdgpu_invalid_rreg;
3910 	adev->smc_wreg = &amdgpu_invalid_wreg;
3911 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3912 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3913 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3914 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3915 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3916 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3917 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3918 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3919 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3920 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
3921 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3922 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3923 	adev->didt_rreg = &amdgpu_invalid_rreg;
3924 	adev->didt_wreg = &amdgpu_invalid_wreg;
3925 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3926 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3927 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3928 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3929 
3930 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3931 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3932 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3933 
3934 	/* mutex initialization are all done here so we
3935 	 * can recall function without having locking issues
3936 	 */
3937 	mutex_init(&adev->firmware.mutex);
3938 	mutex_init(&adev->pm.mutex);
3939 	mutex_init(&adev->gfx.gpu_clock_mutex);
3940 	mutex_init(&adev->srbm_mutex);
3941 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3942 	mutex_init(&adev->gfx.gfx_off_mutex);
3943 	mutex_init(&adev->gfx.partition_mutex);
3944 	mutex_init(&adev->grbm_idx_mutex);
3945 	mutex_init(&adev->mn_lock);
3946 	mutex_init(&adev->virt.vf_errors.lock);
3947 	hash_init(adev->mn_hash);
3948 	mutex_init(&adev->psp.mutex);
3949 	mutex_init(&adev->notifier_lock);
3950 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3951 	mutex_init(&adev->benchmark_mutex);
3952 
3953 	amdgpu_device_init_apu_flags(adev);
3954 
3955 	r = amdgpu_device_check_arguments(adev);
3956 	if (r)
3957 		return r;
3958 
3959 	spin_lock_init(&adev->mmio_idx_lock);
3960 	spin_lock_init(&adev->smc_idx_lock);
3961 	spin_lock_init(&adev->pcie_idx_lock);
3962 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3963 	spin_lock_init(&adev->didt_idx_lock);
3964 	spin_lock_init(&adev->gc_cac_idx_lock);
3965 	spin_lock_init(&adev->se_cac_idx_lock);
3966 	spin_lock_init(&adev->audio_endpt_idx_lock);
3967 	spin_lock_init(&adev->mm_stats.lock);
3968 
3969 	INIT_LIST_HEAD(&adev->shadow_list);
3970 	mutex_init(&adev->shadow_list_lock);
3971 
3972 	INIT_LIST_HEAD(&adev->reset_list);
3973 
3974 	INIT_LIST_HEAD(&adev->ras_list);
3975 
3976 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3977 
3978 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3979 			  amdgpu_device_delayed_init_work_handler);
3980 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3981 			  amdgpu_device_delay_enable_gfx_off);
3982 
3983 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3984 
3985 	adev->gfx.gfx_off_req_count = 1;
3986 	adev->gfx.gfx_off_residency = 0;
3987 	adev->gfx.gfx_off_entrycount = 0;
3988 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3989 
3990 	atomic_set(&adev->throttling_logging_enabled, 1);
3991 	/*
3992 	 * If throttling continues, logging will be performed every minute
3993 	 * to avoid log flooding. "-1" is subtracted since the thermal
3994 	 * throttling interrupt comes every second. Thus, the total logging
3995 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3996 	 * for throttling interrupt) = 60 seconds.
3997 	 */
3998 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3999 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4000 
4001 	/* Registers mapping */
4002 	/* TODO: block userspace mapping of io register */
4003 	if (adev->asic_type >= CHIP_BONAIRE) {
4004 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4005 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4006 	} else {
4007 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4008 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4009 	}
4010 
4011 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4012 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4013 
4014 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4015 	if (!adev->rmmio)
4016 		return -ENOMEM;
4017 
4018 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4019 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4020 
4021 	/*
4022 	 * Reset domain needs to be present early, before XGMI hive discovered
4023 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4024 	 * early on during init and before calling to RREG32.
4025 	 */
4026 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4027 	if (!adev->reset_domain)
4028 		return -ENOMEM;
4029 
4030 	/* detect hw virtualization here */
4031 	amdgpu_detect_virtualization(adev);
4032 
4033 	amdgpu_device_get_pcie_info(adev);
4034 
4035 	r = amdgpu_device_get_job_timeout_settings(adev);
4036 	if (r) {
4037 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4038 		return r;
4039 	}
4040 
4041 	/* early init functions */
4042 	r = amdgpu_device_ip_early_init(adev);
4043 	if (r)
4044 		return r;
4045 
4046 	amdgpu_device_set_mcbp(adev);
4047 
4048 	/* Get rid of things like offb */
4049 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4050 	if (r)
4051 		return r;
4052 
4053 	/* Enable TMZ based on IP_VERSION */
4054 	amdgpu_gmc_tmz_set(adev);
4055 
4056 	amdgpu_gmc_noretry_set(adev);
4057 	/* Need to get xgmi info early to decide the reset behavior*/
4058 	if (adev->gmc.xgmi.supported) {
4059 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4060 		if (r)
4061 			return r;
4062 	}
4063 
4064 	/* enable PCIE atomic ops */
4065 	if (amdgpu_sriov_vf(adev)) {
4066 		if (adev->virt.fw_reserve.p_pf2vf)
4067 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4068 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4069 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4070 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4071 	 * internal path natively support atomics, set have_atomics_support to true.
4072 	 */
4073 	} else if ((adev->flags & AMD_IS_APU) &&
4074 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4075 		    IP_VERSION(9, 0, 0))) {
4076 		adev->have_atomics_support = true;
4077 	} else {
4078 		adev->have_atomics_support =
4079 			!pci_enable_atomic_ops_to_root(adev->pdev,
4080 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4081 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4082 	}
4083 
4084 	if (!adev->have_atomics_support)
4085 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4086 
4087 	/* doorbell bar mapping and doorbell index init*/
4088 	amdgpu_doorbell_init(adev);
4089 
4090 	if (amdgpu_emu_mode == 1) {
4091 		/* post the asic on emulation mode */
4092 		emu_soc_asic_init(adev);
4093 		goto fence_driver_init;
4094 	}
4095 
4096 	amdgpu_reset_init(adev);
4097 
4098 	/* detect if we are with an SRIOV vbios */
4099 	if (adev->bios)
4100 		amdgpu_device_detect_sriov_bios(adev);
4101 
4102 	/* check if we need to reset the asic
4103 	 *  E.g., driver was not cleanly unloaded previously, etc.
4104 	 */
4105 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4106 		if (adev->gmc.xgmi.num_physical_nodes) {
4107 			dev_info(adev->dev, "Pending hive reset.\n");
4108 			adev->gmc.xgmi.pending_reset = true;
4109 			/* Only need to init necessary block for SMU to handle the reset */
4110 			for (i = 0; i < adev->num_ip_blocks; i++) {
4111 				if (!adev->ip_blocks[i].status.valid)
4112 					continue;
4113 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4114 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4115 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4116 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4117 					DRM_DEBUG("IP %s disabled for hw_init.\n",
4118 						adev->ip_blocks[i].version->funcs->name);
4119 					adev->ip_blocks[i].status.hw = true;
4120 				}
4121 			}
4122 		} else {
4123 			switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
4124 			case IP_VERSION(13, 0, 0):
4125 			case IP_VERSION(13, 0, 7):
4126 			case IP_VERSION(13, 0, 10):
4127 				r = psp_gpu_reset(adev);
4128 				break;
4129 			default:
4130 				tmp = amdgpu_reset_method;
4131 				/* It should do a default reset when loading or reloading the driver,
4132 				 * regardless of the module parameter reset_method.
4133 				 */
4134 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4135 				r = amdgpu_asic_reset(adev);
4136 				amdgpu_reset_method = tmp;
4137 				break;
4138 			}
4139 
4140 			if (r) {
4141 				dev_err(adev->dev, "asic reset on init failed\n");
4142 				goto failed;
4143 			}
4144 		}
4145 	}
4146 
4147 	/* Post card if necessary */
4148 	if (amdgpu_device_need_post(adev)) {
4149 		if (!adev->bios) {
4150 			dev_err(adev->dev, "no vBIOS found\n");
4151 			r = -EINVAL;
4152 			goto failed;
4153 		}
4154 		DRM_INFO("GPU posting now...\n");
4155 		r = amdgpu_device_asic_init(adev);
4156 		if (r) {
4157 			dev_err(adev->dev, "gpu post error!\n");
4158 			goto failed;
4159 		}
4160 	}
4161 
4162 	if (adev->bios) {
4163 		if (adev->is_atom_fw) {
4164 			/* Initialize clocks */
4165 			r = amdgpu_atomfirmware_get_clock_info(adev);
4166 			if (r) {
4167 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4168 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4169 				goto failed;
4170 			}
4171 		} else {
4172 			/* Initialize clocks */
4173 			r = amdgpu_atombios_get_clock_info(adev);
4174 			if (r) {
4175 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4176 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4177 				goto failed;
4178 			}
4179 			/* init i2c buses */
4180 			if (!amdgpu_device_has_dc_support(adev))
4181 				amdgpu_atombios_i2c_init(adev);
4182 		}
4183 	}
4184 
4185 fence_driver_init:
4186 	/* Fence driver */
4187 	r = amdgpu_fence_driver_sw_init(adev);
4188 	if (r) {
4189 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4190 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4191 		goto failed;
4192 	}
4193 
4194 	/* init the mode config */
4195 	drm_mode_config_init(adev_to_drm(adev));
4196 
4197 	r = amdgpu_device_ip_init(adev);
4198 	if (r) {
4199 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4200 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4201 		goto release_ras_con;
4202 	}
4203 
4204 	amdgpu_fence_driver_hw_init(adev);
4205 
4206 	dev_info(adev->dev,
4207 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4208 			adev->gfx.config.max_shader_engines,
4209 			adev->gfx.config.max_sh_per_se,
4210 			adev->gfx.config.max_cu_per_sh,
4211 			adev->gfx.cu_info.number);
4212 
4213 	adev->accel_working = true;
4214 
4215 	amdgpu_vm_check_compute_bug(adev);
4216 
4217 	/* Initialize the buffer migration limit. */
4218 	if (amdgpu_moverate >= 0)
4219 		max_MBps = amdgpu_moverate;
4220 	else
4221 		max_MBps = 8; /* Allow 8 MB/s. */
4222 	/* Get a log2 for easy divisions. */
4223 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4224 
4225 	/*
4226 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4227 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4228 	 * gpu instance is counted less.
4229 	 */
4230 	amdgpu_register_gpu_instance(adev);
4231 
4232 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4233 	 * explicit gating rather than handling it automatically.
4234 	 */
4235 	if (!adev->gmc.xgmi.pending_reset) {
4236 		r = amdgpu_device_ip_late_init(adev);
4237 		if (r) {
4238 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4239 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4240 			goto release_ras_con;
4241 		}
4242 		/* must succeed. */
4243 		amdgpu_ras_resume(adev);
4244 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4245 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4246 	}
4247 
4248 	if (amdgpu_sriov_vf(adev)) {
4249 		amdgpu_virt_release_full_gpu(adev, true);
4250 		flush_delayed_work(&adev->delayed_init_work);
4251 	}
4252 
4253 	/*
4254 	 * Place those sysfs registering after `late_init`. As some of those
4255 	 * operations performed in `late_init` might affect the sysfs
4256 	 * interfaces creating.
4257 	 */
4258 	r = amdgpu_atombios_sysfs_init(adev);
4259 	if (r)
4260 		drm_err(&adev->ddev,
4261 			"registering atombios sysfs failed (%d).\n", r);
4262 
4263 	r = amdgpu_pm_sysfs_init(adev);
4264 	if (r)
4265 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4266 
4267 	r = amdgpu_ucode_sysfs_init(adev);
4268 	if (r) {
4269 		adev->ucode_sysfs_en = false;
4270 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4271 	} else
4272 		adev->ucode_sysfs_en = true;
4273 
4274 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4275 	if (r)
4276 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4277 
4278 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4279 	if (r)
4280 		dev_err(adev->dev,
4281 			"Could not create amdgpu board attributes\n");
4282 
4283 	amdgpu_fru_sysfs_init(adev);
4284 	amdgpu_reg_state_sysfs_init(adev);
4285 
4286 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4287 		r = amdgpu_pmu_init(adev);
4288 	if (r)
4289 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4290 
4291 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4292 	if (amdgpu_device_cache_pci_state(adev->pdev))
4293 		pci_restore_state(pdev);
4294 
4295 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4296 	/* this will fail for cards that aren't VGA class devices, just
4297 	 * ignore it
4298 	 */
4299 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4300 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4301 
4302 	px = amdgpu_device_supports_px(ddev);
4303 
4304 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4305 				apple_gmux_detect(NULL, NULL)))
4306 		vga_switcheroo_register_client(adev->pdev,
4307 					       &amdgpu_switcheroo_ops, px);
4308 
4309 	if (px)
4310 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4311 
4312 	if (adev->gmc.xgmi.pending_reset)
4313 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4314 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4315 
4316 	amdgpu_device_check_iommu_direct_map(adev);
4317 
4318 	return 0;
4319 
4320 release_ras_con:
4321 	if (amdgpu_sriov_vf(adev))
4322 		amdgpu_virt_release_full_gpu(adev, true);
4323 
4324 	/* failed in exclusive mode due to timeout */
4325 	if (amdgpu_sriov_vf(adev) &&
4326 		!amdgpu_sriov_runtime(adev) &&
4327 		amdgpu_virt_mmio_blocked(adev) &&
4328 		!amdgpu_virt_wait_reset(adev)) {
4329 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4330 		/* Don't send request since VF is inactive. */
4331 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4332 		adev->virt.ops = NULL;
4333 		r = -EAGAIN;
4334 	}
4335 	amdgpu_release_ras_context(adev);
4336 
4337 failed:
4338 	amdgpu_vf_error_trans_all(adev);
4339 
4340 	return r;
4341 }
4342 
4343 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4344 {
4345 
4346 	/* Clear all CPU mappings pointing to this device */
4347 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4348 
4349 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4350 	amdgpu_doorbell_fini(adev);
4351 
4352 	iounmap(adev->rmmio);
4353 	adev->rmmio = NULL;
4354 	if (adev->mman.aper_base_kaddr)
4355 		iounmap(adev->mman.aper_base_kaddr);
4356 	adev->mman.aper_base_kaddr = NULL;
4357 
4358 	/* Memory manager related */
4359 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4360 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4361 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4362 	}
4363 }
4364 
4365 /**
4366  * amdgpu_device_fini_hw - tear down the driver
4367  *
4368  * @adev: amdgpu_device pointer
4369  *
4370  * Tear down the driver info (all asics).
4371  * Called at driver shutdown.
4372  */
4373 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4374 {
4375 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4376 	flush_delayed_work(&adev->delayed_init_work);
4377 	adev->shutdown = true;
4378 
4379 	/* make sure IB test finished before entering exclusive mode
4380 	 * to avoid preemption on IB test
4381 	 */
4382 	if (amdgpu_sriov_vf(adev)) {
4383 		amdgpu_virt_request_full_gpu(adev, false);
4384 		amdgpu_virt_fini_data_exchange(adev);
4385 	}
4386 
4387 	/* disable all interrupts */
4388 	amdgpu_irq_disable_all(adev);
4389 	if (adev->mode_info.mode_config_initialized) {
4390 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4391 			drm_helper_force_disable_all(adev_to_drm(adev));
4392 		else
4393 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4394 	}
4395 	amdgpu_fence_driver_hw_fini(adev);
4396 
4397 	if (adev->mman.initialized)
4398 		drain_workqueue(adev->mman.bdev.wq);
4399 
4400 	if (adev->pm.sysfs_initialized)
4401 		amdgpu_pm_sysfs_fini(adev);
4402 	if (adev->ucode_sysfs_en)
4403 		amdgpu_ucode_sysfs_fini(adev);
4404 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4405 	amdgpu_fru_sysfs_fini(adev);
4406 
4407 	amdgpu_reg_state_sysfs_fini(adev);
4408 
4409 	/* disable ras feature must before hw fini */
4410 	amdgpu_ras_pre_fini(adev);
4411 
4412 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4413 
4414 	amdgpu_device_ip_fini_early(adev);
4415 
4416 	amdgpu_irq_fini_hw(adev);
4417 
4418 	if (adev->mman.initialized)
4419 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4420 
4421 	amdgpu_gart_dummy_page_fini(adev);
4422 
4423 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4424 		amdgpu_device_unmap_mmio(adev);
4425 
4426 }
4427 
4428 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4429 {
4430 	int idx;
4431 	bool px;
4432 
4433 	amdgpu_fence_driver_sw_fini(adev);
4434 	amdgpu_device_ip_fini(adev);
4435 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4436 	adev->accel_working = false;
4437 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4438 
4439 	amdgpu_reset_fini(adev);
4440 
4441 	/* free i2c buses */
4442 	if (!amdgpu_device_has_dc_support(adev))
4443 		amdgpu_i2c_fini(adev);
4444 
4445 	if (amdgpu_emu_mode != 1)
4446 		amdgpu_atombios_fini(adev);
4447 
4448 	kfree(adev->bios);
4449 	adev->bios = NULL;
4450 
4451 	kfree(adev->fru_info);
4452 	adev->fru_info = NULL;
4453 
4454 	px = amdgpu_device_supports_px(adev_to_drm(adev));
4455 
4456 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4457 				apple_gmux_detect(NULL, NULL)))
4458 		vga_switcheroo_unregister_client(adev->pdev);
4459 
4460 	if (px)
4461 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4462 
4463 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4464 		vga_client_unregister(adev->pdev);
4465 
4466 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4467 
4468 		iounmap(adev->rmmio);
4469 		adev->rmmio = NULL;
4470 		amdgpu_doorbell_fini(adev);
4471 		drm_dev_exit(idx);
4472 	}
4473 
4474 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4475 		amdgpu_pmu_fini(adev);
4476 	if (adev->mman.discovery_bin)
4477 		amdgpu_discovery_fini(adev);
4478 
4479 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4480 	adev->reset_domain = NULL;
4481 
4482 	kfree(adev->pci_state);
4483 
4484 }
4485 
4486 /**
4487  * amdgpu_device_evict_resources - evict device resources
4488  * @adev: amdgpu device object
4489  *
4490  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4491  * of the vram memory type. Mainly used for evicting device resources
4492  * at suspend time.
4493  *
4494  */
4495 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4496 {
4497 	int ret;
4498 
4499 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4500 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4501 		return 0;
4502 
4503 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4504 	if (ret)
4505 		DRM_WARN("evicting device resources failed\n");
4506 	return ret;
4507 }
4508 
4509 /*
4510  * Suspend & resume.
4511  */
4512 /**
4513  * amdgpu_device_prepare - prepare for device suspend
4514  *
4515  * @dev: drm dev pointer
4516  *
4517  * Prepare to put the hw in the suspend state (all asics).
4518  * Returns 0 for success or an error on failure.
4519  * Called at driver suspend.
4520  */
4521 int amdgpu_device_prepare(struct drm_device *dev)
4522 {
4523 	struct amdgpu_device *adev = drm_to_adev(dev);
4524 	int i, r;
4525 
4526 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4527 		return 0;
4528 
4529 	/* Evict the majority of BOs before starting suspend sequence */
4530 	r = amdgpu_device_evict_resources(adev);
4531 	if (r)
4532 		return r;
4533 
4534 	for (i = 0; i < adev->num_ip_blocks; i++) {
4535 		if (!adev->ip_blocks[i].status.valid)
4536 			continue;
4537 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4538 			continue;
4539 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4540 		if (r)
4541 			return r;
4542 	}
4543 
4544 	return 0;
4545 }
4546 
4547 /**
4548  * amdgpu_device_suspend - initiate device suspend
4549  *
4550  * @dev: drm dev pointer
4551  * @fbcon : notify the fbdev of suspend
4552  *
4553  * Puts the hw in the suspend state (all asics).
4554  * Returns 0 for success or an error on failure.
4555  * Called at driver suspend.
4556  */
4557 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4558 {
4559 	struct amdgpu_device *adev = drm_to_adev(dev);
4560 	int r = 0;
4561 
4562 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4563 		return 0;
4564 
4565 	adev->in_suspend = true;
4566 
4567 	if (amdgpu_sriov_vf(adev)) {
4568 		amdgpu_virt_fini_data_exchange(adev);
4569 		r = amdgpu_virt_request_full_gpu(adev, false);
4570 		if (r)
4571 			return r;
4572 	}
4573 
4574 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4575 		DRM_WARN("smart shift update failed\n");
4576 
4577 	if (fbcon)
4578 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4579 
4580 	cancel_delayed_work_sync(&adev->delayed_init_work);
4581 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4582 
4583 	amdgpu_ras_suspend(adev);
4584 
4585 	amdgpu_device_ip_suspend_phase1(adev);
4586 
4587 	if (!adev->in_s0ix)
4588 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4589 
4590 	r = amdgpu_device_evict_resources(adev);
4591 	if (r)
4592 		return r;
4593 
4594 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4595 
4596 	amdgpu_fence_driver_hw_fini(adev);
4597 
4598 	amdgpu_device_ip_suspend_phase2(adev);
4599 
4600 	if (amdgpu_sriov_vf(adev))
4601 		amdgpu_virt_release_full_gpu(adev, false);
4602 
4603 	r = amdgpu_dpm_notify_rlc_state(adev, false);
4604 	if (r)
4605 		return r;
4606 
4607 	return 0;
4608 }
4609 
4610 /**
4611  * amdgpu_device_resume - initiate device resume
4612  *
4613  * @dev: drm dev pointer
4614  * @fbcon : notify the fbdev of resume
4615  *
4616  * Bring the hw back to operating state (all asics).
4617  * Returns 0 for success or an error on failure.
4618  * Called at driver resume.
4619  */
4620 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4621 {
4622 	struct amdgpu_device *adev = drm_to_adev(dev);
4623 	int r = 0;
4624 
4625 	if (amdgpu_sriov_vf(adev)) {
4626 		r = amdgpu_virt_request_full_gpu(adev, true);
4627 		if (r)
4628 			return r;
4629 	}
4630 
4631 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4632 		return 0;
4633 
4634 	if (adev->in_s0ix)
4635 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4636 
4637 	/* post card */
4638 	if (amdgpu_device_need_post(adev)) {
4639 		r = amdgpu_device_asic_init(adev);
4640 		if (r)
4641 			dev_err(adev->dev, "amdgpu asic init failed\n");
4642 	}
4643 
4644 	r = amdgpu_device_ip_resume(adev);
4645 
4646 	if (r) {
4647 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4648 		goto exit;
4649 	}
4650 	amdgpu_fence_driver_hw_init(adev);
4651 
4652 	if (!adev->in_s0ix) {
4653 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4654 		if (r)
4655 			goto exit;
4656 	}
4657 
4658 	r = amdgpu_device_ip_late_init(adev);
4659 	if (r)
4660 		goto exit;
4661 
4662 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4663 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4664 exit:
4665 	if (amdgpu_sriov_vf(adev)) {
4666 		amdgpu_virt_init_data_exchange(adev);
4667 		amdgpu_virt_release_full_gpu(adev, true);
4668 	}
4669 
4670 	if (r)
4671 		return r;
4672 
4673 	/* Make sure IB tests flushed */
4674 	flush_delayed_work(&adev->delayed_init_work);
4675 
4676 	if (fbcon)
4677 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4678 
4679 	amdgpu_ras_resume(adev);
4680 
4681 	if (adev->mode_info.num_crtc) {
4682 		/*
4683 		 * Most of the connector probing functions try to acquire runtime pm
4684 		 * refs to ensure that the GPU is powered on when connector polling is
4685 		 * performed. Since we're calling this from a runtime PM callback,
4686 		 * trying to acquire rpm refs will cause us to deadlock.
4687 		 *
4688 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4689 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4690 		 */
4691 #ifdef CONFIG_PM
4692 		dev->dev->power.disable_depth++;
4693 #endif
4694 		if (!adev->dc_enabled)
4695 			drm_helper_hpd_irq_event(dev);
4696 		else
4697 			drm_kms_helper_hotplug_event(dev);
4698 #ifdef CONFIG_PM
4699 		dev->dev->power.disable_depth--;
4700 #endif
4701 	}
4702 	adev->in_suspend = false;
4703 
4704 	if (adev->enable_mes)
4705 		amdgpu_mes_self_test(adev);
4706 
4707 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4708 		DRM_WARN("smart shift update failed\n");
4709 
4710 	return 0;
4711 }
4712 
4713 /**
4714  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4715  *
4716  * @adev: amdgpu_device pointer
4717  *
4718  * The list of all the hardware IPs that make up the asic is walked and
4719  * the check_soft_reset callbacks are run.  check_soft_reset determines
4720  * if the asic is still hung or not.
4721  * Returns true if any of the IPs are still in a hung state, false if not.
4722  */
4723 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4724 {
4725 	int i;
4726 	bool asic_hang = false;
4727 
4728 	if (amdgpu_sriov_vf(adev))
4729 		return true;
4730 
4731 	if (amdgpu_asic_need_full_reset(adev))
4732 		return true;
4733 
4734 	for (i = 0; i < adev->num_ip_blocks; i++) {
4735 		if (!adev->ip_blocks[i].status.valid)
4736 			continue;
4737 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4738 			adev->ip_blocks[i].status.hang =
4739 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4740 		if (adev->ip_blocks[i].status.hang) {
4741 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4742 			asic_hang = true;
4743 		}
4744 	}
4745 	return asic_hang;
4746 }
4747 
4748 /**
4749  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4750  *
4751  * @adev: amdgpu_device pointer
4752  *
4753  * The list of all the hardware IPs that make up the asic is walked and the
4754  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4755  * handles any IP specific hardware or software state changes that are
4756  * necessary for a soft reset to succeed.
4757  * Returns 0 on success, negative error code on failure.
4758  */
4759 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4760 {
4761 	int i, r = 0;
4762 
4763 	for (i = 0; i < adev->num_ip_blocks; i++) {
4764 		if (!adev->ip_blocks[i].status.valid)
4765 			continue;
4766 		if (adev->ip_blocks[i].status.hang &&
4767 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4768 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4769 			if (r)
4770 				return r;
4771 		}
4772 	}
4773 
4774 	return 0;
4775 }
4776 
4777 /**
4778  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4779  *
4780  * @adev: amdgpu_device pointer
4781  *
4782  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4783  * reset is necessary to recover.
4784  * Returns true if a full asic reset is required, false if not.
4785  */
4786 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4787 {
4788 	int i;
4789 
4790 	if (amdgpu_asic_need_full_reset(adev))
4791 		return true;
4792 
4793 	for (i = 0; i < adev->num_ip_blocks; i++) {
4794 		if (!adev->ip_blocks[i].status.valid)
4795 			continue;
4796 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4797 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4798 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4799 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4800 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4801 			if (adev->ip_blocks[i].status.hang) {
4802 				dev_info(adev->dev, "Some block need full reset!\n");
4803 				return true;
4804 			}
4805 		}
4806 	}
4807 	return false;
4808 }
4809 
4810 /**
4811  * amdgpu_device_ip_soft_reset - do a soft reset
4812  *
4813  * @adev: amdgpu_device pointer
4814  *
4815  * The list of all the hardware IPs that make up the asic is walked and the
4816  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4817  * IP specific hardware or software state changes that are necessary to soft
4818  * reset the IP.
4819  * Returns 0 on success, negative error code on failure.
4820  */
4821 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4822 {
4823 	int i, r = 0;
4824 
4825 	for (i = 0; i < adev->num_ip_blocks; i++) {
4826 		if (!adev->ip_blocks[i].status.valid)
4827 			continue;
4828 		if (adev->ip_blocks[i].status.hang &&
4829 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4830 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4831 			if (r)
4832 				return r;
4833 		}
4834 	}
4835 
4836 	return 0;
4837 }
4838 
4839 /**
4840  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4841  *
4842  * @adev: amdgpu_device pointer
4843  *
4844  * The list of all the hardware IPs that make up the asic is walked and the
4845  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4846  * handles any IP specific hardware or software state changes that are
4847  * necessary after the IP has been soft reset.
4848  * Returns 0 on success, negative error code on failure.
4849  */
4850 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4851 {
4852 	int i, r = 0;
4853 
4854 	for (i = 0; i < adev->num_ip_blocks; i++) {
4855 		if (!adev->ip_blocks[i].status.valid)
4856 			continue;
4857 		if (adev->ip_blocks[i].status.hang &&
4858 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4859 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4860 		if (r)
4861 			return r;
4862 	}
4863 
4864 	return 0;
4865 }
4866 
4867 /**
4868  * amdgpu_device_recover_vram - Recover some VRAM contents
4869  *
4870  * @adev: amdgpu_device pointer
4871  *
4872  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4873  * restore things like GPUVM page tables after a GPU reset where
4874  * the contents of VRAM might be lost.
4875  *
4876  * Returns:
4877  * 0 on success, negative error code on failure.
4878  */
4879 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4880 {
4881 	struct dma_fence *fence = NULL, *next = NULL;
4882 	struct amdgpu_bo *shadow;
4883 	struct amdgpu_bo_vm *vmbo;
4884 	long r = 1, tmo;
4885 
4886 	if (amdgpu_sriov_runtime(adev))
4887 		tmo = msecs_to_jiffies(8000);
4888 	else
4889 		tmo = msecs_to_jiffies(100);
4890 
4891 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4892 	mutex_lock(&adev->shadow_list_lock);
4893 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4894 		/* If vm is compute context or adev is APU, shadow will be NULL */
4895 		if (!vmbo->shadow)
4896 			continue;
4897 		shadow = vmbo->shadow;
4898 
4899 		/* No need to recover an evicted BO */
4900 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4901 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4902 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4903 			continue;
4904 
4905 		r = amdgpu_bo_restore_shadow(shadow, &next);
4906 		if (r)
4907 			break;
4908 
4909 		if (fence) {
4910 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4911 			dma_fence_put(fence);
4912 			fence = next;
4913 			if (tmo == 0) {
4914 				r = -ETIMEDOUT;
4915 				break;
4916 			} else if (tmo < 0) {
4917 				r = tmo;
4918 				break;
4919 			}
4920 		} else {
4921 			fence = next;
4922 		}
4923 	}
4924 	mutex_unlock(&adev->shadow_list_lock);
4925 
4926 	if (fence)
4927 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4928 	dma_fence_put(fence);
4929 
4930 	if (r < 0 || tmo <= 0) {
4931 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4932 		return -EIO;
4933 	}
4934 
4935 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4936 	return 0;
4937 }
4938 
4939 
4940 /**
4941  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4942  *
4943  * @adev: amdgpu_device pointer
4944  * @from_hypervisor: request from hypervisor
4945  *
4946  * do VF FLR and reinitialize Asic
4947  * return 0 means succeeded otherwise failed
4948  */
4949 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4950 				     bool from_hypervisor)
4951 {
4952 	int r;
4953 	struct amdgpu_hive_info *hive = NULL;
4954 	int retry_limit = 0;
4955 
4956 retry:
4957 	amdgpu_amdkfd_pre_reset(adev);
4958 
4959 	if (from_hypervisor)
4960 		r = amdgpu_virt_request_full_gpu(adev, true);
4961 	else
4962 		r = amdgpu_virt_reset_gpu(adev);
4963 	if (r)
4964 		return r;
4965 	amdgpu_irq_gpu_reset_resume_helper(adev);
4966 
4967 	/* some sw clean up VF needs to do before recover */
4968 	amdgpu_virt_post_reset(adev);
4969 
4970 	/* Resume IP prior to SMC */
4971 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4972 	if (r)
4973 		goto error;
4974 
4975 	amdgpu_virt_init_data_exchange(adev);
4976 
4977 	r = amdgpu_device_fw_loading(adev);
4978 	if (r)
4979 		return r;
4980 
4981 	/* now we are okay to resume SMC/CP/SDMA */
4982 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4983 	if (r)
4984 		goto error;
4985 
4986 	hive = amdgpu_get_xgmi_hive(adev);
4987 	/* Update PSP FW topology after reset */
4988 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4989 		r = amdgpu_xgmi_update_topology(hive, adev);
4990 
4991 	if (hive)
4992 		amdgpu_put_xgmi_hive(hive);
4993 
4994 	if (!r) {
4995 		r = amdgpu_ib_ring_tests(adev);
4996 
4997 		amdgpu_amdkfd_post_reset(adev);
4998 	}
4999 
5000 error:
5001 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5002 		amdgpu_inc_vram_lost(adev);
5003 		r = amdgpu_device_recover_vram(adev);
5004 	}
5005 	amdgpu_virt_release_full_gpu(adev, true);
5006 
5007 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5008 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5009 			retry_limit++;
5010 			goto retry;
5011 		} else
5012 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5013 	}
5014 
5015 	return r;
5016 }
5017 
5018 /**
5019  * amdgpu_device_has_job_running - check if there is any job in mirror list
5020  *
5021  * @adev: amdgpu_device pointer
5022  *
5023  * check if there is any job in mirror list
5024  */
5025 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5026 {
5027 	int i;
5028 	struct drm_sched_job *job;
5029 
5030 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5031 		struct amdgpu_ring *ring = adev->rings[i];
5032 
5033 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5034 			continue;
5035 
5036 		spin_lock(&ring->sched.job_list_lock);
5037 		job = list_first_entry_or_null(&ring->sched.pending_list,
5038 					       struct drm_sched_job, list);
5039 		spin_unlock(&ring->sched.job_list_lock);
5040 		if (job)
5041 			return true;
5042 	}
5043 	return false;
5044 }
5045 
5046 /**
5047  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5048  *
5049  * @adev: amdgpu_device pointer
5050  *
5051  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5052  * a hung GPU.
5053  */
5054 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5055 {
5056 
5057 	if (amdgpu_gpu_recovery == 0)
5058 		goto disabled;
5059 
5060 	/* Skip soft reset check in fatal error mode */
5061 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5062 		return true;
5063 
5064 	if (amdgpu_sriov_vf(adev))
5065 		return true;
5066 
5067 	if (amdgpu_gpu_recovery == -1) {
5068 		switch (adev->asic_type) {
5069 #ifdef CONFIG_DRM_AMDGPU_SI
5070 		case CHIP_VERDE:
5071 		case CHIP_TAHITI:
5072 		case CHIP_PITCAIRN:
5073 		case CHIP_OLAND:
5074 		case CHIP_HAINAN:
5075 #endif
5076 #ifdef CONFIG_DRM_AMDGPU_CIK
5077 		case CHIP_KAVERI:
5078 		case CHIP_KABINI:
5079 		case CHIP_MULLINS:
5080 #endif
5081 		case CHIP_CARRIZO:
5082 		case CHIP_STONEY:
5083 		case CHIP_CYAN_SKILLFISH:
5084 			goto disabled;
5085 		default:
5086 			break;
5087 		}
5088 	}
5089 
5090 	return true;
5091 
5092 disabled:
5093 		dev_info(adev->dev, "GPU recovery disabled.\n");
5094 		return false;
5095 }
5096 
5097 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5098 {
5099 	u32 i;
5100 	int ret = 0;
5101 
5102 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5103 
5104 	dev_info(adev->dev, "GPU mode1 reset\n");
5105 
5106 	/* disable BM */
5107 	pci_clear_master(adev->pdev);
5108 
5109 	amdgpu_device_cache_pci_state(adev->pdev);
5110 
5111 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5112 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5113 		ret = amdgpu_dpm_mode1_reset(adev);
5114 	} else {
5115 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5116 		ret = psp_gpu_reset(adev);
5117 	}
5118 
5119 	if (ret)
5120 		goto mode1_reset_failed;
5121 
5122 	amdgpu_device_load_pci_state(adev->pdev);
5123 	ret = amdgpu_psp_wait_for_bootloader(adev);
5124 	if (ret)
5125 		goto mode1_reset_failed;
5126 
5127 	/* wait for asic to come out of reset */
5128 	for (i = 0; i < adev->usec_timeout; i++) {
5129 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5130 
5131 		if (memsize != 0xffffffff)
5132 			break;
5133 		udelay(1);
5134 	}
5135 
5136 	if (i >= adev->usec_timeout) {
5137 		ret = -ETIMEDOUT;
5138 		goto mode1_reset_failed;
5139 	}
5140 
5141 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5142 
5143 	return 0;
5144 
5145 mode1_reset_failed:
5146 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5147 	return ret;
5148 }
5149 
5150 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5151 				 struct amdgpu_reset_context *reset_context)
5152 {
5153 	int i, r = 0;
5154 	struct amdgpu_job *job = NULL;
5155 	bool need_full_reset =
5156 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5157 
5158 	if (reset_context->reset_req_dev == adev)
5159 		job = reset_context->job;
5160 
5161 	if (amdgpu_sriov_vf(adev)) {
5162 		/* stop the data exchange thread */
5163 		amdgpu_virt_fini_data_exchange(adev);
5164 	}
5165 
5166 	amdgpu_fence_driver_isr_toggle(adev, true);
5167 
5168 	/* block all schedulers and reset given job's ring */
5169 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5170 		struct amdgpu_ring *ring = adev->rings[i];
5171 
5172 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5173 			continue;
5174 
5175 		/* Clear job fence from fence drv to avoid force_completion
5176 		 * leave NULL and vm flush fence in fence drv
5177 		 */
5178 		amdgpu_fence_driver_clear_job_fences(ring);
5179 
5180 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5181 		amdgpu_fence_driver_force_completion(ring);
5182 	}
5183 
5184 	amdgpu_fence_driver_isr_toggle(adev, false);
5185 
5186 	if (job && job->vm)
5187 		drm_sched_increase_karma(&job->base);
5188 
5189 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5190 	/* If reset handler not implemented, continue; otherwise return */
5191 	if (r == -EOPNOTSUPP)
5192 		r = 0;
5193 	else
5194 		return r;
5195 
5196 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5197 	if (!amdgpu_sriov_vf(adev)) {
5198 
5199 		if (!need_full_reset)
5200 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5201 
5202 		if (!need_full_reset && amdgpu_gpu_recovery &&
5203 		    amdgpu_device_ip_check_soft_reset(adev)) {
5204 			amdgpu_device_ip_pre_soft_reset(adev);
5205 			r = amdgpu_device_ip_soft_reset(adev);
5206 			amdgpu_device_ip_post_soft_reset(adev);
5207 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5208 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5209 				need_full_reset = true;
5210 			}
5211 		}
5212 
5213 		if (need_full_reset)
5214 			r = amdgpu_device_ip_suspend(adev);
5215 		if (need_full_reset)
5216 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5217 		else
5218 			clear_bit(AMDGPU_NEED_FULL_RESET,
5219 				  &reset_context->flags);
5220 	}
5221 
5222 	return r;
5223 }
5224 
5225 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5226 {
5227 	int i;
5228 
5229 	lockdep_assert_held(&adev->reset_domain->sem);
5230 
5231 	for (i = 0; i < adev->reset_info.num_regs; i++) {
5232 		adev->reset_info.reset_dump_reg_value[i] =
5233 			RREG32(adev->reset_info.reset_dump_reg_list[i]);
5234 
5235 		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5236 					     adev->reset_info.reset_dump_reg_value[i]);
5237 	}
5238 
5239 	return 0;
5240 }
5241 
5242 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5243 			 struct amdgpu_reset_context *reset_context)
5244 {
5245 	struct amdgpu_device *tmp_adev = NULL;
5246 	bool need_full_reset, skip_hw_reset, vram_lost = false;
5247 	int r = 0;
5248 	bool gpu_reset_for_dev_remove = 0;
5249 
5250 	/* Try reset handler method first */
5251 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5252 				    reset_list);
5253 	amdgpu_reset_reg_dumps(tmp_adev);
5254 
5255 	reset_context->reset_device_list = device_list_handle;
5256 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5257 	/* If reset handler not implemented, continue; otherwise return */
5258 	if (r == -EOPNOTSUPP)
5259 		r = 0;
5260 	else
5261 		return r;
5262 
5263 	/* Reset handler not implemented, use the default method */
5264 	need_full_reset =
5265 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5266 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5267 
5268 	gpu_reset_for_dev_remove =
5269 		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5270 			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5271 
5272 	/*
5273 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5274 	 * to allow proper links negotiation in FW (within 1 sec)
5275 	 */
5276 	if (!skip_hw_reset && need_full_reset) {
5277 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5278 			/* For XGMI run all resets in parallel to speed up the process */
5279 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5280 				tmp_adev->gmc.xgmi.pending_reset = false;
5281 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5282 					r = -EALREADY;
5283 			} else
5284 				r = amdgpu_asic_reset(tmp_adev);
5285 
5286 			if (r) {
5287 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5288 					 r, adev_to_drm(tmp_adev)->unique);
5289 				goto out;
5290 			}
5291 		}
5292 
5293 		/* For XGMI wait for all resets to complete before proceed */
5294 		if (!r) {
5295 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5296 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5297 					flush_work(&tmp_adev->xgmi_reset_work);
5298 					r = tmp_adev->asic_reset_res;
5299 					if (r)
5300 						break;
5301 				}
5302 			}
5303 		}
5304 	}
5305 
5306 	if (!r && amdgpu_ras_intr_triggered()) {
5307 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5308 			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5309 		}
5310 
5311 		amdgpu_ras_intr_cleared();
5312 	}
5313 
5314 	/* Since the mode1 reset affects base ip blocks, the
5315 	 * phase1 ip blocks need to be resumed. Otherwise there
5316 	 * will be a BIOS signature error and the psp bootloader
5317 	 * can't load kdb on the next amdgpu install.
5318 	 */
5319 	if (gpu_reset_for_dev_remove) {
5320 		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5321 			amdgpu_device_ip_resume_phase1(tmp_adev);
5322 
5323 		goto end;
5324 	}
5325 
5326 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5327 		if (need_full_reset) {
5328 			/* post card */
5329 			r = amdgpu_device_asic_init(tmp_adev);
5330 			if (r) {
5331 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5332 			} else {
5333 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5334 
5335 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5336 				if (r)
5337 					goto out;
5338 
5339 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5340 
5341 				amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5342 
5343 				if (vram_lost) {
5344 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5345 					amdgpu_inc_vram_lost(tmp_adev);
5346 				}
5347 
5348 				r = amdgpu_device_fw_loading(tmp_adev);
5349 				if (r)
5350 					return r;
5351 
5352 				r = amdgpu_xcp_restore_partition_mode(
5353 					tmp_adev->xcp_mgr);
5354 				if (r)
5355 					goto out;
5356 
5357 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5358 				if (r)
5359 					goto out;
5360 
5361 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5362 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5363 
5364 				if (vram_lost)
5365 					amdgpu_device_fill_reset_magic(tmp_adev);
5366 
5367 				/*
5368 				 * Add this ASIC as tracked as reset was already
5369 				 * complete successfully.
5370 				 */
5371 				amdgpu_register_gpu_instance(tmp_adev);
5372 
5373 				if (!reset_context->hive &&
5374 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5375 					amdgpu_xgmi_add_device(tmp_adev);
5376 
5377 				r = amdgpu_device_ip_late_init(tmp_adev);
5378 				if (r)
5379 					goto out;
5380 
5381 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5382 
5383 				/*
5384 				 * The GPU enters bad state once faulty pages
5385 				 * by ECC has reached the threshold, and ras
5386 				 * recovery is scheduled next. So add one check
5387 				 * here to break recovery if it indeed exceeds
5388 				 * bad page threshold, and remind user to
5389 				 * retire this GPU or setting one bigger
5390 				 * bad_page_threshold value to fix this once
5391 				 * probing driver again.
5392 				 */
5393 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5394 					/* must succeed. */
5395 					amdgpu_ras_resume(tmp_adev);
5396 				} else {
5397 					r = -EINVAL;
5398 					goto out;
5399 				}
5400 
5401 				/* Update PSP FW topology after reset */
5402 				if (reset_context->hive &&
5403 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5404 					r = amdgpu_xgmi_update_topology(
5405 						reset_context->hive, tmp_adev);
5406 			}
5407 		}
5408 
5409 out:
5410 		if (!r) {
5411 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5412 			r = amdgpu_ib_ring_tests(tmp_adev);
5413 			if (r) {
5414 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5415 				need_full_reset = true;
5416 				r = -EAGAIN;
5417 				goto end;
5418 			}
5419 		}
5420 
5421 		if (!r)
5422 			r = amdgpu_device_recover_vram(tmp_adev);
5423 		else
5424 			tmp_adev->asic_reset_res = r;
5425 	}
5426 
5427 end:
5428 	if (need_full_reset)
5429 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5430 	else
5431 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5432 	return r;
5433 }
5434 
5435 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5436 {
5437 
5438 	switch (amdgpu_asic_reset_method(adev)) {
5439 	case AMD_RESET_METHOD_MODE1:
5440 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5441 		break;
5442 	case AMD_RESET_METHOD_MODE2:
5443 		adev->mp1_state = PP_MP1_STATE_RESET;
5444 		break;
5445 	default:
5446 		adev->mp1_state = PP_MP1_STATE_NONE;
5447 		break;
5448 	}
5449 }
5450 
5451 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5452 {
5453 	amdgpu_vf_error_trans_all(adev);
5454 	adev->mp1_state = PP_MP1_STATE_NONE;
5455 }
5456 
5457 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5458 {
5459 	struct pci_dev *p = NULL;
5460 
5461 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5462 			adev->pdev->bus->number, 1);
5463 	if (p) {
5464 		pm_runtime_enable(&(p->dev));
5465 		pm_runtime_resume(&(p->dev));
5466 	}
5467 
5468 	pci_dev_put(p);
5469 }
5470 
5471 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5472 {
5473 	enum amd_reset_method reset_method;
5474 	struct pci_dev *p = NULL;
5475 	u64 expires;
5476 
5477 	/*
5478 	 * For now, only BACO and mode1 reset are confirmed
5479 	 * to suffer the audio issue without proper suspended.
5480 	 */
5481 	reset_method = amdgpu_asic_reset_method(adev);
5482 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5483 	     (reset_method != AMD_RESET_METHOD_MODE1))
5484 		return -EINVAL;
5485 
5486 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5487 			adev->pdev->bus->number, 1);
5488 	if (!p)
5489 		return -ENODEV;
5490 
5491 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5492 	if (!expires)
5493 		/*
5494 		 * If we cannot get the audio device autosuspend delay,
5495 		 * a fixed 4S interval will be used. Considering 3S is
5496 		 * the audio controller default autosuspend delay setting.
5497 		 * 4S used here is guaranteed to cover that.
5498 		 */
5499 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5500 
5501 	while (!pm_runtime_status_suspended(&(p->dev))) {
5502 		if (!pm_runtime_suspend(&(p->dev)))
5503 			break;
5504 
5505 		if (expires < ktime_get_mono_fast_ns()) {
5506 			dev_warn(adev->dev, "failed to suspend display audio\n");
5507 			pci_dev_put(p);
5508 			/* TODO: abort the succeeding gpu reset? */
5509 			return -ETIMEDOUT;
5510 		}
5511 	}
5512 
5513 	pm_runtime_disable(&(p->dev));
5514 
5515 	pci_dev_put(p);
5516 	return 0;
5517 }
5518 
5519 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5520 {
5521 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5522 
5523 #if defined(CONFIG_DEBUG_FS)
5524 	if (!amdgpu_sriov_vf(adev))
5525 		cancel_work(&adev->reset_work);
5526 #endif
5527 
5528 	if (adev->kfd.dev)
5529 		cancel_work(&adev->kfd.reset_work);
5530 
5531 	if (amdgpu_sriov_vf(adev))
5532 		cancel_work(&adev->virt.flr_work);
5533 
5534 	if (con && adev->ras_enabled)
5535 		cancel_work(&con->recovery_work);
5536 
5537 }
5538 
5539 /**
5540  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5541  *
5542  * @adev: amdgpu_device pointer
5543  * @job: which job trigger hang
5544  * @reset_context: amdgpu reset context pointer
5545  *
5546  * Attempt to reset the GPU if it has hung (all asics).
5547  * Attempt to do soft-reset or full-reset and reinitialize Asic
5548  * Returns 0 for success or an error on failure.
5549  */
5550 
5551 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5552 			      struct amdgpu_job *job,
5553 			      struct amdgpu_reset_context *reset_context)
5554 {
5555 	struct list_head device_list, *device_list_handle =  NULL;
5556 	bool job_signaled = false;
5557 	struct amdgpu_hive_info *hive = NULL;
5558 	struct amdgpu_device *tmp_adev = NULL;
5559 	int i, r = 0;
5560 	bool need_emergency_restart = false;
5561 	bool audio_suspended = false;
5562 	bool gpu_reset_for_dev_remove = false;
5563 
5564 	gpu_reset_for_dev_remove =
5565 			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5566 				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5567 
5568 	/*
5569 	 * Special case: RAS triggered and full reset isn't supported
5570 	 */
5571 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5572 
5573 	/*
5574 	 * Flush RAM to disk so that after reboot
5575 	 * the user can read log and see why the system rebooted.
5576 	 */
5577 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5578 		amdgpu_ras_get_context(adev)->reboot) {
5579 		DRM_WARN("Emergency reboot.");
5580 
5581 		ksys_sync_helper();
5582 		emergency_restart();
5583 	}
5584 
5585 	dev_info(adev->dev, "GPU %s begin!\n",
5586 		need_emergency_restart ? "jobs stop":"reset");
5587 
5588 	if (!amdgpu_sriov_vf(adev))
5589 		hive = amdgpu_get_xgmi_hive(adev);
5590 	if (hive)
5591 		mutex_lock(&hive->hive_lock);
5592 
5593 	reset_context->job = job;
5594 	reset_context->hive = hive;
5595 	/*
5596 	 * Build list of devices to reset.
5597 	 * In case we are in XGMI hive mode, resort the device list
5598 	 * to put adev in the 1st position.
5599 	 */
5600 	INIT_LIST_HEAD(&device_list);
5601 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5602 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5603 			list_add_tail(&tmp_adev->reset_list, &device_list);
5604 			if (gpu_reset_for_dev_remove && adev->shutdown)
5605 				tmp_adev->shutdown = true;
5606 		}
5607 		if (!list_is_first(&adev->reset_list, &device_list))
5608 			list_rotate_to_front(&adev->reset_list, &device_list);
5609 		device_list_handle = &device_list;
5610 	} else {
5611 		list_add_tail(&adev->reset_list, &device_list);
5612 		device_list_handle = &device_list;
5613 	}
5614 
5615 	/* We need to lock reset domain only once both for XGMI and single device */
5616 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5617 				    reset_list);
5618 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5619 
5620 	/* block all schedulers and reset given job's ring */
5621 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5622 
5623 		amdgpu_device_set_mp1_state(tmp_adev);
5624 
5625 		/*
5626 		 * Try to put the audio codec into suspend state
5627 		 * before gpu reset started.
5628 		 *
5629 		 * Due to the power domain of the graphics device
5630 		 * is shared with AZ power domain. Without this,
5631 		 * we may change the audio hardware from behind
5632 		 * the audio driver's back. That will trigger
5633 		 * some audio codec errors.
5634 		 */
5635 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5636 			audio_suspended = true;
5637 
5638 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5639 
5640 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5641 
5642 		if (!amdgpu_sriov_vf(tmp_adev))
5643 			amdgpu_amdkfd_pre_reset(tmp_adev);
5644 
5645 		/*
5646 		 * Mark these ASICs to be reseted as untracked first
5647 		 * And add them back after reset completed
5648 		 */
5649 		amdgpu_unregister_gpu_instance(tmp_adev);
5650 
5651 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5652 
5653 		/* disable ras on ALL IPs */
5654 		if (!need_emergency_restart &&
5655 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5656 			amdgpu_ras_suspend(tmp_adev);
5657 
5658 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5659 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5660 
5661 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5662 				continue;
5663 
5664 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5665 
5666 			if (need_emergency_restart)
5667 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5668 		}
5669 		atomic_inc(&tmp_adev->gpu_reset_counter);
5670 	}
5671 
5672 	if (need_emergency_restart)
5673 		goto skip_sched_resume;
5674 
5675 	/*
5676 	 * Must check guilty signal here since after this point all old
5677 	 * HW fences are force signaled.
5678 	 *
5679 	 * job->base holds a reference to parent fence
5680 	 */
5681 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5682 		job_signaled = true;
5683 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5684 		goto skip_hw_reset;
5685 	}
5686 
5687 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5688 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5689 		if (gpu_reset_for_dev_remove) {
5690 			/* Workaroud for ASICs need to disable SMC first */
5691 			amdgpu_device_smu_fini_early(tmp_adev);
5692 		}
5693 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5694 		/*TODO Should we stop ?*/
5695 		if (r) {
5696 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5697 				  r, adev_to_drm(tmp_adev)->unique);
5698 			tmp_adev->asic_reset_res = r;
5699 		}
5700 
5701 		/*
5702 		 * Drop all pending non scheduler resets. Scheduler resets
5703 		 * were already dropped during drm_sched_stop
5704 		 */
5705 		amdgpu_device_stop_pending_resets(tmp_adev);
5706 	}
5707 
5708 	/* Actual ASIC resets if needed.*/
5709 	/* Host driver will handle XGMI hive reset for SRIOV */
5710 	if (amdgpu_sriov_vf(adev)) {
5711 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5712 		if (r)
5713 			adev->asic_reset_res = r;
5714 
5715 		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5716 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5717 			    IP_VERSION(9, 4, 2) ||
5718 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5719 			amdgpu_ras_resume(adev);
5720 	} else {
5721 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5722 		if (r && r == -EAGAIN)
5723 			goto retry;
5724 
5725 		if (!r && gpu_reset_for_dev_remove)
5726 			goto recover_end;
5727 	}
5728 
5729 skip_hw_reset:
5730 
5731 	/* Post ASIC reset for all devs .*/
5732 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5733 
5734 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5735 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5736 
5737 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5738 				continue;
5739 
5740 			drm_sched_start(&ring->sched, true);
5741 		}
5742 
5743 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5744 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5745 
5746 		if (tmp_adev->asic_reset_res)
5747 			r = tmp_adev->asic_reset_res;
5748 
5749 		tmp_adev->asic_reset_res = 0;
5750 
5751 		if (r) {
5752 			/* bad news, how to tell it to userspace ? */
5753 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5754 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5755 		} else {
5756 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5757 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5758 				DRM_WARN("smart shift update failed\n");
5759 		}
5760 	}
5761 
5762 skip_sched_resume:
5763 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5764 		/* unlock kfd: SRIOV would do it separately */
5765 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5766 			amdgpu_amdkfd_post_reset(tmp_adev);
5767 
5768 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5769 		 * need to bring up kfd here if it's not be initialized before
5770 		 */
5771 		if (!adev->kfd.init_complete)
5772 			amdgpu_amdkfd_device_init(adev);
5773 
5774 		if (audio_suspended)
5775 			amdgpu_device_resume_display_audio(tmp_adev);
5776 
5777 		amdgpu_device_unset_mp1_state(tmp_adev);
5778 
5779 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5780 	}
5781 
5782 recover_end:
5783 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5784 					    reset_list);
5785 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5786 
5787 	if (hive) {
5788 		mutex_unlock(&hive->hive_lock);
5789 		amdgpu_put_xgmi_hive(hive);
5790 	}
5791 
5792 	if (r)
5793 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5794 
5795 	atomic_set(&adev->reset_domain->reset_res, r);
5796 	return r;
5797 }
5798 
5799 /**
5800  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5801  *
5802  * @adev: amdgpu_device pointer
5803  * @speed: pointer to the speed of the link
5804  * @width: pointer to the width of the link
5805  *
5806  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5807  * first physical partner to an AMD dGPU.
5808  * This will exclude any virtual switches and links.
5809  */
5810 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5811 					    enum pci_bus_speed *speed,
5812 					    enum pcie_link_width *width)
5813 {
5814 	struct pci_dev *parent = adev->pdev;
5815 
5816 	if (!speed || !width)
5817 		return;
5818 
5819 	*speed = PCI_SPEED_UNKNOWN;
5820 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5821 
5822 	while ((parent = pci_upstream_bridge(parent))) {
5823 		/* skip upstream/downstream switches internal to dGPU*/
5824 		if (parent->vendor == PCI_VENDOR_ID_ATI)
5825 			continue;
5826 		*speed = pcie_get_speed_cap(parent);
5827 		*width = pcie_get_width_cap(parent);
5828 		break;
5829 	}
5830 }
5831 
5832 /**
5833  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5834  *
5835  * @adev: amdgpu_device pointer
5836  *
5837  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5838  * and lanes) of the slot the device is in. Handles APUs and
5839  * virtualized environments where PCIE config space may not be available.
5840  */
5841 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5842 {
5843 	struct pci_dev *pdev;
5844 	enum pci_bus_speed speed_cap, platform_speed_cap;
5845 	enum pcie_link_width platform_link_width;
5846 
5847 	if (amdgpu_pcie_gen_cap)
5848 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5849 
5850 	if (amdgpu_pcie_lane_cap)
5851 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5852 
5853 	/* covers APUs as well */
5854 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5855 		if (adev->pm.pcie_gen_mask == 0)
5856 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5857 		if (adev->pm.pcie_mlw_mask == 0)
5858 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5859 		return;
5860 	}
5861 
5862 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5863 		return;
5864 
5865 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5866 					&platform_link_width);
5867 
5868 	if (adev->pm.pcie_gen_mask == 0) {
5869 		/* asic caps */
5870 		pdev = adev->pdev;
5871 		speed_cap = pcie_get_speed_cap(pdev);
5872 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5873 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5874 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5875 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5876 		} else {
5877 			if (speed_cap == PCIE_SPEED_32_0GT)
5878 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5879 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5880 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5881 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5882 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5883 			else if (speed_cap == PCIE_SPEED_16_0GT)
5884 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5885 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5886 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5887 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5888 			else if (speed_cap == PCIE_SPEED_8_0GT)
5889 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5890 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5891 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5892 			else if (speed_cap == PCIE_SPEED_5_0GT)
5893 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5894 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5895 			else
5896 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5897 		}
5898 		/* platform caps */
5899 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5900 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5901 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5902 		} else {
5903 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5904 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5905 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5906 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5907 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5908 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5909 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5910 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5911 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5912 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5913 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5914 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5915 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5916 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5917 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5918 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5919 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5920 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5921 			else
5922 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5923 
5924 		}
5925 	}
5926 	if (adev->pm.pcie_mlw_mask == 0) {
5927 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5928 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5929 		} else {
5930 			switch (platform_link_width) {
5931 			case PCIE_LNK_X32:
5932 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5933 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5934 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5935 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5936 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5937 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5938 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5939 				break;
5940 			case PCIE_LNK_X16:
5941 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5942 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5943 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5944 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5945 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5946 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5947 				break;
5948 			case PCIE_LNK_X12:
5949 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5950 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5951 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5952 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5953 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5954 				break;
5955 			case PCIE_LNK_X8:
5956 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5957 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5958 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5959 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5960 				break;
5961 			case PCIE_LNK_X4:
5962 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5963 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5964 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5965 				break;
5966 			case PCIE_LNK_X2:
5967 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5968 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5969 				break;
5970 			case PCIE_LNK_X1:
5971 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5972 				break;
5973 			default:
5974 				break;
5975 			}
5976 		}
5977 	}
5978 }
5979 
5980 /**
5981  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5982  *
5983  * @adev: amdgpu_device pointer
5984  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5985  *
5986  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5987  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5988  * @peer_adev.
5989  */
5990 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5991 				      struct amdgpu_device *peer_adev)
5992 {
5993 #ifdef CONFIG_HSA_AMD_P2P
5994 	uint64_t address_mask = peer_adev->dev->dma_mask ?
5995 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5996 	resource_size_t aper_limit =
5997 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
5998 	bool p2p_access =
5999 		!adev->gmc.xgmi.connected_to_cpu &&
6000 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6001 
6002 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6003 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6004 		!(adev->gmc.aper_base & address_mask ||
6005 		  aper_limit & address_mask));
6006 #else
6007 	return false;
6008 #endif
6009 }
6010 
6011 int amdgpu_device_baco_enter(struct drm_device *dev)
6012 {
6013 	struct amdgpu_device *adev = drm_to_adev(dev);
6014 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6015 
6016 	if (!amdgpu_device_supports_baco(dev))
6017 		return -ENOTSUPP;
6018 
6019 	if (ras && adev->ras_enabled &&
6020 	    adev->nbio.funcs->enable_doorbell_interrupt)
6021 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6022 
6023 	return amdgpu_dpm_baco_enter(adev);
6024 }
6025 
6026 int amdgpu_device_baco_exit(struct drm_device *dev)
6027 {
6028 	struct amdgpu_device *adev = drm_to_adev(dev);
6029 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6030 	int ret = 0;
6031 
6032 	if (!amdgpu_device_supports_baco(dev))
6033 		return -ENOTSUPP;
6034 
6035 	ret = amdgpu_dpm_baco_exit(adev);
6036 	if (ret)
6037 		return ret;
6038 
6039 	if (ras && adev->ras_enabled &&
6040 	    adev->nbio.funcs->enable_doorbell_interrupt)
6041 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6042 
6043 	if (amdgpu_passthrough(adev) &&
6044 	    adev->nbio.funcs->clear_doorbell_interrupt)
6045 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6046 
6047 	return 0;
6048 }
6049 
6050 /**
6051  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6052  * @pdev: PCI device struct
6053  * @state: PCI channel state
6054  *
6055  * Description: Called when a PCI error is detected.
6056  *
6057  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6058  */
6059 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6060 {
6061 	struct drm_device *dev = pci_get_drvdata(pdev);
6062 	struct amdgpu_device *adev = drm_to_adev(dev);
6063 	int i;
6064 
6065 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6066 
6067 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6068 		DRM_WARN("No support for XGMI hive yet...");
6069 		return PCI_ERS_RESULT_DISCONNECT;
6070 	}
6071 
6072 	adev->pci_channel_state = state;
6073 
6074 	switch (state) {
6075 	case pci_channel_io_normal:
6076 		return PCI_ERS_RESULT_CAN_RECOVER;
6077 	/* Fatal error, prepare for slot reset */
6078 	case pci_channel_io_frozen:
6079 		/*
6080 		 * Locking adev->reset_domain->sem will prevent any external access
6081 		 * to GPU during PCI error recovery
6082 		 */
6083 		amdgpu_device_lock_reset_domain(adev->reset_domain);
6084 		amdgpu_device_set_mp1_state(adev);
6085 
6086 		/*
6087 		 * Block any work scheduling as we do for regular GPU reset
6088 		 * for the duration of the recovery
6089 		 */
6090 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6091 			struct amdgpu_ring *ring = adev->rings[i];
6092 
6093 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
6094 				continue;
6095 
6096 			drm_sched_stop(&ring->sched, NULL);
6097 		}
6098 		atomic_inc(&adev->gpu_reset_counter);
6099 		return PCI_ERS_RESULT_NEED_RESET;
6100 	case pci_channel_io_perm_failure:
6101 		/* Permanent error, prepare for device removal */
6102 		return PCI_ERS_RESULT_DISCONNECT;
6103 	}
6104 
6105 	return PCI_ERS_RESULT_NEED_RESET;
6106 }
6107 
6108 /**
6109  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6110  * @pdev: pointer to PCI device
6111  */
6112 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6113 {
6114 
6115 	DRM_INFO("PCI error: mmio enabled callback!!\n");
6116 
6117 	/* TODO - dump whatever for debugging purposes */
6118 
6119 	/* This called only if amdgpu_pci_error_detected returns
6120 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6121 	 * works, no need to reset slot.
6122 	 */
6123 
6124 	return PCI_ERS_RESULT_RECOVERED;
6125 }
6126 
6127 /**
6128  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6129  * @pdev: PCI device struct
6130  *
6131  * Description: This routine is called by the pci error recovery
6132  * code after the PCI slot has been reset, just before we
6133  * should resume normal operations.
6134  */
6135 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6136 {
6137 	struct drm_device *dev = pci_get_drvdata(pdev);
6138 	struct amdgpu_device *adev = drm_to_adev(dev);
6139 	int r, i;
6140 	struct amdgpu_reset_context reset_context;
6141 	u32 memsize;
6142 	struct list_head device_list;
6143 
6144 	DRM_INFO("PCI error: slot reset callback!!\n");
6145 
6146 	memset(&reset_context, 0, sizeof(reset_context));
6147 
6148 	INIT_LIST_HEAD(&device_list);
6149 	list_add_tail(&adev->reset_list, &device_list);
6150 
6151 	/* wait for asic to come out of reset */
6152 	msleep(500);
6153 
6154 	/* Restore PCI confspace */
6155 	amdgpu_device_load_pci_state(pdev);
6156 
6157 	/* confirm  ASIC came out of reset */
6158 	for (i = 0; i < adev->usec_timeout; i++) {
6159 		memsize = amdgpu_asic_get_config_memsize(adev);
6160 
6161 		if (memsize != 0xffffffff)
6162 			break;
6163 		udelay(1);
6164 	}
6165 	if (memsize == 0xffffffff) {
6166 		r = -ETIME;
6167 		goto out;
6168 	}
6169 
6170 	reset_context.method = AMD_RESET_METHOD_NONE;
6171 	reset_context.reset_req_dev = adev;
6172 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6173 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6174 
6175 	adev->no_hw_access = true;
6176 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6177 	adev->no_hw_access = false;
6178 	if (r)
6179 		goto out;
6180 
6181 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6182 
6183 out:
6184 	if (!r) {
6185 		if (amdgpu_device_cache_pci_state(adev->pdev))
6186 			pci_restore_state(adev->pdev);
6187 
6188 		DRM_INFO("PCIe error recovery succeeded\n");
6189 	} else {
6190 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6191 		amdgpu_device_unset_mp1_state(adev);
6192 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6193 	}
6194 
6195 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6196 }
6197 
6198 /**
6199  * amdgpu_pci_resume() - resume normal ops after PCI reset
6200  * @pdev: pointer to PCI device
6201  *
6202  * Called when the error recovery driver tells us that its
6203  * OK to resume normal operation.
6204  */
6205 void amdgpu_pci_resume(struct pci_dev *pdev)
6206 {
6207 	struct drm_device *dev = pci_get_drvdata(pdev);
6208 	struct amdgpu_device *adev = drm_to_adev(dev);
6209 	int i;
6210 
6211 
6212 	DRM_INFO("PCI error: resume callback!!\n");
6213 
6214 	/* Only continue execution for the case of pci_channel_io_frozen */
6215 	if (adev->pci_channel_state != pci_channel_io_frozen)
6216 		return;
6217 
6218 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6219 		struct amdgpu_ring *ring = adev->rings[i];
6220 
6221 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
6222 			continue;
6223 
6224 		drm_sched_start(&ring->sched, true);
6225 	}
6226 
6227 	amdgpu_device_unset_mp1_state(adev);
6228 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6229 }
6230 
6231 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6232 {
6233 	struct drm_device *dev = pci_get_drvdata(pdev);
6234 	struct amdgpu_device *adev = drm_to_adev(dev);
6235 	int r;
6236 
6237 	r = pci_save_state(pdev);
6238 	if (!r) {
6239 		kfree(adev->pci_state);
6240 
6241 		adev->pci_state = pci_store_saved_state(pdev);
6242 
6243 		if (!adev->pci_state) {
6244 			DRM_ERROR("Failed to store PCI saved state");
6245 			return false;
6246 		}
6247 	} else {
6248 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6249 		return false;
6250 	}
6251 
6252 	return true;
6253 }
6254 
6255 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6256 {
6257 	struct drm_device *dev = pci_get_drvdata(pdev);
6258 	struct amdgpu_device *adev = drm_to_adev(dev);
6259 	int r;
6260 
6261 	if (!adev->pci_state)
6262 		return false;
6263 
6264 	r = pci_load_saved_state(pdev, adev->pci_state);
6265 
6266 	if (!r) {
6267 		pci_restore_state(pdev);
6268 	} else {
6269 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6270 		return false;
6271 	}
6272 
6273 	return true;
6274 }
6275 
6276 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6277 		struct amdgpu_ring *ring)
6278 {
6279 #ifdef CONFIG_X86_64
6280 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6281 		return;
6282 #endif
6283 	if (adev->gmc.xgmi.connected_to_cpu)
6284 		return;
6285 
6286 	if (ring && ring->funcs->emit_hdp_flush)
6287 		amdgpu_ring_emit_hdp_flush(ring);
6288 	else
6289 		amdgpu_asic_flush_hdp(adev, ring);
6290 }
6291 
6292 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6293 		struct amdgpu_ring *ring)
6294 {
6295 #ifdef CONFIG_X86_64
6296 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6297 		return;
6298 #endif
6299 	if (adev->gmc.xgmi.connected_to_cpu)
6300 		return;
6301 
6302 	amdgpu_asic_invalidate_hdp(adev, ring);
6303 }
6304 
6305 int amdgpu_in_reset(struct amdgpu_device *adev)
6306 {
6307 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6308 }
6309 
6310 /**
6311  * amdgpu_device_halt() - bring hardware to some kind of halt state
6312  *
6313  * @adev: amdgpu_device pointer
6314  *
6315  * Bring hardware to some kind of halt state so that no one can touch it
6316  * any more. It will help to maintain error context when error occurred.
6317  * Compare to a simple hang, the system will keep stable at least for SSH
6318  * access. Then it should be trivial to inspect the hardware state and
6319  * see what's going on. Implemented as following:
6320  *
6321  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6322  *    clears all CPU mappings to device, disallows remappings through page faults
6323  * 2. amdgpu_irq_disable_all() disables all interrupts
6324  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6325  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6326  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6327  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6328  *    flush any in flight DMA operations
6329  */
6330 void amdgpu_device_halt(struct amdgpu_device *adev)
6331 {
6332 	struct pci_dev *pdev = adev->pdev;
6333 	struct drm_device *ddev = adev_to_drm(adev);
6334 
6335 	amdgpu_xcp_dev_unplug(adev);
6336 	drm_dev_unplug(ddev);
6337 
6338 	amdgpu_irq_disable_all(adev);
6339 
6340 	amdgpu_fence_driver_hw_fini(adev);
6341 
6342 	adev->no_hw_access = true;
6343 
6344 	amdgpu_device_unmap_mmio(adev);
6345 
6346 	pci_disable_device(pdev);
6347 	pci_wait_for_pending_transaction(pdev);
6348 }
6349 
6350 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6351 				u32 reg)
6352 {
6353 	unsigned long flags, address, data;
6354 	u32 r;
6355 
6356 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6357 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6358 
6359 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6360 	WREG32(address, reg * 4);
6361 	(void)RREG32(address);
6362 	r = RREG32(data);
6363 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6364 	return r;
6365 }
6366 
6367 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6368 				u32 reg, u32 v)
6369 {
6370 	unsigned long flags, address, data;
6371 
6372 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6373 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6374 
6375 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6376 	WREG32(address, reg * 4);
6377 	(void)RREG32(address);
6378 	WREG32(data, v);
6379 	(void)RREG32(data);
6380 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6381 }
6382 
6383 /**
6384  * amdgpu_device_switch_gang - switch to a new gang
6385  * @adev: amdgpu_device pointer
6386  * @gang: the gang to switch to
6387  *
6388  * Try to switch to a new gang.
6389  * Returns: NULL if we switched to the new gang or a reference to the current
6390  * gang leader.
6391  */
6392 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6393 					    struct dma_fence *gang)
6394 {
6395 	struct dma_fence *old = NULL;
6396 
6397 	do {
6398 		dma_fence_put(old);
6399 		rcu_read_lock();
6400 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
6401 		rcu_read_unlock();
6402 
6403 		if (old == gang)
6404 			break;
6405 
6406 		if (!dma_fence_is_signaled(old))
6407 			return old;
6408 
6409 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6410 			 old, gang) != old);
6411 
6412 	dma_fence_put(old);
6413 	return NULL;
6414 }
6415 
6416 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6417 {
6418 	switch (adev->asic_type) {
6419 #ifdef CONFIG_DRM_AMDGPU_SI
6420 	case CHIP_HAINAN:
6421 #endif
6422 	case CHIP_TOPAZ:
6423 		/* chips with no display hardware */
6424 		return false;
6425 #ifdef CONFIG_DRM_AMDGPU_SI
6426 	case CHIP_TAHITI:
6427 	case CHIP_PITCAIRN:
6428 	case CHIP_VERDE:
6429 	case CHIP_OLAND:
6430 #endif
6431 #ifdef CONFIG_DRM_AMDGPU_CIK
6432 	case CHIP_BONAIRE:
6433 	case CHIP_HAWAII:
6434 	case CHIP_KAVERI:
6435 	case CHIP_KABINI:
6436 	case CHIP_MULLINS:
6437 #endif
6438 	case CHIP_TONGA:
6439 	case CHIP_FIJI:
6440 	case CHIP_POLARIS10:
6441 	case CHIP_POLARIS11:
6442 	case CHIP_POLARIS12:
6443 	case CHIP_VEGAM:
6444 	case CHIP_CARRIZO:
6445 	case CHIP_STONEY:
6446 		/* chips with display hardware */
6447 		return true;
6448 	default:
6449 		/* IP discovery */
6450 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6451 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6452 			return false;
6453 		return true;
6454 	}
6455 }
6456 
6457 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6458 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6459 		uint32_t expected_value, uint32_t mask)
6460 {
6461 	uint32_t ret = 0;
6462 	uint32_t old_ = 0;
6463 	uint32_t tmp_ = RREG32(reg_addr);
6464 	uint32_t loop = adev->usec_timeout;
6465 
6466 	while ((tmp_ & (mask)) != (expected_value)) {
6467 		if (old_ != tmp_) {
6468 			loop = adev->usec_timeout;
6469 			old_ = tmp_;
6470 		} else
6471 			udelay(1);
6472 		tmp_ = RREG32(reg_addr);
6473 		loop--;
6474 		if (!loop) {
6475 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6476 				  inst, reg_name, (uint32_t)expected_value,
6477 				  (uint32_t)(tmp_ & (mask)));
6478 			ret = -ETIMEDOUT;
6479 			break;
6480 		}
6481 	}
6482 	return ret;
6483 }
6484