xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision ab779466166348eecf17d20f620aa9a47965c934)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37 
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 
78 #include <linux/suspend.h>
79 #include <drm/task_barrier.h>
80 #include <linux/pm_runtime.h>
81 
82 #include <drm/drm_drv.h>
83 
84 #if IS_ENABLED(CONFIG_X86)
85 #include <asm/intel-family.h>
86 #endif
87 
88 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
89 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
95 
96 #define AMDGPU_RESUME_MS		2000
97 #define AMDGPU_MAX_RETRY_LIMIT		2
98 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
99 
100 static const struct drm_driver amdgpu_kms_driver;
101 
102 const char *amdgpu_asic_name[] = {
103 	"TAHITI",
104 	"PITCAIRN",
105 	"VERDE",
106 	"OLAND",
107 	"HAINAN",
108 	"BONAIRE",
109 	"KAVERI",
110 	"KABINI",
111 	"HAWAII",
112 	"MULLINS",
113 	"TOPAZ",
114 	"TONGA",
115 	"FIJI",
116 	"CARRIZO",
117 	"STONEY",
118 	"POLARIS10",
119 	"POLARIS11",
120 	"POLARIS12",
121 	"VEGAM",
122 	"VEGA10",
123 	"VEGA12",
124 	"VEGA20",
125 	"RAVEN",
126 	"ARCTURUS",
127 	"RENOIR",
128 	"ALDEBARAN",
129 	"NAVI10",
130 	"CYAN_SKILLFISH",
131 	"NAVI14",
132 	"NAVI12",
133 	"SIENNA_CICHLID",
134 	"NAVY_FLOUNDER",
135 	"VANGOGH",
136 	"DIMGREY_CAVEFISH",
137 	"BEIGE_GOBY",
138 	"YELLOW_CARP",
139 	"IP DISCOVERY",
140 	"LAST",
141 };
142 
143 /**
144  * DOC: pcie_replay_count
145  *
146  * The amdgpu driver provides a sysfs API for reporting the total number
147  * of PCIe replays (NAKs)
148  * The file pcie_replay_count is used for this and returns the total
149  * number of replays as a sum of the NAKs generated and NAKs received
150  */
151 
152 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 		struct device_attribute *attr, char *buf)
154 {
155 	struct drm_device *ddev = dev_get_drvdata(dev);
156 	struct amdgpu_device *adev = drm_to_adev(ddev);
157 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158 
159 	return sysfs_emit(buf, "%llu\n", cnt);
160 }
161 
162 static DEVICE_ATTR(pcie_replay_count, 0444,
163 		amdgpu_device_get_pcie_replay_count, NULL);
164 
165 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
166 					  struct bin_attribute *attr, char *buf,
167 					  loff_t ppos, size_t count)
168 {
169 	struct device *dev = kobj_to_dev(kobj);
170 	struct drm_device *ddev = dev_get_drvdata(dev);
171 	struct amdgpu_device *adev = drm_to_adev(ddev);
172 	ssize_t bytes_read;
173 
174 	switch (ppos) {
175 	case AMDGPU_SYS_REG_STATE_XGMI:
176 		bytes_read = amdgpu_asic_get_reg_state(
177 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
178 		break;
179 	case AMDGPU_SYS_REG_STATE_WAFL:
180 		bytes_read = amdgpu_asic_get_reg_state(
181 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
182 		break;
183 	case AMDGPU_SYS_REG_STATE_PCIE:
184 		bytes_read = amdgpu_asic_get_reg_state(
185 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
186 		break;
187 	case AMDGPU_SYS_REG_STATE_USR:
188 		bytes_read = amdgpu_asic_get_reg_state(
189 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
190 		break;
191 	case AMDGPU_SYS_REG_STATE_USR_1:
192 		bytes_read = amdgpu_asic_get_reg_state(
193 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
194 		break;
195 	default:
196 		return -EINVAL;
197 	}
198 
199 	return bytes_read;
200 }
201 
202 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
203 	 AMDGPU_SYS_REG_STATE_END);
204 
205 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
206 {
207 	int ret;
208 
209 	if (!amdgpu_asic_get_reg_state_supported(adev))
210 		return 0;
211 
212 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
213 
214 	return ret;
215 }
216 
217 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
218 {
219 	if (!amdgpu_asic_get_reg_state_supported(adev))
220 		return;
221 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
222 }
223 
224 /**
225  * DOC: board_info
226  *
227  * The amdgpu driver provides a sysfs API for giving board related information.
228  * It provides the form factor information in the format
229  *
230  *   type : form factor
231  *
232  * Possible form factor values
233  *
234  * - "cem"		- PCIE CEM card
235  * - "oam"		- Open Compute Accelerator Module
236  * - "unknown"	- Not known
237  *
238  */
239 
240 static ssize_t amdgpu_device_get_board_info(struct device *dev,
241 					    struct device_attribute *attr,
242 					    char *buf)
243 {
244 	struct drm_device *ddev = dev_get_drvdata(dev);
245 	struct amdgpu_device *adev = drm_to_adev(ddev);
246 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
247 	const char *pkg;
248 
249 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
250 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
251 
252 	switch (pkg_type) {
253 	case AMDGPU_PKG_TYPE_CEM:
254 		pkg = "cem";
255 		break;
256 	case AMDGPU_PKG_TYPE_OAM:
257 		pkg = "oam";
258 		break;
259 	default:
260 		pkg = "unknown";
261 		break;
262 	}
263 
264 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
265 }
266 
267 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
268 
269 static struct attribute *amdgpu_board_attrs[] = {
270 	&dev_attr_board_info.attr,
271 	NULL,
272 };
273 
274 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
275 					     struct attribute *attr, int n)
276 {
277 	struct device *dev = kobj_to_dev(kobj);
278 	struct drm_device *ddev = dev_get_drvdata(dev);
279 	struct amdgpu_device *adev = drm_to_adev(ddev);
280 
281 	if (adev->flags & AMD_IS_APU)
282 		return 0;
283 
284 	return attr->mode;
285 }
286 
287 static const struct attribute_group amdgpu_board_attrs_group = {
288 	.attrs = amdgpu_board_attrs,
289 	.is_visible = amdgpu_board_attrs_is_visible
290 };
291 
292 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
293 
294 
295 /**
296  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
297  *
298  * @dev: drm_device pointer
299  *
300  * Returns true if the device is a dGPU with ATPX power control,
301  * otherwise return false.
302  */
303 bool amdgpu_device_supports_px(struct drm_device *dev)
304 {
305 	struct amdgpu_device *adev = drm_to_adev(dev);
306 
307 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
308 		return true;
309 	return false;
310 }
311 
312 /**
313  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
314  *
315  * @dev: drm_device pointer
316  *
317  * Returns true if the device is a dGPU with ACPI power control,
318  * otherwise return false.
319  */
320 bool amdgpu_device_supports_boco(struct drm_device *dev)
321 {
322 	struct amdgpu_device *adev = drm_to_adev(dev);
323 
324 	if (adev->has_pr3 ||
325 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
326 		return true;
327 	return false;
328 }
329 
330 /**
331  * amdgpu_device_supports_baco - Does the device support BACO
332  *
333  * @dev: drm_device pointer
334  *
335  * Returns true if the device supporte BACO,
336  * otherwise return false.
337  */
338 bool amdgpu_device_supports_baco(struct drm_device *dev)
339 {
340 	struct amdgpu_device *adev = drm_to_adev(dev);
341 
342 	return amdgpu_asic_supports_baco(adev);
343 }
344 
345 /**
346  * amdgpu_device_supports_smart_shift - Is the device dGPU with
347  * smart shift support
348  *
349  * @dev: drm_device pointer
350  *
351  * Returns true if the device is a dGPU with Smart Shift support,
352  * otherwise returns false.
353  */
354 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
355 {
356 	return (amdgpu_device_supports_boco(dev) &&
357 		amdgpu_acpi_is_power_shift_control_supported());
358 }
359 
360 /*
361  * VRAM access helper functions
362  */
363 
364 /**
365  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
366  *
367  * @adev: amdgpu_device pointer
368  * @pos: offset of the buffer in vram
369  * @buf: virtual address of the buffer in system memory
370  * @size: read/write size, sizeof(@buf) must > @size
371  * @write: true - write to vram, otherwise - read from vram
372  */
373 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
374 			     void *buf, size_t size, bool write)
375 {
376 	unsigned long flags;
377 	uint32_t hi = ~0, tmp = 0;
378 	uint32_t *data = buf;
379 	uint64_t last;
380 	int idx;
381 
382 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
383 		return;
384 
385 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
386 
387 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
388 	for (last = pos + size; pos < last; pos += 4) {
389 		tmp = pos >> 31;
390 
391 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
392 		if (tmp != hi) {
393 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
394 			hi = tmp;
395 		}
396 		if (write)
397 			WREG32_NO_KIQ(mmMM_DATA, *data++);
398 		else
399 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
400 	}
401 
402 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
403 	drm_dev_exit(idx);
404 }
405 
406 /**
407  * amdgpu_device_aper_access - access vram by vram aperature
408  *
409  * @adev: amdgpu_device pointer
410  * @pos: offset of the buffer in vram
411  * @buf: virtual address of the buffer in system memory
412  * @size: read/write size, sizeof(@buf) must > @size
413  * @write: true - write to vram, otherwise - read from vram
414  *
415  * The return value means how many bytes have been transferred.
416  */
417 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
418 				 void *buf, size_t size, bool write)
419 {
420 #ifdef CONFIG_64BIT
421 	void __iomem *addr;
422 	size_t count = 0;
423 	uint64_t last;
424 
425 	if (!adev->mman.aper_base_kaddr)
426 		return 0;
427 
428 	last = min(pos + size, adev->gmc.visible_vram_size);
429 	if (last > pos) {
430 		addr = adev->mman.aper_base_kaddr + pos;
431 		count = last - pos;
432 
433 		if (write) {
434 			memcpy_toio(addr, buf, count);
435 			/* Make sure HDP write cache flush happens without any reordering
436 			 * after the system memory contents are sent over PCIe device
437 			 */
438 			mb();
439 			amdgpu_device_flush_hdp(adev, NULL);
440 		} else {
441 			amdgpu_device_invalidate_hdp(adev, NULL);
442 			/* Make sure HDP read cache is invalidated before issuing a read
443 			 * to the PCIe device
444 			 */
445 			mb();
446 			memcpy_fromio(buf, addr, count);
447 		}
448 
449 	}
450 
451 	return count;
452 #else
453 	return 0;
454 #endif
455 }
456 
457 /**
458  * amdgpu_device_vram_access - read/write a buffer in vram
459  *
460  * @adev: amdgpu_device pointer
461  * @pos: offset of the buffer in vram
462  * @buf: virtual address of the buffer in system memory
463  * @size: read/write size, sizeof(@buf) must > @size
464  * @write: true - write to vram, otherwise - read from vram
465  */
466 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
467 			       void *buf, size_t size, bool write)
468 {
469 	size_t count;
470 
471 	/* try to using vram apreature to access vram first */
472 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
473 	size -= count;
474 	if (size) {
475 		/* using MM to access rest vram */
476 		pos += count;
477 		buf += count;
478 		amdgpu_device_mm_access(adev, pos, buf, size, write);
479 	}
480 }
481 
482 /*
483  * register access helper functions.
484  */
485 
486 /* Check if hw access should be skipped because of hotplug or device error */
487 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
488 {
489 	if (adev->no_hw_access)
490 		return true;
491 
492 #ifdef CONFIG_LOCKDEP
493 	/*
494 	 * This is a bit complicated to understand, so worth a comment. What we assert
495 	 * here is that the GPU reset is not running on another thread in parallel.
496 	 *
497 	 * For this we trylock the read side of the reset semaphore, if that succeeds
498 	 * we know that the reset is not running in paralell.
499 	 *
500 	 * If the trylock fails we assert that we are either already holding the read
501 	 * side of the lock or are the reset thread itself and hold the write side of
502 	 * the lock.
503 	 */
504 	if (in_task()) {
505 		if (down_read_trylock(&adev->reset_domain->sem))
506 			up_read(&adev->reset_domain->sem);
507 		else
508 			lockdep_assert_held(&adev->reset_domain->sem);
509 	}
510 #endif
511 	return false;
512 }
513 
514 /**
515  * amdgpu_device_rreg - read a memory mapped IO or indirect register
516  *
517  * @adev: amdgpu_device pointer
518  * @reg: dword aligned register offset
519  * @acc_flags: access flags which require special behavior
520  *
521  * Returns the 32 bit value from the offset specified.
522  */
523 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
524 			    uint32_t reg, uint32_t acc_flags)
525 {
526 	uint32_t ret;
527 
528 	if (amdgpu_device_skip_hw_access(adev))
529 		return 0;
530 
531 	if ((reg * 4) < adev->rmmio_size) {
532 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
533 		    amdgpu_sriov_runtime(adev) &&
534 		    down_read_trylock(&adev->reset_domain->sem)) {
535 			ret = amdgpu_kiq_rreg(adev, reg, 0);
536 			up_read(&adev->reset_domain->sem);
537 		} else {
538 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
539 		}
540 	} else {
541 		ret = adev->pcie_rreg(adev, reg * 4);
542 	}
543 
544 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
545 
546 	return ret;
547 }
548 
549 /*
550  * MMIO register read with bytes helper functions
551  * @offset:bytes offset from MMIO start
552  */
553 
554 /**
555  * amdgpu_mm_rreg8 - read a memory mapped IO register
556  *
557  * @adev: amdgpu_device pointer
558  * @offset: byte aligned register offset
559  *
560  * Returns the 8 bit value from the offset specified.
561  */
562 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
563 {
564 	if (amdgpu_device_skip_hw_access(adev))
565 		return 0;
566 
567 	if (offset < adev->rmmio_size)
568 		return (readb(adev->rmmio + offset));
569 	BUG();
570 }
571 
572 
573 /**
574  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
575  *
576  * @adev: amdgpu_device pointer
577  * @reg: dword aligned register offset
578  * @acc_flags: access flags which require special behavior
579  * @xcc_id: xcc accelerated compute core id
580  *
581  * Returns the 32 bit value from the offset specified.
582  */
583 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
584 				uint32_t reg, uint32_t acc_flags,
585 				uint32_t xcc_id)
586 {
587 	uint32_t ret, rlcg_flag;
588 
589 	if (amdgpu_device_skip_hw_access(adev))
590 		return 0;
591 
592 	if ((reg * 4) < adev->rmmio_size) {
593 		if (amdgpu_sriov_vf(adev) &&
594 		    !amdgpu_sriov_runtime(adev) &&
595 		    adev->gfx.rlc.rlcg_reg_access_supported &&
596 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
597 							 GC_HWIP, false,
598 							 &rlcg_flag)) {
599 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
600 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
601 		    amdgpu_sriov_runtime(adev) &&
602 		    down_read_trylock(&adev->reset_domain->sem)) {
603 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
604 			up_read(&adev->reset_domain->sem);
605 		} else {
606 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
607 		}
608 	} else {
609 		ret = adev->pcie_rreg(adev, reg * 4);
610 	}
611 
612 	return ret;
613 }
614 
615 /*
616  * MMIO register write with bytes helper functions
617  * @offset:bytes offset from MMIO start
618  * @value: the value want to be written to the register
619  */
620 
621 /**
622  * amdgpu_mm_wreg8 - read a memory mapped IO register
623  *
624  * @adev: amdgpu_device pointer
625  * @offset: byte aligned register offset
626  * @value: 8 bit value to write
627  *
628  * Writes the value specified to the offset specified.
629  */
630 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
631 {
632 	if (amdgpu_device_skip_hw_access(adev))
633 		return;
634 
635 	if (offset < adev->rmmio_size)
636 		writeb(value, adev->rmmio + offset);
637 	else
638 		BUG();
639 }
640 
641 /**
642  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
643  *
644  * @adev: amdgpu_device pointer
645  * @reg: dword aligned register offset
646  * @v: 32 bit value to write to the register
647  * @acc_flags: access flags which require special behavior
648  *
649  * Writes the value specified to the offset specified.
650  */
651 void amdgpu_device_wreg(struct amdgpu_device *adev,
652 			uint32_t reg, uint32_t v,
653 			uint32_t acc_flags)
654 {
655 	if (amdgpu_device_skip_hw_access(adev))
656 		return;
657 
658 	if ((reg * 4) < adev->rmmio_size) {
659 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
660 		    amdgpu_sriov_runtime(adev) &&
661 		    down_read_trylock(&adev->reset_domain->sem)) {
662 			amdgpu_kiq_wreg(adev, reg, v, 0);
663 			up_read(&adev->reset_domain->sem);
664 		} else {
665 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
666 		}
667 	} else {
668 		adev->pcie_wreg(adev, reg * 4, v);
669 	}
670 
671 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
672 }
673 
674 /**
675  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
676  *
677  * @adev: amdgpu_device pointer
678  * @reg: mmio/rlc register
679  * @v: value to write
680  * @xcc_id: xcc accelerated compute core id
681  *
682  * this function is invoked only for the debugfs register access
683  */
684 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
685 			     uint32_t reg, uint32_t v,
686 			     uint32_t xcc_id)
687 {
688 	if (amdgpu_device_skip_hw_access(adev))
689 		return;
690 
691 	if (amdgpu_sriov_fullaccess(adev) &&
692 	    adev->gfx.rlc.funcs &&
693 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
694 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
695 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
696 	} else if ((reg * 4) >= adev->rmmio_size) {
697 		adev->pcie_wreg(adev, reg * 4, v);
698 	} else {
699 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
700 	}
701 }
702 
703 /**
704  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
705  *
706  * @adev: amdgpu_device pointer
707  * @reg: dword aligned register offset
708  * @v: 32 bit value to write to the register
709  * @acc_flags: access flags which require special behavior
710  * @xcc_id: xcc accelerated compute core id
711  *
712  * Writes the value specified to the offset specified.
713  */
714 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
715 			uint32_t reg, uint32_t v,
716 			uint32_t acc_flags, uint32_t xcc_id)
717 {
718 	uint32_t rlcg_flag;
719 
720 	if (amdgpu_device_skip_hw_access(adev))
721 		return;
722 
723 	if ((reg * 4) < adev->rmmio_size) {
724 		if (amdgpu_sriov_vf(adev) &&
725 		    !amdgpu_sriov_runtime(adev) &&
726 		    adev->gfx.rlc.rlcg_reg_access_supported &&
727 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
728 							 GC_HWIP, true,
729 							 &rlcg_flag)) {
730 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
731 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
732 		    amdgpu_sriov_runtime(adev) &&
733 		    down_read_trylock(&adev->reset_domain->sem)) {
734 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
735 			up_read(&adev->reset_domain->sem);
736 		} else {
737 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
738 		}
739 	} else {
740 		adev->pcie_wreg(adev, reg * 4, v);
741 	}
742 }
743 
744 /**
745  * amdgpu_device_indirect_rreg - read an indirect register
746  *
747  * @adev: amdgpu_device pointer
748  * @reg_addr: indirect register address to read from
749  *
750  * Returns the value of indirect register @reg_addr
751  */
752 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
753 				u32 reg_addr)
754 {
755 	unsigned long flags, pcie_index, pcie_data;
756 	void __iomem *pcie_index_offset;
757 	void __iomem *pcie_data_offset;
758 	u32 r;
759 
760 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
761 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
762 
763 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
764 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
765 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
766 
767 	writel(reg_addr, pcie_index_offset);
768 	readl(pcie_index_offset);
769 	r = readl(pcie_data_offset);
770 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
771 
772 	return r;
773 }
774 
775 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
776 				    u64 reg_addr)
777 {
778 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
779 	u32 r;
780 	void __iomem *pcie_index_offset;
781 	void __iomem *pcie_index_hi_offset;
782 	void __iomem *pcie_data_offset;
783 
784 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
785 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
786 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
787 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
788 	else
789 		pcie_index_hi = 0;
790 
791 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
792 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
793 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
794 	if (pcie_index_hi != 0)
795 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
796 				pcie_index_hi * 4;
797 
798 	writel(reg_addr, pcie_index_offset);
799 	readl(pcie_index_offset);
800 	if (pcie_index_hi != 0) {
801 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
802 		readl(pcie_index_hi_offset);
803 	}
804 	r = readl(pcie_data_offset);
805 
806 	/* clear the high bits */
807 	if (pcie_index_hi != 0) {
808 		writel(0, pcie_index_hi_offset);
809 		readl(pcie_index_hi_offset);
810 	}
811 
812 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
813 
814 	return r;
815 }
816 
817 /**
818  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
819  *
820  * @adev: amdgpu_device pointer
821  * @reg_addr: indirect register address to read from
822  *
823  * Returns the value of indirect register @reg_addr
824  */
825 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
826 				  u32 reg_addr)
827 {
828 	unsigned long flags, pcie_index, pcie_data;
829 	void __iomem *pcie_index_offset;
830 	void __iomem *pcie_data_offset;
831 	u64 r;
832 
833 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
834 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
835 
836 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
837 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
838 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
839 
840 	/* read low 32 bits */
841 	writel(reg_addr, pcie_index_offset);
842 	readl(pcie_index_offset);
843 	r = readl(pcie_data_offset);
844 	/* read high 32 bits */
845 	writel(reg_addr + 4, pcie_index_offset);
846 	readl(pcie_index_offset);
847 	r |= ((u64)readl(pcie_data_offset) << 32);
848 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
849 
850 	return r;
851 }
852 
853 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
854 				  u64 reg_addr)
855 {
856 	unsigned long flags, pcie_index, pcie_data;
857 	unsigned long pcie_index_hi = 0;
858 	void __iomem *pcie_index_offset;
859 	void __iomem *pcie_index_hi_offset;
860 	void __iomem *pcie_data_offset;
861 	u64 r;
862 
863 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
864 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
865 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
866 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
867 
868 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
869 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
870 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
871 	if (pcie_index_hi != 0)
872 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
873 			pcie_index_hi * 4;
874 
875 	/* read low 32 bits */
876 	writel(reg_addr, pcie_index_offset);
877 	readl(pcie_index_offset);
878 	if (pcie_index_hi != 0) {
879 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
880 		readl(pcie_index_hi_offset);
881 	}
882 	r = readl(pcie_data_offset);
883 	/* read high 32 bits */
884 	writel(reg_addr + 4, pcie_index_offset);
885 	readl(pcie_index_offset);
886 	if (pcie_index_hi != 0) {
887 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
888 		readl(pcie_index_hi_offset);
889 	}
890 	r |= ((u64)readl(pcie_data_offset) << 32);
891 
892 	/* clear the high bits */
893 	if (pcie_index_hi != 0) {
894 		writel(0, pcie_index_hi_offset);
895 		readl(pcie_index_hi_offset);
896 	}
897 
898 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
899 
900 	return r;
901 }
902 
903 /**
904  * amdgpu_device_indirect_wreg - write an indirect register address
905  *
906  * @adev: amdgpu_device pointer
907  * @reg_addr: indirect register offset
908  * @reg_data: indirect register data
909  *
910  */
911 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
912 				 u32 reg_addr, u32 reg_data)
913 {
914 	unsigned long flags, pcie_index, pcie_data;
915 	void __iomem *pcie_index_offset;
916 	void __iomem *pcie_data_offset;
917 
918 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
919 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
920 
921 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
922 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
923 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
924 
925 	writel(reg_addr, pcie_index_offset);
926 	readl(pcie_index_offset);
927 	writel(reg_data, pcie_data_offset);
928 	readl(pcie_data_offset);
929 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
930 }
931 
932 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
933 				     u64 reg_addr, u32 reg_data)
934 {
935 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
936 	void __iomem *pcie_index_offset;
937 	void __iomem *pcie_index_hi_offset;
938 	void __iomem *pcie_data_offset;
939 
940 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
941 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
942 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
943 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
944 	else
945 		pcie_index_hi = 0;
946 
947 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
948 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
949 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
950 	if (pcie_index_hi != 0)
951 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
952 				pcie_index_hi * 4;
953 
954 	writel(reg_addr, pcie_index_offset);
955 	readl(pcie_index_offset);
956 	if (pcie_index_hi != 0) {
957 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
958 		readl(pcie_index_hi_offset);
959 	}
960 	writel(reg_data, pcie_data_offset);
961 	readl(pcie_data_offset);
962 
963 	/* clear the high bits */
964 	if (pcie_index_hi != 0) {
965 		writel(0, pcie_index_hi_offset);
966 		readl(pcie_index_hi_offset);
967 	}
968 
969 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
970 }
971 
972 /**
973  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
974  *
975  * @adev: amdgpu_device pointer
976  * @reg_addr: indirect register offset
977  * @reg_data: indirect register data
978  *
979  */
980 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
981 				   u32 reg_addr, u64 reg_data)
982 {
983 	unsigned long flags, pcie_index, pcie_data;
984 	void __iomem *pcie_index_offset;
985 	void __iomem *pcie_data_offset;
986 
987 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
988 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
989 
990 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
991 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
992 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
993 
994 	/* write low 32 bits */
995 	writel(reg_addr, pcie_index_offset);
996 	readl(pcie_index_offset);
997 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
998 	readl(pcie_data_offset);
999 	/* write high 32 bits */
1000 	writel(reg_addr + 4, pcie_index_offset);
1001 	readl(pcie_index_offset);
1002 	writel((u32)(reg_data >> 32), pcie_data_offset);
1003 	readl(pcie_data_offset);
1004 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1005 }
1006 
1007 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1008 				   u64 reg_addr, u64 reg_data)
1009 {
1010 	unsigned long flags, pcie_index, pcie_data;
1011 	unsigned long pcie_index_hi = 0;
1012 	void __iomem *pcie_index_offset;
1013 	void __iomem *pcie_index_hi_offset;
1014 	void __iomem *pcie_data_offset;
1015 
1016 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1017 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1018 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1019 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1020 
1021 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1022 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1023 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1024 	if (pcie_index_hi != 0)
1025 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1026 				pcie_index_hi * 4;
1027 
1028 	/* write low 32 bits */
1029 	writel(reg_addr, pcie_index_offset);
1030 	readl(pcie_index_offset);
1031 	if (pcie_index_hi != 0) {
1032 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1033 		readl(pcie_index_hi_offset);
1034 	}
1035 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1036 	readl(pcie_data_offset);
1037 	/* write high 32 bits */
1038 	writel(reg_addr + 4, pcie_index_offset);
1039 	readl(pcie_index_offset);
1040 	if (pcie_index_hi != 0) {
1041 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1042 		readl(pcie_index_hi_offset);
1043 	}
1044 	writel((u32)(reg_data >> 32), pcie_data_offset);
1045 	readl(pcie_data_offset);
1046 
1047 	/* clear the high bits */
1048 	if (pcie_index_hi != 0) {
1049 		writel(0, pcie_index_hi_offset);
1050 		readl(pcie_index_hi_offset);
1051 	}
1052 
1053 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1054 }
1055 
1056 /**
1057  * amdgpu_device_get_rev_id - query device rev_id
1058  *
1059  * @adev: amdgpu_device pointer
1060  *
1061  * Return device rev_id
1062  */
1063 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1064 {
1065 	return adev->nbio.funcs->get_rev_id(adev);
1066 }
1067 
1068 /**
1069  * amdgpu_invalid_rreg - dummy reg read function
1070  *
1071  * @adev: amdgpu_device pointer
1072  * @reg: offset of register
1073  *
1074  * Dummy register read function.  Used for register blocks
1075  * that certain asics don't have (all asics).
1076  * Returns the value in the register.
1077  */
1078 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1079 {
1080 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1081 	BUG();
1082 	return 0;
1083 }
1084 
1085 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1086 {
1087 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1088 	BUG();
1089 	return 0;
1090 }
1091 
1092 /**
1093  * amdgpu_invalid_wreg - dummy reg write function
1094  *
1095  * @adev: amdgpu_device pointer
1096  * @reg: offset of register
1097  * @v: value to write to the register
1098  *
1099  * Dummy register read function.  Used for register blocks
1100  * that certain asics don't have (all asics).
1101  */
1102 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1103 {
1104 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1105 		  reg, v);
1106 	BUG();
1107 }
1108 
1109 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1110 {
1111 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1112 		  reg, v);
1113 	BUG();
1114 }
1115 
1116 /**
1117  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1118  *
1119  * @adev: amdgpu_device pointer
1120  * @reg: offset of register
1121  *
1122  * Dummy register read function.  Used for register blocks
1123  * that certain asics don't have (all asics).
1124  * Returns the value in the register.
1125  */
1126 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1127 {
1128 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1129 	BUG();
1130 	return 0;
1131 }
1132 
1133 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1134 {
1135 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1136 	BUG();
1137 	return 0;
1138 }
1139 
1140 /**
1141  * amdgpu_invalid_wreg64 - dummy reg write function
1142  *
1143  * @adev: amdgpu_device pointer
1144  * @reg: offset of register
1145  * @v: value to write to the register
1146  *
1147  * Dummy register read function.  Used for register blocks
1148  * that certain asics don't have (all asics).
1149  */
1150 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1151 {
1152 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1153 		  reg, v);
1154 	BUG();
1155 }
1156 
1157 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1158 {
1159 	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1160 		  reg, v);
1161 	BUG();
1162 }
1163 
1164 /**
1165  * amdgpu_block_invalid_rreg - dummy reg read function
1166  *
1167  * @adev: amdgpu_device pointer
1168  * @block: offset of instance
1169  * @reg: offset of register
1170  *
1171  * Dummy register read function.  Used for register blocks
1172  * that certain asics don't have (all asics).
1173  * Returns the value in the register.
1174  */
1175 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1176 					  uint32_t block, uint32_t reg)
1177 {
1178 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1179 		  reg, block);
1180 	BUG();
1181 	return 0;
1182 }
1183 
1184 /**
1185  * amdgpu_block_invalid_wreg - dummy reg write function
1186  *
1187  * @adev: amdgpu_device pointer
1188  * @block: offset of instance
1189  * @reg: offset of register
1190  * @v: value to write to the register
1191  *
1192  * Dummy register read function.  Used for register blocks
1193  * that certain asics don't have (all asics).
1194  */
1195 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1196 				      uint32_t block,
1197 				      uint32_t reg, uint32_t v)
1198 {
1199 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1200 		  reg, block, v);
1201 	BUG();
1202 }
1203 
1204 /**
1205  * amdgpu_device_asic_init - Wrapper for atom asic_init
1206  *
1207  * @adev: amdgpu_device pointer
1208  *
1209  * Does any asic specific work and then calls atom asic init.
1210  */
1211 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1212 {
1213 	int ret;
1214 
1215 	amdgpu_asic_pre_asic_init(adev);
1216 
1217 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1218 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1219 		amdgpu_psp_wait_for_bootloader(adev);
1220 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1221 		/* TODO: check the return val and stop device initialization if boot fails */
1222 		amdgpu_psp_query_boot_status(adev);
1223 		return ret;
1224 	} else {
1225 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 /**
1232  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1233  *
1234  * @adev: amdgpu_device pointer
1235  *
1236  * Allocates a scratch page of VRAM for use by various things in the
1237  * driver.
1238  */
1239 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1240 {
1241 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1242 				       AMDGPU_GEM_DOMAIN_VRAM |
1243 				       AMDGPU_GEM_DOMAIN_GTT,
1244 				       &adev->mem_scratch.robj,
1245 				       &adev->mem_scratch.gpu_addr,
1246 				       (void **)&adev->mem_scratch.ptr);
1247 }
1248 
1249 /**
1250  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1251  *
1252  * @adev: amdgpu_device pointer
1253  *
1254  * Frees the VRAM scratch page.
1255  */
1256 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1257 {
1258 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1259 }
1260 
1261 /**
1262  * amdgpu_device_program_register_sequence - program an array of registers.
1263  *
1264  * @adev: amdgpu_device pointer
1265  * @registers: pointer to the register array
1266  * @array_size: size of the register array
1267  *
1268  * Programs an array or registers with and or masks.
1269  * This is a helper for setting golden registers.
1270  */
1271 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1272 					     const u32 *registers,
1273 					     const u32 array_size)
1274 {
1275 	u32 tmp, reg, and_mask, or_mask;
1276 	int i;
1277 
1278 	if (array_size % 3)
1279 		return;
1280 
1281 	for (i = 0; i < array_size; i += 3) {
1282 		reg = registers[i + 0];
1283 		and_mask = registers[i + 1];
1284 		or_mask = registers[i + 2];
1285 
1286 		if (and_mask == 0xffffffff) {
1287 			tmp = or_mask;
1288 		} else {
1289 			tmp = RREG32(reg);
1290 			tmp &= ~and_mask;
1291 			if (adev->family >= AMDGPU_FAMILY_AI)
1292 				tmp |= (or_mask & and_mask);
1293 			else
1294 				tmp |= or_mask;
1295 		}
1296 		WREG32(reg, tmp);
1297 	}
1298 }
1299 
1300 /**
1301  * amdgpu_device_pci_config_reset - reset the GPU
1302  *
1303  * @adev: amdgpu_device pointer
1304  *
1305  * Resets the GPU using the pci config reset sequence.
1306  * Only applicable to asics prior to vega10.
1307  */
1308 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1309 {
1310 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1311 }
1312 
1313 /**
1314  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1315  *
1316  * @adev: amdgpu_device pointer
1317  *
1318  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1319  */
1320 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1321 {
1322 	return pci_reset_function(adev->pdev);
1323 }
1324 
1325 /*
1326  * amdgpu_device_wb_*()
1327  * Writeback is the method by which the GPU updates special pages in memory
1328  * with the status of certain GPU events (fences, ring pointers,etc.).
1329  */
1330 
1331 /**
1332  * amdgpu_device_wb_fini - Disable Writeback and free memory
1333  *
1334  * @adev: amdgpu_device pointer
1335  *
1336  * Disables Writeback and frees the Writeback memory (all asics).
1337  * Used at driver shutdown.
1338  */
1339 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1340 {
1341 	if (adev->wb.wb_obj) {
1342 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1343 				      &adev->wb.gpu_addr,
1344 				      (void **)&adev->wb.wb);
1345 		adev->wb.wb_obj = NULL;
1346 	}
1347 }
1348 
1349 /**
1350  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1351  *
1352  * @adev: amdgpu_device pointer
1353  *
1354  * Initializes writeback and allocates writeback memory (all asics).
1355  * Used at driver startup.
1356  * Returns 0 on success or an -error on failure.
1357  */
1358 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1359 {
1360 	int r;
1361 
1362 	if (adev->wb.wb_obj == NULL) {
1363 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1364 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1365 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1366 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1367 					    (void **)&adev->wb.wb);
1368 		if (r) {
1369 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1370 			return r;
1371 		}
1372 
1373 		adev->wb.num_wb = AMDGPU_MAX_WB;
1374 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1375 
1376 		/* clear wb memory */
1377 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1378 	}
1379 
1380 	return 0;
1381 }
1382 
1383 /**
1384  * amdgpu_device_wb_get - Allocate a wb entry
1385  *
1386  * @adev: amdgpu_device pointer
1387  * @wb: wb index
1388  *
1389  * Allocate a wb slot for use by the driver (all asics).
1390  * Returns 0 on success or -EINVAL on failure.
1391  */
1392 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1393 {
1394 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1395 
1396 	if (offset < adev->wb.num_wb) {
1397 		__set_bit(offset, adev->wb.used);
1398 		*wb = offset << 3; /* convert to dw offset */
1399 		return 0;
1400 	} else {
1401 		return -EINVAL;
1402 	}
1403 }
1404 
1405 /**
1406  * amdgpu_device_wb_free - Free a wb entry
1407  *
1408  * @adev: amdgpu_device pointer
1409  * @wb: wb index
1410  *
1411  * Free a wb slot allocated for use by the driver (all asics)
1412  */
1413 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1414 {
1415 	wb >>= 3;
1416 	if (wb < adev->wb.num_wb)
1417 		__clear_bit(wb, adev->wb.used);
1418 }
1419 
1420 /**
1421  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1422  *
1423  * @adev: amdgpu_device pointer
1424  *
1425  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1426  * to fail, but if any of the BARs is not accessible after the size we abort
1427  * driver loading by returning -ENODEV.
1428  */
1429 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1430 {
1431 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1432 	struct pci_bus *root;
1433 	struct resource *res;
1434 	unsigned int i;
1435 	u16 cmd;
1436 	int r;
1437 
1438 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1439 		return 0;
1440 
1441 	/* Bypass for VF */
1442 	if (amdgpu_sriov_vf(adev))
1443 		return 0;
1444 
1445 	/* skip if the bios has already enabled large BAR */
1446 	if (adev->gmc.real_vram_size &&
1447 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1448 		return 0;
1449 
1450 	/* Check if the root BUS has 64bit memory resources */
1451 	root = adev->pdev->bus;
1452 	while (root->parent)
1453 		root = root->parent;
1454 
1455 	pci_bus_for_each_resource(root, res, i) {
1456 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1457 		    res->start > 0x100000000ull)
1458 			break;
1459 	}
1460 
1461 	/* Trying to resize is pointless without a root hub window above 4GB */
1462 	if (!res)
1463 		return 0;
1464 
1465 	/* Limit the BAR size to what is available */
1466 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1467 			rbar_size);
1468 
1469 	/* Disable memory decoding while we change the BAR addresses and size */
1470 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1471 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1472 			      cmd & ~PCI_COMMAND_MEMORY);
1473 
1474 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1475 	amdgpu_doorbell_fini(adev);
1476 	if (adev->asic_type >= CHIP_BONAIRE)
1477 		pci_release_resource(adev->pdev, 2);
1478 
1479 	pci_release_resource(adev->pdev, 0);
1480 
1481 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1482 	if (r == -ENOSPC)
1483 		DRM_INFO("Not enough PCI address space for a large BAR.");
1484 	else if (r && r != -ENOTSUPP)
1485 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1486 
1487 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1488 
1489 	/* When the doorbell or fb BAR isn't available we have no chance of
1490 	 * using the device.
1491 	 */
1492 	r = amdgpu_doorbell_init(adev);
1493 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1494 		return -ENODEV;
1495 
1496 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1497 
1498 	return 0;
1499 }
1500 
1501 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1502 {
1503 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1504 		return false;
1505 
1506 	return true;
1507 }
1508 
1509 /*
1510  * GPU helpers function.
1511  */
1512 /**
1513  * amdgpu_device_need_post - check if the hw need post or not
1514  *
1515  * @adev: amdgpu_device pointer
1516  *
1517  * Check if the asic has been initialized (all asics) at driver startup
1518  * or post is needed if  hw reset is performed.
1519  * Returns true if need or false if not.
1520  */
1521 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1522 {
1523 	uint32_t reg;
1524 
1525 	if (amdgpu_sriov_vf(adev))
1526 		return false;
1527 
1528 	if (!amdgpu_device_read_bios(adev))
1529 		return false;
1530 
1531 	if (amdgpu_passthrough(adev)) {
1532 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1533 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1534 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1535 		 * vpost executed for smc version below 22.15
1536 		 */
1537 		if (adev->asic_type == CHIP_FIJI) {
1538 			int err;
1539 			uint32_t fw_ver;
1540 
1541 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1542 			/* force vPost if error occured */
1543 			if (err)
1544 				return true;
1545 
1546 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1547 			if (fw_ver < 0x00160e00)
1548 				return true;
1549 		}
1550 	}
1551 
1552 	/* Don't post if we need to reset whole hive on init */
1553 	if (adev->gmc.xgmi.pending_reset)
1554 		return false;
1555 
1556 	if (adev->has_hw_reset) {
1557 		adev->has_hw_reset = false;
1558 		return true;
1559 	}
1560 
1561 	/* bios scratch used on CIK+ */
1562 	if (adev->asic_type >= CHIP_BONAIRE)
1563 		return amdgpu_atombios_scratch_need_asic_init(adev);
1564 
1565 	/* check MEM_SIZE for older asics */
1566 	reg = amdgpu_asic_get_config_memsize(adev);
1567 
1568 	if ((reg != 0) && (reg != 0xffffffff))
1569 		return false;
1570 
1571 	return true;
1572 }
1573 
1574 /*
1575  * Check whether seamless boot is supported.
1576  *
1577  * So far we only support seamless boot on DCE 3.0 or later.
1578  * If users report that it works on older ASICS as well, we may
1579  * loosen this.
1580  */
1581 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1582 {
1583 	switch (amdgpu_seamless) {
1584 	case -1:
1585 		break;
1586 	case 1:
1587 		return true;
1588 	case 0:
1589 		return false;
1590 	default:
1591 		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1592 			  amdgpu_seamless);
1593 		return false;
1594 	}
1595 
1596 	if (!(adev->flags & AMD_IS_APU))
1597 		return false;
1598 
1599 	if (adev->mman.keep_stolen_vga_memory)
1600 		return false;
1601 
1602 	return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
1603 }
1604 
1605 /*
1606  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1607  * don't support dynamic speed switching. Until we have confirmation from Intel
1608  * that a specific host supports it, it's safer that we keep it disabled for all.
1609  *
1610  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1611  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1612  */
1613 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1614 {
1615 #if IS_ENABLED(CONFIG_X86)
1616 	struct cpuinfo_x86 *c = &cpu_data(0);
1617 
1618 	/* eGPU change speeds based on USB4 fabric conditions */
1619 	if (dev_is_removable(adev->dev))
1620 		return true;
1621 
1622 	if (c->x86_vendor == X86_VENDOR_INTEL)
1623 		return false;
1624 #endif
1625 	return true;
1626 }
1627 
1628 /**
1629  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1630  *
1631  * @adev: amdgpu_device pointer
1632  *
1633  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1634  * be set for this device.
1635  *
1636  * Returns true if it should be used or false if not.
1637  */
1638 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1639 {
1640 	switch (amdgpu_aspm) {
1641 	case -1:
1642 		break;
1643 	case 0:
1644 		return false;
1645 	case 1:
1646 		return true;
1647 	default:
1648 		return false;
1649 	}
1650 	if (adev->flags & AMD_IS_APU)
1651 		return false;
1652 	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1653 		return false;
1654 	return pcie_aspm_enabled(adev->pdev);
1655 }
1656 
1657 /* if we get transitioned to only one device, take VGA back */
1658 /**
1659  * amdgpu_device_vga_set_decode - enable/disable vga decode
1660  *
1661  * @pdev: PCI device pointer
1662  * @state: enable/disable vga decode
1663  *
1664  * Enable/disable vga decode (all asics).
1665  * Returns VGA resource flags.
1666  */
1667 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1668 		bool state)
1669 {
1670 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1671 
1672 	amdgpu_asic_set_vga_state(adev, state);
1673 	if (state)
1674 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1675 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1676 	else
1677 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1678 }
1679 
1680 /**
1681  * amdgpu_device_check_block_size - validate the vm block size
1682  *
1683  * @adev: amdgpu_device pointer
1684  *
1685  * Validates the vm block size specified via module parameter.
1686  * The vm block size defines number of bits in page table versus page directory,
1687  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1688  * page table and the remaining bits are in the page directory.
1689  */
1690 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1691 {
1692 	/* defines number of bits in page table versus page directory,
1693 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1694 	 * page table and the remaining bits are in the page directory
1695 	 */
1696 	if (amdgpu_vm_block_size == -1)
1697 		return;
1698 
1699 	if (amdgpu_vm_block_size < 9) {
1700 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1701 			 amdgpu_vm_block_size);
1702 		amdgpu_vm_block_size = -1;
1703 	}
1704 }
1705 
1706 /**
1707  * amdgpu_device_check_vm_size - validate the vm size
1708  *
1709  * @adev: amdgpu_device pointer
1710  *
1711  * Validates the vm size in GB specified via module parameter.
1712  * The VM size is the size of the GPU virtual memory space in GB.
1713  */
1714 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1715 {
1716 	/* no need to check the default value */
1717 	if (amdgpu_vm_size == -1)
1718 		return;
1719 
1720 	if (amdgpu_vm_size < 1) {
1721 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1722 			 amdgpu_vm_size);
1723 		amdgpu_vm_size = -1;
1724 	}
1725 }
1726 
1727 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1728 {
1729 	struct sysinfo si;
1730 	bool is_os_64 = (sizeof(void *) == 8);
1731 	uint64_t total_memory;
1732 	uint64_t dram_size_seven_GB = 0x1B8000000;
1733 	uint64_t dram_size_three_GB = 0xB8000000;
1734 
1735 	if (amdgpu_smu_memory_pool_size == 0)
1736 		return;
1737 
1738 	if (!is_os_64) {
1739 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1740 		goto def_value;
1741 	}
1742 	si_meminfo(&si);
1743 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1744 
1745 	if ((amdgpu_smu_memory_pool_size == 1) ||
1746 		(amdgpu_smu_memory_pool_size == 2)) {
1747 		if (total_memory < dram_size_three_GB)
1748 			goto def_value1;
1749 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1750 		(amdgpu_smu_memory_pool_size == 8)) {
1751 		if (total_memory < dram_size_seven_GB)
1752 			goto def_value1;
1753 	} else {
1754 		DRM_WARN("Smu memory pool size not supported\n");
1755 		goto def_value;
1756 	}
1757 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1758 
1759 	return;
1760 
1761 def_value1:
1762 	DRM_WARN("No enough system memory\n");
1763 def_value:
1764 	adev->pm.smu_prv_buffer_size = 0;
1765 }
1766 
1767 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1768 {
1769 	if (!(adev->flags & AMD_IS_APU) ||
1770 	    adev->asic_type < CHIP_RAVEN)
1771 		return 0;
1772 
1773 	switch (adev->asic_type) {
1774 	case CHIP_RAVEN:
1775 		if (adev->pdev->device == 0x15dd)
1776 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1777 		if (adev->pdev->device == 0x15d8)
1778 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1779 		break;
1780 	case CHIP_RENOIR:
1781 		if ((adev->pdev->device == 0x1636) ||
1782 		    (adev->pdev->device == 0x164c))
1783 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1784 		else
1785 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1786 		break;
1787 	case CHIP_VANGOGH:
1788 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1789 		break;
1790 	case CHIP_YELLOW_CARP:
1791 		break;
1792 	case CHIP_CYAN_SKILLFISH:
1793 		if ((adev->pdev->device == 0x13FE) ||
1794 		    (adev->pdev->device == 0x143F))
1795 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1796 		break;
1797 	default:
1798 		break;
1799 	}
1800 
1801 	return 0;
1802 }
1803 
1804 /**
1805  * amdgpu_device_check_arguments - validate module params
1806  *
1807  * @adev: amdgpu_device pointer
1808  *
1809  * Validates certain module parameters and updates
1810  * the associated values used by the driver (all asics).
1811  */
1812 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1813 {
1814 	if (amdgpu_sched_jobs < 4) {
1815 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1816 			 amdgpu_sched_jobs);
1817 		amdgpu_sched_jobs = 4;
1818 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1819 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1820 			 amdgpu_sched_jobs);
1821 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1822 	}
1823 
1824 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1825 		/* gart size must be greater or equal to 32M */
1826 		dev_warn(adev->dev, "gart size (%d) too small\n",
1827 			 amdgpu_gart_size);
1828 		amdgpu_gart_size = -1;
1829 	}
1830 
1831 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1832 		/* gtt size must be greater or equal to 32M */
1833 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1834 				 amdgpu_gtt_size);
1835 		amdgpu_gtt_size = -1;
1836 	}
1837 
1838 	/* valid range is between 4 and 9 inclusive */
1839 	if (amdgpu_vm_fragment_size != -1 &&
1840 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1841 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1842 		amdgpu_vm_fragment_size = -1;
1843 	}
1844 
1845 	if (amdgpu_sched_hw_submission < 2) {
1846 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1847 			 amdgpu_sched_hw_submission);
1848 		amdgpu_sched_hw_submission = 2;
1849 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1850 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1851 			 amdgpu_sched_hw_submission);
1852 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1853 	}
1854 
1855 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1856 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1857 		amdgpu_reset_method = -1;
1858 	}
1859 
1860 	amdgpu_device_check_smu_prv_buffer_size(adev);
1861 
1862 	amdgpu_device_check_vm_size(adev);
1863 
1864 	amdgpu_device_check_block_size(adev);
1865 
1866 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1867 
1868 	return 0;
1869 }
1870 
1871 /**
1872  * amdgpu_switcheroo_set_state - set switcheroo state
1873  *
1874  * @pdev: pci dev pointer
1875  * @state: vga_switcheroo state
1876  *
1877  * Callback for the switcheroo driver.  Suspends or resumes
1878  * the asics before or after it is powered up using ACPI methods.
1879  */
1880 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1881 					enum vga_switcheroo_state state)
1882 {
1883 	struct drm_device *dev = pci_get_drvdata(pdev);
1884 	int r;
1885 
1886 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1887 		return;
1888 
1889 	if (state == VGA_SWITCHEROO_ON) {
1890 		pr_info("switched on\n");
1891 		/* don't suspend or resume card normally */
1892 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1893 
1894 		pci_set_power_state(pdev, PCI_D0);
1895 		amdgpu_device_load_pci_state(pdev);
1896 		r = pci_enable_device(pdev);
1897 		if (r)
1898 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1899 		amdgpu_device_resume(dev, true);
1900 
1901 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1902 	} else {
1903 		pr_info("switched off\n");
1904 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1905 		amdgpu_device_prepare(dev);
1906 		amdgpu_device_suspend(dev, true);
1907 		amdgpu_device_cache_pci_state(pdev);
1908 		/* Shut down the device */
1909 		pci_disable_device(pdev);
1910 		pci_set_power_state(pdev, PCI_D3cold);
1911 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1912 	}
1913 }
1914 
1915 /**
1916  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1917  *
1918  * @pdev: pci dev pointer
1919  *
1920  * Callback for the switcheroo driver.  Check of the switcheroo
1921  * state can be changed.
1922  * Returns true if the state can be changed, false if not.
1923  */
1924 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1925 {
1926 	struct drm_device *dev = pci_get_drvdata(pdev);
1927 
1928        /*
1929 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1930 	* locking inversion with the driver load path. And the access here is
1931 	* completely racy anyway. So don't bother with locking for now.
1932 	*/
1933 	return atomic_read(&dev->open_count) == 0;
1934 }
1935 
1936 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1937 	.set_gpu_state = amdgpu_switcheroo_set_state,
1938 	.reprobe = NULL,
1939 	.can_switch = amdgpu_switcheroo_can_switch,
1940 };
1941 
1942 /**
1943  * amdgpu_device_ip_set_clockgating_state - set the CG state
1944  *
1945  * @dev: amdgpu_device pointer
1946  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1947  * @state: clockgating state (gate or ungate)
1948  *
1949  * Sets the requested clockgating state for all instances of
1950  * the hardware IP specified.
1951  * Returns the error code from the last instance.
1952  */
1953 int amdgpu_device_ip_set_clockgating_state(void *dev,
1954 					   enum amd_ip_block_type block_type,
1955 					   enum amd_clockgating_state state)
1956 {
1957 	struct amdgpu_device *adev = dev;
1958 	int i, r = 0;
1959 
1960 	for (i = 0; i < adev->num_ip_blocks; i++) {
1961 		if (!adev->ip_blocks[i].status.valid)
1962 			continue;
1963 		if (adev->ip_blocks[i].version->type != block_type)
1964 			continue;
1965 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1966 			continue;
1967 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1968 			(void *)adev, state);
1969 		if (r)
1970 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1971 				  adev->ip_blocks[i].version->funcs->name, r);
1972 	}
1973 	return r;
1974 }
1975 
1976 /**
1977  * amdgpu_device_ip_set_powergating_state - set the PG state
1978  *
1979  * @dev: amdgpu_device pointer
1980  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1981  * @state: powergating state (gate or ungate)
1982  *
1983  * Sets the requested powergating state for all instances of
1984  * the hardware IP specified.
1985  * Returns the error code from the last instance.
1986  */
1987 int amdgpu_device_ip_set_powergating_state(void *dev,
1988 					   enum amd_ip_block_type block_type,
1989 					   enum amd_powergating_state state)
1990 {
1991 	struct amdgpu_device *adev = dev;
1992 	int i, r = 0;
1993 
1994 	for (i = 0; i < adev->num_ip_blocks; i++) {
1995 		if (!adev->ip_blocks[i].status.valid)
1996 			continue;
1997 		if (adev->ip_blocks[i].version->type != block_type)
1998 			continue;
1999 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2000 			continue;
2001 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2002 			(void *)adev, state);
2003 		if (r)
2004 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2005 				  adev->ip_blocks[i].version->funcs->name, r);
2006 	}
2007 	return r;
2008 }
2009 
2010 /**
2011  * amdgpu_device_ip_get_clockgating_state - get the CG state
2012  *
2013  * @adev: amdgpu_device pointer
2014  * @flags: clockgating feature flags
2015  *
2016  * Walks the list of IPs on the device and updates the clockgating
2017  * flags for each IP.
2018  * Updates @flags with the feature flags for each hardware IP where
2019  * clockgating is enabled.
2020  */
2021 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2022 					    u64 *flags)
2023 {
2024 	int i;
2025 
2026 	for (i = 0; i < adev->num_ip_blocks; i++) {
2027 		if (!adev->ip_blocks[i].status.valid)
2028 			continue;
2029 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2030 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2031 	}
2032 }
2033 
2034 /**
2035  * amdgpu_device_ip_wait_for_idle - wait for idle
2036  *
2037  * @adev: amdgpu_device pointer
2038  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2039  *
2040  * Waits for the request hardware IP to be idle.
2041  * Returns 0 for success or a negative error code on failure.
2042  */
2043 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2044 				   enum amd_ip_block_type block_type)
2045 {
2046 	int i, r;
2047 
2048 	for (i = 0; i < adev->num_ip_blocks; i++) {
2049 		if (!adev->ip_blocks[i].status.valid)
2050 			continue;
2051 		if (adev->ip_blocks[i].version->type == block_type) {
2052 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2053 			if (r)
2054 				return r;
2055 			break;
2056 		}
2057 	}
2058 	return 0;
2059 
2060 }
2061 
2062 /**
2063  * amdgpu_device_ip_is_idle - is the hardware IP idle
2064  *
2065  * @adev: amdgpu_device pointer
2066  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2067  *
2068  * Check if the hardware IP is idle or not.
2069  * Returns true if it the IP is idle, false if not.
2070  */
2071 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2072 			      enum amd_ip_block_type block_type)
2073 {
2074 	int i;
2075 
2076 	for (i = 0; i < adev->num_ip_blocks; i++) {
2077 		if (!adev->ip_blocks[i].status.valid)
2078 			continue;
2079 		if (adev->ip_blocks[i].version->type == block_type)
2080 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2081 	}
2082 	return true;
2083 
2084 }
2085 
2086 /**
2087  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2088  *
2089  * @adev: amdgpu_device pointer
2090  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2091  *
2092  * Returns a pointer to the hardware IP block structure
2093  * if it exists for the asic, otherwise NULL.
2094  */
2095 struct amdgpu_ip_block *
2096 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2097 			      enum amd_ip_block_type type)
2098 {
2099 	int i;
2100 
2101 	for (i = 0; i < adev->num_ip_blocks; i++)
2102 		if (adev->ip_blocks[i].version->type == type)
2103 			return &adev->ip_blocks[i];
2104 
2105 	return NULL;
2106 }
2107 
2108 /**
2109  * amdgpu_device_ip_block_version_cmp
2110  *
2111  * @adev: amdgpu_device pointer
2112  * @type: enum amd_ip_block_type
2113  * @major: major version
2114  * @minor: minor version
2115  *
2116  * return 0 if equal or greater
2117  * return 1 if smaller or the ip_block doesn't exist
2118  */
2119 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2120 				       enum amd_ip_block_type type,
2121 				       u32 major, u32 minor)
2122 {
2123 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2124 
2125 	if (ip_block && ((ip_block->version->major > major) ||
2126 			((ip_block->version->major == major) &&
2127 			(ip_block->version->minor >= minor))))
2128 		return 0;
2129 
2130 	return 1;
2131 }
2132 
2133 /**
2134  * amdgpu_device_ip_block_add
2135  *
2136  * @adev: amdgpu_device pointer
2137  * @ip_block_version: pointer to the IP to add
2138  *
2139  * Adds the IP block driver information to the collection of IPs
2140  * on the asic.
2141  */
2142 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2143 			       const struct amdgpu_ip_block_version *ip_block_version)
2144 {
2145 	if (!ip_block_version)
2146 		return -EINVAL;
2147 
2148 	switch (ip_block_version->type) {
2149 	case AMD_IP_BLOCK_TYPE_VCN:
2150 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2151 			return 0;
2152 		break;
2153 	case AMD_IP_BLOCK_TYPE_JPEG:
2154 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2155 			return 0;
2156 		break;
2157 	default:
2158 		break;
2159 	}
2160 
2161 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2162 		  ip_block_version->funcs->name);
2163 
2164 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2165 
2166 	return 0;
2167 }
2168 
2169 /**
2170  * amdgpu_device_enable_virtual_display - enable virtual display feature
2171  *
2172  * @adev: amdgpu_device pointer
2173  *
2174  * Enabled the virtual display feature if the user has enabled it via
2175  * the module parameter virtual_display.  This feature provides a virtual
2176  * display hardware on headless boards or in virtualized environments.
2177  * This function parses and validates the configuration string specified by
2178  * the user and configues the virtual display configuration (number of
2179  * virtual connectors, crtcs, etc.) specified.
2180  */
2181 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2182 {
2183 	adev->enable_virtual_display = false;
2184 
2185 	if (amdgpu_virtual_display) {
2186 		const char *pci_address_name = pci_name(adev->pdev);
2187 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2188 
2189 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2190 		pciaddstr_tmp = pciaddstr;
2191 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2192 			pciaddname = strsep(&pciaddname_tmp, ",");
2193 			if (!strcmp("all", pciaddname)
2194 			    || !strcmp(pci_address_name, pciaddname)) {
2195 				long num_crtc;
2196 				int res = -1;
2197 
2198 				adev->enable_virtual_display = true;
2199 
2200 				if (pciaddname_tmp)
2201 					res = kstrtol(pciaddname_tmp, 10,
2202 						      &num_crtc);
2203 
2204 				if (!res) {
2205 					if (num_crtc < 1)
2206 						num_crtc = 1;
2207 					if (num_crtc > 6)
2208 						num_crtc = 6;
2209 					adev->mode_info.num_crtc = num_crtc;
2210 				} else {
2211 					adev->mode_info.num_crtc = 1;
2212 				}
2213 				break;
2214 			}
2215 		}
2216 
2217 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2218 			 amdgpu_virtual_display, pci_address_name,
2219 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2220 
2221 		kfree(pciaddstr);
2222 	}
2223 }
2224 
2225 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2226 {
2227 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2228 		adev->mode_info.num_crtc = 1;
2229 		adev->enable_virtual_display = true;
2230 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2231 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2232 	}
2233 }
2234 
2235 /**
2236  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2237  *
2238  * @adev: amdgpu_device pointer
2239  *
2240  * Parses the asic configuration parameters specified in the gpu info
2241  * firmware and makes them availale to the driver for use in configuring
2242  * the asic.
2243  * Returns 0 on success, -EINVAL on failure.
2244  */
2245 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2246 {
2247 	const char *chip_name;
2248 	char fw_name[40];
2249 	int err;
2250 	const struct gpu_info_firmware_header_v1_0 *hdr;
2251 
2252 	adev->firmware.gpu_info_fw = NULL;
2253 
2254 	if (adev->mman.discovery_bin) {
2255 		/*
2256 		 * FIXME: The bounding box is still needed by Navi12, so
2257 		 * temporarily read it from gpu_info firmware. Should be dropped
2258 		 * when DAL no longer needs it.
2259 		 */
2260 		if (adev->asic_type != CHIP_NAVI12)
2261 			return 0;
2262 	}
2263 
2264 	switch (adev->asic_type) {
2265 	default:
2266 		return 0;
2267 	case CHIP_VEGA10:
2268 		chip_name = "vega10";
2269 		break;
2270 	case CHIP_VEGA12:
2271 		chip_name = "vega12";
2272 		break;
2273 	case CHIP_RAVEN:
2274 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2275 			chip_name = "raven2";
2276 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2277 			chip_name = "picasso";
2278 		else
2279 			chip_name = "raven";
2280 		break;
2281 	case CHIP_ARCTURUS:
2282 		chip_name = "arcturus";
2283 		break;
2284 	case CHIP_NAVI12:
2285 		chip_name = "navi12";
2286 		break;
2287 	}
2288 
2289 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2290 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2291 	if (err) {
2292 		dev_err(adev->dev,
2293 			"Failed to get gpu_info firmware \"%s\"\n",
2294 			fw_name);
2295 		goto out;
2296 	}
2297 
2298 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2299 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2300 
2301 	switch (hdr->version_major) {
2302 	case 1:
2303 	{
2304 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2305 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2306 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2307 
2308 		/*
2309 		 * Should be droped when DAL no longer needs it.
2310 		 */
2311 		if (adev->asic_type == CHIP_NAVI12)
2312 			goto parse_soc_bounding_box;
2313 
2314 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2315 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2316 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2317 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2318 		adev->gfx.config.max_texture_channel_caches =
2319 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2320 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2321 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2322 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2323 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2324 		adev->gfx.config.double_offchip_lds_buf =
2325 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2326 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2327 		adev->gfx.cu_info.max_waves_per_simd =
2328 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2329 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2330 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2331 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2332 		if (hdr->version_minor >= 1) {
2333 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2334 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2335 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2336 			adev->gfx.config.num_sc_per_sh =
2337 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2338 			adev->gfx.config.num_packer_per_sc =
2339 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2340 		}
2341 
2342 parse_soc_bounding_box:
2343 		/*
2344 		 * soc bounding box info is not integrated in disocovery table,
2345 		 * we always need to parse it from gpu info firmware if needed.
2346 		 */
2347 		if (hdr->version_minor == 2) {
2348 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2349 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2350 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2351 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2352 		}
2353 		break;
2354 	}
2355 	default:
2356 		dev_err(adev->dev,
2357 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2358 		err = -EINVAL;
2359 		goto out;
2360 	}
2361 out:
2362 	return err;
2363 }
2364 
2365 /**
2366  * amdgpu_device_ip_early_init - run early init for hardware IPs
2367  *
2368  * @adev: amdgpu_device pointer
2369  *
2370  * Early initialization pass for hardware IPs.  The hardware IPs that make
2371  * up each asic are discovered each IP's early_init callback is run.  This
2372  * is the first stage in initializing the asic.
2373  * Returns 0 on success, negative error code on failure.
2374  */
2375 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2376 {
2377 	struct pci_dev *parent;
2378 	int i, r;
2379 	bool total;
2380 
2381 	amdgpu_device_enable_virtual_display(adev);
2382 
2383 	if (amdgpu_sriov_vf(adev)) {
2384 		r = amdgpu_virt_request_full_gpu(adev, true);
2385 		if (r)
2386 			return r;
2387 	}
2388 
2389 	switch (adev->asic_type) {
2390 #ifdef CONFIG_DRM_AMDGPU_SI
2391 	case CHIP_VERDE:
2392 	case CHIP_TAHITI:
2393 	case CHIP_PITCAIRN:
2394 	case CHIP_OLAND:
2395 	case CHIP_HAINAN:
2396 		adev->family = AMDGPU_FAMILY_SI;
2397 		r = si_set_ip_blocks(adev);
2398 		if (r)
2399 			return r;
2400 		break;
2401 #endif
2402 #ifdef CONFIG_DRM_AMDGPU_CIK
2403 	case CHIP_BONAIRE:
2404 	case CHIP_HAWAII:
2405 	case CHIP_KAVERI:
2406 	case CHIP_KABINI:
2407 	case CHIP_MULLINS:
2408 		if (adev->flags & AMD_IS_APU)
2409 			adev->family = AMDGPU_FAMILY_KV;
2410 		else
2411 			adev->family = AMDGPU_FAMILY_CI;
2412 
2413 		r = cik_set_ip_blocks(adev);
2414 		if (r)
2415 			return r;
2416 		break;
2417 #endif
2418 	case CHIP_TOPAZ:
2419 	case CHIP_TONGA:
2420 	case CHIP_FIJI:
2421 	case CHIP_POLARIS10:
2422 	case CHIP_POLARIS11:
2423 	case CHIP_POLARIS12:
2424 	case CHIP_VEGAM:
2425 	case CHIP_CARRIZO:
2426 	case CHIP_STONEY:
2427 		if (adev->flags & AMD_IS_APU)
2428 			adev->family = AMDGPU_FAMILY_CZ;
2429 		else
2430 			adev->family = AMDGPU_FAMILY_VI;
2431 
2432 		r = vi_set_ip_blocks(adev);
2433 		if (r)
2434 			return r;
2435 		break;
2436 	default:
2437 		r = amdgpu_discovery_set_ip_blocks(adev);
2438 		if (r)
2439 			return r;
2440 		break;
2441 	}
2442 
2443 	if (amdgpu_has_atpx() &&
2444 	    (amdgpu_is_atpx_hybrid() ||
2445 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2446 	    ((adev->flags & AMD_IS_APU) == 0) &&
2447 	    !dev_is_removable(&adev->pdev->dev))
2448 		adev->flags |= AMD_IS_PX;
2449 
2450 	if (!(adev->flags & AMD_IS_APU)) {
2451 		parent = pcie_find_root_port(adev->pdev);
2452 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2453 	}
2454 
2455 
2456 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2457 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2458 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2459 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2460 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2461 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2462 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2463 
2464 	total = true;
2465 	for (i = 0; i < adev->num_ip_blocks; i++) {
2466 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2467 			DRM_WARN("disabled ip block: %d <%s>\n",
2468 				  i, adev->ip_blocks[i].version->funcs->name);
2469 			adev->ip_blocks[i].status.valid = false;
2470 		} else {
2471 			if (adev->ip_blocks[i].version->funcs->early_init) {
2472 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2473 				if (r == -ENOENT) {
2474 					adev->ip_blocks[i].status.valid = false;
2475 				} else if (r) {
2476 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2477 						  adev->ip_blocks[i].version->funcs->name, r);
2478 					total = false;
2479 				} else {
2480 					adev->ip_blocks[i].status.valid = true;
2481 				}
2482 			} else {
2483 				adev->ip_blocks[i].status.valid = true;
2484 			}
2485 		}
2486 		/* get the vbios after the asic_funcs are set up */
2487 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2488 			r = amdgpu_device_parse_gpu_info_fw(adev);
2489 			if (r)
2490 				return r;
2491 
2492 			/* Read BIOS */
2493 			if (amdgpu_device_read_bios(adev)) {
2494 				if (!amdgpu_get_bios(adev))
2495 					return -EINVAL;
2496 
2497 				r = amdgpu_atombios_init(adev);
2498 				if (r) {
2499 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2500 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2501 					return r;
2502 				}
2503 			}
2504 
2505 			/*get pf2vf msg info at it's earliest time*/
2506 			if (amdgpu_sriov_vf(adev))
2507 				amdgpu_virt_init_data_exchange(adev);
2508 
2509 		}
2510 	}
2511 	if (!total)
2512 		return -ENODEV;
2513 
2514 	amdgpu_amdkfd_device_probe(adev);
2515 	adev->cg_flags &= amdgpu_cg_mask;
2516 	adev->pg_flags &= amdgpu_pg_mask;
2517 
2518 	return 0;
2519 }
2520 
2521 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2522 {
2523 	int i, r;
2524 
2525 	for (i = 0; i < adev->num_ip_blocks; i++) {
2526 		if (!adev->ip_blocks[i].status.sw)
2527 			continue;
2528 		if (adev->ip_blocks[i].status.hw)
2529 			continue;
2530 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2531 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2532 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2533 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2534 			if (r) {
2535 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2536 					  adev->ip_blocks[i].version->funcs->name, r);
2537 				return r;
2538 			}
2539 			adev->ip_blocks[i].status.hw = true;
2540 		}
2541 	}
2542 
2543 	return 0;
2544 }
2545 
2546 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2547 {
2548 	int i, r;
2549 
2550 	for (i = 0; i < adev->num_ip_blocks; i++) {
2551 		if (!adev->ip_blocks[i].status.sw)
2552 			continue;
2553 		if (adev->ip_blocks[i].status.hw)
2554 			continue;
2555 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2556 		if (r) {
2557 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2558 				  adev->ip_blocks[i].version->funcs->name, r);
2559 			return r;
2560 		}
2561 		adev->ip_blocks[i].status.hw = true;
2562 	}
2563 
2564 	return 0;
2565 }
2566 
2567 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2568 {
2569 	int r = 0;
2570 	int i;
2571 	uint32_t smu_version;
2572 
2573 	if (adev->asic_type >= CHIP_VEGA10) {
2574 		for (i = 0; i < adev->num_ip_blocks; i++) {
2575 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2576 				continue;
2577 
2578 			if (!adev->ip_blocks[i].status.sw)
2579 				continue;
2580 
2581 			/* no need to do the fw loading again if already done*/
2582 			if (adev->ip_blocks[i].status.hw == true)
2583 				break;
2584 
2585 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2586 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2587 				if (r) {
2588 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2589 							  adev->ip_blocks[i].version->funcs->name, r);
2590 					return r;
2591 				}
2592 			} else {
2593 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2594 				if (r) {
2595 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2596 							  adev->ip_blocks[i].version->funcs->name, r);
2597 					return r;
2598 				}
2599 			}
2600 
2601 			adev->ip_blocks[i].status.hw = true;
2602 			break;
2603 		}
2604 	}
2605 
2606 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2607 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2608 
2609 	return r;
2610 }
2611 
2612 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2613 {
2614 	long timeout;
2615 	int r, i;
2616 
2617 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2618 		struct amdgpu_ring *ring = adev->rings[i];
2619 
2620 		/* No need to setup the GPU scheduler for rings that don't need it */
2621 		if (!ring || ring->no_scheduler)
2622 			continue;
2623 
2624 		switch (ring->funcs->type) {
2625 		case AMDGPU_RING_TYPE_GFX:
2626 			timeout = adev->gfx_timeout;
2627 			break;
2628 		case AMDGPU_RING_TYPE_COMPUTE:
2629 			timeout = adev->compute_timeout;
2630 			break;
2631 		case AMDGPU_RING_TYPE_SDMA:
2632 			timeout = adev->sdma_timeout;
2633 			break;
2634 		default:
2635 			timeout = adev->video_timeout;
2636 			break;
2637 		}
2638 
2639 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2640 				   DRM_SCHED_PRIORITY_COUNT,
2641 				   ring->num_hw_submission, 0,
2642 				   timeout, adev->reset_domain->wq,
2643 				   ring->sched_score, ring->name,
2644 				   adev->dev);
2645 		if (r) {
2646 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2647 				  ring->name);
2648 			return r;
2649 		}
2650 		r = amdgpu_uvd_entity_init(adev, ring);
2651 		if (r) {
2652 			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2653 				  ring->name);
2654 			return r;
2655 		}
2656 		r = amdgpu_vce_entity_init(adev, ring);
2657 		if (r) {
2658 			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2659 				  ring->name);
2660 			return r;
2661 		}
2662 	}
2663 
2664 	amdgpu_xcp_update_partition_sched_list(adev);
2665 
2666 	return 0;
2667 }
2668 
2669 
2670 /**
2671  * amdgpu_device_ip_init - run init for hardware IPs
2672  *
2673  * @adev: amdgpu_device pointer
2674  *
2675  * Main initialization pass for hardware IPs.  The list of all the hardware
2676  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2677  * are run.  sw_init initializes the software state associated with each IP
2678  * and hw_init initializes the hardware associated with each IP.
2679  * Returns 0 on success, negative error code on failure.
2680  */
2681 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2682 {
2683 	int i, r;
2684 
2685 	r = amdgpu_ras_init(adev);
2686 	if (r)
2687 		return r;
2688 
2689 	for (i = 0; i < adev->num_ip_blocks; i++) {
2690 		if (!adev->ip_blocks[i].status.valid)
2691 			continue;
2692 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2693 		if (r) {
2694 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2695 				  adev->ip_blocks[i].version->funcs->name, r);
2696 			goto init_failed;
2697 		}
2698 		adev->ip_blocks[i].status.sw = true;
2699 
2700 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2701 			/* need to do common hw init early so everything is set up for gmc */
2702 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2703 			if (r) {
2704 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2705 				goto init_failed;
2706 			}
2707 			adev->ip_blocks[i].status.hw = true;
2708 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2709 			/* need to do gmc hw init early so we can allocate gpu mem */
2710 			/* Try to reserve bad pages early */
2711 			if (amdgpu_sriov_vf(adev))
2712 				amdgpu_virt_exchange_data(adev);
2713 
2714 			r = amdgpu_device_mem_scratch_init(adev);
2715 			if (r) {
2716 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2717 				goto init_failed;
2718 			}
2719 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2720 			if (r) {
2721 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2722 				goto init_failed;
2723 			}
2724 			r = amdgpu_device_wb_init(adev);
2725 			if (r) {
2726 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2727 				goto init_failed;
2728 			}
2729 			adev->ip_blocks[i].status.hw = true;
2730 
2731 			/* right after GMC hw init, we create CSA */
2732 			if (adev->gfx.mcbp) {
2733 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2734 							       AMDGPU_GEM_DOMAIN_VRAM |
2735 							       AMDGPU_GEM_DOMAIN_GTT,
2736 							       AMDGPU_CSA_SIZE);
2737 				if (r) {
2738 					DRM_ERROR("allocate CSA failed %d\n", r);
2739 					goto init_failed;
2740 				}
2741 			}
2742 
2743 			r = amdgpu_seq64_init(adev);
2744 			if (r) {
2745 				DRM_ERROR("allocate seq64 failed %d\n", r);
2746 				goto init_failed;
2747 			}
2748 		}
2749 	}
2750 
2751 	if (amdgpu_sriov_vf(adev))
2752 		amdgpu_virt_init_data_exchange(adev);
2753 
2754 	r = amdgpu_ib_pool_init(adev);
2755 	if (r) {
2756 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2757 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2758 		goto init_failed;
2759 	}
2760 
2761 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2762 	if (r)
2763 		goto init_failed;
2764 
2765 	r = amdgpu_device_ip_hw_init_phase1(adev);
2766 	if (r)
2767 		goto init_failed;
2768 
2769 	r = amdgpu_device_fw_loading(adev);
2770 	if (r)
2771 		goto init_failed;
2772 
2773 	r = amdgpu_device_ip_hw_init_phase2(adev);
2774 	if (r)
2775 		goto init_failed;
2776 
2777 	/*
2778 	 * retired pages will be loaded from eeprom and reserved here,
2779 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2780 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2781 	 * for I2C communication which only true at this point.
2782 	 *
2783 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2784 	 * failure from bad gpu situation and stop amdgpu init process
2785 	 * accordingly. For other failed cases, it will still release all
2786 	 * the resource and print error message, rather than returning one
2787 	 * negative value to upper level.
2788 	 *
2789 	 * Note: theoretically, this should be called before all vram allocations
2790 	 * to protect retired page from abusing
2791 	 */
2792 	r = amdgpu_ras_recovery_init(adev);
2793 	if (r)
2794 		goto init_failed;
2795 
2796 	/**
2797 	 * In case of XGMI grab extra reference for reset domain for this device
2798 	 */
2799 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2800 		if (amdgpu_xgmi_add_device(adev) == 0) {
2801 			if (!amdgpu_sriov_vf(adev)) {
2802 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2803 
2804 				if (WARN_ON(!hive)) {
2805 					r = -ENOENT;
2806 					goto init_failed;
2807 				}
2808 
2809 				if (!hive->reset_domain ||
2810 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2811 					r = -ENOENT;
2812 					amdgpu_put_xgmi_hive(hive);
2813 					goto init_failed;
2814 				}
2815 
2816 				/* Drop the early temporary reset domain we created for device */
2817 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2818 				adev->reset_domain = hive->reset_domain;
2819 				amdgpu_put_xgmi_hive(hive);
2820 			}
2821 		}
2822 	}
2823 
2824 	r = amdgpu_device_init_schedulers(adev);
2825 	if (r)
2826 		goto init_failed;
2827 
2828 	if (adev->mman.buffer_funcs_ring->sched.ready)
2829 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2830 
2831 	/* Don't init kfd if whole hive need to be reset during init */
2832 	if (!adev->gmc.xgmi.pending_reset) {
2833 		kgd2kfd_init_zone_device(adev);
2834 		amdgpu_amdkfd_device_init(adev);
2835 	}
2836 
2837 	amdgpu_fru_get_product_info(adev);
2838 
2839 init_failed:
2840 
2841 	return r;
2842 }
2843 
2844 /**
2845  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2846  *
2847  * @adev: amdgpu_device pointer
2848  *
2849  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2850  * this function before a GPU reset.  If the value is retained after a
2851  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2852  */
2853 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2854 {
2855 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2856 }
2857 
2858 /**
2859  * amdgpu_device_check_vram_lost - check if vram is valid
2860  *
2861  * @adev: amdgpu_device pointer
2862  *
2863  * Checks the reset magic value written to the gart pointer in VRAM.
2864  * The driver calls this after a GPU reset to see if the contents of
2865  * VRAM is lost or now.
2866  * returns true if vram is lost, false if not.
2867  */
2868 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2869 {
2870 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2871 			AMDGPU_RESET_MAGIC_NUM))
2872 		return true;
2873 
2874 	if (!amdgpu_in_reset(adev))
2875 		return false;
2876 
2877 	/*
2878 	 * For all ASICs with baco/mode1 reset, the VRAM is
2879 	 * always assumed to be lost.
2880 	 */
2881 	switch (amdgpu_asic_reset_method(adev)) {
2882 	case AMD_RESET_METHOD_BACO:
2883 	case AMD_RESET_METHOD_MODE1:
2884 		return true;
2885 	default:
2886 		return false;
2887 	}
2888 }
2889 
2890 /**
2891  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2892  *
2893  * @adev: amdgpu_device pointer
2894  * @state: clockgating state (gate or ungate)
2895  *
2896  * The list of all the hardware IPs that make up the asic is walked and the
2897  * set_clockgating_state callbacks are run.
2898  * Late initialization pass enabling clockgating for hardware IPs.
2899  * Fini or suspend, pass disabling clockgating for hardware IPs.
2900  * Returns 0 on success, negative error code on failure.
2901  */
2902 
2903 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2904 			       enum amd_clockgating_state state)
2905 {
2906 	int i, j, r;
2907 
2908 	if (amdgpu_emu_mode == 1)
2909 		return 0;
2910 
2911 	for (j = 0; j < adev->num_ip_blocks; j++) {
2912 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2913 		if (!adev->ip_blocks[i].status.late_initialized)
2914 			continue;
2915 		/* skip CG for GFX, SDMA on S0ix */
2916 		if (adev->in_s0ix &&
2917 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2918 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2919 			continue;
2920 		/* skip CG for VCE/UVD, it's handled specially */
2921 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2922 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2923 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2924 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2925 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2926 			/* enable clockgating to save power */
2927 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2928 										     state);
2929 			if (r) {
2930 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2931 					  adev->ip_blocks[i].version->funcs->name, r);
2932 				return r;
2933 			}
2934 		}
2935 	}
2936 
2937 	return 0;
2938 }
2939 
2940 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2941 			       enum amd_powergating_state state)
2942 {
2943 	int i, j, r;
2944 
2945 	if (amdgpu_emu_mode == 1)
2946 		return 0;
2947 
2948 	for (j = 0; j < adev->num_ip_blocks; j++) {
2949 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2950 		if (!adev->ip_blocks[i].status.late_initialized)
2951 			continue;
2952 		/* skip PG for GFX, SDMA on S0ix */
2953 		if (adev->in_s0ix &&
2954 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2955 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2956 			continue;
2957 		/* skip CG for VCE/UVD, it's handled specially */
2958 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2959 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2960 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2961 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2962 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2963 			/* enable powergating to save power */
2964 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2965 											state);
2966 			if (r) {
2967 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2968 					  adev->ip_blocks[i].version->funcs->name, r);
2969 				return r;
2970 			}
2971 		}
2972 	}
2973 	return 0;
2974 }
2975 
2976 static int amdgpu_device_enable_mgpu_fan_boost(void)
2977 {
2978 	struct amdgpu_gpu_instance *gpu_ins;
2979 	struct amdgpu_device *adev;
2980 	int i, ret = 0;
2981 
2982 	mutex_lock(&mgpu_info.mutex);
2983 
2984 	/*
2985 	 * MGPU fan boost feature should be enabled
2986 	 * only when there are two or more dGPUs in
2987 	 * the system
2988 	 */
2989 	if (mgpu_info.num_dgpu < 2)
2990 		goto out;
2991 
2992 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2993 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2994 		adev = gpu_ins->adev;
2995 		if (!(adev->flags & AMD_IS_APU) &&
2996 		    !gpu_ins->mgpu_fan_enabled) {
2997 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2998 			if (ret)
2999 				break;
3000 
3001 			gpu_ins->mgpu_fan_enabled = 1;
3002 		}
3003 	}
3004 
3005 out:
3006 	mutex_unlock(&mgpu_info.mutex);
3007 
3008 	return ret;
3009 }
3010 
3011 /**
3012  * amdgpu_device_ip_late_init - run late init for hardware IPs
3013  *
3014  * @adev: amdgpu_device pointer
3015  *
3016  * Late initialization pass for hardware IPs.  The list of all the hardware
3017  * IPs that make up the asic is walked and the late_init callbacks are run.
3018  * late_init covers any special initialization that an IP requires
3019  * after all of the have been initialized or something that needs to happen
3020  * late in the init process.
3021  * Returns 0 on success, negative error code on failure.
3022  */
3023 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3024 {
3025 	struct amdgpu_gpu_instance *gpu_instance;
3026 	int i = 0, r;
3027 
3028 	for (i = 0; i < adev->num_ip_blocks; i++) {
3029 		if (!adev->ip_blocks[i].status.hw)
3030 			continue;
3031 		if (adev->ip_blocks[i].version->funcs->late_init) {
3032 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3033 			if (r) {
3034 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3035 					  adev->ip_blocks[i].version->funcs->name, r);
3036 				return r;
3037 			}
3038 		}
3039 		adev->ip_blocks[i].status.late_initialized = true;
3040 	}
3041 
3042 	r = amdgpu_ras_late_init(adev);
3043 	if (r) {
3044 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3045 		return r;
3046 	}
3047 
3048 	amdgpu_ras_set_error_query_ready(adev, true);
3049 
3050 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3051 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3052 
3053 	amdgpu_device_fill_reset_magic(adev);
3054 
3055 	r = amdgpu_device_enable_mgpu_fan_boost();
3056 	if (r)
3057 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3058 
3059 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3060 	if (amdgpu_passthrough(adev) &&
3061 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3062 	     adev->asic_type == CHIP_ALDEBARAN))
3063 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3064 
3065 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3066 		mutex_lock(&mgpu_info.mutex);
3067 
3068 		/*
3069 		 * Reset device p-state to low as this was booted with high.
3070 		 *
3071 		 * This should be performed only after all devices from the same
3072 		 * hive get initialized.
3073 		 *
3074 		 * However, it's unknown how many device in the hive in advance.
3075 		 * As this is counted one by one during devices initializations.
3076 		 *
3077 		 * So, we wait for all XGMI interlinked devices initialized.
3078 		 * This may bring some delays as those devices may come from
3079 		 * different hives. But that should be OK.
3080 		 */
3081 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3082 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3083 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3084 				if (gpu_instance->adev->flags & AMD_IS_APU)
3085 					continue;
3086 
3087 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3088 						AMDGPU_XGMI_PSTATE_MIN);
3089 				if (r) {
3090 					DRM_ERROR("pstate setting failed (%d).\n", r);
3091 					break;
3092 				}
3093 			}
3094 		}
3095 
3096 		mutex_unlock(&mgpu_info.mutex);
3097 	}
3098 
3099 	return 0;
3100 }
3101 
3102 /**
3103  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3104  *
3105  * @adev: amdgpu_device pointer
3106  *
3107  * For ASICs need to disable SMC first
3108  */
3109 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3110 {
3111 	int i, r;
3112 
3113 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3114 		return;
3115 
3116 	for (i = 0; i < adev->num_ip_blocks; i++) {
3117 		if (!adev->ip_blocks[i].status.hw)
3118 			continue;
3119 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3120 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3121 			/* XXX handle errors */
3122 			if (r) {
3123 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3124 					  adev->ip_blocks[i].version->funcs->name, r);
3125 			}
3126 			adev->ip_blocks[i].status.hw = false;
3127 			break;
3128 		}
3129 	}
3130 }
3131 
3132 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3133 {
3134 	int i, r;
3135 
3136 	for (i = 0; i < adev->num_ip_blocks; i++) {
3137 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3138 			continue;
3139 
3140 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3141 		if (r) {
3142 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3143 				  adev->ip_blocks[i].version->funcs->name, r);
3144 		}
3145 	}
3146 
3147 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3148 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3149 
3150 	amdgpu_amdkfd_suspend(adev, false);
3151 
3152 	/* Workaroud for ASICs need to disable SMC first */
3153 	amdgpu_device_smu_fini_early(adev);
3154 
3155 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3156 		if (!adev->ip_blocks[i].status.hw)
3157 			continue;
3158 
3159 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3160 		/* XXX handle errors */
3161 		if (r) {
3162 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3163 				  adev->ip_blocks[i].version->funcs->name, r);
3164 		}
3165 
3166 		adev->ip_blocks[i].status.hw = false;
3167 	}
3168 
3169 	if (amdgpu_sriov_vf(adev)) {
3170 		if (amdgpu_virt_release_full_gpu(adev, false))
3171 			DRM_ERROR("failed to release exclusive mode on fini\n");
3172 	}
3173 
3174 	return 0;
3175 }
3176 
3177 /**
3178  * amdgpu_device_ip_fini - run fini for hardware IPs
3179  *
3180  * @adev: amdgpu_device pointer
3181  *
3182  * Main teardown pass for hardware IPs.  The list of all the hardware
3183  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3184  * are run.  hw_fini tears down the hardware associated with each IP
3185  * and sw_fini tears down any software state associated with each IP.
3186  * Returns 0 on success, negative error code on failure.
3187  */
3188 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3189 {
3190 	int i, r;
3191 
3192 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3193 		amdgpu_virt_release_ras_err_handler_data(adev);
3194 
3195 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3196 		amdgpu_xgmi_remove_device(adev);
3197 
3198 	amdgpu_amdkfd_device_fini_sw(adev);
3199 
3200 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3201 		if (!adev->ip_blocks[i].status.sw)
3202 			continue;
3203 
3204 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3205 			amdgpu_ucode_free_bo(adev);
3206 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3207 			amdgpu_device_wb_fini(adev);
3208 			amdgpu_device_mem_scratch_fini(adev);
3209 			amdgpu_ib_pool_fini(adev);
3210 			amdgpu_seq64_fini(adev);
3211 		}
3212 
3213 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3214 		/* XXX handle errors */
3215 		if (r) {
3216 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3217 				  adev->ip_blocks[i].version->funcs->name, r);
3218 		}
3219 		adev->ip_blocks[i].status.sw = false;
3220 		adev->ip_blocks[i].status.valid = false;
3221 	}
3222 
3223 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3224 		if (!adev->ip_blocks[i].status.late_initialized)
3225 			continue;
3226 		if (adev->ip_blocks[i].version->funcs->late_fini)
3227 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3228 		adev->ip_blocks[i].status.late_initialized = false;
3229 	}
3230 
3231 	amdgpu_ras_fini(adev);
3232 
3233 	return 0;
3234 }
3235 
3236 /**
3237  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3238  *
3239  * @work: work_struct.
3240  */
3241 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3242 {
3243 	struct amdgpu_device *adev =
3244 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3245 	int r;
3246 
3247 	r = amdgpu_ib_ring_tests(adev);
3248 	if (r)
3249 		DRM_ERROR("ib ring test failed (%d).\n", r);
3250 }
3251 
3252 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3253 {
3254 	struct amdgpu_device *adev =
3255 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3256 
3257 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3258 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3259 
3260 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3261 		adev->gfx.gfx_off_state = true;
3262 }
3263 
3264 /**
3265  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3266  *
3267  * @adev: amdgpu_device pointer
3268  *
3269  * Main suspend function for hardware IPs.  The list of all the hardware
3270  * IPs that make up the asic is walked, clockgating is disabled and the
3271  * suspend callbacks are run.  suspend puts the hardware and software state
3272  * in each IP into a state suitable for suspend.
3273  * Returns 0 on success, negative error code on failure.
3274  */
3275 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3276 {
3277 	int i, r;
3278 
3279 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3280 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3281 
3282 	/*
3283 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3284 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3285 	 * scenario. Add the missing df cstate disablement here.
3286 	 */
3287 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3288 		dev_warn(adev->dev, "Failed to disallow df cstate");
3289 
3290 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3291 		if (!adev->ip_blocks[i].status.valid)
3292 			continue;
3293 
3294 		/* displays are handled separately */
3295 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3296 			continue;
3297 
3298 		/* XXX handle errors */
3299 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3300 		/* XXX handle errors */
3301 		if (r) {
3302 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3303 				  adev->ip_blocks[i].version->funcs->name, r);
3304 			return r;
3305 		}
3306 
3307 		adev->ip_blocks[i].status.hw = false;
3308 	}
3309 
3310 	return 0;
3311 }
3312 
3313 /**
3314  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3315  *
3316  * @adev: amdgpu_device pointer
3317  *
3318  * Main suspend function for hardware IPs.  The list of all the hardware
3319  * IPs that make up the asic is walked, clockgating is disabled and the
3320  * suspend callbacks are run.  suspend puts the hardware and software state
3321  * in each IP into a state suitable for suspend.
3322  * Returns 0 on success, negative error code on failure.
3323  */
3324 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3325 {
3326 	int i, r;
3327 
3328 	if (adev->in_s0ix)
3329 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3330 
3331 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3332 		if (!adev->ip_blocks[i].status.valid)
3333 			continue;
3334 		/* displays are handled in phase1 */
3335 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3336 			continue;
3337 		/* PSP lost connection when err_event_athub occurs */
3338 		if (amdgpu_ras_intr_triggered() &&
3339 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3340 			adev->ip_blocks[i].status.hw = false;
3341 			continue;
3342 		}
3343 
3344 		/* skip unnecessary suspend if we do not initialize them yet */
3345 		if (adev->gmc.xgmi.pending_reset &&
3346 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3347 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3348 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3349 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3350 			adev->ip_blocks[i].status.hw = false;
3351 			continue;
3352 		}
3353 
3354 		/* skip suspend of gfx/mes and psp for S0ix
3355 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3356 		 * like at runtime. PSP is also part of the always on hardware
3357 		 * so no need to suspend it.
3358 		 */
3359 		if (adev->in_s0ix &&
3360 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3361 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3362 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3363 			continue;
3364 
3365 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3366 		if (adev->in_s0ix &&
3367 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3368 		     IP_VERSION(5, 0, 0)) &&
3369 		    (adev->ip_blocks[i].version->type ==
3370 		     AMD_IP_BLOCK_TYPE_SDMA))
3371 			continue;
3372 
3373 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3374 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3375 		 * from this location and RLC Autoload automatically also gets loaded
3376 		 * from here based on PMFW -> PSP message during re-init sequence.
3377 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3378 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3379 		 */
3380 		if (amdgpu_in_reset(adev) &&
3381 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3382 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3383 			continue;
3384 
3385 		/* XXX handle errors */
3386 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3387 		/* XXX handle errors */
3388 		if (r) {
3389 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3390 				  adev->ip_blocks[i].version->funcs->name, r);
3391 		}
3392 		adev->ip_blocks[i].status.hw = false;
3393 		/* handle putting the SMC in the appropriate state */
3394 		if (!amdgpu_sriov_vf(adev)) {
3395 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3396 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3397 				if (r) {
3398 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3399 							adev->mp1_state, r);
3400 					return r;
3401 				}
3402 			}
3403 		}
3404 	}
3405 
3406 	return 0;
3407 }
3408 
3409 /**
3410  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3411  *
3412  * @adev: amdgpu_device pointer
3413  *
3414  * Main suspend function for hardware IPs.  The list of all the hardware
3415  * IPs that make up the asic is walked, clockgating is disabled and the
3416  * suspend callbacks are run.  suspend puts the hardware and software state
3417  * in each IP into a state suitable for suspend.
3418  * Returns 0 on success, negative error code on failure.
3419  */
3420 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3421 {
3422 	int r;
3423 
3424 	if (amdgpu_sriov_vf(adev)) {
3425 		amdgpu_virt_fini_data_exchange(adev);
3426 		amdgpu_virt_request_full_gpu(adev, false);
3427 	}
3428 
3429 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3430 
3431 	r = amdgpu_device_ip_suspend_phase1(adev);
3432 	if (r)
3433 		return r;
3434 	r = amdgpu_device_ip_suspend_phase2(adev);
3435 
3436 	if (amdgpu_sriov_vf(adev))
3437 		amdgpu_virt_release_full_gpu(adev, false);
3438 
3439 	return r;
3440 }
3441 
3442 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3443 {
3444 	int i, r;
3445 
3446 	static enum amd_ip_block_type ip_order[] = {
3447 		AMD_IP_BLOCK_TYPE_COMMON,
3448 		AMD_IP_BLOCK_TYPE_GMC,
3449 		AMD_IP_BLOCK_TYPE_PSP,
3450 		AMD_IP_BLOCK_TYPE_IH,
3451 	};
3452 
3453 	for (i = 0; i < adev->num_ip_blocks; i++) {
3454 		int j;
3455 		struct amdgpu_ip_block *block;
3456 
3457 		block = &adev->ip_blocks[i];
3458 		block->status.hw = false;
3459 
3460 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3461 
3462 			if (block->version->type != ip_order[j] ||
3463 				!block->status.valid)
3464 				continue;
3465 
3466 			r = block->version->funcs->hw_init(adev);
3467 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3468 			if (r)
3469 				return r;
3470 			block->status.hw = true;
3471 		}
3472 	}
3473 
3474 	return 0;
3475 }
3476 
3477 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3478 {
3479 	int i, r;
3480 
3481 	static enum amd_ip_block_type ip_order[] = {
3482 		AMD_IP_BLOCK_TYPE_SMC,
3483 		AMD_IP_BLOCK_TYPE_DCE,
3484 		AMD_IP_BLOCK_TYPE_GFX,
3485 		AMD_IP_BLOCK_TYPE_SDMA,
3486 		AMD_IP_BLOCK_TYPE_MES,
3487 		AMD_IP_BLOCK_TYPE_UVD,
3488 		AMD_IP_BLOCK_TYPE_VCE,
3489 		AMD_IP_BLOCK_TYPE_VCN,
3490 		AMD_IP_BLOCK_TYPE_JPEG
3491 	};
3492 
3493 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3494 		int j;
3495 		struct amdgpu_ip_block *block;
3496 
3497 		for (j = 0; j < adev->num_ip_blocks; j++) {
3498 			block = &adev->ip_blocks[j];
3499 
3500 			if (block->version->type != ip_order[i] ||
3501 				!block->status.valid ||
3502 				block->status.hw)
3503 				continue;
3504 
3505 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3506 				r = block->version->funcs->resume(adev);
3507 			else
3508 				r = block->version->funcs->hw_init(adev);
3509 
3510 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3511 			if (r)
3512 				return r;
3513 			block->status.hw = true;
3514 		}
3515 	}
3516 
3517 	return 0;
3518 }
3519 
3520 /**
3521  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3522  *
3523  * @adev: amdgpu_device pointer
3524  *
3525  * First resume function for hardware IPs.  The list of all the hardware
3526  * IPs that make up the asic is walked and the resume callbacks are run for
3527  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3528  * after a suspend and updates the software state as necessary.  This
3529  * function is also used for restoring the GPU after a GPU reset.
3530  * Returns 0 on success, negative error code on failure.
3531  */
3532 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3533 {
3534 	int i, r;
3535 
3536 	for (i = 0; i < adev->num_ip_blocks; i++) {
3537 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3538 			continue;
3539 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3540 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3541 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3542 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3543 
3544 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3545 			if (r) {
3546 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3547 					  adev->ip_blocks[i].version->funcs->name, r);
3548 				return r;
3549 			}
3550 			adev->ip_blocks[i].status.hw = true;
3551 		}
3552 	}
3553 
3554 	return 0;
3555 }
3556 
3557 /**
3558  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3559  *
3560  * @adev: amdgpu_device pointer
3561  *
3562  * First resume function for hardware IPs.  The list of all the hardware
3563  * IPs that make up the asic is walked and the resume callbacks are run for
3564  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3565  * functional state after a suspend and updates the software state as
3566  * necessary.  This function is also used for restoring the GPU after a GPU
3567  * reset.
3568  * Returns 0 on success, negative error code on failure.
3569  */
3570 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3571 {
3572 	int i, r;
3573 
3574 	for (i = 0; i < adev->num_ip_blocks; i++) {
3575 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3576 			continue;
3577 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3578 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3579 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3580 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3581 			continue;
3582 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3583 		if (r) {
3584 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3585 				  adev->ip_blocks[i].version->funcs->name, r);
3586 			return r;
3587 		}
3588 		adev->ip_blocks[i].status.hw = true;
3589 	}
3590 
3591 	return 0;
3592 }
3593 
3594 /**
3595  * amdgpu_device_ip_resume - run resume for hardware IPs
3596  *
3597  * @adev: amdgpu_device pointer
3598  *
3599  * Main resume function for hardware IPs.  The hardware IPs
3600  * are split into two resume functions because they are
3601  * also used in recovering from a GPU reset and some additional
3602  * steps need to be take between them.  In this case (S3/S4) they are
3603  * run sequentially.
3604  * Returns 0 on success, negative error code on failure.
3605  */
3606 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3607 {
3608 	int r;
3609 
3610 	r = amdgpu_device_ip_resume_phase1(adev);
3611 	if (r)
3612 		return r;
3613 
3614 	r = amdgpu_device_fw_loading(adev);
3615 	if (r)
3616 		return r;
3617 
3618 	r = amdgpu_device_ip_resume_phase2(adev);
3619 
3620 	if (adev->mman.buffer_funcs_ring->sched.ready)
3621 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3622 
3623 	return r;
3624 }
3625 
3626 /**
3627  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3628  *
3629  * @adev: amdgpu_device pointer
3630  *
3631  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3632  */
3633 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3634 {
3635 	if (amdgpu_sriov_vf(adev)) {
3636 		if (adev->is_atom_fw) {
3637 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3638 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3639 		} else {
3640 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3641 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3642 		}
3643 
3644 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3645 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3646 	}
3647 }
3648 
3649 /**
3650  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3651  *
3652  * @asic_type: AMD asic type
3653  *
3654  * Check if there is DC (new modesetting infrastructre) support for an asic.
3655  * returns true if DC has support, false if not.
3656  */
3657 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3658 {
3659 	switch (asic_type) {
3660 #ifdef CONFIG_DRM_AMDGPU_SI
3661 	case CHIP_HAINAN:
3662 #endif
3663 	case CHIP_TOPAZ:
3664 		/* chips with no display hardware */
3665 		return false;
3666 #if defined(CONFIG_DRM_AMD_DC)
3667 	case CHIP_TAHITI:
3668 	case CHIP_PITCAIRN:
3669 	case CHIP_VERDE:
3670 	case CHIP_OLAND:
3671 		/*
3672 		 * We have systems in the wild with these ASICs that require
3673 		 * LVDS and VGA support which is not supported with DC.
3674 		 *
3675 		 * Fallback to the non-DC driver here by default so as not to
3676 		 * cause regressions.
3677 		 */
3678 #if defined(CONFIG_DRM_AMD_DC_SI)
3679 		return amdgpu_dc > 0;
3680 #else
3681 		return false;
3682 #endif
3683 	case CHIP_BONAIRE:
3684 	case CHIP_KAVERI:
3685 	case CHIP_KABINI:
3686 	case CHIP_MULLINS:
3687 		/*
3688 		 * We have systems in the wild with these ASICs that require
3689 		 * VGA support which is not supported with DC.
3690 		 *
3691 		 * Fallback to the non-DC driver here by default so as not to
3692 		 * cause regressions.
3693 		 */
3694 		return amdgpu_dc > 0;
3695 	default:
3696 		return amdgpu_dc != 0;
3697 #else
3698 	default:
3699 		if (amdgpu_dc > 0)
3700 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3701 		return false;
3702 #endif
3703 	}
3704 }
3705 
3706 /**
3707  * amdgpu_device_has_dc_support - check if dc is supported
3708  *
3709  * @adev: amdgpu_device pointer
3710  *
3711  * Returns true for supported, false for not supported
3712  */
3713 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3714 {
3715 	if (adev->enable_virtual_display ||
3716 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3717 		return false;
3718 
3719 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3720 }
3721 
3722 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3723 {
3724 	struct amdgpu_device *adev =
3725 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3726 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3727 
3728 	/* It's a bug to not have a hive within this function */
3729 	if (WARN_ON(!hive))
3730 		return;
3731 
3732 	/*
3733 	 * Use task barrier to synchronize all xgmi reset works across the
3734 	 * hive. task_barrier_enter and task_barrier_exit will block
3735 	 * until all the threads running the xgmi reset works reach
3736 	 * those points. task_barrier_full will do both blocks.
3737 	 */
3738 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3739 
3740 		task_barrier_enter(&hive->tb);
3741 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3742 
3743 		if (adev->asic_reset_res)
3744 			goto fail;
3745 
3746 		task_barrier_exit(&hive->tb);
3747 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3748 
3749 		if (adev->asic_reset_res)
3750 			goto fail;
3751 
3752 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3753 	} else {
3754 
3755 		task_barrier_full(&hive->tb);
3756 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3757 	}
3758 
3759 fail:
3760 	if (adev->asic_reset_res)
3761 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3762 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3763 	amdgpu_put_xgmi_hive(hive);
3764 }
3765 
3766 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3767 {
3768 	char *input = amdgpu_lockup_timeout;
3769 	char *timeout_setting = NULL;
3770 	int index = 0;
3771 	long timeout;
3772 	int ret = 0;
3773 
3774 	/*
3775 	 * By default timeout for non compute jobs is 10000
3776 	 * and 60000 for compute jobs.
3777 	 * In SR-IOV or passthrough mode, timeout for compute
3778 	 * jobs are 60000 by default.
3779 	 */
3780 	adev->gfx_timeout = msecs_to_jiffies(10000);
3781 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3782 	if (amdgpu_sriov_vf(adev))
3783 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3784 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3785 	else
3786 		adev->compute_timeout =  msecs_to_jiffies(60000);
3787 
3788 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3789 		while ((timeout_setting = strsep(&input, ",")) &&
3790 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3791 			ret = kstrtol(timeout_setting, 0, &timeout);
3792 			if (ret)
3793 				return ret;
3794 
3795 			if (timeout == 0) {
3796 				index++;
3797 				continue;
3798 			} else if (timeout < 0) {
3799 				timeout = MAX_SCHEDULE_TIMEOUT;
3800 				dev_warn(adev->dev, "lockup timeout disabled");
3801 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3802 			} else {
3803 				timeout = msecs_to_jiffies(timeout);
3804 			}
3805 
3806 			switch (index++) {
3807 			case 0:
3808 				adev->gfx_timeout = timeout;
3809 				break;
3810 			case 1:
3811 				adev->compute_timeout = timeout;
3812 				break;
3813 			case 2:
3814 				adev->sdma_timeout = timeout;
3815 				break;
3816 			case 3:
3817 				adev->video_timeout = timeout;
3818 				break;
3819 			default:
3820 				break;
3821 			}
3822 		}
3823 		/*
3824 		 * There is only one value specified and
3825 		 * it should apply to all non-compute jobs.
3826 		 */
3827 		if (index == 1) {
3828 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3829 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3830 				adev->compute_timeout = adev->gfx_timeout;
3831 		}
3832 	}
3833 
3834 	return ret;
3835 }
3836 
3837 /**
3838  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3839  *
3840  * @adev: amdgpu_device pointer
3841  *
3842  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3843  */
3844 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3845 {
3846 	struct iommu_domain *domain;
3847 
3848 	domain = iommu_get_domain_for_dev(adev->dev);
3849 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3850 		adev->ram_is_direct_mapped = true;
3851 }
3852 
3853 static const struct attribute *amdgpu_dev_attributes[] = {
3854 	&dev_attr_pcie_replay_count.attr,
3855 	NULL
3856 };
3857 
3858 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3859 {
3860 	if (amdgpu_mcbp == 1)
3861 		adev->gfx.mcbp = true;
3862 	else if (amdgpu_mcbp == 0)
3863 		adev->gfx.mcbp = false;
3864 
3865 	if (amdgpu_sriov_vf(adev))
3866 		adev->gfx.mcbp = true;
3867 
3868 	if (adev->gfx.mcbp)
3869 		DRM_INFO("MCBP is enabled\n");
3870 }
3871 
3872 /**
3873  * amdgpu_device_init - initialize the driver
3874  *
3875  * @adev: amdgpu_device pointer
3876  * @flags: driver flags
3877  *
3878  * Initializes the driver info and hw (all asics).
3879  * Returns 0 for success or an error on failure.
3880  * Called at driver startup.
3881  */
3882 int amdgpu_device_init(struct amdgpu_device *adev,
3883 		       uint32_t flags)
3884 {
3885 	struct drm_device *ddev = adev_to_drm(adev);
3886 	struct pci_dev *pdev = adev->pdev;
3887 	int r, i;
3888 	bool px = false;
3889 	u32 max_MBps;
3890 	int tmp;
3891 
3892 	adev->shutdown = false;
3893 	adev->flags = flags;
3894 
3895 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3896 		adev->asic_type = amdgpu_force_asic_type;
3897 	else
3898 		adev->asic_type = flags & AMD_ASIC_MASK;
3899 
3900 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3901 	if (amdgpu_emu_mode == 1)
3902 		adev->usec_timeout *= 10;
3903 	adev->gmc.gart_size = 512 * 1024 * 1024;
3904 	adev->accel_working = false;
3905 	adev->num_rings = 0;
3906 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3907 	adev->mman.buffer_funcs = NULL;
3908 	adev->mman.buffer_funcs_ring = NULL;
3909 	adev->vm_manager.vm_pte_funcs = NULL;
3910 	adev->vm_manager.vm_pte_num_scheds = 0;
3911 	adev->gmc.gmc_funcs = NULL;
3912 	adev->harvest_ip_mask = 0x0;
3913 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3914 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3915 
3916 	adev->smc_rreg = &amdgpu_invalid_rreg;
3917 	adev->smc_wreg = &amdgpu_invalid_wreg;
3918 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3919 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3920 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3921 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3922 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3923 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3924 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3925 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3926 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3927 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
3928 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3929 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3930 	adev->didt_rreg = &amdgpu_invalid_rreg;
3931 	adev->didt_wreg = &amdgpu_invalid_wreg;
3932 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3933 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3934 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3935 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3936 
3937 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3938 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3939 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3940 
3941 	/* mutex initialization are all done here so we
3942 	 * can recall function without having locking issues
3943 	 */
3944 	mutex_init(&adev->firmware.mutex);
3945 	mutex_init(&adev->pm.mutex);
3946 	mutex_init(&adev->gfx.gpu_clock_mutex);
3947 	mutex_init(&adev->srbm_mutex);
3948 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3949 	mutex_init(&adev->gfx.gfx_off_mutex);
3950 	mutex_init(&adev->gfx.partition_mutex);
3951 	mutex_init(&adev->grbm_idx_mutex);
3952 	mutex_init(&adev->mn_lock);
3953 	mutex_init(&adev->virt.vf_errors.lock);
3954 	hash_init(adev->mn_hash);
3955 	mutex_init(&adev->psp.mutex);
3956 	mutex_init(&adev->notifier_lock);
3957 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3958 	mutex_init(&adev->benchmark_mutex);
3959 
3960 	amdgpu_device_init_apu_flags(adev);
3961 
3962 	r = amdgpu_device_check_arguments(adev);
3963 	if (r)
3964 		return r;
3965 
3966 	spin_lock_init(&adev->mmio_idx_lock);
3967 	spin_lock_init(&adev->smc_idx_lock);
3968 	spin_lock_init(&adev->pcie_idx_lock);
3969 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3970 	spin_lock_init(&adev->didt_idx_lock);
3971 	spin_lock_init(&adev->gc_cac_idx_lock);
3972 	spin_lock_init(&adev->se_cac_idx_lock);
3973 	spin_lock_init(&adev->audio_endpt_idx_lock);
3974 	spin_lock_init(&adev->mm_stats.lock);
3975 
3976 	INIT_LIST_HEAD(&adev->shadow_list);
3977 	mutex_init(&adev->shadow_list_lock);
3978 
3979 	INIT_LIST_HEAD(&adev->reset_list);
3980 
3981 	INIT_LIST_HEAD(&adev->ras_list);
3982 
3983 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3984 
3985 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3986 			  amdgpu_device_delayed_init_work_handler);
3987 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3988 			  amdgpu_device_delay_enable_gfx_off);
3989 
3990 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3991 
3992 	adev->gfx.gfx_off_req_count = 1;
3993 	adev->gfx.gfx_off_residency = 0;
3994 	adev->gfx.gfx_off_entrycount = 0;
3995 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3996 
3997 	atomic_set(&adev->throttling_logging_enabled, 1);
3998 	/*
3999 	 * If throttling continues, logging will be performed every minute
4000 	 * to avoid log flooding. "-1" is subtracted since the thermal
4001 	 * throttling interrupt comes every second. Thus, the total logging
4002 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4003 	 * for throttling interrupt) = 60 seconds.
4004 	 */
4005 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4006 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4007 
4008 	/* Registers mapping */
4009 	/* TODO: block userspace mapping of io register */
4010 	if (adev->asic_type >= CHIP_BONAIRE) {
4011 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4012 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4013 	} else {
4014 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4015 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4016 	}
4017 
4018 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4019 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4020 
4021 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4022 	if (!adev->rmmio)
4023 		return -ENOMEM;
4024 
4025 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4026 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4027 
4028 	/*
4029 	 * Reset domain needs to be present early, before XGMI hive discovered
4030 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4031 	 * early on during init and before calling to RREG32.
4032 	 */
4033 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4034 	if (!adev->reset_domain)
4035 		return -ENOMEM;
4036 
4037 	/* detect hw virtualization here */
4038 	amdgpu_detect_virtualization(adev);
4039 
4040 	amdgpu_device_get_pcie_info(adev);
4041 
4042 	r = amdgpu_device_get_job_timeout_settings(adev);
4043 	if (r) {
4044 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4045 		return r;
4046 	}
4047 
4048 	/* early init functions */
4049 	r = amdgpu_device_ip_early_init(adev);
4050 	if (r)
4051 		return r;
4052 
4053 	amdgpu_device_set_mcbp(adev);
4054 
4055 	/* Get rid of things like offb */
4056 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4057 	if (r)
4058 		return r;
4059 
4060 	/* Enable TMZ based on IP_VERSION */
4061 	amdgpu_gmc_tmz_set(adev);
4062 
4063 	amdgpu_gmc_noretry_set(adev);
4064 	/* Need to get xgmi info early to decide the reset behavior*/
4065 	if (adev->gmc.xgmi.supported) {
4066 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4067 		if (r)
4068 			return r;
4069 	}
4070 
4071 	/* enable PCIE atomic ops */
4072 	if (amdgpu_sriov_vf(adev)) {
4073 		if (adev->virt.fw_reserve.p_pf2vf)
4074 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4075 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4076 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4077 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4078 	 * internal path natively support atomics, set have_atomics_support to true.
4079 	 */
4080 	} else if ((adev->flags & AMD_IS_APU) &&
4081 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4082 		    IP_VERSION(9, 0, 0))) {
4083 		adev->have_atomics_support = true;
4084 	} else {
4085 		adev->have_atomics_support =
4086 			!pci_enable_atomic_ops_to_root(adev->pdev,
4087 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4088 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4089 	}
4090 
4091 	if (!adev->have_atomics_support)
4092 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4093 
4094 	/* doorbell bar mapping and doorbell index init*/
4095 	amdgpu_doorbell_init(adev);
4096 
4097 	if (amdgpu_emu_mode == 1) {
4098 		/* post the asic on emulation mode */
4099 		emu_soc_asic_init(adev);
4100 		goto fence_driver_init;
4101 	}
4102 
4103 	amdgpu_reset_init(adev);
4104 
4105 	/* detect if we are with an SRIOV vbios */
4106 	if (adev->bios)
4107 		amdgpu_device_detect_sriov_bios(adev);
4108 
4109 	/* check if we need to reset the asic
4110 	 *  E.g., driver was not cleanly unloaded previously, etc.
4111 	 */
4112 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4113 		if (adev->gmc.xgmi.num_physical_nodes) {
4114 			dev_info(adev->dev, "Pending hive reset.\n");
4115 			adev->gmc.xgmi.pending_reset = true;
4116 			/* Only need to init necessary block for SMU to handle the reset */
4117 			for (i = 0; i < adev->num_ip_blocks; i++) {
4118 				if (!adev->ip_blocks[i].status.valid)
4119 					continue;
4120 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4121 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4122 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4123 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4124 					DRM_DEBUG("IP %s disabled for hw_init.\n",
4125 						adev->ip_blocks[i].version->funcs->name);
4126 					adev->ip_blocks[i].status.hw = true;
4127 				}
4128 			}
4129 		} else {
4130 			switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
4131 			case IP_VERSION(13, 0, 0):
4132 			case IP_VERSION(13, 0, 7):
4133 			case IP_VERSION(13, 0, 10):
4134 				r = psp_gpu_reset(adev);
4135 				break;
4136 			default:
4137 				tmp = amdgpu_reset_method;
4138 				/* It should do a default reset when loading or reloading the driver,
4139 				 * regardless of the module parameter reset_method.
4140 				 */
4141 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4142 				r = amdgpu_asic_reset(adev);
4143 				amdgpu_reset_method = tmp;
4144 				break;
4145 			}
4146 
4147 			if (r) {
4148 				dev_err(adev->dev, "asic reset on init failed\n");
4149 				goto failed;
4150 			}
4151 		}
4152 	}
4153 
4154 	/* Post card if necessary */
4155 	if (amdgpu_device_need_post(adev)) {
4156 		if (!adev->bios) {
4157 			dev_err(adev->dev, "no vBIOS found\n");
4158 			r = -EINVAL;
4159 			goto failed;
4160 		}
4161 		DRM_INFO("GPU posting now...\n");
4162 		r = amdgpu_device_asic_init(adev);
4163 		if (r) {
4164 			dev_err(adev->dev, "gpu post error!\n");
4165 			goto failed;
4166 		}
4167 	}
4168 
4169 	if (adev->bios) {
4170 		if (adev->is_atom_fw) {
4171 			/* Initialize clocks */
4172 			r = amdgpu_atomfirmware_get_clock_info(adev);
4173 			if (r) {
4174 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4175 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4176 				goto failed;
4177 			}
4178 		} else {
4179 			/* Initialize clocks */
4180 			r = amdgpu_atombios_get_clock_info(adev);
4181 			if (r) {
4182 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4183 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4184 				goto failed;
4185 			}
4186 			/* init i2c buses */
4187 			if (!amdgpu_device_has_dc_support(adev))
4188 				amdgpu_atombios_i2c_init(adev);
4189 		}
4190 	}
4191 
4192 fence_driver_init:
4193 	/* Fence driver */
4194 	r = amdgpu_fence_driver_sw_init(adev);
4195 	if (r) {
4196 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4197 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4198 		goto failed;
4199 	}
4200 
4201 	/* init the mode config */
4202 	drm_mode_config_init(adev_to_drm(adev));
4203 
4204 	r = amdgpu_device_ip_init(adev);
4205 	if (r) {
4206 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4207 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4208 		goto release_ras_con;
4209 	}
4210 
4211 	amdgpu_fence_driver_hw_init(adev);
4212 
4213 	dev_info(adev->dev,
4214 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4215 			adev->gfx.config.max_shader_engines,
4216 			adev->gfx.config.max_sh_per_se,
4217 			adev->gfx.config.max_cu_per_sh,
4218 			adev->gfx.cu_info.number);
4219 
4220 	adev->accel_working = true;
4221 
4222 	amdgpu_vm_check_compute_bug(adev);
4223 
4224 	/* Initialize the buffer migration limit. */
4225 	if (amdgpu_moverate >= 0)
4226 		max_MBps = amdgpu_moverate;
4227 	else
4228 		max_MBps = 8; /* Allow 8 MB/s. */
4229 	/* Get a log2 for easy divisions. */
4230 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4231 
4232 	/*
4233 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4234 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4235 	 * gpu instance is counted less.
4236 	 */
4237 	amdgpu_register_gpu_instance(adev);
4238 
4239 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4240 	 * explicit gating rather than handling it automatically.
4241 	 */
4242 	if (!adev->gmc.xgmi.pending_reset) {
4243 		r = amdgpu_device_ip_late_init(adev);
4244 		if (r) {
4245 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4246 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4247 			goto release_ras_con;
4248 		}
4249 		/* must succeed. */
4250 		amdgpu_ras_resume(adev);
4251 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4252 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4253 	}
4254 
4255 	if (amdgpu_sriov_vf(adev)) {
4256 		amdgpu_virt_release_full_gpu(adev, true);
4257 		flush_delayed_work(&adev->delayed_init_work);
4258 	}
4259 
4260 	/*
4261 	 * Place those sysfs registering after `late_init`. As some of those
4262 	 * operations performed in `late_init` might affect the sysfs
4263 	 * interfaces creating.
4264 	 */
4265 	r = amdgpu_atombios_sysfs_init(adev);
4266 	if (r)
4267 		drm_err(&adev->ddev,
4268 			"registering atombios sysfs failed (%d).\n", r);
4269 
4270 	r = amdgpu_pm_sysfs_init(adev);
4271 	if (r)
4272 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4273 
4274 	r = amdgpu_ucode_sysfs_init(adev);
4275 	if (r) {
4276 		adev->ucode_sysfs_en = false;
4277 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4278 	} else
4279 		adev->ucode_sysfs_en = true;
4280 
4281 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4282 	if (r)
4283 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4284 
4285 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4286 	if (r)
4287 		dev_err(adev->dev,
4288 			"Could not create amdgpu board attributes\n");
4289 
4290 	amdgpu_fru_sysfs_init(adev);
4291 	amdgpu_reg_state_sysfs_init(adev);
4292 
4293 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4294 		r = amdgpu_pmu_init(adev);
4295 	if (r)
4296 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4297 
4298 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4299 	if (amdgpu_device_cache_pci_state(adev->pdev))
4300 		pci_restore_state(pdev);
4301 
4302 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4303 	/* this will fail for cards that aren't VGA class devices, just
4304 	 * ignore it
4305 	 */
4306 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4307 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4308 
4309 	px = amdgpu_device_supports_px(ddev);
4310 
4311 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4312 				apple_gmux_detect(NULL, NULL)))
4313 		vga_switcheroo_register_client(adev->pdev,
4314 					       &amdgpu_switcheroo_ops, px);
4315 
4316 	if (px)
4317 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4318 
4319 	if (adev->gmc.xgmi.pending_reset)
4320 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4321 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4322 
4323 	amdgpu_device_check_iommu_direct_map(adev);
4324 
4325 	return 0;
4326 
4327 release_ras_con:
4328 	if (amdgpu_sriov_vf(adev))
4329 		amdgpu_virt_release_full_gpu(adev, true);
4330 
4331 	/* failed in exclusive mode due to timeout */
4332 	if (amdgpu_sriov_vf(adev) &&
4333 		!amdgpu_sriov_runtime(adev) &&
4334 		amdgpu_virt_mmio_blocked(adev) &&
4335 		!amdgpu_virt_wait_reset(adev)) {
4336 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4337 		/* Don't send request since VF is inactive. */
4338 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4339 		adev->virt.ops = NULL;
4340 		r = -EAGAIN;
4341 	}
4342 	amdgpu_release_ras_context(adev);
4343 
4344 failed:
4345 	amdgpu_vf_error_trans_all(adev);
4346 
4347 	return r;
4348 }
4349 
4350 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4351 {
4352 
4353 	/* Clear all CPU mappings pointing to this device */
4354 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4355 
4356 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4357 	amdgpu_doorbell_fini(adev);
4358 
4359 	iounmap(adev->rmmio);
4360 	adev->rmmio = NULL;
4361 	if (adev->mman.aper_base_kaddr)
4362 		iounmap(adev->mman.aper_base_kaddr);
4363 	adev->mman.aper_base_kaddr = NULL;
4364 
4365 	/* Memory manager related */
4366 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4367 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4368 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4369 	}
4370 }
4371 
4372 /**
4373  * amdgpu_device_fini_hw - tear down the driver
4374  *
4375  * @adev: amdgpu_device pointer
4376  *
4377  * Tear down the driver info (all asics).
4378  * Called at driver shutdown.
4379  */
4380 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4381 {
4382 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4383 	flush_delayed_work(&adev->delayed_init_work);
4384 	adev->shutdown = true;
4385 
4386 	/* make sure IB test finished before entering exclusive mode
4387 	 * to avoid preemption on IB test
4388 	 */
4389 	if (amdgpu_sriov_vf(adev)) {
4390 		amdgpu_virt_request_full_gpu(adev, false);
4391 		amdgpu_virt_fini_data_exchange(adev);
4392 	}
4393 
4394 	/* disable all interrupts */
4395 	amdgpu_irq_disable_all(adev);
4396 	if (adev->mode_info.mode_config_initialized) {
4397 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4398 			drm_helper_force_disable_all(adev_to_drm(adev));
4399 		else
4400 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4401 	}
4402 	amdgpu_fence_driver_hw_fini(adev);
4403 
4404 	if (adev->mman.initialized)
4405 		drain_workqueue(adev->mman.bdev.wq);
4406 
4407 	if (adev->pm.sysfs_initialized)
4408 		amdgpu_pm_sysfs_fini(adev);
4409 	if (adev->ucode_sysfs_en)
4410 		amdgpu_ucode_sysfs_fini(adev);
4411 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4412 	amdgpu_fru_sysfs_fini(adev);
4413 
4414 	amdgpu_reg_state_sysfs_fini(adev);
4415 
4416 	/* disable ras feature must before hw fini */
4417 	amdgpu_ras_pre_fini(adev);
4418 
4419 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4420 
4421 	amdgpu_device_ip_fini_early(adev);
4422 
4423 	amdgpu_irq_fini_hw(adev);
4424 
4425 	if (adev->mman.initialized)
4426 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4427 
4428 	amdgpu_gart_dummy_page_fini(adev);
4429 
4430 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4431 		amdgpu_device_unmap_mmio(adev);
4432 
4433 }
4434 
4435 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4436 {
4437 	int idx;
4438 	bool px;
4439 
4440 	amdgpu_fence_driver_sw_fini(adev);
4441 	amdgpu_device_ip_fini(adev);
4442 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4443 	adev->accel_working = false;
4444 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4445 
4446 	amdgpu_reset_fini(adev);
4447 
4448 	/* free i2c buses */
4449 	if (!amdgpu_device_has_dc_support(adev))
4450 		amdgpu_i2c_fini(adev);
4451 
4452 	if (amdgpu_emu_mode != 1)
4453 		amdgpu_atombios_fini(adev);
4454 
4455 	kfree(adev->bios);
4456 	adev->bios = NULL;
4457 
4458 	kfree(adev->fru_info);
4459 	adev->fru_info = NULL;
4460 
4461 	px = amdgpu_device_supports_px(adev_to_drm(adev));
4462 
4463 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4464 				apple_gmux_detect(NULL, NULL)))
4465 		vga_switcheroo_unregister_client(adev->pdev);
4466 
4467 	if (px)
4468 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4469 
4470 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4471 		vga_client_unregister(adev->pdev);
4472 
4473 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4474 
4475 		iounmap(adev->rmmio);
4476 		adev->rmmio = NULL;
4477 		amdgpu_doorbell_fini(adev);
4478 		drm_dev_exit(idx);
4479 	}
4480 
4481 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4482 		amdgpu_pmu_fini(adev);
4483 	if (adev->mman.discovery_bin)
4484 		amdgpu_discovery_fini(adev);
4485 
4486 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4487 	adev->reset_domain = NULL;
4488 
4489 	kfree(adev->pci_state);
4490 
4491 }
4492 
4493 /**
4494  * amdgpu_device_evict_resources - evict device resources
4495  * @adev: amdgpu device object
4496  *
4497  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4498  * of the vram memory type. Mainly used for evicting device resources
4499  * at suspend time.
4500  *
4501  */
4502 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4503 {
4504 	int ret;
4505 
4506 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4507 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4508 		return 0;
4509 
4510 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4511 	if (ret)
4512 		DRM_WARN("evicting device resources failed\n");
4513 	return ret;
4514 }
4515 
4516 /*
4517  * Suspend & resume.
4518  */
4519 /**
4520  * amdgpu_device_prepare - prepare for device suspend
4521  *
4522  * @dev: drm dev pointer
4523  *
4524  * Prepare to put the hw in the suspend state (all asics).
4525  * Returns 0 for success or an error on failure.
4526  * Called at driver suspend.
4527  */
4528 int amdgpu_device_prepare(struct drm_device *dev)
4529 {
4530 	struct amdgpu_device *adev = drm_to_adev(dev);
4531 	int i, r;
4532 
4533 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4534 		return 0;
4535 
4536 	/* Evict the majority of BOs before starting suspend sequence */
4537 	r = amdgpu_device_evict_resources(adev);
4538 	if (r)
4539 		return r;
4540 
4541 	for (i = 0; i < adev->num_ip_blocks; i++) {
4542 		if (!adev->ip_blocks[i].status.valid)
4543 			continue;
4544 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4545 			continue;
4546 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4547 		if (r)
4548 			return r;
4549 	}
4550 
4551 	return 0;
4552 }
4553 
4554 /**
4555  * amdgpu_device_suspend - initiate device suspend
4556  *
4557  * @dev: drm dev pointer
4558  * @fbcon : notify the fbdev of suspend
4559  *
4560  * Puts the hw in the suspend state (all asics).
4561  * Returns 0 for success or an error on failure.
4562  * Called at driver suspend.
4563  */
4564 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4565 {
4566 	struct amdgpu_device *adev = drm_to_adev(dev);
4567 	int r = 0;
4568 
4569 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4570 		return 0;
4571 
4572 	adev->in_suspend = true;
4573 
4574 	if (amdgpu_sriov_vf(adev)) {
4575 		amdgpu_virt_fini_data_exchange(adev);
4576 		r = amdgpu_virt_request_full_gpu(adev, false);
4577 		if (r)
4578 			return r;
4579 	}
4580 
4581 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4582 		DRM_WARN("smart shift update failed\n");
4583 
4584 	if (fbcon)
4585 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4586 
4587 	cancel_delayed_work_sync(&adev->delayed_init_work);
4588 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4589 
4590 	amdgpu_ras_suspend(adev);
4591 
4592 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4593 
4594 	amdgpu_device_ip_suspend_phase1(adev);
4595 
4596 	if (!adev->in_s0ix)
4597 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4598 
4599 	r = amdgpu_device_evict_resources(adev);
4600 	if (r)
4601 		return r;
4602 
4603 	amdgpu_fence_driver_hw_fini(adev);
4604 
4605 	amdgpu_device_ip_suspend_phase2(adev);
4606 
4607 	if (amdgpu_sriov_vf(adev))
4608 		amdgpu_virt_release_full_gpu(adev, false);
4609 
4610 	r = amdgpu_dpm_notify_rlc_state(adev, false);
4611 	if (r)
4612 		return r;
4613 
4614 	return 0;
4615 }
4616 
4617 /**
4618  * amdgpu_device_resume - initiate device resume
4619  *
4620  * @dev: drm dev pointer
4621  * @fbcon : notify the fbdev of resume
4622  *
4623  * Bring the hw back to operating state (all asics).
4624  * Returns 0 for success or an error on failure.
4625  * Called at driver resume.
4626  */
4627 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4628 {
4629 	struct amdgpu_device *adev = drm_to_adev(dev);
4630 	int r = 0;
4631 
4632 	if (amdgpu_sriov_vf(adev)) {
4633 		r = amdgpu_virt_request_full_gpu(adev, true);
4634 		if (r)
4635 			return r;
4636 	}
4637 
4638 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4639 		return 0;
4640 
4641 	if (adev->in_s0ix)
4642 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4643 
4644 	/* post card */
4645 	if (amdgpu_device_need_post(adev)) {
4646 		r = amdgpu_device_asic_init(adev);
4647 		if (r)
4648 			dev_err(adev->dev, "amdgpu asic init failed\n");
4649 	}
4650 
4651 	r = amdgpu_device_ip_resume(adev);
4652 
4653 	if (r) {
4654 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4655 		goto exit;
4656 	}
4657 	amdgpu_fence_driver_hw_init(adev);
4658 
4659 	if (!adev->in_s0ix) {
4660 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4661 		if (r)
4662 			goto exit;
4663 	}
4664 
4665 	r = amdgpu_device_ip_late_init(adev);
4666 	if (r)
4667 		goto exit;
4668 
4669 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4670 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4671 exit:
4672 	if (amdgpu_sriov_vf(adev)) {
4673 		amdgpu_virt_init_data_exchange(adev);
4674 		amdgpu_virt_release_full_gpu(adev, true);
4675 	}
4676 
4677 	if (r)
4678 		return r;
4679 
4680 	/* Make sure IB tests flushed */
4681 	flush_delayed_work(&adev->delayed_init_work);
4682 
4683 	if (fbcon)
4684 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4685 
4686 	amdgpu_ras_resume(adev);
4687 
4688 	if (adev->mode_info.num_crtc) {
4689 		/*
4690 		 * Most of the connector probing functions try to acquire runtime pm
4691 		 * refs to ensure that the GPU is powered on when connector polling is
4692 		 * performed. Since we're calling this from a runtime PM callback,
4693 		 * trying to acquire rpm refs will cause us to deadlock.
4694 		 *
4695 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4696 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4697 		 */
4698 #ifdef CONFIG_PM
4699 		dev->dev->power.disable_depth++;
4700 #endif
4701 		if (!adev->dc_enabled)
4702 			drm_helper_hpd_irq_event(dev);
4703 		else
4704 			drm_kms_helper_hotplug_event(dev);
4705 #ifdef CONFIG_PM
4706 		dev->dev->power.disable_depth--;
4707 #endif
4708 	}
4709 	adev->in_suspend = false;
4710 
4711 	if (adev->enable_mes)
4712 		amdgpu_mes_self_test(adev);
4713 
4714 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4715 		DRM_WARN("smart shift update failed\n");
4716 
4717 	return 0;
4718 }
4719 
4720 /**
4721  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4722  *
4723  * @adev: amdgpu_device pointer
4724  *
4725  * The list of all the hardware IPs that make up the asic is walked and
4726  * the check_soft_reset callbacks are run.  check_soft_reset determines
4727  * if the asic is still hung or not.
4728  * Returns true if any of the IPs are still in a hung state, false if not.
4729  */
4730 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4731 {
4732 	int i;
4733 	bool asic_hang = false;
4734 
4735 	if (amdgpu_sriov_vf(adev))
4736 		return true;
4737 
4738 	if (amdgpu_asic_need_full_reset(adev))
4739 		return true;
4740 
4741 	for (i = 0; i < adev->num_ip_blocks; i++) {
4742 		if (!adev->ip_blocks[i].status.valid)
4743 			continue;
4744 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4745 			adev->ip_blocks[i].status.hang =
4746 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4747 		if (adev->ip_blocks[i].status.hang) {
4748 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4749 			asic_hang = true;
4750 		}
4751 	}
4752 	return asic_hang;
4753 }
4754 
4755 /**
4756  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4757  *
4758  * @adev: amdgpu_device pointer
4759  *
4760  * The list of all the hardware IPs that make up the asic is walked and the
4761  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4762  * handles any IP specific hardware or software state changes that are
4763  * necessary for a soft reset to succeed.
4764  * Returns 0 on success, negative error code on failure.
4765  */
4766 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4767 {
4768 	int i, r = 0;
4769 
4770 	for (i = 0; i < adev->num_ip_blocks; i++) {
4771 		if (!adev->ip_blocks[i].status.valid)
4772 			continue;
4773 		if (adev->ip_blocks[i].status.hang &&
4774 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4775 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4776 			if (r)
4777 				return r;
4778 		}
4779 	}
4780 
4781 	return 0;
4782 }
4783 
4784 /**
4785  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4786  *
4787  * @adev: amdgpu_device pointer
4788  *
4789  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4790  * reset is necessary to recover.
4791  * Returns true if a full asic reset is required, false if not.
4792  */
4793 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4794 {
4795 	int i;
4796 
4797 	if (amdgpu_asic_need_full_reset(adev))
4798 		return true;
4799 
4800 	for (i = 0; i < adev->num_ip_blocks; i++) {
4801 		if (!adev->ip_blocks[i].status.valid)
4802 			continue;
4803 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4804 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4805 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4806 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4807 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4808 			if (adev->ip_blocks[i].status.hang) {
4809 				dev_info(adev->dev, "Some block need full reset!\n");
4810 				return true;
4811 			}
4812 		}
4813 	}
4814 	return false;
4815 }
4816 
4817 /**
4818  * amdgpu_device_ip_soft_reset - do a soft reset
4819  *
4820  * @adev: amdgpu_device pointer
4821  *
4822  * The list of all the hardware IPs that make up the asic is walked and the
4823  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4824  * IP specific hardware or software state changes that are necessary to soft
4825  * reset the IP.
4826  * Returns 0 on success, negative error code on failure.
4827  */
4828 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4829 {
4830 	int i, r = 0;
4831 
4832 	for (i = 0; i < adev->num_ip_blocks; i++) {
4833 		if (!adev->ip_blocks[i].status.valid)
4834 			continue;
4835 		if (adev->ip_blocks[i].status.hang &&
4836 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4837 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4838 			if (r)
4839 				return r;
4840 		}
4841 	}
4842 
4843 	return 0;
4844 }
4845 
4846 /**
4847  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4848  *
4849  * @adev: amdgpu_device pointer
4850  *
4851  * The list of all the hardware IPs that make up the asic is walked and the
4852  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4853  * handles any IP specific hardware or software state changes that are
4854  * necessary after the IP has been soft reset.
4855  * Returns 0 on success, negative error code on failure.
4856  */
4857 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4858 {
4859 	int i, r = 0;
4860 
4861 	for (i = 0; i < adev->num_ip_blocks; i++) {
4862 		if (!adev->ip_blocks[i].status.valid)
4863 			continue;
4864 		if (adev->ip_blocks[i].status.hang &&
4865 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4866 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4867 		if (r)
4868 			return r;
4869 	}
4870 
4871 	return 0;
4872 }
4873 
4874 /**
4875  * amdgpu_device_recover_vram - Recover some VRAM contents
4876  *
4877  * @adev: amdgpu_device pointer
4878  *
4879  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4880  * restore things like GPUVM page tables after a GPU reset where
4881  * the contents of VRAM might be lost.
4882  *
4883  * Returns:
4884  * 0 on success, negative error code on failure.
4885  */
4886 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4887 {
4888 	struct dma_fence *fence = NULL, *next = NULL;
4889 	struct amdgpu_bo *shadow;
4890 	struct amdgpu_bo_vm *vmbo;
4891 	long r = 1, tmo;
4892 
4893 	if (amdgpu_sriov_runtime(adev))
4894 		tmo = msecs_to_jiffies(8000);
4895 	else
4896 		tmo = msecs_to_jiffies(100);
4897 
4898 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4899 	mutex_lock(&adev->shadow_list_lock);
4900 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4901 		/* If vm is compute context or adev is APU, shadow will be NULL */
4902 		if (!vmbo->shadow)
4903 			continue;
4904 		shadow = vmbo->shadow;
4905 
4906 		/* No need to recover an evicted BO */
4907 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4908 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4909 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4910 			continue;
4911 
4912 		r = amdgpu_bo_restore_shadow(shadow, &next);
4913 		if (r)
4914 			break;
4915 
4916 		if (fence) {
4917 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4918 			dma_fence_put(fence);
4919 			fence = next;
4920 			if (tmo == 0) {
4921 				r = -ETIMEDOUT;
4922 				break;
4923 			} else if (tmo < 0) {
4924 				r = tmo;
4925 				break;
4926 			}
4927 		} else {
4928 			fence = next;
4929 		}
4930 	}
4931 	mutex_unlock(&adev->shadow_list_lock);
4932 
4933 	if (fence)
4934 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4935 	dma_fence_put(fence);
4936 
4937 	if (r < 0 || tmo <= 0) {
4938 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4939 		return -EIO;
4940 	}
4941 
4942 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4943 	return 0;
4944 }
4945 
4946 
4947 /**
4948  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4949  *
4950  * @adev: amdgpu_device pointer
4951  * @from_hypervisor: request from hypervisor
4952  *
4953  * do VF FLR and reinitialize Asic
4954  * return 0 means succeeded otherwise failed
4955  */
4956 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4957 				     bool from_hypervisor)
4958 {
4959 	int r;
4960 	struct amdgpu_hive_info *hive = NULL;
4961 	int retry_limit = 0;
4962 
4963 retry:
4964 	amdgpu_amdkfd_pre_reset(adev);
4965 
4966 	if (from_hypervisor)
4967 		r = amdgpu_virt_request_full_gpu(adev, true);
4968 	else
4969 		r = amdgpu_virt_reset_gpu(adev);
4970 	if (r)
4971 		return r;
4972 	amdgpu_irq_gpu_reset_resume_helper(adev);
4973 
4974 	/* some sw clean up VF needs to do before recover */
4975 	amdgpu_virt_post_reset(adev);
4976 
4977 	/* Resume IP prior to SMC */
4978 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4979 	if (r)
4980 		goto error;
4981 
4982 	amdgpu_virt_init_data_exchange(adev);
4983 
4984 	r = amdgpu_device_fw_loading(adev);
4985 	if (r)
4986 		return r;
4987 
4988 	/* now we are okay to resume SMC/CP/SDMA */
4989 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4990 	if (r)
4991 		goto error;
4992 
4993 	hive = amdgpu_get_xgmi_hive(adev);
4994 	/* Update PSP FW topology after reset */
4995 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4996 		r = amdgpu_xgmi_update_topology(hive, adev);
4997 
4998 	if (hive)
4999 		amdgpu_put_xgmi_hive(hive);
5000 
5001 	if (!r) {
5002 		r = amdgpu_ib_ring_tests(adev);
5003 
5004 		amdgpu_amdkfd_post_reset(adev);
5005 	}
5006 
5007 error:
5008 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5009 		amdgpu_inc_vram_lost(adev);
5010 		r = amdgpu_device_recover_vram(adev);
5011 	}
5012 	amdgpu_virt_release_full_gpu(adev, true);
5013 
5014 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5015 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5016 			retry_limit++;
5017 			goto retry;
5018 		} else
5019 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5020 	}
5021 
5022 	return r;
5023 }
5024 
5025 /**
5026  * amdgpu_device_has_job_running - check if there is any job in mirror list
5027  *
5028  * @adev: amdgpu_device pointer
5029  *
5030  * check if there is any job in mirror list
5031  */
5032 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5033 {
5034 	int i;
5035 	struct drm_sched_job *job;
5036 
5037 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5038 		struct amdgpu_ring *ring = adev->rings[i];
5039 
5040 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5041 			continue;
5042 
5043 		spin_lock(&ring->sched.job_list_lock);
5044 		job = list_first_entry_or_null(&ring->sched.pending_list,
5045 					       struct drm_sched_job, list);
5046 		spin_unlock(&ring->sched.job_list_lock);
5047 		if (job)
5048 			return true;
5049 	}
5050 	return false;
5051 }
5052 
5053 /**
5054  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5055  *
5056  * @adev: amdgpu_device pointer
5057  *
5058  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5059  * a hung GPU.
5060  */
5061 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5062 {
5063 
5064 	if (amdgpu_gpu_recovery == 0)
5065 		goto disabled;
5066 
5067 	/* Skip soft reset check in fatal error mode */
5068 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5069 		return true;
5070 
5071 	if (amdgpu_sriov_vf(adev))
5072 		return true;
5073 
5074 	if (amdgpu_gpu_recovery == -1) {
5075 		switch (adev->asic_type) {
5076 #ifdef CONFIG_DRM_AMDGPU_SI
5077 		case CHIP_VERDE:
5078 		case CHIP_TAHITI:
5079 		case CHIP_PITCAIRN:
5080 		case CHIP_OLAND:
5081 		case CHIP_HAINAN:
5082 #endif
5083 #ifdef CONFIG_DRM_AMDGPU_CIK
5084 		case CHIP_KAVERI:
5085 		case CHIP_KABINI:
5086 		case CHIP_MULLINS:
5087 #endif
5088 		case CHIP_CARRIZO:
5089 		case CHIP_STONEY:
5090 		case CHIP_CYAN_SKILLFISH:
5091 			goto disabled;
5092 		default:
5093 			break;
5094 		}
5095 	}
5096 
5097 	return true;
5098 
5099 disabled:
5100 		dev_info(adev->dev, "GPU recovery disabled.\n");
5101 		return false;
5102 }
5103 
5104 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5105 {
5106 	u32 i;
5107 	int ret = 0;
5108 
5109 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5110 
5111 	dev_info(adev->dev, "GPU mode1 reset\n");
5112 
5113 	/* disable BM */
5114 	pci_clear_master(adev->pdev);
5115 
5116 	amdgpu_device_cache_pci_state(adev->pdev);
5117 
5118 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5119 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5120 		ret = amdgpu_dpm_mode1_reset(adev);
5121 	} else {
5122 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5123 		ret = psp_gpu_reset(adev);
5124 	}
5125 
5126 	if (ret)
5127 		goto mode1_reset_failed;
5128 
5129 	amdgpu_device_load_pci_state(adev->pdev);
5130 	ret = amdgpu_psp_wait_for_bootloader(adev);
5131 	if (ret)
5132 		goto mode1_reset_failed;
5133 
5134 	/* wait for asic to come out of reset */
5135 	for (i = 0; i < adev->usec_timeout; i++) {
5136 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5137 
5138 		if (memsize != 0xffffffff)
5139 			break;
5140 		udelay(1);
5141 	}
5142 
5143 	if (i >= adev->usec_timeout) {
5144 		ret = -ETIMEDOUT;
5145 		goto mode1_reset_failed;
5146 	}
5147 
5148 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5149 
5150 	return 0;
5151 
5152 mode1_reset_failed:
5153 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5154 	return ret;
5155 }
5156 
5157 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5158 				 struct amdgpu_reset_context *reset_context)
5159 {
5160 	int i, r = 0;
5161 	struct amdgpu_job *job = NULL;
5162 	bool need_full_reset =
5163 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5164 
5165 	if (reset_context->reset_req_dev == adev)
5166 		job = reset_context->job;
5167 
5168 	if (amdgpu_sriov_vf(adev)) {
5169 		/* stop the data exchange thread */
5170 		amdgpu_virt_fini_data_exchange(adev);
5171 	}
5172 
5173 	amdgpu_fence_driver_isr_toggle(adev, true);
5174 
5175 	/* block all schedulers and reset given job's ring */
5176 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5177 		struct amdgpu_ring *ring = adev->rings[i];
5178 
5179 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5180 			continue;
5181 
5182 		/* Clear job fence from fence drv to avoid force_completion
5183 		 * leave NULL and vm flush fence in fence drv
5184 		 */
5185 		amdgpu_fence_driver_clear_job_fences(ring);
5186 
5187 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5188 		amdgpu_fence_driver_force_completion(ring);
5189 	}
5190 
5191 	amdgpu_fence_driver_isr_toggle(adev, false);
5192 
5193 	if (job && job->vm)
5194 		drm_sched_increase_karma(&job->base);
5195 
5196 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5197 	/* If reset handler not implemented, continue; otherwise return */
5198 	if (r == -EOPNOTSUPP)
5199 		r = 0;
5200 	else
5201 		return r;
5202 
5203 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5204 	if (!amdgpu_sriov_vf(adev)) {
5205 
5206 		if (!need_full_reset)
5207 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5208 
5209 		if (!need_full_reset && amdgpu_gpu_recovery &&
5210 		    amdgpu_device_ip_check_soft_reset(adev)) {
5211 			amdgpu_device_ip_pre_soft_reset(adev);
5212 			r = amdgpu_device_ip_soft_reset(adev);
5213 			amdgpu_device_ip_post_soft_reset(adev);
5214 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5215 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5216 				need_full_reset = true;
5217 			}
5218 		}
5219 
5220 		if (need_full_reset)
5221 			r = amdgpu_device_ip_suspend(adev);
5222 		if (need_full_reset)
5223 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5224 		else
5225 			clear_bit(AMDGPU_NEED_FULL_RESET,
5226 				  &reset_context->flags);
5227 	}
5228 
5229 	return r;
5230 }
5231 
5232 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5233 {
5234 	int i;
5235 
5236 	lockdep_assert_held(&adev->reset_domain->sem);
5237 
5238 	for (i = 0; i < adev->reset_info.num_regs; i++) {
5239 		adev->reset_info.reset_dump_reg_value[i] =
5240 			RREG32(adev->reset_info.reset_dump_reg_list[i]);
5241 
5242 		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5243 					     adev->reset_info.reset_dump_reg_value[i]);
5244 	}
5245 
5246 	return 0;
5247 }
5248 
5249 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5250 			 struct amdgpu_reset_context *reset_context)
5251 {
5252 	struct amdgpu_device *tmp_adev = NULL;
5253 	bool need_full_reset, skip_hw_reset, vram_lost = false;
5254 	int r = 0;
5255 	bool gpu_reset_for_dev_remove = 0;
5256 
5257 	/* Try reset handler method first */
5258 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5259 				    reset_list);
5260 	amdgpu_reset_reg_dumps(tmp_adev);
5261 
5262 	reset_context->reset_device_list = device_list_handle;
5263 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5264 	/* If reset handler not implemented, continue; otherwise return */
5265 	if (r == -EOPNOTSUPP)
5266 		r = 0;
5267 	else
5268 		return r;
5269 
5270 	/* Reset handler not implemented, use the default method */
5271 	need_full_reset =
5272 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5273 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5274 
5275 	gpu_reset_for_dev_remove =
5276 		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5277 			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5278 
5279 	/*
5280 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5281 	 * to allow proper links negotiation in FW (within 1 sec)
5282 	 */
5283 	if (!skip_hw_reset && need_full_reset) {
5284 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5285 			/* For XGMI run all resets in parallel to speed up the process */
5286 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5287 				tmp_adev->gmc.xgmi.pending_reset = false;
5288 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5289 					r = -EALREADY;
5290 			} else
5291 				r = amdgpu_asic_reset(tmp_adev);
5292 
5293 			if (r) {
5294 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5295 					 r, adev_to_drm(tmp_adev)->unique);
5296 				goto out;
5297 			}
5298 		}
5299 
5300 		/* For XGMI wait for all resets to complete before proceed */
5301 		if (!r) {
5302 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5303 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5304 					flush_work(&tmp_adev->xgmi_reset_work);
5305 					r = tmp_adev->asic_reset_res;
5306 					if (r)
5307 						break;
5308 				}
5309 			}
5310 		}
5311 	}
5312 
5313 	if (!r && amdgpu_ras_intr_triggered()) {
5314 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5315 			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5316 		}
5317 
5318 		amdgpu_ras_intr_cleared();
5319 	}
5320 
5321 	/* Since the mode1 reset affects base ip blocks, the
5322 	 * phase1 ip blocks need to be resumed. Otherwise there
5323 	 * will be a BIOS signature error and the psp bootloader
5324 	 * can't load kdb on the next amdgpu install.
5325 	 */
5326 	if (gpu_reset_for_dev_remove) {
5327 		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5328 			amdgpu_device_ip_resume_phase1(tmp_adev);
5329 
5330 		goto end;
5331 	}
5332 
5333 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5334 		if (need_full_reset) {
5335 			/* post card */
5336 			r = amdgpu_device_asic_init(tmp_adev);
5337 			if (r) {
5338 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5339 			} else {
5340 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5341 
5342 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5343 				if (r)
5344 					goto out;
5345 
5346 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5347 
5348 				amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5349 
5350 				if (vram_lost) {
5351 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5352 					amdgpu_inc_vram_lost(tmp_adev);
5353 				}
5354 
5355 				r = amdgpu_device_fw_loading(tmp_adev);
5356 				if (r)
5357 					return r;
5358 
5359 				r = amdgpu_xcp_restore_partition_mode(
5360 					tmp_adev->xcp_mgr);
5361 				if (r)
5362 					goto out;
5363 
5364 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5365 				if (r)
5366 					goto out;
5367 
5368 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5369 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5370 
5371 				if (vram_lost)
5372 					amdgpu_device_fill_reset_magic(tmp_adev);
5373 
5374 				/*
5375 				 * Add this ASIC as tracked as reset was already
5376 				 * complete successfully.
5377 				 */
5378 				amdgpu_register_gpu_instance(tmp_adev);
5379 
5380 				if (!reset_context->hive &&
5381 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5382 					amdgpu_xgmi_add_device(tmp_adev);
5383 
5384 				r = amdgpu_device_ip_late_init(tmp_adev);
5385 				if (r)
5386 					goto out;
5387 
5388 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5389 
5390 				/*
5391 				 * The GPU enters bad state once faulty pages
5392 				 * by ECC has reached the threshold, and ras
5393 				 * recovery is scheduled next. So add one check
5394 				 * here to break recovery if it indeed exceeds
5395 				 * bad page threshold, and remind user to
5396 				 * retire this GPU or setting one bigger
5397 				 * bad_page_threshold value to fix this once
5398 				 * probing driver again.
5399 				 */
5400 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5401 					/* must succeed. */
5402 					amdgpu_ras_resume(tmp_adev);
5403 				} else {
5404 					r = -EINVAL;
5405 					goto out;
5406 				}
5407 
5408 				/* Update PSP FW topology after reset */
5409 				if (reset_context->hive &&
5410 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5411 					r = amdgpu_xgmi_update_topology(
5412 						reset_context->hive, tmp_adev);
5413 			}
5414 		}
5415 
5416 out:
5417 		if (!r) {
5418 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5419 			r = amdgpu_ib_ring_tests(tmp_adev);
5420 			if (r) {
5421 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5422 				need_full_reset = true;
5423 				r = -EAGAIN;
5424 				goto end;
5425 			}
5426 		}
5427 
5428 		if (!r)
5429 			r = amdgpu_device_recover_vram(tmp_adev);
5430 		else
5431 			tmp_adev->asic_reset_res = r;
5432 	}
5433 
5434 end:
5435 	if (need_full_reset)
5436 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5437 	else
5438 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5439 	return r;
5440 }
5441 
5442 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5443 {
5444 
5445 	switch (amdgpu_asic_reset_method(adev)) {
5446 	case AMD_RESET_METHOD_MODE1:
5447 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5448 		break;
5449 	case AMD_RESET_METHOD_MODE2:
5450 		adev->mp1_state = PP_MP1_STATE_RESET;
5451 		break;
5452 	default:
5453 		adev->mp1_state = PP_MP1_STATE_NONE;
5454 		break;
5455 	}
5456 }
5457 
5458 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5459 {
5460 	amdgpu_vf_error_trans_all(adev);
5461 	adev->mp1_state = PP_MP1_STATE_NONE;
5462 }
5463 
5464 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5465 {
5466 	struct pci_dev *p = NULL;
5467 
5468 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5469 			adev->pdev->bus->number, 1);
5470 	if (p) {
5471 		pm_runtime_enable(&(p->dev));
5472 		pm_runtime_resume(&(p->dev));
5473 	}
5474 
5475 	pci_dev_put(p);
5476 }
5477 
5478 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5479 {
5480 	enum amd_reset_method reset_method;
5481 	struct pci_dev *p = NULL;
5482 	u64 expires;
5483 
5484 	/*
5485 	 * For now, only BACO and mode1 reset are confirmed
5486 	 * to suffer the audio issue without proper suspended.
5487 	 */
5488 	reset_method = amdgpu_asic_reset_method(adev);
5489 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5490 	     (reset_method != AMD_RESET_METHOD_MODE1))
5491 		return -EINVAL;
5492 
5493 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5494 			adev->pdev->bus->number, 1);
5495 	if (!p)
5496 		return -ENODEV;
5497 
5498 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5499 	if (!expires)
5500 		/*
5501 		 * If we cannot get the audio device autosuspend delay,
5502 		 * a fixed 4S interval will be used. Considering 3S is
5503 		 * the audio controller default autosuspend delay setting.
5504 		 * 4S used here is guaranteed to cover that.
5505 		 */
5506 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5507 
5508 	while (!pm_runtime_status_suspended(&(p->dev))) {
5509 		if (!pm_runtime_suspend(&(p->dev)))
5510 			break;
5511 
5512 		if (expires < ktime_get_mono_fast_ns()) {
5513 			dev_warn(adev->dev, "failed to suspend display audio\n");
5514 			pci_dev_put(p);
5515 			/* TODO: abort the succeeding gpu reset? */
5516 			return -ETIMEDOUT;
5517 		}
5518 	}
5519 
5520 	pm_runtime_disable(&(p->dev));
5521 
5522 	pci_dev_put(p);
5523 	return 0;
5524 }
5525 
5526 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5527 {
5528 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5529 
5530 #if defined(CONFIG_DEBUG_FS)
5531 	if (!amdgpu_sriov_vf(adev))
5532 		cancel_work(&adev->reset_work);
5533 #endif
5534 
5535 	if (adev->kfd.dev)
5536 		cancel_work(&adev->kfd.reset_work);
5537 
5538 	if (amdgpu_sriov_vf(adev))
5539 		cancel_work(&adev->virt.flr_work);
5540 
5541 	if (con && adev->ras_enabled)
5542 		cancel_work(&con->recovery_work);
5543 
5544 }
5545 
5546 /**
5547  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5548  *
5549  * @adev: amdgpu_device pointer
5550  * @job: which job trigger hang
5551  * @reset_context: amdgpu reset context pointer
5552  *
5553  * Attempt to reset the GPU if it has hung (all asics).
5554  * Attempt to do soft-reset or full-reset and reinitialize Asic
5555  * Returns 0 for success or an error on failure.
5556  */
5557 
5558 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5559 			      struct amdgpu_job *job,
5560 			      struct amdgpu_reset_context *reset_context)
5561 {
5562 	struct list_head device_list, *device_list_handle =  NULL;
5563 	bool job_signaled = false;
5564 	struct amdgpu_hive_info *hive = NULL;
5565 	struct amdgpu_device *tmp_adev = NULL;
5566 	int i, r = 0;
5567 	bool need_emergency_restart = false;
5568 	bool audio_suspended = false;
5569 	bool gpu_reset_for_dev_remove = false;
5570 
5571 	gpu_reset_for_dev_remove =
5572 			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5573 				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5574 
5575 	/*
5576 	 * Special case: RAS triggered and full reset isn't supported
5577 	 */
5578 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5579 
5580 	/*
5581 	 * Flush RAM to disk so that after reboot
5582 	 * the user can read log and see why the system rebooted.
5583 	 */
5584 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5585 		amdgpu_ras_get_context(adev)->reboot) {
5586 		DRM_WARN("Emergency reboot.");
5587 
5588 		ksys_sync_helper();
5589 		emergency_restart();
5590 	}
5591 
5592 	dev_info(adev->dev, "GPU %s begin!\n",
5593 		need_emergency_restart ? "jobs stop":"reset");
5594 
5595 	if (!amdgpu_sriov_vf(adev))
5596 		hive = amdgpu_get_xgmi_hive(adev);
5597 	if (hive)
5598 		mutex_lock(&hive->hive_lock);
5599 
5600 	reset_context->job = job;
5601 	reset_context->hive = hive;
5602 	/*
5603 	 * Build list of devices to reset.
5604 	 * In case we are in XGMI hive mode, resort the device list
5605 	 * to put adev in the 1st position.
5606 	 */
5607 	INIT_LIST_HEAD(&device_list);
5608 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5609 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5610 			list_add_tail(&tmp_adev->reset_list, &device_list);
5611 			if (gpu_reset_for_dev_remove && adev->shutdown)
5612 				tmp_adev->shutdown = true;
5613 		}
5614 		if (!list_is_first(&adev->reset_list, &device_list))
5615 			list_rotate_to_front(&adev->reset_list, &device_list);
5616 		device_list_handle = &device_list;
5617 	} else {
5618 		list_add_tail(&adev->reset_list, &device_list);
5619 		device_list_handle = &device_list;
5620 	}
5621 
5622 	/* We need to lock reset domain only once both for XGMI and single device */
5623 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5624 				    reset_list);
5625 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5626 
5627 	/* block all schedulers and reset given job's ring */
5628 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5629 
5630 		amdgpu_device_set_mp1_state(tmp_adev);
5631 
5632 		/*
5633 		 * Try to put the audio codec into suspend state
5634 		 * before gpu reset started.
5635 		 *
5636 		 * Due to the power domain of the graphics device
5637 		 * is shared with AZ power domain. Without this,
5638 		 * we may change the audio hardware from behind
5639 		 * the audio driver's back. That will trigger
5640 		 * some audio codec errors.
5641 		 */
5642 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5643 			audio_suspended = true;
5644 
5645 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5646 
5647 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5648 
5649 		if (!amdgpu_sriov_vf(tmp_adev))
5650 			amdgpu_amdkfd_pre_reset(tmp_adev);
5651 
5652 		/*
5653 		 * Mark these ASICs to be reseted as untracked first
5654 		 * And add them back after reset completed
5655 		 */
5656 		amdgpu_unregister_gpu_instance(tmp_adev);
5657 
5658 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5659 
5660 		/* disable ras on ALL IPs */
5661 		if (!need_emergency_restart &&
5662 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5663 			amdgpu_ras_suspend(tmp_adev);
5664 
5665 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5666 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5667 
5668 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5669 				continue;
5670 
5671 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5672 
5673 			if (need_emergency_restart)
5674 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5675 		}
5676 		atomic_inc(&tmp_adev->gpu_reset_counter);
5677 	}
5678 
5679 	if (need_emergency_restart)
5680 		goto skip_sched_resume;
5681 
5682 	/*
5683 	 * Must check guilty signal here since after this point all old
5684 	 * HW fences are force signaled.
5685 	 *
5686 	 * job->base holds a reference to parent fence
5687 	 */
5688 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5689 		job_signaled = true;
5690 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5691 		goto skip_hw_reset;
5692 	}
5693 
5694 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5695 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5696 		if (gpu_reset_for_dev_remove) {
5697 			/* Workaroud for ASICs need to disable SMC first */
5698 			amdgpu_device_smu_fini_early(tmp_adev);
5699 		}
5700 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5701 		/*TODO Should we stop ?*/
5702 		if (r) {
5703 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5704 				  r, adev_to_drm(tmp_adev)->unique);
5705 			tmp_adev->asic_reset_res = r;
5706 		}
5707 
5708 		/*
5709 		 * Drop all pending non scheduler resets. Scheduler resets
5710 		 * were already dropped during drm_sched_stop
5711 		 */
5712 		amdgpu_device_stop_pending_resets(tmp_adev);
5713 	}
5714 
5715 	/* Actual ASIC resets if needed.*/
5716 	/* Host driver will handle XGMI hive reset for SRIOV */
5717 	if (amdgpu_sriov_vf(adev)) {
5718 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5719 		if (r)
5720 			adev->asic_reset_res = r;
5721 
5722 		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5723 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5724 			    IP_VERSION(9, 4, 2) ||
5725 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5726 			amdgpu_ras_resume(adev);
5727 	} else {
5728 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5729 		if (r && r == -EAGAIN)
5730 			goto retry;
5731 
5732 		if (!r && gpu_reset_for_dev_remove)
5733 			goto recover_end;
5734 	}
5735 
5736 skip_hw_reset:
5737 
5738 	/* Post ASIC reset for all devs .*/
5739 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5740 
5741 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5742 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5743 
5744 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
5745 				continue;
5746 
5747 			drm_sched_start(&ring->sched, true);
5748 		}
5749 
5750 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5751 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5752 
5753 		if (tmp_adev->asic_reset_res)
5754 			r = tmp_adev->asic_reset_res;
5755 
5756 		tmp_adev->asic_reset_res = 0;
5757 
5758 		if (r) {
5759 			/* bad news, how to tell it to userspace ? */
5760 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5761 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5762 		} else {
5763 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5764 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5765 				DRM_WARN("smart shift update failed\n");
5766 		}
5767 	}
5768 
5769 skip_sched_resume:
5770 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5771 		/* unlock kfd: SRIOV would do it separately */
5772 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5773 			amdgpu_amdkfd_post_reset(tmp_adev);
5774 
5775 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5776 		 * need to bring up kfd here if it's not be initialized before
5777 		 */
5778 		if (!adev->kfd.init_complete)
5779 			amdgpu_amdkfd_device_init(adev);
5780 
5781 		if (audio_suspended)
5782 			amdgpu_device_resume_display_audio(tmp_adev);
5783 
5784 		amdgpu_device_unset_mp1_state(tmp_adev);
5785 
5786 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5787 	}
5788 
5789 recover_end:
5790 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5791 					    reset_list);
5792 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5793 
5794 	if (hive) {
5795 		mutex_unlock(&hive->hive_lock);
5796 		amdgpu_put_xgmi_hive(hive);
5797 	}
5798 
5799 	if (r)
5800 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5801 
5802 	atomic_set(&adev->reset_domain->reset_res, r);
5803 	return r;
5804 }
5805 
5806 /**
5807  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5808  *
5809  * @adev: amdgpu_device pointer
5810  * @speed: pointer to the speed of the link
5811  * @width: pointer to the width of the link
5812  *
5813  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5814  * first physical partner to an AMD dGPU.
5815  * This will exclude any virtual switches and links.
5816  */
5817 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5818 					    enum pci_bus_speed *speed,
5819 					    enum pcie_link_width *width)
5820 {
5821 	struct pci_dev *parent = adev->pdev;
5822 
5823 	if (!speed || !width)
5824 		return;
5825 
5826 	*speed = PCI_SPEED_UNKNOWN;
5827 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5828 
5829 	while ((parent = pci_upstream_bridge(parent))) {
5830 		/* skip upstream/downstream switches internal to dGPU*/
5831 		if (parent->vendor == PCI_VENDOR_ID_ATI)
5832 			continue;
5833 		*speed = pcie_get_speed_cap(parent);
5834 		*width = pcie_get_width_cap(parent);
5835 		break;
5836 	}
5837 }
5838 
5839 /**
5840  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5841  *
5842  * @adev: amdgpu_device pointer
5843  *
5844  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5845  * and lanes) of the slot the device is in. Handles APUs and
5846  * virtualized environments where PCIE config space may not be available.
5847  */
5848 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5849 {
5850 	struct pci_dev *pdev;
5851 	enum pci_bus_speed speed_cap, platform_speed_cap;
5852 	enum pcie_link_width platform_link_width;
5853 
5854 	if (amdgpu_pcie_gen_cap)
5855 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5856 
5857 	if (amdgpu_pcie_lane_cap)
5858 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5859 
5860 	/* covers APUs as well */
5861 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5862 		if (adev->pm.pcie_gen_mask == 0)
5863 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5864 		if (adev->pm.pcie_mlw_mask == 0)
5865 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5866 		return;
5867 	}
5868 
5869 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5870 		return;
5871 
5872 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5873 					&platform_link_width);
5874 
5875 	if (adev->pm.pcie_gen_mask == 0) {
5876 		/* asic caps */
5877 		pdev = adev->pdev;
5878 		speed_cap = pcie_get_speed_cap(pdev);
5879 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5880 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5881 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5882 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5883 		} else {
5884 			if (speed_cap == PCIE_SPEED_32_0GT)
5885 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5886 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5887 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5888 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5889 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5890 			else if (speed_cap == PCIE_SPEED_16_0GT)
5891 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5892 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5893 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5894 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5895 			else if (speed_cap == PCIE_SPEED_8_0GT)
5896 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5897 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5898 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5899 			else if (speed_cap == PCIE_SPEED_5_0GT)
5900 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5901 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5902 			else
5903 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5904 		}
5905 		/* platform caps */
5906 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5907 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5908 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5909 		} else {
5910 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5911 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5912 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5913 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5914 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5915 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5916 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5917 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5918 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5919 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5920 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5921 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5922 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5923 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5924 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5925 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5926 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5927 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5928 			else
5929 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5930 
5931 		}
5932 	}
5933 	if (adev->pm.pcie_mlw_mask == 0) {
5934 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5935 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5936 		} else {
5937 			switch (platform_link_width) {
5938 			case PCIE_LNK_X32:
5939 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5940 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5941 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5942 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5943 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5944 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5945 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5946 				break;
5947 			case PCIE_LNK_X16:
5948 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5949 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5950 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5951 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5952 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5953 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5954 				break;
5955 			case PCIE_LNK_X12:
5956 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5957 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5958 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5959 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5960 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5961 				break;
5962 			case PCIE_LNK_X8:
5963 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5964 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5965 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5966 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5967 				break;
5968 			case PCIE_LNK_X4:
5969 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5970 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5971 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5972 				break;
5973 			case PCIE_LNK_X2:
5974 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5975 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5976 				break;
5977 			case PCIE_LNK_X1:
5978 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5979 				break;
5980 			default:
5981 				break;
5982 			}
5983 		}
5984 	}
5985 }
5986 
5987 /**
5988  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5989  *
5990  * @adev: amdgpu_device pointer
5991  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5992  *
5993  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5994  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5995  * @peer_adev.
5996  */
5997 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5998 				      struct amdgpu_device *peer_adev)
5999 {
6000 #ifdef CONFIG_HSA_AMD_P2P
6001 	uint64_t address_mask = peer_adev->dev->dma_mask ?
6002 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6003 	resource_size_t aper_limit =
6004 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
6005 	bool p2p_access =
6006 		!adev->gmc.xgmi.connected_to_cpu &&
6007 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6008 
6009 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6010 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6011 		!(adev->gmc.aper_base & address_mask ||
6012 		  aper_limit & address_mask));
6013 #else
6014 	return false;
6015 #endif
6016 }
6017 
6018 int amdgpu_device_baco_enter(struct drm_device *dev)
6019 {
6020 	struct amdgpu_device *adev = drm_to_adev(dev);
6021 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6022 
6023 	if (!amdgpu_device_supports_baco(dev))
6024 		return -ENOTSUPP;
6025 
6026 	if (ras && adev->ras_enabled &&
6027 	    adev->nbio.funcs->enable_doorbell_interrupt)
6028 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6029 
6030 	return amdgpu_dpm_baco_enter(adev);
6031 }
6032 
6033 int amdgpu_device_baco_exit(struct drm_device *dev)
6034 {
6035 	struct amdgpu_device *adev = drm_to_adev(dev);
6036 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6037 	int ret = 0;
6038 
6039 	if (!amdgpu_device_supports_baco(dev))
6040 		return -ENOTSUPP;
6041 
6042 	ret = amdgpu_dpm_baco_exit(adev);
6043 	if (ret)
6044 		return ret;
6045 
6046 	if (ras && adev->ras_enabled &&
6047 	    adev->nbio.funcs->enable_doorbell_interrupt)
6048 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6049 
6050 	if (amdgpu_passthrough(adev) &&
6051 	    adev->nbio.funcs->clear_doorbell_interrupt)
6052 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6053 
6054 	return 0;
6055 }
6056 
6057 /**
6058  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6059  * @pdev: PCI device struct
6060  * @state: PCI channel state
6061  *
6062  * Description: Called when a PCI error is detected.
6063  *
6064  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6065  */
6066 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6067 {
6068 	struct drm_device *dev = pci_get_drvdata(pdev);
6069 	struct amdgpu_device *adev = drm_to_adev(dev);
6070 	int i;
6071 
6072 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6073 
6074 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6075 		DRM_WARN("No support for XGMI hive yet...");
6076 		return PCI_ERS_RESULT_DISCONNECT;
6077 	}
6078 
6079 	adev->pci_channel_state = state;
6080 
6081 	switch (state) {
6082 	case pci_channel_io_normal:
6083 		return PCI_ERS_RESULT_CAN_RECOVER;
6084 	/* Fatal error, prepare for slot reset */
6085 	case pci_channel_io_frozen:
6086 		/*
6087 		 * Locking adev->reset_domain->sem will prevent any external access
6088 		 * to GPU during PCI error recovery
6089 		 */
6090 		amdgpu_device_lock_reset_domain(adev->reset_domain);
6091 		amdgpu_device_set_mp1_state(adev);
6092 
6093 		/*
6094 		 * Block any work scheduling as we do for regular GPU reset
6095 		 * for the duration of the recovery
6096 		 */
6097 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6098 			struct amdgpu_ring *ring = adev->rings[i];
6099 
6100 			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
6101 				continue;
6102 
6103 			drm_sched_stop(&ring->sched, NULL);
6104 		}
6105 		atomic_inc(&adev->gpu_reset_counter);
6106 		return PCI_ERS_RESULT_NEED_RESET;
6107 	case pci_channel_io_perm_failure:
6108 		/* Permanent error, prepare for device removal */
6109 		return PCI_ERS_RESULT_DISCONNECT;
6110 	}
6111 
6112 	return PCI_ERS_RESULT_NEED_RESET;
6113 }
6114 
6115 /**
6116  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6117  * @pdev: pointer to PCI device
6118  */
6119 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6120 {
6121 
6122 	DRM_INFO("PCI error: mmio enabled callback!!\n");
6123 
6124 	/* TODO - dump whatever for debugging purposes */
6125 
6126 	/* This called only if amdgpu_pci_error_detected returns
6127 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6128 	 * works, no need to reset slot.
6129 	 */
6130 
6131 	return PCI_ERS_RESULT_RECOVERED;
6132 }
6133 
6134 /**
6135  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6136  * @pdev: PCI device struct
6137  *
6138  * Description: This routine is called by the pci error recovery
6139  * code after the PCI slot has been reset, just before we
6140  * should resume normal operations.
6141  */
6142 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6143 {
6144 	struct drm_device *dev = pci_get_drvdata(pdev);
6145 	struct amdgpu_device *adev = drm_to_adev(dev);
6146 	int r, i;
6147 	struct amdgpu_reset_context reset_context;
6148 	u32 memsize;
6149 	struct list_head device_list;
6150 
6151 	DRM_INFO("PCI error: slot reset callback!!\n");
6152 
6153 	memset(&reset_context, 0, sizeof(reset_context));
6154 
6155 	INIT_LIST_HEAD(&device_list);
6156 	list_add_tail(&adev->reset_list, &device_list);
6157 
6158 	/* wait for asic to come out of reset */
6159 	msleep(500);
6160 
6161 	/* Restore PCI confspace */
6162 	amdgpu_device_load_pci_state(pdev);
6163 
6164 	/* confirm  ASIC came out of reset */
6165 	for (i = 0; i < adev->usec_timeout; i++) {
6166 		memsize = amdgpu_asic_get_config_memsize(adev);
6167 
6168 		if (memsize != 0xffffffff)
6169 			break;
6170 		udelay(1);
6171 	}
6172 	if (memsize == 0xffffffff) {
6173 		r = -ETIME;
6174 		goto out;
6175 	}
6176 
6177 	reset_context.method = AMD_RESET_METHOD_NONE;
6178 	reset_context.reset_req_dev = adev;
6179 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6180 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6181 
6182 	adev->no_hw_access = true;
6183 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6184 	adev->no_hw_access = false;
6185 	if (r)
6186 		goto out;
6187 
6188 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6189 
6190 out:
6191 	if (!r) {
6192 		if (amdgpu_device_cache_pci_state(adev->pdev))
6193 			pci_restore_state(adev->pdev);
6194 
6195 		DRM_INFO("PCIe error recovery succeeded\n");
6196 	} else {
6197 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6198 		amdgpu_device_unset_mp1_state(adev);
6199 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6200 	}
6201 
6202 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6203 }
6204 
6205 /**
6206  * amdgpu_pci_resume() - resume normal ops after PCI reset
6207  * @pdev: pointer to PCI device
6208  *
6209  * Called when the error recovery driver tells us that its
6210  * OK to resume normal operation.
6211  */
6212 void amdgpu_pci_resume(struct pci_dev *pdev)
6213 {
6214 	struct drm_device *dev = pci_get_drvdata(pdev);
6215 	struct amdgpu_device *adev = drm_to_adev(dev);
6216 	int i;
6217 
6218 
6219 	DRM_INFO("PCI error: resume callback!!\n");
6220 
6221 	/* Only continue execution for the case of pci_channel_io_frozen */
6222 	if (adev->pci_channel_state != pci_channel_io_frozen)
6223 		return;
6224 
6225 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6226 		struct amdgpu_ring *ring = adev->rings[i];
6227 
6228 		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
6229 			continue;
6230 
6231 		drm_sched_start(&ring->sched, true);
6232 	}
6233 
6234 	amdgpu_device_unset_mp1_state(adev);
6235 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6236 }
6237 
6238 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6239 {
6240 	struct drm_device *dev = pci_get_drvdata(pdev);
6241 	struct amdgpu_device *adev = drm_to_adev(dev);
6242 	int r;
6243 
6244 	r = pci_save_state(pdev);
6245 	if (!r) {
6246 		kfree(adev->pci_state);
6247 
6248 		adev->pci_state = pci_store_saved_state(pdev);
6249 
6250 		if (!adev->pci_state) {
6251 			DRM_ERROR("Failed to store PCI saved state");
6252 			return false;
6253 		}
6254 	} else {
6255 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6256 		return false;
6257 	}
6258 
6259 	return true;
6260 }
6261 
6262 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6263 {
6264 	struct drm_device *dev = pci_get_drvdata(pdev);
6265 	struct amdgpu_device *adev = drm_to_adev(dev);
6266 	int r;
6267 
6268 	if (!adev->pci_state)
6269 		return false;
6270 
6271 	r = pci_load_saved_state(pdev, adev->pci_state);
6272 
6273 	if (!r) {
6274 		pci_restore_state(pdev);
6275 	} else {
6276 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6277 		return false;
6278 	}
6279 
6280 	return true;
6281 }
6282 
6283 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6284 		struct amdgpu_ring *ring)
6285 {
6286 #ifdef CONFIG_X86_64
6287 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6288 		return;
6289 #endif
6290 	if (adev->gmc.xgmi.connected_to_cpu)
6291 		return;
6292 
6293 	if (ring && ring->funcs->emit_hdp_flush)
6294 		amdgpu_ring_emit_hdp_flush(ring);
6295 	else
6296 		amdgpu_asic_flush_hdp(adev, ring);
6297 }
6298 
6299 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6300 		struct amdgpu_ring *ring)
6301 {
6302 #ifdef CONFIG_X86_64
6303 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6304 		return;
6305 #endif
6306 	if (adev->gmc.xgmi.connected_to_cpu)
6307 		return;
6308 
6309 	amdgpu_asic_invalidate_hdp(adev, ring);
6310 }
6311 
6312 int amdgpu_in_reset(struct amdgpu_device *adev)
6313 {
6314 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6315 }
6316 
6317 /**
6318  * amdgpu_device_halt() - bring hardware to some kind of halt state
6319  *
6320  * @adev: amdgpu_device pointer
6321  *
6322  * Bring hardware to some kind of halt state so that no one can touch it
6323  * any more. It will help to maintain error context when error occurred.
6324  * Compare to a simple hang, the system will keep stable at least for SSH
6325  * access. Then it should be trivial to inspect the hardware state and
6326  * see what's going on. Implemented as following:
6327  *
6328  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6329  *    clears all CPU mappings to device, disallows remappings through page faults
6330  * 2. amdgpu_irq_disable_all() disables all interrupts
6331  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6332  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6333  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6334  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6335  *    flush any in flight DMA operations
6336  */
6337 void amdgpu_device_halt(struct amdgpu_device *adev)
6338 {
6339 	struct pci_dev *pdev = adev->pdev;
6340 	struct drm_device *ddev = adev_to_drm(adev);
6341 
6342 	amdgpu_xcp_dev_unplug(adev);
6343 	drm_dev_unplug(ddev);
6344 
6345 	amdgpu_irq_disable_all(adev);
6346 
6347 	amdgpu_fence_driver_hw_fini(adev);
6348 
6349 	adev->no_hw_access = true;
6350 
6351 	amdgpu_device_unmap_mmio(adev);
6352 
6353 	pci_disable_device(pdev);
6354 	pci_wait_for_pending_transaction(pdev);
6355 }
6356 
6357 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6358 				u32 reg)
6359 {
6360 	unsigned long flags, address, data;
6361 	u32 r;
6362 
6363 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6364 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6365 
6366 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6367 	WREG32(address, reg * 4);
6368 	(void)RREG32(address);
6369 	r = RREG32(data);
6370 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6371 	return r;
6372 }
6373 
6374 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6375 				u32 reg, u32 v)
6376 {
6377 	unsigned long flags, address, data;
6378 
6379 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6380 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6381 
6382 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6383 	WREG32(address, reg * 4);
6384 	(void)RREG32(address);
6385 	WREG32(data, v);
6386 	(void)RREG32(data);
6387 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6388 }
6389 
6390 /**
6391  * amdgpu_device_switch_gang - switch to a new gang
6392  * @adev: amdgpu_device pointer
6393  * @gang: the gang to switch to
6394  *
6395  * Try to switch to a new gang.
6396  * Returns: NULL if we switched to the new gang or a reference to the current
6397  * gang leader.
6398  */
6399 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6400 					    struct dma_fence *gang)
6401 {
6402 	struct dma_fence *old = NULL;
6403 
6404 	do {
6405 		dma_fence_put(old);
6406 		rcu_read_lock();
6407 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
6408 		rcu_read_unlock();
6409 
6410 		if (old == gang)
6411 			break;
6412 
6413 		if (!dma_fence_is_signaled(old))
6414 			return old;
6415 
6416 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6417 			 old, gang) != old);
6418 
6419 	dma_fence_put(old);
6420 	return NULL;
6421 }
6422 
6423 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6424 {
6425 	switch (adev->asic_type) {
6426 #ifdef CONFIG_DRM_AMDGPU_SI
6427 	case CHIP_HAINAN:
6428 #endif
6429 	case CHIP_TOPAZ:
6430 		/* chips with no display hardware */
6431 		return false;
6432 #ifdef CONFIG_DRM_AMDGPU_SI
6433 	case CHIP_TAHITI:
6434 	case CHIP_PITCAIRN:
6435 	case CHIP_VERDE:
6436 	case CHIP_OLAND:
6437 #endif
6438 #ifdef CONFIG_DRM_AMDGPU_CIK
6439 	case CHIP_BONAIRE:
6440 	case CHIP_HAWAII:
6441 	case CHIP_KAVERI:
6442 	case CHIP_KABINI:
6443 	case CHIP_MULLINS:
6444 #endif
6445 	case CHIP_TONGA:
6446 	case CHIP_FIJI:
6447 	case CHIP_POLARIS10:
6448 	case CHIP_POLARIS11:
6449 	case CHIP_POLARIS12:
6450 	case CHIP_VEGAM:
6451 	case CHIP_CARRIZO:
6452 	case CHIP_STONEY:
6453 		/* chips with display hardware */
6454 		return true;
6455 	default:
6456 		/* IP discovery */
6457 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6458 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6459 			return false;
6460 		return true;
6461 	}
6462 }
6463 
6464 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6465 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6466 		uint32_t expected_value, uint32_t mask)
6467 {
6468 	uint32_t ret = 0;
6469 	uint32_t old_ = 0;
6470 	uint32_t tmp_ = RREG32(reg_addr);
6471 	uint32_t loop = adev->usec_timeout;
6472 
6473 	while ((tmp_ & (mask)) != (expected_value)) {
6474 		if (old_ != tmp_) {
6475 			loop = adev->usec_timeout;
6476 			old_ = tmp_;
6477 		} else
6478 			udelay(1);
6479 		tmp_ = RREG32(reg_addr);
6480 		loop--;
6481 		if (!loop) {
6482 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6483 				  inst, reg_name, (uint32_t)expected_value,
6484 				  (uint32_t)(tmp_ & (mask)));
6485 			ret = -ETIMEDOUT;
6486 			break;
6487 		}
6488 	}
6489 	return ret;
6490 }
6491