xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 0d38f6009e4e4e511fb5c3c673d54bf0c242c4b7)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37 
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 #include "amdgpu_dev_coredump.h"
78 
79 #include <linux/suspend.h>
80 #include <drm/task_barrier.h>
81 #include <linux/pm_runtime.h>
82 
83 #include <drm/drm_drv.h>
84 
85 #if IS_ENABLED(CONFIG_X86)
86 #include <asm/intel-family.h>
87 #endif
88 
89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
96 
97 #define AMDGPU_RESUME_MS		2000
98 #define AMDGPU_MAX_RETRY_LIMIT		2
99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
103 
104 static const struct drm_driver amdgpu_kms_driver;
105 
106 const char *amdgpu_asic_name[] = {
107 	"TAHITI",
108 	"PITCAIRN",
109 	"VERDE",
110 	"OLAND",
111 	"HAINAN",
112 	"BONAIRE",
113 	"KAVERI",
114 	"KABINI",
115 	"HAWAII",
116 	"MULLINS",
117 	"TOPAZ",
118 	"TONGA",
119 	"FIJI",
120 	"CARRIZO",
121 	"STONEY",
122 	"POLARIS10",
123 	"POLARIS11",
124 	"POLARIS12",
125 	"VEGAM",
126 	"VEGA10",
127 	"VEGA12",
128 	"VEGA20",
129 	"RAVEN",
130 	"ARCTURUS",
131 	"RENOIR",
132 	"ALDEBARAN",
133 	"NAVI10",
134 	"CYAN_SKILLFISH",
135 	"NAVI14",
136 	"NAVI12",
137 	"SIENNA_CICHLID",
138 	"NAVY_FLOUNDER",
139 	"VANGOGH",
140 	"DIMGREY_CAVEFISH",
141 	"BEIGE_GOBY",
142 	"YELLOW_CARP",
143 	"IP DISCOVERY",
144 	"LAST",
145 };
146 
147 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
148 
149 /**
150  * DOC: pcie_replay_count
151  *
152  * The amdgpu driver provides a sysfs API for reporting the total number
153  * of PCIe replays (NAKs)
154  * The file pcie_replay_count is used for this and returns the total
155  * number of replays as a sum of the NAKs generated and NAKs received
156  */
157 
158 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
159 		struct device_attribute *attr, char *buf)
160 {
161 	struct drm_device *ddev = dev_get_drvdata(dev);
162 	struct amdgpu_device *adev = drm_to_adev(ddev);
163 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
164 
165 	return sysfs_emit(buf, "%llu\n", cnt);
166 }
167 
168 static DEVICE_ATTR(pcie_replay_count, 0444,
169 		amdgpu_device_get_pcie_replay_count, NULL);
170 
171 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
172 					  struct bin_attribute *attr, char *buf,
173 					  loff_t ppos, size_t count)
174 {
175 	struct device *dev = kobj_to_dev(kobj);
176 	struct drm_device *ddev = dev_get_drvdata(dev);
177 	struct amdgpu_device *adev = drm_to_adev(ddev);
178 	ssize_t bytes_read;
179 
180 	switch (ppos) {
181 	case AMDGPU_SYS_REG_STATE_XGMI:
182 		bytes_read = amdgpu_asic_get_reg_state(
183 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
184 		break;
185 	case AMDGPU_SYS_REG_STATE_WAFL:
186 		bytes_read = amdgpu_asic_get_reg_state(
187 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
188 		break;
189 	case AMDGPU_SYS_REG_STATE_PCIE:
190 		bytes_read = amdgpu_asic_get_reg_state(
191 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
192 		break;
193 	case AMDGPU_SYS_REG_STATE_USR:
194 		bytes_read = amdgpu_asic_get_reg_state(
195 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
196 		break;
197 	case AMDGPU_SYS_REG_STATE_USR_1:
198 		bytes_read = amdgpu_asic_get_reg_state(
199 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
200 		break;
201 	default:
202 		return -EINVAL;
203 	}
204 
205 	return bytes_read;
206 }
207 
208 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
209 	 AMDGPU_SYS_REG_STATE_END);
210 
211 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
212 {
213 	int ret;
214 
215 	if (!amdgpu_asic_get_reg_state_supported(adev))
216 		return 0;
217 
218 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
219 
220 	return ret;
221 }
222 
223 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
224 {
225 	if (!amdgpu_asic_get_reg_state_supported(adev))
226 		return;
227 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
228 }
229 
230 /**
231  * DOC: board_info
232  *
233  * The amdgpu driver provides a sysfs API for giving board related information.
234  * It provides the form factor information in the format
235  *
236  *   type : form factor
237  *
238  * Possible form factor values
239  *
240  * - "cem"		- PCIE CEM card
241  * - "oam"		- Open Compute Accelerator Module
242  * - "unknown"	- Not known
243  *
244  */
245 
246 static ssize_t amdgpu_device_get_board_info(struct device *dev,
247 					    struct device_attribute *attr,
248 					    char *buf)
249 {
250 	struct drm_device *ddev = dev_get_drvdata(dev);
251 	struct amdgpu_device *adev = drm_to_adev(ddev);
252 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
253 	const char *pkg;
254 
255 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
256 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
257 
258 	switch (pkg_type) {
259 	case AMDGPU_PKG_TYPE_CEM:
260 		pkg = "cem";
261 		break;
262 	case AMDGPU_PKG_TYPE_OAM:
263 		pkg = "oam";
264 		break;
265 	default:
266 		pkg = "unknown";
267 		break;
268 	}
269 
270 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
271 }
272 
273 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
274 
275 static struct attribute *amdgpu_board_attrs[] = {
276 	&dev_attr_board_info.attr,
277 	NULL,
278 };
279 
280 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
281 					     struct attribute *attr, int n)
282 {
283 	struct device *dev = kobj_to_dev(kobj);
284 	struct drm_device *ddev = dev_get_drvdata(dev);
285 	struct amdgpu_device *adev = drm_to_adev(ddev);
286 
287 	if (adev->flags & AMD_IS_APU)
288 		return 0;
289 
290 	return attr->mode;
291 }
292 
293 static const struct attribute_group amdgpu_board_attrs_group = {
294 	.attrs = amdgpu_board_attrs,
295 	.is_visible = amdgpu_board_attrs_is_visible
296 };
297 
298 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
299 
300 
301 /**
302  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
303  *
304  * @dev: drm_device pointer
305  *
306  * Returns true if the device is a dGPU with ATPX power control,
307  * otherwise return false.
308  */
309 bool amdgpu_device_supports_px(struct drm_device *dev)
310 {
311 	struct amdgpu_device *adev = drm_to_adev(dev);
312 
313 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
314 		return true;
315 	return false;
316 }
317 
318 /**
319  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
320  *
321  * @dev: drm_device pointer
322  *
323  * Returns true if the device is a dGPU with ACPI power control,
324  * otherwise return false.
325  */
326 bool amdgpu_device_supports_boco(struct drm_device *dev)
327 {
328 	struct amdgpu_device *adev = drm_to_adev(dev);
329 
330 	if (adev->has_pr3 ||
331 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
332 		return true;
333 	return false;
334 }
335 
336 /**
337  * amdgpu_device_supports_baco - Does the device support BACO
338  *
339  * @dev: drm_device pointer
340  *
341  * Return:
342  * 1 if the device supporte BACO;
343  * 3 if the device support MACO (only works if BACO is supported)
344  * otherwise return 0.
345  */
346 int amdgpu_device_supports_baco(struct drm_device *dev)
347 {
348 	struct amdgpu_device *adev = drm_to_adev(dev);
349 
350 	return amdgpu_asic_supports_baco(adev);
351 }
352 
353 /**
354  * amdgpu_device_supports_smart_shift - Is the device dGPU with
355  * smart shift support
356  *
357  * @dev: drm_device pointer
358  *
359  * Returns true if the device is a dGPU with Smart Shift support,
360  * otherwise returns false.
361  */
362 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
363 {
364 	return (amdgpu_device_supports_boco(dev) &&
365 		amdgpu_acpi_is_power_shift_control_supported());
366 }
367 
368 /*
369  * VRAM access helper functions
370  */
371 
372 /**
373  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
374  *
375  * @adev: amdgpu_device pointer
376  * @pos: offset of the buffer in vram
377  * @buf: virtual address of the buffer in system memory
378  * @size: read/write size, sizeof(@buf) must > @size
379  * @write: true - write to vram, otherwise - read from vram
380  */
381 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
382 			     void *buf, size_t size, bool write)
383 {
384 	unsigned long flags;
385 	uint32_t hi = ~0, tmp = 0;
386 	uint32_t *data = buf;
387 	uint64_t last;
388 	int idx;
389 
390 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
391 		return;
392 
393 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
394 
395 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
396 	for (last = pos + size; pos < last; pos += 4) {
397 		tmp = pos >> 31;
398 
399 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
400 		if (tmp != hi) {
401 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
402 			hi = tmp;
403 		}
404 		if (write)
405 			WREG32_NO_KIQ(mmMM_DATA, *data++);
406 		else
407 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
408 	}
409 
410 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
411 	drm_dev_exit(idx);
412 }
413 
414 /**
415  * amdgpu_device_aper_access - access vram by vram aperature
416  *
417  * @adev: amdgpu_device pointer
418  * @pos: offset of the buffer in vram
419  * @buf: virtual address of the buffer in system memory
420  * @size: read/write size, sizeof(@buf) must > @size
421  * @write: true - write to vram, otherwise - read from vram
422  *
423  * The return value means how many bytes have been transferred.
424  */
425 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
426 				 void *buf, size_t size, bool write)
427 {
428 #ifdef CONFIG_64BIT
429 	void __iomem *addr;
430 	size_t count = 0;
431 	uint64_t last;
432 
433 	if (!adev->mman.aper_base_kaddr)
434 		return 0;
435 
436 	last = min(pos + size, adev->gmc.visible_vram_size);
437 	if (last > pos) {
438 		addr = adev->mman.aper_base_kaddr + pos;
439 		count = last - pos;
440 
441 		if (write) {
442 			memcpy_toio(addr, buf, count);
443 			/* Make sure HDP write cache flush happens without any reordering
444 			 * after the system memory contents are sent over PCIe device
445 			 */
446 			mb();
447 			amdgpu_device_flush_hdp(adev, NULL);
448 		} else {
449 			amdgpu_device_invalidate_hdp(adev, NULL);
450 			/* Make sure HDP read cache is invalidated before issuing a read
451 			 * to the PCIe device
452 			 */
453 			mb();
454 			memcpy_fromio(buf, addr, count);
455 		}
456 
457 	}
458 
459 	return count;
460 #else
461 	return 0;
462 #endif
463 }
464 
465 /**
466  * amdgpu_device_vram_access - read/write a buffer in vram
467  *
468  * @adev: amdgpu_device pointer
469  * @pos: offset of the buffer in vram
470  * @buf: virtual address of the buffer in system memory
471  * @size: read/write size, sizeof(@buf) must > @size
472  * @write: true - write to vram, otherwise - read from vram
473  */
474 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
475 			       void *buf, size_t size, bool write)
476 {
477 	size_t count;
478 
479 	/* try to using vram apreature to access vram first */
480 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
481 	size -= count;
482 	if (size) {
483 		/* using MM to access rest vram */
484 		pos += count;
485 		buf += count;
486 		amdgpu_device_mm_access(adev, pos, buf, size, write);
487 	}
488 }
489 
490 /*
491  * register access helper functions.
492  */
493 
494 /* Check if hw access should be skipped because of hotplug or device error */
495 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
496 {
497 	if (adev->no_hw_access)
498 		return true;
499 
500 #ifdef CONFIG_LOCKDEP
501 	/*
502 	 * This is a bit complicated to understand, so worth a comment. What we assert
503 	 * here is that the GPU reset is not running on another thread in parallel.
504 	 *
505 	 * For this we trylock the read side of the reset semaphore, if that succeeds
506 	 * we know that the reset is not running in paralell.
507 	 *
508 	 * If the trylock fails we assert that we are either already holding the read
509 	 * side of the lock or are the reset thread itself and hold the write side of
510 	 * the lock.
511 	 */
512 	if (in_task()) {
513 		if (down_read_trylock(&adev->reset_domain->sem))
514 			up_read(&adev->reset_domain->sem);
515 		else
516 			lockdep_assert_held(&adev->reset_domain->sem);
517 	}
518 #endif
519 	return false;
520 }
521 
522 /**
523  * amdgpu_device_rreg - read a memory mapped IO or indirect register
524  *
525  * @adev: amdgpu_device pointer
526  * @reg: dword aligned register offset
527  * @acc_flags: access flags which require special behavior
528  *
529  * Returns the 32 bit value from the offset specified.
530  */
531 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
532 			    uint32_t reg, uint32_t acc_flags)
533 {
534 	uint32_t ret;
535 
536 	if (amdgpu_device_skip_hw_access(adev))
537 		return 0;
538 
539 	if ((reg * 4) < adev->rmmio_size) {
540 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
541 		    amdgpu_sriov_runtime(adev) &&
542 		    down_read_trylock(&adev->reset_domain->sem)) {
543 			ret = amdgpu_kiq_rreg(adev, reg, 0);
544 			up_read(&adev->reset_domain->sem);
545 		} else {
546 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
547 		}
548 	} else {
549 		ret = adev->pcie_rreg(adev, reg * 4);
550 	}
551 
552 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
553 
554 	return ret;
555 }
556 
557 /*
558  * MMIO register read with bytes helper functions
559  * @offset:bytes offset from MMIO start
560  */
561 
562 /**
563  * amdgpu_mm_rreg8 - read a memory mapped IO register
564  *
565  * @adev: amdgpu_device pointer
566  * @offset: byte aligned register offset
567  *
568  * Returns the 8 bit value from the offset specified.
569  */
570 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
571 {
572 	if (amdgpu_device_skip_hw_access(adev))
573 		return 0;
574 
575 	if (offset < adev->rmmio_size)
576 		return (readb(adev->rmmio + offset));
577 	BUG();
578 }
579 
580 
581 /**
582  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
583  *
584  * @adev: amdgpu_device pointer
585  * @reg: dword aligned register offset
586  * @acc_flags: access flags which require special behavior
587  * @xcc_id: xcc accelerated compute core id
588  *
589  * Returns the 32 bit value from the offset specified.
590  */
591 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
592 				uint32_t reg, uint32_t acc_flags,
593 				uint32_t xcc_id)
594 {
595 	uint32_t ret, rlcg_flag;
596 
597 	if (amdgpu_device_skip_hw_access(adev))
598 		return 0;
599 
600 	if ((reg * 4) < adev->rmmio_size) {
601 		if (amdgpu_sriov_vf(adev) &&
602 		    !amdgpu_sriov_runtime(adev) &&
603 		    adev->gfx.rlc.rlcg_reg_access_supported &&
604 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
605 							 GC_HWIP, false,
606 							 &rlcg_flag)) {
607 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
608 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
609 		    amdgpu_sriov_runtime(adev) &&
610 		    down_read_trylock(&adev->reset_domain->sem)) {
611 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
612 			up_read(&adev->reset_domain->sem);
613 		} else {
614 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
615 		}
616 	} else {
617 		ret = adev->pcie_rreg(adev, reg * 4);
618 	}
619 
620 	return ret;
621 }
622 
623 /*
624  * MMIO register write with bytes helper functions
625  * @offset:bytes offset from MMIO start
626  * @value: the value want to be written to the register
627  */
628 
629 /**
630  * amdgpu_mm_wreg8 - read a memory mapped IO register
631  *
632  * @adev: amdgpu_device pointer
633  * @offset: byte aligned register offset
634  * @value: 8 bit value to write
635  *
636  * Writes the value specified to the offset specified.
637  */
638 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
639 {
640 	if (amdgpu_device_skip_hw_access(adev))
641 		return;
642 
643 	if (offset < adev->rmmio_size)
644 		writeb(value, adev->rmmio + offset);
645 	else
646 		BUG();
647 }
648 
649 /**
650  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
651  *
652  * @adev: amdgpu_device pointer
653  * @reg: dword aligned register offset
654  * @v: 32 bit value to write to the register
655  * @acc_flags: access flags which require special behavior
656  *
657  * Writes the value specified to the offset specified.
658  */
659 void amdgpu_device_wreg(struct amdgpu_device *adev,
660 			uint32_t reg, uint32_t v,
661 			uint32_t acc_flags)
662 {
663 	if (amdgpu_device_skip_hw_access(adev))
664 		return;
665 
666 	if ((reg * 4) < adev->rmmio_size) {
667 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
668 		    amdgpu_sriov_runtime(adev) &&
669 		    down_read_trylock(&adev->reset_domain->sem)) {
670 			amdgpu_kiq_wreg(adev, reg, v, 0);
671 			up_read(&adev->reset_domain->sem);
672 		} else {
673 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
674 		}
675 	} else {
676 		adev->pcie_wreg(adev, reg * 4, v);
677 	}
678 
679 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
680 }
681 
682 /**
683  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
684  *
685  * @adev: amdgpu_device pointer
686  * @reg: mmio/rlc register
687  * @v: value to write
688  * @xcc_id: xcc accelerated compute core id
689  *
690  * this function is invoked only for the debugfs register access
691  */
692 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
693 			     uint32_t reg, uint32_t v,
694 			     uint32_t xcc_id)
695 {
696 	if (amdgpu_device_skip_hw_access(adev))
697 		return;
698 
699 	if (amdgpu_sriov_fullaccess(adev) &&
700 	    adev->gfx.rlc.funcs &&
701 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
702 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
703 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
704 	} else if ((reg * 4) >= adev->rmmio_size) {
705 		adev->pcie_wreg(adev, reg * 4, v);
706 	} else {
707 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
708 	}
709 }
710 
711 /**
712  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
713  *
714  * @adev: amdgpu_device pointer
715  * @reg: dword aligned register offset
716  * @v: 32 bit value to write to the register
717  * @acc_flags: access flags which require special behavior
718  * @xcc_id: xcc accelerated compute core id
719  *
720  * Writes the value specified to the offset specified.
721  */
722 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
723 			uint32_t reg, uint32_t v,
724 			uint32_t acc_flags, uint32_t xcc_id)
725 {
726 	uint32_t rlcg_flag;
727 
728 	if (amdgpu_device_skip_hw_access(adev))
729 		return;
730 
731 	if ((reg * 4) < adev->rmmio_size) {
732 		if (amdgpu_sriov_vf(adev) &&
733 		    !amdgpu_sriov_runtime(adev) &&
734 		    adev->gfx.rlc.rlcg_reg_access_supported &&
735 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
736 							 GC_HWIP, true,
737 							 &rlcg_flag)) {
738 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
739 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
740 		    amdgpu_sriov_runtime(adev) &&
741 		    down_read_trylock(&adev->reset_domain->sem)) {
742 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
743 			up_read(&adev->reset_domain->sem);
744 		} else {
745 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
746 		}
747 	} else {
748 		adev->pcie_wreg(adev, reg * 4, v);
749 	}
750 }
751 
752 /**
753  * amdgpu_device_indirect_rreg - read an indirect register
754  *
755  * @adev: amdgpu_device pointer
756  * @reg_addr: indirect register address to read from
757  *
758  * Returns the value of indirect register @reg_addr
759  */
760 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
761 				u32 reg_addr)
762 {
763 	unsigned long flags, pcie_index, pcie_data;
764 	void __iomem *pcie_index_offset;
765 	void __iomem *pcie_data_offset;
766 	u32 r;
767 
768 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
769 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
770 
771 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
772 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
773 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
774 
775 	writel(reg_addr, pcie_index_offset);
776 	readl(pcie_index_offset);
777 	r = readl(pcie_data_offset);
778 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
779 
780 	return r;
781 }
782 
783 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
784 				    u64 reg_addr)
785 {
786 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
787 	u32 r;
788 	void __iomem *pcie_index_offset;
789 	void __iomem *pcie_index_hi_offset;
790 	void __iomem *pcie_data_offset;
791 
792 	if (unlikely(!adev->nbio.funcs)) {
793 		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
794 		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
795 	} else {
796 		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
797 		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
798 	}
799 
800 	if (reg_addr >> 32) {
801 		if (unlikely(!adev->nbio.funcs))
802 			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
803 		else
804 			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
805 	} else {
806 		pcie_index_hi = 0;
807 	}
808 
809 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
810 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
811 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
812 	if (pcie_index_hi != 0)
813 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
814 				pcie_index_hi * 4;
815 
816 	writel(reg_addr, pcie_index_offset);
817 	readl(pcie_index_offset);
818 	if (pcie_index_hi != 0) {
819 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
820 		readl(pcie_index_hi_offset);
821 	}
822 	r = readl(pcie_data_offset);
823 
824 	/* clear the high bits */
825 	if (pcie_index_hi != 0) {
826 		writel(0, pcie_index_hi_offset);
827 		readl(pcie_index_hi_offset);
828 	}
829 
830 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
831 
832 	return r;
833 }
834 
835 /**
836  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
837  *
838  * @adev: amdgpu_device pointer
839  * @reg_addr: indirect register address to read from
840  *
841  * Returns the value of indirect register @reg_addr
842  */
843 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
844 				  u32 reg_addr)
845 {
846 	unsigned long flags, pcie_index, pcie_data;
847 	void __iomem *pcie_index_offset;
848 	void __iomem *pcie_data_offset;
849 	u64 r;
850 
851 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
852 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
853 
854 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
855 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
856 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
857 
858 	/* read low 32 bits */
859 	writel(reg_addr, pcie_index_offset);
860 	readl(pcie_index_offset);
861 	r = readl(pcie_data_offset);
862 	/* read high 32 bits */
863 	writel(reg_addr + 4, pcie_index_offset);
864 	readl(pcie_index_offset);
865 	r |= ((u64)readl(pcie_data_offset) << 32);
866 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
867 
868 	return r;
869 }
870 
871 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
872 				  u64 reg_addr)
873 {
874 	unsigned long flags, pcie_index, pcie_data;
875 	unsigned long pcie_index_hi = 0;
876 	void __iomem *pcie_index_offset;
877 	void __iomem *pcie_index_hi_offset;
878 	void __iomem *pcie_data_offset;
879 	u64 r;
880 
881 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
882 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
883 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
884 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
885 
886 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
887 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
888 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
889 	if (pcie_index_hi != 0)
890 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
891 			pcie_index_hi * 4;
892 
893 	/* read low 32 bits */
894 	writel(reg_addr, pcie_index_offset);
895 	readl(pcie_index_offset);
896 	if (pcie_index_hi != 0) {
897 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
898 		readl(pcie_index_hi_offset);
899 	}
900 	r = readl(pcie_data_offset);
901 	/* read high 32 bits */
902 	writel(reg_addr + 4, pcie_index_offset);
903 	readl(pcie_index_offset);
904 	if (pcie_index_hi != 0) {
905 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
906 		readl(pcie_index_hi_offset);
907 	}
908 	r |= ((u64)readl(pcie_data_offset) << 32);
909 
910 	/* clear the high bits */
911 	if (pcie_index_hi != 0) {
912 		writel(0, pcie_index_hi_offset);
913 		readl(pcie_index_hi_offset);
914 	}
915 
916 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
917 
918 	return r;
919 }
920 
921 /**
922  * amdgpu_device_indirect_wreg - write an indirect register address
923  *
924  * @adev: amdgpu_device pointer
925  * @reg_addr: indirect register offset
926  * @reg_data: indirect register data
927  *
928  */
929 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
930 				 u32 reg_addr, u32 reg_data)
931 {
932 	unsigned long flags, pcie_index, pcie_data;
933 	void __iomem *pcie_index_offset;
934 	void __iomem *pcie_data_offset;
935 
936 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
937 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
938 
939 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
940 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
941 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
942 
943 	writel(reg_addr, pcie_index_offset);
944 	readl(pcie_index_offset);
945 	writel(reg_data, pcie_data_offset);
946 	readl(pcie_data_offset);
947 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
948 }
949 
950 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
951 				     u64 reg_addr, u32 reg_data)
952 {
953 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
954 	void __iomem *pcie_index_offset;
955 	void __iomem *pcie_index_hi_offset;
956 	void __iomem *pcie_data_offset;
957 
958 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
959 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
960 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
961 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
962 	else
963 		pcie_index_hi = 0;
964 
965 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
966 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
967 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
968 	if (pcie_index_hi != 0)
969 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
970 				pcie_index_hi * 4;
971 
972 	writel(reg_addr, pcie_index_offset);
973 	readl(pcie_index_offset);
974 	if (pcie_index_hi != 0) {
975 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
976 		readl(pcie_index_hi_offset);
977 	}
978 	writel(reg_data, pcie_data_offset);
979 	readl(pcie_data_offset);
980 
981 	/* clear the high bits */
982 	if (pcie_index_hi != 0) {
983 		writel(0, pcie_index_hi_offset);
984 		readl(pcie_index_hi_offset);
985 	}
986 
987 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
988 }
989 
990 /**
991  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
992  *
993  * @adev: amdgpu_device pointer
994  * @reg_addr: indirect register offset
995  * @reg_data: indirect register data
996  *
997  */
998 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
999 				   u32 reg_addr, u64 reg_data)
1000 {
1001 	unsigned long flags, pcie_index, pcie_data;
1002 	void __iomem *pcie_index_offset;
1003 	void __iomem *pcie_data_offset;
1004 
1005 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1006 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1007 
1008 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1009 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1010 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1011 
1012 	/* write low 32 bits */
1013 	writel(reg_addr, pcie_index_offset);
1014 	readl(pcie_index_offset);
1015 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1016 	readl(pcie_data_offset);
1017 	/* write high 32 bits */
1018 	writel(reg_addr + 4, pcie_index_offset);
1019 	readl(pcie_index_offset);
1020 	writel((u32)(reg_data >> 32), pcie_data_offset);
1021 	readl(pcie_data_offset);
1022 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1023 }
1024 
1025 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1026 				   u64 reg_addr, u64 reg_data)
1027 {
1028 	unsigned long flags, pcie_index, pcie_data;
1029 	unsigned long pcie_index_hi = 0;
1030 	void __iomem *pcie_index_offset;
1031 	void __iomem *pcie_index_hi_offset;
1032 	void __iomem *pcie_data_offset;
1033 
1034 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1035 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1036 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1037 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1038 
1039 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1040 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1041 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1042 	if (pcie_index_hi != 0)
1043 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1044 				pcie_index_hi * 4;
1045 
1046 	/* write low 32 bits */
1047 	writel(reg_addr, pcie_index_offset);
1048 	readl(pcie_index_offset);
1049 	if (pcie_index_hi != 0) {
1050 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1051 		readl(pcie_index_hi_offset);
1052 	}
1053 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1054 	readl(pcie_data_offset);
1055 	/* write high 32 bits */
1056 	writel(reg_addr + 4, pcie_index_offset);
1057 	readl(pcie_index_offset);
1058 	if (pcie_index_hi != 0) {
1059 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1060 		readl(pcie_index_hi_offset);
1061 	}
1062 	writel((u32)(reg_data >> 32), pcie_data_offset);
1063 	readl(pcie_data_offset);
1064 
1065 	/* clear the high bits */
1066 	if (pcie_index_hi != 0) {
1067 		writel(0, pcie_index_hi_offset);
1068 		readl(pcie_index_hi_offset);
1069 	}
1070 
1071 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1072 }
1073 
1074 /**
1075  * amdgpu_device_get_rev_id - query device rev_id
1076  *
1077  * @adev: amdgpu_device pointer
1078  *
1079  * Return device rev_id
1080  */
1081 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1082 {
1083 	return adev->nbio.funcs->get_rev_id(adev);
1084 }
1085 
1086 /**
1087  * amdgpu_invalid_rreg - dummy reg read function
1088  *
1089  * @adev: amdgpu_device pointer
1090  * @reg: offset of register
1091  *
1092  * Dummy register read function.  Used for register blocks
1093  * that certain asics don't have (all asics).
1094  * Returns the value in the register.
1095  */
1096 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1097 {
1098 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1099 	BUG();
1100 	return 0;
1101 }
1102 
1103 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1104 {
1105 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1106 	BUG();
1107 	return 0;
1108 }
1109 
1110 /**
1111  * amdgpu_invalid_wreg - dummy reg write function
1112  *
1113  * @adev: amdgpu_device pointer
1114  * @reg: offset of register
1115  * @v: value to write to the register
1116  *
1117  * Dummy register read function.  Used for register blocks
1118  * that certain asics don't have (all asics).
1119  */
1120 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1121 {
1122 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1123 		  reg, v);
1124 	BUG();
1125 }
1126 
1127 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1128 {
1129 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1130 		  reg, v);
1131 	BUG();
1132 }
1133 
1134 /**
1135  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1136  *
1137  * @adev: amdgpu_device pointer
1138  * @reg: offset of register
1139  *
1140  * Dummy register read function.  Used for register blocks
1141  * that certain asics don't have (all asics).
1142  * Returns the value in the register.
1143  */
1144 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1145 {
1146 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1147 	BUG();
1148 	return 0;
1149 }
1150 
1151 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1152 {
1153 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1154 	BUG();
1155 	return 0;
1156 }
1157 
1158 /**
1159  * amdgpu_invalid_wreg64 - dummy reg write function
1160  *
1161  * @adev: amdgpu_device pointer
1162  * @reg: offset of register
1163  * @v: value to write to the register
1164  *
1165  * Dummy register read function.  Used for register blocks
1166  * that certain asics don't have (all asics).
1167  */
1168 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1169 {
1170 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1171 		  reg, v);
1172 	BUG();
1173 }
1174 
1175 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1176 {
1177 	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1178 		  reg, v);
1179 	BUG();
1180 }
1181 
1182 /**
1183  * amdgpu_block_invalid_rreg - dummy reg read function
1184  *
1185  * @adev: amdgpu_device pointer
1186  * @block: offset of instance
1187  * @reg: offset of register
1188  *
1189  * Dummy register read function.  Used for register blocks
1190  * that certain asics don't have (all asics).
1191  * Returns the value in the register.
1192  */
1193 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1194 					  uint32_t block, uint32_t reg)
1195 {
1196 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1197 		  reg, block);
1198 	BUG();
1199 	return 0;
1200 }
1201 
1202 /**
1203  * amdgpu_block_invalid_wreg - dummy reg write function
1204  *
1205  * @adev: amdgpu_device pointer
1206  * @block: offset of instance
1207  * @reg: offset of register
1208  * @v: value to write to the register
1209  *
1210  * Dummy register read function.  Used for register blocks
1211  * that certain asics don't have (all asics).
1212  */
1213 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1214 				      uint32_t block,
1215 				      uint32_t reg, uint32_t v)
1216 {
1217 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1218 		  reg, block, v);
1219 	BUG();
1220 }
1221 
1222 /**
1223  * amdgpu_device_asic_init - Wrapper for atom asic_init
1224  *
1225  * @adev: amdgpu_device pointer
1226  *
1227  * Does any asic specific work and then calls atom asic init.
1228  */
1229 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1230 {
1231 	int ret;
1232 
1233 	amdgpu_asic_pre_asic_init(adev);
1234 
1235 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1236 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1237 		amdgpu_psp_wait_for_bootloader(adev);
1238 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1239 		return ret;
1240 	} else {
1241 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1242 	}
1243 
1244 	return 0;
1245 }
1246 
1247 /**
1248  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1249  *
1250  * @adev: amdgpu_device pointer
1251  *
1252  * Allocates a scratch page of VRAM for use by various things in the
1253  * driver.
1254  */
1255 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1256 {
1257 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1258 				       AMDGPU_GEM_DOMAIN_VRAM |
1259 				       AMDGPU_GEM_DOMAIN_GTT,
1260 				       &adev->mem_scratch.robj,
1261 				       &adev->mem_scratch.gpu_addr,
1262 				       (void **)&adev->mem_scratch.ptr);
1263 }
1264 
1265 /**
1266  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1267  *
1268  * @adev: amdgpu_device pointer
1269  *
1270  * Frees the VRAM scratch page.
1271  */
1272 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1273 {
1274 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1275 }
1276 
1277 /**
1278  * amdgpu_device_program_register_sequence - program an array of registers.
1279  *
1280  * @adev: amdgpu_device pointer
1281  * @registers: pointer to the register array
1282  * @array_size: size of the register array
1283  *
1284  * Programs an array or registers with and or masks.
1285  * This is a helper for setting golden registers.
1286  */
1287 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1288 					     const u32 *registers,
1289 					     const u32 array_size)
1290 {
1291 	u32 tmp, reg, and_mask, or_mask;
1292 	int i;
1293 
1294 	if (array_size % 3)
1295 		return;
1296 
1297 	for (i = 0; i < array_size; i += 3) {
1298 		reg = registers[i + 0];
1299 		and_mask = registers[i + 1];
1300 		or_mask = registers[i + 2];
1301 
1302 		if (and_mask == 0xffffffff) {
1303 			tmp = or_mask;
1304 		} else {
1305 			tmp = RREG32(reg);
1306 			tmp &= ~and_mask;
1307 			if (adev->family >= AMDGPU_FAMILY_AI)
1308 				tmp |= (or_mask & and_mask);
1309 			else
1310 				tmp |= or_mask;
1311 		}
1312 		WREG32(reg, tmp);
1313 	}
1314 }
1315 
1316 /**
1317  * amdgpu_device_pci_config_reset - reset the GPU
1318  *
1319  * @adev: amdgpu_device pointer
1320  *
1321  * Resets the GPU using the pci config reset sequence.
1322  * Only applicable to asics prior to vega10.
1323  */
1324 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1325 {
1326 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1327 }
1328 
1329 /**
1330  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1331  *
1332  * @adev: amdgpu_device pointer
1333  *
1334  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1335  */
1336 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1337 {
1338 	return pci_reset_function(adev->pdev);
1339 }
1340 
1341 /*
1342  * amdgpu_device_wb_*()
1343  * Writeback is the method by which the GPU updates special pages in memory
1344  * with the status of certain GPU events (fences, ring pointers,etc.).
1345  */
1346 
1347 /**
1348  * amdgpu_device_wb_fini - Disable Writeback and free memory
1349  *
1350  * @adev: amdgpu_device pointer
1351  *
1352  * Disables Writeback and frees the Writeback memory (all asics).
1353  * Used at driver shutdown.
1354  */
1355 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1356 {
1357 	if (adev->wb.wb_obj) {
1358 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1359 				      &adev->wb.gpu_addr,
1360 				      (void **)&adev->wb.wb);
1361 		adev->wb.wb_obj = NULL;
1362 	}
1363 }
1364 
1365 /**
1366  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1367  *
1368  * @adev: amdgpu_device pointer
1369  *
1370  * Initializes writeback and allocates writeback memory (all asics).
1371  * Used at driver startup.
1372  * Returns 0 on success or an -error on failure.
1373  */
1374 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1375 {
1376 	int r;
1377 
1378 	if (adev->wb.wb_obj == NULL) {
1379 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1380 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1381 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1382 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1383 					    (void **)&adev->wb.wb);
1384 		if (r) {
1385 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1386 			return r;
1387 		}
1388 
1389 		adev->wb.num_wb = AMDGPU_MAX_WB;
1390 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1391 
1392 		/* clear wb memory */
1393 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1394 	}
1395 
1396 	return 0;
1397 }
1398 
1399 /**
1400  * amdgpu_device_wb_get - Allocate a wb entry
1401  *
1402  * @adev: amdgpu_device pointer
1403  * @wb: wb index
1404  *
1405  * Allocate a wb slot for use by the driver (all asics).
1406  * Returns 0 on success or -EINVAL on failure.
1407  */
1408 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1409 {
1410 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1411 
1412 	if (offset < adev->wb.num_wb) {
1413 		__set_bit(offset, adev->wb.used);
1414 		*wb = offset << 3; /* convert to dw offset */
1415 		return 0;
1416 	} else {
1417 		return -EINVAL;
1418 	}
1419 }
1420 
1421 /**
1422  * amdgpu_device_wb_free - Free a wb entry
1423  *
1424  * @adev: amdgpu_device pointer
1425  * @wb: wb index
1426  *
1427  * Free a wb slot allocated for use by the driver (all asics)
1428  */
1429 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1430 {
1431 	wb >>= 3;
1432 	if (wb < adev->wb.num_wb)
1433 		__clear_bit(wb, adev->wb.used);
1434 }
1435 
1436 /**
1437  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1438  *
1439  * @adev: amdgpu_device pointer
1440  *
1441  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1442  * to fail, but if any of the BARs is not accessible after the size we abort
1443  * driver loading by returning -ENODEV.
1444  */
1445 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1446 {
1447 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1448 	struct pci_bus *root;
1449 	struct resource *res;
1450 	unsigned int i;
1451 	u16 cmd;
1452 	int r;
1453 
1454 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1455 		return 0;
1456 
1457 	/* Bypass for VF */
1458 	if (amdgpu_sriov_vf(adev))
1459 		return 0;
1460 
1461 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1462 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1463 		DRM_WARN("System can't access extended configuration space,please check!!\n");
1464 
1465 	/* skip if the bios has already enabled large BAR */
1466 	if (adev->gmc.real_vram_size &&
1467 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1468 		return 0;
1469 
1470 	/* Check if the root BUS has 64bit memory resources */
1471 	root = adev->pdev->bus;
1472 	while (root->parent)
1473 		root = root->parent;
1474 
1475 	pci_bus_for_each_resource(root, res, i) {
1476 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1477 		    res->start > 0x100000000ull)
1478 			break;
1479 	}
1480 
1481 	/* Trying to resize is pointless without a root hub window above 4GB */
1482 	if (!res)
1483 		return 0;
1484 
1485 	/* Limit the BAR size to what is available */
1486 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1487 			rbar_size);
1488 
1489 	/* Disable memory decoding while we change the BAR addresses and size */
1490 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1491 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1492 			      cmd & ~PCI_COMMAND_MEMORY);
1493 
1494 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1495 	amdgpu_doorbell_fini(adev);
1496 	if (adev->asic_type >= CHIP_BONAIRE)
1497 		pci_release_resource(adev->pdev, 2);
1498 
1499 	pci_release_resource(adev->pdev, 0);
1500 
1501 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1502 	if (r == -ENOSPC)
1503 		DRM_INFO("Not enough PCI address space for a large BAR.");
1504 	else if (r && r != -ENOTSUPP)
1505 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1506 
1507 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1508 
1509 	/* When the doorbell or fb BAR isn't available we have no chance of
1510 	 * using the device.
1511 	 */
1512 	r = amdgpu_doorbell_init(adev);
1513 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1514 		return -ENODEV;
1515 
1516 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1517 
1518 	return 0;
1519 }
1520 
1521 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1522 {
1523 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1524 		return false;
1525 
1526 	return true;
1527 }
1528 
1529 /*
1530  * GPU helpers function.
1531  */
1532 /**
1533  * amdgpu_device_need_post - check if the hw need post or not
1534  *
1535  * @adev: amdgpu_device pointer
1536  *
1537  * Check if the asic has been initialized (all asics) at driver startup
1538  * or post is needed if  hw reset is performed.
1539  * Returns true if need or false if not.
1540  */
1541 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1542 {
1543 	uint32_t reg;
1544 
1545 	if (amdgpu_sriov_vf(adev))
1546 		return false;
1547 
1548 	if (!amdgpu_device_read_bios(adev))
1549 		return false;
1550 
1551 	if (amdgpu_passthrough(adev)) {
1552 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1553 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1554 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1555 		 * vpost executed for smc version below 22.15
1556 		 */
1557 		if (adev->asic_type == CHIP_FIJI) {
1558 			int err;
1559 			uint32_t fw_ver;
1560 
1561 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1562 			/* force vPost if error occured */
1563 			if (err)
1564 				return true;
1565 
1566 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1567 			release_firmware(adev->pm.fw);
1568 			if (fw_ver < 0x00160e00)
1569 				return true;
1570 		}
1571 	}
1572 
1573 	/* Don't post if we need to reset whole hive on init */
1574 	if (adev->gmc.xgmi.pending_reset)
1575 		return false;
1576 
1577 	if (adev->has_hw_reset) {
1578 		adev->has_hw_reset = false;
1579 		return true;
1580 	}
1581 
1582 	/* bios scratch used on CIK+ */
1583 	if (adev->asic_type >= CHIP_BONAIRE)
1584 		return amdgpu_atombios_scratch_need_asic_init(adev);
1585 
1586 	/* check MEM_SIZE for older asics */
1587 	reg = amdgpu_asic_get_config_memsize(adev);
1588 
1589 	if ((reg != 0) && (reg != 0xffffffff))
1590 		return false;
1591 
1592 	return true;
1593 }
1594 
1595 /*
1596  * Check whether seamless boot is supported.
1597  *
1598  * So far we only support seamless boot on DCE 3.0 or later.
1599  * If users report that it works on older ASICS as well, we may
1600  * loosen this.
1601  */
1602 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1603 {
1604 	switch (amdgpu_seamless) {
1605 	case -1:
1606 		break;
1607 	case 1:
1608 		return true;
1609 	case 0:
1610 		return false;
1611 	default:
1612 		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1613 			  amdgpu_seamless);
1614 		return false;
1615 	}
1616 
1617 	if (!(adev->flags & AMD_IS_APU))
1618 		return false;
1619 
1620 	if (adev->mman.keep_stolen_vga_memory)
1621 		return false;
1622 
1623 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1624 }
1625 
1626 /*
1627  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1628  * don't support dynamic speed switching. Until we have confirmation from Intel
1629  * that a specific host supports it, it's safer that we keep it disabled for all.
1630  *
1631  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1632  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1633  */
1634 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1635 {
1636 #if IS_ENABLED(CONFIG_X86)
1637 	struct cpuinfo_x86 *c = &cpu_data(0);
1638 
1639 	/* eGPU change speeds based on USB4 fabric conditions */
1640 	if (dev_is_removable(adev->dev))
1641 		return true;
1642 
1643 	if (c->x86_vendor == X86_VENDOR_INTEL)
1644 		return false;
1645 #endif
1646 	return true;
1647 }
1648 
1649 /**
1650  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1651  *
1652  * @adev: amdgpu_device pointer
1653  *
1654  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1655  * be set for this device.
1656  *
1657  * Returns true if it should be used or false if not.
1658  */
1659 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1660 {
1661 	switch (amdgpu_aspm) {
1662 	case -1:
1663 		break;
1664 	case 0:
1665 		return false;
1666 	case 1:
1667 		return true;
1668 	default:
1669 		return false;
1670 	}
1671 	if (adev->flags & AMD_IS_APU)
1672 		return false;
1673 	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1674 		return false;
1675 	return pcie_aspm_enabled(adev->pdev);
1676 }
1677 
1678 /* if we get transitioned to only one device, take VGA back */
1679 /**
1680  * amdgpu_device_vga_set_decode - enable/disable vga decode
1681  *
1682  * @pdev: PCI device pointer
1683  * @state: enable/disable vga decode
1684  *
1685  * Enable/disable vga decode (all asics).
1686  * Returns VGA resource flags.
1687  */
1688 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1689 		bool state)
1690 {
1691 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1692 
1693 	amdgpu_asic_set_vga_state(adev, state);
1694 	if (state)
1695 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1696 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1697 	else
1698 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1699 }
1700 
1701 /**
1702  * amdgpu_device_check_block_size - validate the vm block size
1703  *
1704  * @adev: amdgpu_device pointer
1705  *
1706  * Validates the vm block size specified via module parameter.
1707  * The vm block size defines number of bits in page table versus page directory,
1708  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1709  * page table and the remaining bits are in the page directory.
1710  */
1711 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1712 {
1713 	/* defines number of bits in page table versus page directory,
1714 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1715 	 * page table and the remaining bits are in the page directory
1716 	 */
1717 	if (amdgpu_vm_block_size == -1)
1718 		return;
1719 
1720 	if (amdgpu_vm_block_size < 9) {
1721 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1722 			 amdgpu_vm_block_size);
1723 		amdgpu_vm_block_size = -1;
1724 	}
1725 }
1726 
1727 /**
1728  * amdgpu_device_check_vm_size - validate the vm size
1729  *
1730  * @adev: amdgpu_device pointer
1731  *
1732  * Validates the vm size in GB specified via module parameter.
1733  * The VM size is the size of the GPU virtual memory space in GB.
1734  */
1735 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1736 {
1737 	/* no need to check the default value */
1738 	if (amdgpu_vm_size == -1)
1739 		return;
1740 
1741 	if (amdgpu_vm_size < 1) {
1742 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1743 			 amdgpu_vm_size);
1744 		amdgpu_vm_size = -1;
1745 	}
1746 }
1747 
1748 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1749 {
1750 	struct sysinfo si;
1751 	bool is_os_64 = (sizeof(void *) == 8);
1752 	uint64_t total_memory;
1753 	uint64_t dram_size_seven_GB = 0x1B8000000;
1754 	uint64_t dram_size_three_GB = 0xB8000000;
1755 
1756 	if (amdgpu_smu_memory_pool_size == 0)
1757 		return;
1758 
1759 	if (!is_os_64) {
1760 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1761 		goto def_value;
1762 	}
1763 	si_meminfo(&si);
1764 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1765 
1766 	if ((amdgpu_smu_memory_pool_size == 1) ||
1767 		(amdgpu_smu_memory_pool_size == 2)) {
1768 		if (total_memory < dram_size_three_GB)
1769 			goto def_value1;
1770 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1771 		(amdgpu_smu_memory_pool_size == 8)) {
1772 		if (total_memory < dram_size_seven_GB)
1773 			goto def_value1;
1774 	} else {
1775 		DRM_WARN("Smu memory pool size not supported\n");
1776 		goto def_value;
1777 	}
1778 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1779 
1780 	return;
1781 
1782 def_value1:
1783 	DRM_WARN("No enough system memory\n");
1784 def_value:
1785 	adev->pm.smu_prv_buffer_size = 0;
1786 }
1787 
1788 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1789 {
1790 	if (!(adev->flags & AMD_IS_APU) ||
1791 	    adev->asic_type < CHIP_RAVEN)
1792 		return 0;
1793 
1794 	switch (adev->asic_type) {
1795 	case CHIP_RAVEN:
1796 		if (adev->pdev->device == 0x15dd)
1797 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1798 		if (adev->pdev->device == 0x15d8)
1799 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1800 		break;
1801 	case CHIP_RENOIR:
1802 		if ((adev->pdev->device == 0x1636) ||
1803 		    (adev->pdev->device == 0x164c))
1804 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1805 		else
1806 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1807 		break;
1808 	case CHIP_VANGOGH:
1809 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1810 		break;
1811 	case CHIP_YELLOW_CARP:
1812 		break;
1813 	case CHIP_CYAN_SKILLFISH:
1814 		if ((adev->pdev->device == 0x13FE) ||
1815 		    (adev->pdev->device == 0x143F))
1816 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1817 		break;
1818 	default:
1819 		break;
1820 	}
1821 
1822 	return 0;
1823 }
1824 
1825 /**
1826  * amdgpu_device_check_arguments - validate module params
1827  *
1828  * @adev: amdgpu_device pointer
1829  *
1830  * Validates certain module parameters and updates
1831  * the associated values used by the driver (all asics).
1832  */
1833 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1834 {
1835 	if (amdgpu_sched_jobs < 4) {
1836 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1837 			 amdgpu_sched_jobs);
1838 		amdgpu_sched_jobs = 4;
1839 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1840 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1841 			 amdgpu_sched_jobs);
1842 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1843 	}
1844 
1845 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1846 		/* gart size must be greater or equal to 32M */
1847 		dev_warn(adev->dev, "gart size (%d) too small\n",
1848 			 amdgpu_gart_size);
1849 		amdgpu_gart_size = -1;
1850 	}
1851 
1852 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1853 		/* gtt size must be greater or equal to 32M */
1854 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1855 				 amdgpu_gtt_size);
1856 		amdgpu_gtt_size = -1;
1857 	}
1858 
1859 	/* valid range is between 4 and 9 inclusive */
1860 	if (amdgpu_vm_fragment_size != -1 &&
1861 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1862 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1863 		amdgpu_vm_fragment_size = -1;
1864 	}
1865 
1866 	if (amdgpu_sched_hw_submission < 2) {
1867 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1868 			 amdgpu_sched_hw_submission);
1869 		amdgpu_sched_hw_submission = 2;
1870 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1871 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1872 			 amdgpu_sched_hw_submission);
1873 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1874 	}
1875 
1876 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1877 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1878 		amdgpu_reset_method = -1;
1879 	}
1880 
1881 	amdgpu_device_check_smu_prv_buffer_size(adev);
1882 
1883 	amdgpu_device_check_vm_size(adev);
1884 
1885 	amdgpu_device_check_block_size(adev);
1886 
1887 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1888 
1889 	return 0;
1890 }
1891 
1892 /**
1893  * amdgpu_switcheroo_set_state - set switcheroo state
1894  *
1895  * @pdev: pci dev pointer
1896  * @state: vga_switcheroo state
1897  *
1898  * Callback for the switcheroo driver.  Suspends or resumes
1899  * the asics before or after it is powered up using ACPI methods.
1900  */
1901 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1902 					enum vga_switcheroo_state state)
1903 {
1904 	struct drm_device *dev = pci_get_drvdata(pdev);
1905 	int r;
1906 
1907 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1908 		return;
1909 
1910 	if (state == VGA_SWITCHEROO_ON) {
1911 		pr_info("switched on\n");
1912 		/* don't suspend or resume card normally */
1913 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1914 
1915 		pci_set_power_state(pdev, PCI_D0);
1916 		amdgpu_device_load_pci_state(pdev);
1917 		r = pci_enable_device(pdev);
1918 		if (r)
1919 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1920 		amdgpu_device_resume(dev, true);
1921 
1922 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1923 	} else {
1924 		pr_info("switched off\n");
1925 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1926 		amdgpu_device_prepare(dev);
1927 		amdgpu_device_suspend(dev, true);
1928 		amdgpu_device_cache_pci_state(pdev);
1929 		/* Shut down the device */
1930 		pci_disable_device(pdev);
1931 		pci_set_power_state(pdev, PCI_D3cold);
1932 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1933 	}
1934 }
1935 
1936 /**
1937  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1938  *
1939  * @pdev: pci dev pointer
1940  *
1941  * Callback for the switcheroo driver.  Check of the switcheroo
1942  * state can be changed.
1943  * Returns true if the state can be changed, false if not.
1944  */
1945 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1946 {
1947 	struct drm_device *dev = pci_get_drvdata(pdev);
1948 
1949        /*
1950 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1951 	* locking inversion with the driver load path. And the access here is
1952 	* completely racy anyway. So don't bother with locking for now.
1953 	*/
1954 	return atomic_read(&dev->open_count) == 0;
1955 }
1956 
1957 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1958 	.set_gpu_state = amdgpu_switcheroo_set_state,
1959 	.reprobe = NULL,
1960 	.can_switch = amdgpu_switcheroo_can_switch,
1961 };
1962 
1963 /**
1964  * amdgpu_device_ip_set_clockgating_state - set the CG state
1965  *
1966  * @dev: amdgpu_device pointer
1967  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1968  * @state: clockgating state (gate or ungate)
1969  *
1970  * Sets the requested clockgating state for all instances of
1971  * the hardware IP specified.
1972  * Returns the error code from the last instance.
1973  */
1974 int amdgpu_device_ip_set_clockgating_state(void *dev,
1975 					   enum amd_ip_block_type block_type,
1976 					   enum amd_clockgating_state state)
1977 {
1978 	struct amdgpu_device *adev = dev;
1979 	int i, r = 0;
1980 
1981 	for (i = 0; i < adev->num_ip_blocks; i++) {
1982 		if (!adev->ip_blocks[i].status.valid)
1983 			continue;
1984 		if (adev->ip_blocks[i].version->type != block_type)
1985 			continue;
1986 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1987 			continue;
1988 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1989 			(void *)adev, state);
1990 		if (r)
1991 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1992 				  adev->ip_blocks[i].version->funcs->name, r);
1993 	}
1994 	return r;
1995 }
1996 
1997 /**
1998  * amdgpu_device_ip_set_powergating_state - set the PG state
1999  *
2000  * @dev: amdgpu_device pointer
2001  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2002  * @state: powergating state (gate or ungate)
2003  *
2004  * Sets the requested powergating state for all instances of
2005  * the hardware IP specified.
2006  * Returns the error code from the last instance.
2007  */
2008 int amdgpu_device_ip_set_powergating_state(void *dev,
2009 					   enum amd_ip_block_type block_type,
2010 					   enum amd_powergating_state state)
2011 {
2012 	struct amdgpu_device *adev = dev;
2013 	int i, r = 0;
2014 
2015 	for (i = 0; i < adev->num_ip_blocks; i++) {
2016 		if (!adev->ip_blocks[i].status.valid)
2017 			continue;
2018 		if (adev->ip_blocks[i].version->type != block_type)
2019 			continue;
2020 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2021 			continue;
2022 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2023 			(void *)adev, state);
2024 		if (r)
2025 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2026 				  adev->ip_blocks[i].version->funcs->name, r);
2027 	}
2028 	return r;
2029 }
2030 
2031 /**
2032  * amdgpu_device_ip_get_clockgating_state - get the CG state
2033  *
2034  * @adev: amdgpu_device pointer
2035  * @flags: clockgating feature flags
2036  *
2037  * Walks the list of IPs on the device and updates the clockgating
2038  * flags for each IP.
2039  * Updates @flags with the feature flags for each hardware IP where
2040  * clockgating is enabled.
2041  */
2042 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2043 					    u64 *flags)
2044 {
2045 	int i;
2046 
2047 	for (i = 0; i < adev->num_ip_blocks; i++) {
2048 		if (!adev->ip_blocks[i].status.valid)
2049 			continue;
2050 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2051 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2052 	}
2053 }
2054 
2055 /**
2056  * amdgpu_device_ip_wait_for_idle - wait for idle
2057  *
2058  * @adev: amdgpu_device pointer
2059  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2060  *
2061  * Waits for the request hardware IP to be idle.
2062  * Returns 0 for success or a negative error code on failure.
2063  */
2064 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2065 				   enum amd_ip_block_type block_type)
2066 {
2067 	int i, r;
2068 
2069 	for (i = 0; i < adev->num_ip_blocks; i++) {
2070 		if (!adev->ip_blocks[i].status.valid)
2071 			continue;
2072 		if (adev->ip_blocks[i].version->type == block_type) {
2073 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2074 			if (r)
2075 				return r;
2076 			break;
2077 		}
2078 	}
2079 	return 0;
2080 
2081 }
2082 
2083 /**
2084  * amdgpu_device_ip_is_idle - is the hardware IP idle
2085  *
2086  * @adev: amdgpu_device pointer
2087  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2088  *
2089  * Check if the hardware IP is idle or not.
2090  * Returns true if it the IP is idle, false if not.
2091  */
2092 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2093 			      enum amd_ip_block_type block_type)
2094 {
2095 	int i;
2096 
2097 	for (i = 0; i < adev->num_ip_blocks; i++) {
2098 		if (!adev->ip_blocks[i].status.valid)
2099 			continue;
2100 		if (adev->ip_blocks[i].version->type == block_type)
2101 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2102 	}
2103 	return true;
2104 
2105 }
2106 
2107 /**
2108  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2109  *
2110  * @adev: amdgpu_device pointer
2111  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2112  *
2113  * Returns a pointer to the hardware IP block structure
2114  * if it exists for the asic, otherwise NULL.
2115  */
2116 struct amdgpu_ip_block *
2117 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2118 			      enum amd_ip_block_type type)
2119 {
2120 	int i;
2121 
2122 	for (i = 0; i < adev->num_ip_blocks; i++)
2123 		if (adev->ip_blocks[i].version->type == type)
2124 			return &adev->ip_blocks[i];
2125 
2126 	return NULL;
2127 }
2128 
2129 /**
2130  * amdgpu_device_ip_block_version_cmp
2131  *
2132  * @adev: amdgpu_device pointer
2133  * @type: enum amd_ip_block_type
2134  * @major: major version
2135  * @minor: minor version
2136  *
2137  * return 0 if equal or greater
2138  * return 1 if smaller or the ip_block doesn't exist
2139  */
2140 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2141 				       enum amd_ip_block_type type,
2142 				       u32 major, u32 minor)
2143 {
2144 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2145 
2146 	if (ip_block && ((ip_block->version->major > major) ||
2147 			((ip_block->version->major == major) &&
2148 			(ip_block->version->minor >= minor))))
2149 		return 0;
2150 
2151 	return 1;
2152 }
2153 
2154 /**
2155  * amdgpu_device_ip_block_add
2156  *
2157  * @adev: amdgpu_device pointer
2158  * @ip_block_version: pointer to the IP to add
2159  *
2160  * Adds the IP block driver information to the collection of IPs
2161  * on the asic.
2162  */
2163 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2164 			       const struct amdgpu_ip_block_version *ip_block_version)
2165 {
2166 	if (!ip_block_version)
2167 		return -EINVAL;
2168 
2169 	switch (ip_block_version->type) {
2170 	case AMD_IP_BLOCK_TYPE_VCN:
2171 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2172 			return 0;
2173 		break;
2174 	case AMD_IP_BLOCK_TYPE_JPEG:
2175 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2176 			return 0;
2177 		break;
2178 	default:
2179 		break;
2180 	}
2181 
2182 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2183 		  ip_block_version->funcs->name);
2184 
2185 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2186 
2187 	return 0;
2188 }
2189 
2190 /**
2191  * amdgpu_device_enable_virtual_display - enable virtual display feature
2192  *
2193  * @adev: amdgpu_device pointer
2194  *
2195  * Enabled the virtual display feature if the user has enabled it via
2196  * the module parameter virtual_display.  This feature provides a virtual
2197  * display hardware on headless boards or in virtualized environments.
2198  * This function parses and validates the configuration string specified by
2199  * the user and configues the virtual display configuration (number of
2200  * virtual connectors, crtcs, etc.) specified.
2201  */
2202 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2203 {
2204 	adev->enable_virtual_display = false;
2205 
2206 	if (amdgpu_virtual_display) {
2207 		const char *pci_address_name = pci_name(adev->pdev);
2208 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2209 
2210 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2211 		pciaddstr_tmp = pciaddstr;
2212 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2213 			pciaddname = strsep(&pciaddname_tmp, ",");
2214 			if (!strcmp("all", pciaddname)
2215 			    || !strcmp(pci_address_name, pciaddname)) {
2216 				long num_crtc;
2217 				int res = -1;
2218 
2219 				adev->enable_virtual_display = true;
2220 
2221 				if (pciaddname_tmp)
2222 					res = kstrtol(pciaddname_tmp, 10,
2223 						      &num_crtc);
2224 
2225 				if (!res) {
2226 					if (num_crtc < 1)
2227 						num_crtc = 1;
2228 					if (num_crtc > 6)
2229 						num_crtc = 6;
2230 					adev->mode_info.num_crtc = num_crtc;
2231 				} else {
2232 					adev->mode_info.num_crtc = 1;
2233 				}
2234 				break;
2235 			}
2236 		}
2237 
2238 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2239 			 amdgpu_virtual_display, pci_address_name,
2240 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2241 
2242 		kfree(pciaddstr);
2243 	}
2244 }
2245 
2246 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2247 {
2248 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2249 		adev->mode_info.num_crtc = 1;
2250 		adev->enable_virtual_display = true;
2251 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2252 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2253 	}
2254 }
2255 
2256 /**
2257  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2258  *
2259  * @adev: amdgpu_device pointer
2260  *
2261  * Parses the asic configuration parameters specified in the gpu info
2262  * firmware and makes them availale to the driver for use in configuring
2263  * the asic.
2264  * Returns 0 on success, -EINVAL on failure.
2265  */
2266 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2267 {
2268 	const char *chip_name;
2269 	char fw_name[40];
2270 	int err;
2271 	const struct gpu_info_firmware_header_v1_0 *hdr;
2272 
2273 	adev->firmware.gpu_info_fw = NULL;
2274 
2275 	if (adev->mman.discovery_bin)
2276 		return 0;
2277 
2278 	switch (adev->asic_type) {
2279 	default:
2280 		return 0;
2281 	case CHIP_VEGA10:
2282 		chip_name = "vega10";
2283 		break;
2284 	case CHIP_VEGA12:
2285 		chip_name = "vega12";
2286 		break;
2287 	case CHIP_RAVEN:
2288 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2289 			chip_name = "raven2";
2290 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2291 			chip_name = "picasso";
2292 		else
2293 			chip_name = "raven";
2294 		break;
2295 	case CHIP_ARCTURUS:
2296 		chip_name = "arcturus";
2297 		break;
2298 	case CHIP_NAVI12:
2299 		chip_name = "navi12";
2300 		break;
2301 	}
2302 
2303 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2304 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2305 	if (err) {
2306 		dev_err(adev->dev,
2307 			"Failed to get gpu_info firmware \"%s\"\n",
2308 			fw_name);
2309 		goto out;
2310 	}
2311 
2312 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2313 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2314 
2315 	switch (hdr->version_major) {
2316 	case 1:
2317 	{
2318 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2319 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2320 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2321 
2322 		/*
2323 		 * Should be droped when DAL no longer needs it.
2324 		 */
2325 		if (adev->asic_type == CHIP_NAVI12)
2326 			goto parse_soc_bounding_box;
2327 
2328 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2329 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2330 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2331 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2332 		adev->gfx.config.max_texture_channel_caches =
2333 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2334 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2335 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2336 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2337 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2338 		adev->gfx.config.double_offchip_lds_buf =
2339 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2340 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2341 		adev->gfx.cu_info.max_waves_per_simd =
2342 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2343 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2344 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2345 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2346 		if (hdr->version_minor >= 1) {
2347 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2348 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2349 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2350 			adev->gfx.config.num_sc_per_sh =
2351 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2352 			adev->gfx.config.num_packer_per_sc =
2353 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2354 		}
2355 
2356 parse_soc_bounding_box:
2357 		/*
2358 		 * soc bounding box info is not integrated in disocovery table,
2359 		 * we always need to parse it from gpu info firmware if needed.
2360 		 */
2361 		if (hdr->version_minor == 2) {
2362 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2363 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2364 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2365 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2366 		}
2367 		break;
2368 	}
2369 	default:
2370 		dev_err(adev->dev,
2371 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2372 		err = -EINVAL;
2373 		goto out;
2374 	}
2375 out:
2376 	return err;
2377 }
2378 
2379 /**
2380  * amdgpu_device_ip_early_init - run early init for hardware IPs
2381  *
2382  * @adev: amdgpu_device pointer
2383  *
2384  * Early initialization pass for hardware IPs.  The hardware IPs that make
2385  * up each asic are discovered each IP's early_init callback is run.  This
2386  * is the first stage in initializing the asic.
2387  * Returns 0 on success, negative error code on failure.
2388  */
2389 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2390 {
2391 	struct pci_dev *parent;
2392 	int i, r;
2393 	bool total;
2394 
2395 	amdgpu_device_enable_virtual_display(adev);
2396 
2397 	if (amdgpu_sriov_vf(adev)) {
2398 		r = amdgpu_virt_request_full_gpu(adev, true);
2399 		if (r)
2400 			return r;
2401 	}
2402 
2403 	switch (adev->asic_type) {
2404 #ifdef CONFIG_DRM_AMDGPU_SI
2405 	case CHIP_VERDE:
2406 	case CHIP_TAHITI:
2407 	case CHIP_PITCAIRN:
2408 	case CHIP_OLAND:
2409 	case CHIP_HAINAN:
2410 		adev->family = AMDGPU_FAMILY_SI;
2411 		r = si_set_ip_blocks(adev);
2412 		if (r)
2413 			return r;
2414 		break;
2415 #endif
2416 #ifdef CONFIG_DRM_AMDGPU_CIK
2417 	case CHIP_BONAIRE:
2418 	case CHIP_HAWAII:
2419 	case CHIP_KAVERI:
2420 	case CHIP_KABINI:
2421 	case CHIP_MULLINS:
2422 		if (adev->flags & AMD_IS_APU)
2423 			adev->family = AMDGPU_FAMILY_KV;
2424 		else
2425 			adev->family = AMDGPU_FAMILY_CI;
2426 
2427 		r = cik_set_ip_blocks(adev);
2428 		if (r)
2429 			return r;
2430 		break;
2431 #endif
2432 	case CHIP_TOPAZ:
2433 	case CHIP_TONGA:
2434 	case CHIP_FIJI:
2435 	case CHIP_POLARIS10:
2436 	case CHIP_POLARIS11:
2437 	case CHIP_POLARIS12:
2438 	case CHIP_VEGAM:
2439 	case CHIP_CARRIZO:
2440 	case CHIP_STONEY:
2441 		if (adev->flags & AMD_IS_APU)
2442 			adev->family = AMDGPU_FAMILY_CZ;
2443 		else
2444 			adev->family = AMDGPU_FAMILY_VI;
2445 
2446 		r = vi_set_ip_blocks(adev);
2447 		if (r)
2448 			return r;
2449 		break;
2450 	default:
2451 		r = amdgpu_discovery_set_ip_blocks(adev);
2452 		if (r)
2453 			return r;
2454 		break;
2455 	}
2456 
2457 	if (amdgpu_has_atpx() &&
2458 	    (amdgpu_is_atpx_hybrid() ||
2459 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2460 	    ((adev->flags & AMD_IS_APU) == 0) &&
2461 	    !dev_is_removable(&adev->pdev->dev))
2462 		adev->flags |= AMD_IS_PX;
2463 
2464 	if (!(adev->flags & AMD_IS_APU)) {
2465 		parent = pcie_find_root_port(adev->pdev);
2466 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2467 	}
2468 
2469 
2470 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2471 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2472 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2473 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2474 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2475 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2476 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2477 
2478 	total = true;
2479 	for (i = 0; i < adev->num_ip_blocks; i++) {
2480 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2481 			DRM_WARN("disabled ip block: %d <%s>\n",
2482 				  i, adev->ip_blocks[i].version->funcs->name);
2483 			adev->ip_blocks[i].status.valid = false;
2484 		} else {
2485 			if (adev->ip_blocks[i].version->funcs->early_init) {
2486 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2487 				if (r == -ENOENT) {
2488 					adev->ip_blocks[i].status.valid = false;
2489 				} else if (r) {
2490 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2491 						  adev->ip_blocks[i].version->funcs->name, r);
2492 					total = false;
2493 				} else {
2494 					adev->ip_blocks[i].status.valid = true;
2495 				}
2496 			} else {
2497 				adev->ip_blocks[i].status.valid = true;
2498 			}
2499 		}
2500 		/* get the vbios after the asic_funcs are set up */
2501 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2502 			r = amdgpu_device_parse_gpu_info_fw(adev);
2503 			if (r)
2504 				return r;
2505 
2506 			/* Read BIOS */
2507 			if (amdgpu_device_read_bios(adev)) {
2508 				if (!amdgpu_get_bios(adev))
2509 					return -EINVAL;
2510 
2511 				r = amdgpu_atombios_init(adev);
2512 				if (r) {
2513 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2514 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2515 					return r;
2516 				}
2517 			}
2518 
2519 			/*get pf2vf msg info at it's earliest time*/
2520 			if (amdgpu_sriov_vf(adev))
2521 				amdgpu_virt_init_data_exchange(adev);
2522 
2523 		}
2524 	}
2525 	if (!total)
2526 		return -ENODEV;
2527 
2528 	amdgpu_amdkfd_device_probe(adev);
2529 	adev->cg_flags &= amdgpu_cg_mask;
2530 	adev->pg_flags &= amdgpu_pg_mask;
2531 
2532 	return 0;
2533 }
2534 
2535 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2536 {
2537 	int i, r;
2538 
2539 	for (i = 0; i < adev->num_ip_blocks; i++) {
2540 		if (!adev->ip_blocks[i].status.sw)
2541 			continue;
2542 		if (adev->ip_blocks[i].status.hw)
2543 			continue;
2544 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2545 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2546 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2547 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2548 			if (r) {
2549 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2550 					  adev->ip_blocks[i].version->funcs->name, r);
2551 				return r;
2552 			}
2553 			adev->ip_blocks[i].status.hw = true;
2554 		}
2555 	}
2556 
2557 	return 0;
2558 }
2559 
2560 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2561 {
2562 	int i, r;
2563 
2564 	for (i = 0; i < adev->num_ip_blocks; i++) {
2565 		if (!adev->ip_blocks[i].status.sw)
2566 			continue;
2567 		if (adev->ip_blocks[i].status.hw)
2568 			continue;
2569 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2570 		if (r) {
2571 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2572 				  adev->ip_blocks[i].version->funcs->name, r);
2573 			return r;
2574 		}
2575 		adev->ip_blocks[i].status.hw = true;
2576 	}
2577 
2578 	return 0;
2579 }
2580 
2581 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2582 {
2583 	int r = 0;
2584 	int i;
2585 	uint32_t smu_version;
2586 
2587 	if (adev->asic_type >= CHIP_VEGA10) {
2588 		for (i = 0; i < adev->num_ip_blocks; i++) {
2589 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2590 				continue;
2591 
2592 			if (!adev->ip_blocks[i].status.sw)
2593 				continue;
2594 
2595 			/* no need to do the fw loading again if already done*/
2596 			if (adev->ip_blocks[i].status.hw == true)
2597 				break;
2598 
2599 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2600 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2601 				if (r) {
2602 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2603 							  adev->ip_blocks[i].version->funcs->name, r);
2604 					return r;
2605 				}
2606 			} else {
2607 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2608 				if (r) {
2609 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2610 							  adev->ip_blocks[i].version->funcs->name, r);
2611 					return r;
2612 				}
2613 			}
2614 
2615 			adev->ip_blocks[i].status.hw = true;
2616 			break;
2617 		}
2618 	}
2619 
2620 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2621 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2622 
2623 	return r;
2624 }
2625 
2626 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2627 {
2628 	long timeout;
2629 	int r, i;
2630 
2631 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2632 		struct amdgpu_ring *ring = adev->rings[i];
2633 
2634 		/* No need to setup the GPU scheduler for rings that don't need it */
2635 		if (!ring || ring->no_scheduler)
2636 			continue;
2637 
2638 		switch (ring->funcs->type) {
2639 		case AMDGPU_RING_TYPE_GFX:
2640 			timeout = adev->gfx_timeout;
2641 			break;
2642 		case AMDGPU_RING_TYPE_COMPUTE:
2643 			timeout = adev->compute_timeout;
2644 			break;
2645 		case AMDGPU_RING_TYPE_SDMA:
2646 			timeout = adev->sdma_timeout;
2647 			break;
2648 		default:
2649 			timeout = adev->video_timeout;
2650 			break;
2651 		}
2652 
2653 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2654 				   DRM_SCHED_PRIORITY_COUNT,
2655 				   ring->num_hw_submission, 0,
2656 				   timeout, adev->reset_domain->wq,
2657 				   ring->sched_score, ring->name,
2658 				   adev->dev);
2659 		if (r) {
2660 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2661 				  ring->name);
2662 			return r;
2663 		}
2664 		r = amdgpu_uvd_entity_init(adev, ring);
2665 		if (r) {
2666 			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2667 				  ring->name);
2668 			return r;
2669 		}
2670 		r = amdgpu_vce_entity_init(adev, ring);
2671 		if (r) {
2672 			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2673 				  ring->name);
2674 			return r;
2675 		}
2676 	}
2677 
2678 	amdgpu_xcp_update_partition_sched_list(adev);
2679 
2680 	return 0;
2681 }
2682 
2683 
2684 /**
2685  * amdgpu_device_ip_init - run init for hardware IPs
2686  *
2687  * @adev: amdgpu_device pointer
2688  *
2689  * Main initialization pass for hardware IPs.  The list of all the hardware
2690  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2691  * are run.  sw_init initializes the software state associated with each IP
2692  * and hw_init initializes the hardware associated with each IP.
2693  * Returns 0 on success, negative error code on failure.
2694  */
2695 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2696 {
2697 	int i, r;
2698 
2699 	r = amdgpu_ras_init(adev);
2700 	if (r)
2701 		return r;
2702 
2703 	for (i = 0; i < adev->num_ip_blocks; i++) {
2704 		if (!adev->ip_blocks[i].status.valid)
2705 			continue;
2706 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2707 		if (r) {
2708 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2709 				  adev->ip_blocks[i].version->funcs->name, r);
2710 			goto init_failed;
2711 		}
2712 		adev->ip_blocks[i].status.sw = true;
2713 
2714 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2715 			/* need to do common hw init early so everything is set up for gmc */
2716 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2717 			if (r) {
2718 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2719 				goto init_failed;
2720 			}
2721 			adev->ip_blocks[i].status.hw = true;
2722 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2723 			/* need to do gmc hw init early so we can allocate gpu mem */
2724 			/* Try to reserve bad pages early */
2725 			if (amdgpu_sriov_vf(adev))
2726 				amdgpu_virt_exchange_data(adev);
2727 
2728 			r = amdgpu_device_mem_scratch_init(adev);
2729 			if (r) {
2730 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2731 				goto init_failed;
2732 			}
2733 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2734 			if (r) {
2735 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2736 				goto init_failed;
2737 			}
2738 			r = amdgpu_device_wb_init(adev);
2739 			if (r) {
2740 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2741 				goto init_failed;
2742 			}
2743 			adev->ip_blocks[i].status.hw = true;
2744 
2745 			/* right after GMC hw init, we create CSA */
2746 			if (adev->gfx.mcbp) {
2747 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2748 							       AMDGPU_GEM_DOMAIN_VRAM |
2749 							       AMDGPU_GEM_DOMAIN_GTT,
2750 							       AMDGPU_CSA_SIZE);
2751 				if (r) {
2752 					DRM_ERROR("allocate CSA failed %d\n", r);
2753 					goto init_failed;
2754 				}
2755 			}
2756 
2757 			r = amdgpu_seq64_init(adev);
2758 			if (r) {
2759 				DRM_ERROR("allocate seq64 failed %d\n", r);
2760 				goto init_failed;
2761 			}
2762 		}
2763 	}
2764 
2765 	if (amdgpu_sriov_vf(adev))
2766 		amdgpu_virt_init_data_exchange(adev);
2767 
2768 	r = amdgpu_ib_pool_init(adev);
2769 	if (r) {
2770 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2771 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2772 		goto init_failed;
2773 	}
2774 
2775 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2776 	if (r)
2777 		goto init_failed;
2778 
2779 	r = amdgpu_device_ip_hw_init_phase1(adev);
2780 	if (r)
2781 		goto init_failed;
2782 
2783 	r = amdgpu_device_fw_loading(adev);
2784 	if (r)
2785 		goto init_failed;
2786 
2787 	r = amdgpu_device_ip_hw_init_phase2(adev);
2788 	if (r)
2789 		goto init_failed;
2790 
2791 	/*
2792 	 * retired pages will be loaded from eeprom and reserved here,
2793 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2794 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2795 	 * for I2C communication which only true at this point.
2796 	 *
2797 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2798 	 * failure from bad gpu situation and stop amdgpu init process
2799 	 * accordingly. For other failed cases, it will still release all
2800 	 * the resource and print error message, rather than returning one
2801 	 * negative value to upper level.
2802 	 *
2803 	 * Note: theoretically, this should be called before all vram allocations
2804 	 * to protect retired page from abusing
2805 	 */
2806 	r = amdgpu_ras_recovery_init(adev);
2807 	if (r)
2808 		goto init_failed;
2809 
2810 	/**
2811 	 * In case of XGMI grab extra reference for reset domain for this device
2812 	 */
2813 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2814 		if (amdgpu_xgmi_add_device(adev) == 0) {
2815 			if (!amdgpu_sriov_vf(adev)) {
2816 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2817 
2818 				if (WARN_ON(!hive)) {
2819 					r = -ENOENT;
2820 					goto init_failed;
2821 				}
2822 
2823 				if (!hive->reset_domain ||
2824 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2825 					r = -ENOENT;
2826 					amdgpu_put_xgmi_hive(hive);
2827 					goto init_failed;
2828 				}
2829 
2830 				/* Drop the early temporary reset domain we created for device */
2831 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2832 				adev->reset_domain = hive->reset_domain;
2833 				amdgpu_put_xgmi_hive(hive);
2834 			}
2835 		}
2836 	}
2837 
2838 	r = amdgpu_device_init_schedulers(adev);
2839 	if (r)
2840 		goto init_failed;
2841 
2842 	if (adev->mman.buffer_funcs_ring->sched.ready)
2843 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2844 
2845 	/* Don't init kfd if whole hive need to be reset during init */
2846 	if (!adev->gmc.xgmi.pending_reset) {
2847 		kgd2kfd_init_zone_device(adev);
2848 		amdgpu_amdkfd_device_init(adev);
2849 	}
2850 
2851 	amdgpu_fru_get_product_info(adev);
2852 
2853 init_failed:
2854 
2855 	return r;
2856 }
2857 
2858 /**
2859  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2860  *
2861  * @adev: amdgpu_device pointer
2862  *
2863  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2864  * this function before a GPU reset.  If the value is retained after a
2865  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2866  */
2867 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2868 {
2869 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2870 }
2871 
2872 /**
2873  * amdgpu_device_check_vram_lost - check if vram is valid
2874  *
2875  * @adev: amdgpu_device pointer
2876  *
2877  * Checks the reset magic value written to the gart pointer in VRAM.
2878  * The driver calls this after a GPU reset to see if the contents of
2879  * VRAM is lost or now.
2880  * returns true if vram is lost, false if not.
2881  */
2882 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2883 {
2884 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2885 			AMDGPU_RESET_MAGIC_NUM))
2886 		return true;
2887 
2888 	if (!amdgpu_in_reset(adev))
2889 		return false;
2890 
2891 	/*
2892 	 * For all ASICs with baco/mode1 reset, the VRAM is
2893 	 * always assumed to be lost.
2894 	 */
2895 	switch (amdgpu_asic_reset_method(adev)) {
2896 	case AMD_RESET_METHOD_BACO:
2897 	case AMD_RESET_METHOD_MODE1:
2898 		return true;
2899 	default:
2900 		return false;
2901 	}
2902 }
2903 
2904 /**
2905  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2906  *
2907  * @adev: amdgpu_device pointer
2908  * @state: clockgating state (gate or ungate)
2909  *
2910  * The list of all the hardware IPs that make up the asic is walked and the
2911  * set_clockgating_state callbacks are run.
2912  * Late initialization pass enabling clockgating for hardware IPs.
2913  * Fini or suspend, pass disabling clockgating for hardware IPs.
2914  * Returns 0 on success, negative error code on failure.
2915  */
2916 
2917 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2918 			       enum amd_clockgating_state state)
2919 {
2920 	int i, j, r;
2921 
2922 	if (amdgpu_emu_mode == 1)
2923 		return 0;
2924 
2925 	for (j = 0; j < adev->num_ip_blocks; j++) {
2926 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2927 		if (!adev->ip_blocks[i].status.late_initialized)
2928 			continue;
2929 		/* skip CG for GFX, SDMA on S0ix */
2930 		if (adev->in_s0ix &&
2931 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2932 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2933 			continue;
2934 		/* skip CG for VCE/UVD, it's handled specially */
2935 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2936 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2937 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2938 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2939 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2940 			/* enable clockgating to save power */
2941 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2942 										     state);
2943 			if (r) {
2944 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2945 					  adev->ip_blocks[i].version->funcs->name, r);
2946 				return r;
2947 			}
2948 		}
2949 	}
2950 
2951 	return 0;
2952 }
2953 
2954 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2955 			       enum amd_powergating_state state)
2956 {
2957 	int i, j, r;
2958 
2959 	if (amdgpu_emu_mode == 1)
2960 		return 0;
2961 
2962 	for (j = 0; j < adev->num_ip_blocks; j++) {
2963 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2964 		if (!adev->ip_blocks[i].status.late_initialized)
2965 			continue;
2966 		/* skip PG for GFX, SDMA on S0ix */
2967 		if (adev->in_s0ix &&
2968 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2969 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2970 			continue;
2971 		/* skip CG for VCE/UVD, it's handled specially */
2972 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2973 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2974 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2975 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2976 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2977 			/* enable powergating to save power */
2978 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2979 											state);
2980 			if (r) {
2981 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2982 					  adev->ip_blocks[i].version->funcs->name, r);
2983 				return r;
2984 			}
2985 		}
2986 	}
2987 	return 0;
2988 }
2989 
2990 static int amdgpu_device_enable_mgpu_fan_boost(void)
2991 {
2992 	struct amdgpu_gpu_instance *gpu_ins;
2993 	struct amdgpu_device *adev;
2994 	int i, ret = 0;
2995 
2996 	mutex_lock(&mgpu_info.mutex);
2997 
2998 	/*
2999 	 * MGPU fan boost feature should be enabled
3000 	 * only when there are two or more dGPUs in
3001 	 * the system
3002 	 */
3003 	if (mgpu_info.num_dgpu < 2)
3004 		goto out;
3005 
3006 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3007 		gpu_ins = &(mgpu_info.gpu_ins[i]);
3008 		adev = gpu_ins->adev;
3009 		if (!(adev->flags & AMD_IS_APU) &&
3010 		    !gpu_ins->mgpu_fan_enabled) {
3011 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3012 			if (ret)
3013 				break;
3014 
3015 			gpu_ins->mgpu_fan_enabled = 1;
3016 		}
3017 	}
3018 
3019 out:
3020 	mutex_unlock(&mgpu_info.mutex);
3021 
3022 	return ret;
3023 }
3024 
3025 /**
3026  * amdgpu_device_ip_late_init - run late init for hardware IPs
3027  *
3028  * @adev: amdgpu_device pointer
3029  *
3030  * Late initialization pass for hardware IPs.  The list of all the hardware
3031  * IPs that make up the asic is walked and the late_init callbacks are run.
3032  * late_init covers any special initialization that an IP requires
3033  * after all of the have been initialized or something that needs to happen
3034  * late in the init process.
3035  * Returns 0 on success, negative error code on failure.
3036  */
3037 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3038 {
3039 	struct amdgpu_gpu_instance *gpu_instance;
3040 	int i = 0, r;
3041 
3042 	for (i = 0; i < adev->num_ip_blocks; i++) {
3043 		if (!adev->ip_blocks[i].status.hw)
3044 			continue;
3045 		if (adev->ip_blocks[i].version->funcs->late_init) {
3046 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3047 			if (r) {
3048 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3049 					  adev->ip_blocks[i].version->funcs->name, r);
3050 				return r;
3051 			}
3052 		}
3053 		adev->ip_blocks[i].status.late_initialized = true;
3054 	}
3055 
3056 	r = amdgpu_ras_late_init(adev);
3057 	if (r) {
3058 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3059 		return r;
3060 	}
3061 
3062 	amdgpu_ras_set_error_query_ready(adev, true);
3063 
3064 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3065 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3066 
3067 	amdgpu_device_fill_reset_magic(adev);
3068 
3069 	r = amdgpu_device_enable_mgpu_fan_boost();
3070 	if (r)
3071 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3072 
3073 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3074 	if (amdgpu_passthrough(adev) &&
3075 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3076 	     adev->asic_type == CHIP_ALDEBARAN))
3077 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3078 
3079 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3080 		mutex_lock(&mgpu_info.mutex);
3081 
3082 		/*
3083 		 * Reset device p-state to low as this was booted with high.
3084 		 *
3085 		 * This should be performed only after all devices from the same
3086 		 * hive get initialized.
3087 		 *
3088 		 * However, it's unknown how many device in the hive in advance.
3089 		 * As this is counted one by one during devices initializations.
3090 		 *
3091 		 * So, we wait for all XGMI interlinked devices initialized.
3092 		 * This may bring some delays as those devices may come from
3093 		 * different hives. But that should be OK.
3094 		 */
3095 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3096 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3097 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3098 				if (gpu_instance->adev->flags & AMD_IS_APU)
3099 					continue;
3100 
3101 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3102 						AMDGPU_XGMI_PSTATE_MIN);
3103 				if (r) {
3104 					DRM_ERROR("pstate setting failed (%d).\n", r);
3105 					break;
3106 				}
3107 			}
3108 		}
3109 
3110 		mutex_unlock(&mgpu_info.mutex);
3111 	}
3112 
3113 	return 0;
3114 }
3115 
3116 /**
3117  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3118  *
3119  * @adev: amdgpu_device pointer
3120  *
3121  * For ASICs need to disable SMC first
3122  */
3123 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3124 {
3125 	int i, r;
3126 
3127 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3128 		return;
3129 
3130 	for (i = 0; i < adev->num_ip_blocks; i++) {
3131 		if (!adev->ip_blocks[i].status.hw)
3132 			continue;
3133 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3134 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3135 			/* XXX handle errors */
3136 			if (r) {
3137 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3138 					  adev->ip_blocks[i].version->funcs->name, r);
3139 			}
3140 			adev->ip_blocks[i].status.hw = false;
3141 			break;
3142 		}
3143 	}
3144 }
3145 
3146 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3147 {
3148 	int i, r;
3149 
3150 	for (i = 0; i < adev->num_ip_blocks; i++) {
3151 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3152 			continue;
3153 
3154 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3155 		if (r) {
3156 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3157 				  adev->ip_blocks[i].version->funcs->name, r);
3158 		}
3159 	}
3160 
3161 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3162 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3163 
3164 	amdgpu_amdkfd_suspend(adev, false);
3165 
3166 	/* Workaroud for ASICs need to disable SMC first */
3167 	amdgpu_device_smu_fini_early(adev);
3168 
3169 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3170 		if (!adev->ip_blocks[i].status.hw)
3171 			continue;
3172 
3173 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3174 		/* XXX handle errors */
3175 		if (r) {
3176 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3177 				  adev->ip_blocks[i].version->funcs->name, r);
3178 		}
3179 
3180 		adev->ip_blocks[i].status.hw = false;
3181 	}
3182 
3183 	if (amdgpu_sriov_vf(adev)) {
3184 		if (amdgpu_virt_release_full_gpu(adev, false))
3185 			DRM_ERROR("failed to release exclusive mode on fini\n");
3186 	}
3187 
3188 	return 0;
3189 }
3190 
3191 /**
3192  * amdgpu_device_ip_fini - run fini for hardware IPs
3193  *
3194  * @adev: amdgpu_device pointer
3195  *
3196  * Main teardown pass for hardware IPs.  The list of all the hardware
3197  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3198  * are run.  hw_fini tears down the hardware associated with each IP
3199  * and sw_fini tears down any software state associated with each IP.
3200  * Returns 0 on success, negative error code on failure.
3201  */
3202 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3203 {
3204 	int i, r;
3205 
3206 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3207 		amdgpu_virt_release_ras_err_handler_data(adev);
3208 
3209 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3210 		amdgpu_xgmi_remove_device(adev);
3211 
3212 	amdgpu_amdkfd_device_fini_sw(adev);
3213 
3214 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3215 		if (!adev->ip_blocks[i].status.sw)
3216 			continue;
3217 
3218 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3219 			amdgpu_ucode_free_bo(adev);
3220 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3221 			amdgpu_device_wb_fini(adev);
3222 			amdgpu_device_mem_scratch_fini(adev);
3223 			amdgpu_ib_pool_fini(adev);
3224 			amdgpu_seq64_fini(adev);
3225 		}
3226 
3227 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3228 		/* XXX handle errors */
3229 		if (r) {
3230 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3231 				  adev->ip_blocks[i].version->funcs->name, r);
3232 		}
3233 		adev->ip_blocks[i].status.sw = false;
3234 		adev->ip_blocks[i].status.valid = false;
3235 	}
3236 
3237 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3238 		if (!adev->ip_blocks[i].status.late_initialized)
3239 			continue;
3240 		if (adev->ip_blocks[i].version->funcs->late_fini)
3241 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3242 		adev->ip_blocks[i].status.late_initialized = false;
3243 	}
3244 
3245 	amdgpu_ras_fini(adev);
3246 
3247 	return 0;
3248 }
3249 
3250 /**
3251  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3252  *
3253  * @work: work_struct.
3254  */
3255 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3256 {
3257 	struct amdgpu_device *adev =
3258 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3259 	int r;
3260 
3261 	r = amdgpu_ib_ring_tests(adev);
3262 	if (r)
3263 		DRM_ERROR("ib ring test failed (%d).\n", r);
3264 }
3265 
3266 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3267 {
3268 	struct amdgpu_device *adev =
3269 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3270 
3271 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3272 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3273 
3274 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3275 		adev->gfx.gfx_off_state = true;
3276 }
3277 
3278 /**
3279  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3280  *
3281  * @adev: amdgpu_device pointer
3282  *
3283  * Main suspend function for hardware IPs.  The list of all the hardware
3284  * IPs that make up the asic is walked, clockgating is disabled and the
3285  * suspend callbacks are run.  suspend puts the hardware and software state
3286  * in each IP into a state suitable for suspend.
3287  * Returns 0 on success, negative error code on failure.
3288  */
3289 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3290 {
3291 	int i, r;
3292 
3293 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3294 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3295 
3296 	/*
3297 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3298 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3299 	 * scenario. Add the missing df cstate disablement here.
3300 	 */
3301 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3302 		dev_warn(adev->dev, "Failed to disallow df cstate");
3303 
3304 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3305 		if (!adev->ip_blocks[i].status.valid)
3306 			continue;
3307 
3308 		/* displays are handled separately */
3309 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3310 			continue;
3311 
3312 		/* XXX handle errors */
3313 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3314 		/* XXX handle errors */
3315 		if (r) {
3316 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3317 				  adev->ip_blocks[i].version->funcs->name, r);
3318 			return r;
3319 		}
3320 
3321 		adev->ip_blocks[i].status.hw = false;
3322 	}
3323 
3324 	return 0;
3325 }
3326 
3327 /**
3328  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3329  *
3330  * @adev: amdgpu_device pointer
3331  *
3332  * Main suspend function for hardware IPs.  The list of all the hardware
3333  * IPs that make up the asic is walked, clockgating is disabled and the
3334  * suspend callbacks are run.  suspend puts the hardware and software state
3335  * in each IP into a state suitable for suspend.
3336  * Returns 0 on success, negative error code on failure.
3337  */
3338 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3339 {
3340 	int i, r;
3341 
3342 	if (adev->in_s0ix)
3343 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3344 
3345 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3346 		if (!adev->ip_blocks[i].status.valid)
3347 			continue;
3348 		/* displays are handled in phase1 */
3349 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3350 			continue;
3351 		/* PSP lost connection when err_event_athub occurs */
3352 		if (amdgpu_ras_intr_triggered() &&
3353 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3354 			adev->ip_blocks[i].status.hw = false;
3355 			continue;
3356 		}
3357 
3358 		/* skip unnecessary suspend if we do not initialize them yet */
3359 		if (adev->gmc.xgmi.pending_reset &&
3360 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3361 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3362 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3363 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3364 			adev->ip_blocks[i].status.hw = false;
3365 			continue;
3366 		}
3367 
3368 		/* skip suspend of gfx/mes and psp for S0ix
3369 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3370 		 * like at runtime. PSP is also part of the always on hardware
3371 		 * so no need to suspend it.
3372 		 */
3373 		if (adev->in_s0ix &&
3374 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3375 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3376 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3377 			continue;
3378 
3379 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3380 		if (adev->in_s0ix &&
3381 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3382 		     IP_VERSION(5, 0, 0)) &&
3383 		    (adev->ip_blocks[i].version->type ==
3384 		     AMD_IP_BLOCK_TYPE_SDMA))
3385 			continue;
3386 
3387 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3388 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3389 		 * from this location and RLC Autoload automatically also gets loaded
3390 		 * from here based on PMFW -> PSP message during re-init sequence.
3391 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3392 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3393 		 */
3394 		if (amdgpu_in_reset(adev) &&
3395 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3396 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3397 			continue;
3398 
3399 		/* XXX handle errors */
3400 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3401 		/* XXX handle errors */
3402 		if (r) {
3403 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3404 				  adev->ip_blocks[i].version->funcs->name, r);
3405 		}
3406 		adev->ip_blocks[i].status.hw = false;
3407 		/* handle putting the SMC in the appropriate state */
3408 		if (!amdgpu_sriov_vf(adev)) {
3409 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3410 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3411 				if (r) {
3412 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3413 							adev->mp1_state, r);
3414 					return r;
3415 				}
3416 			}
3417 		}
3418 	}
3419 
3420 	return 0;
3421 }
3422 
3423 /**
3424  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3425  *
3426  * @adev: amdgpu_device pointer
3427  *
3428  * Main suspend function for hardware IPs.  The list of all the hardware
3429  * IPs that make up the asic is walked, clockgating is disabled and the
3430  * suspend callbacks are run.  suspend puts the hardware and software state
3431  * in each IP into a state suitable for suspend.
3432  * Returns 0 on success, negative error code on failure.
3433  */
3434 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3435 {
3436 	int r;
3437 
3438 	if (amdgpu_sriov_vf(adev)) {
3439 		amdgpu_virt_fini_data_exchange(adev);
3440 		amdgpu_virt_request_full_gpu(adev, false);
3441 	}
3442 
3443 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3444 
3445 	r = amdgpu_device_ip_suspend_phase1(adev);
3446 	if (r)
3447 		return r;
3448 	r = amdgpu_device_ip_suspend_phase2(adev);
3449 
3450 	if (amdgpu_sriov_vf(adev))
3451 		amdgpu_virt_release_full_gpu(adev, false);
3452 
3453 	return r;
3454 }
3455 
3456 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3457 {
3458 	int i, r;
3459 
3460 	static enum amd_ip_block_type ip_order[] = {
3461 		AMD_IP_BLOCK_TYPE_COMMON,
3462 		AMD_IP_BLOCK_TYPE_GMC,
3463 		AMD_IP_BLOCK_TYPE_PSP,
3464 		AMD_IP_BLOCK_TYPE_IH,
3465 	};
3466 
3467 	for (i = 0; i < adev->num_ip_blocks; i++) {
3468 		int j;
3469 		struct amdgpu_ip_block *block;
3470 
3471 		block = &adev->ip_blocks[i];
3472 		block->status.hw = false;
3473 
3474 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3475 
3476 			if (block->version->type != ip_order[j] ||
3477 				!block->status.valid)
3478 				continue;
3479 
3480 			r = block->version->funcs->hw_init(adev);
3481 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3482 			if (r)
3483 				return r;
3484 			block->status.hw = true;
3485 		}
3486 	}
3487 
3488 	return 0;
3489 }
3490 
3491 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3492 {
3493 	int i, r;
3494 
3495 	static enum amd_ip_block_type ip_order[] = {
3496 		AMD_IP_BLOCK_TYPE_SMC,
3497 		AMD_IP_BLOCK_TYPE_DCE,
3498 		AMD_IP_BLOCK_TYPE_GFX,
3499 		AMD_IP_BLOCK_TYPE_SDMA,
3500 		AMD_IP_BLOCK_TYPE_MES,
3501 		AMD_IP_BLOCK_TYPE_UVD,
3502 		AMD_IP_BLOCK_TYPE_VCE,
3503 		AMD_IP_BLOCK_TYPE_VCN,
3504 		AMD_IP_BLOCK_TYPE_JPEG
3505 	};
3506 
3507 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3508 		int j;
3509 		struct amdgpu_ip_block *block;
3510 
3511 		for (j = 0; j < adev->num_ip_blocks; j++) {
3512 			block = &adev->ip_blocks[j];
3513 
3514 			if (block->version->type != ip_order[i] ||
3515 				!block->status.valid ||
3516 				block->status.hw)
3517 				continue;
3518 
3519 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3520 				r = block->version->funcs->resume(adev);
3521 			else
3522 				r = block->version->funcs->hw_init(adev);
3523 
3524 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3525 			if (r)
3526 				return r;
3527 			block->status.hw = true;
3528 		}
3529 	}
3530 
3531 	return 0;
3532 }
3533 
3534 /**
3535  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3536  *
3537  * @adev: amdgpu_device pointer
3538  *
3539  * First resume function for hardware IPs.  The list of all the hardware
3540  * IPs that make up the asic is walked and the resume callbacks are run for
3541  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3542  * after a suspend and updates the software state as necessary.  This
3543  * function is also used for restoring the GPU after a GPU reset.
3544  * Returns 0 on success, negative error code on failure.
3545  */
3546 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3547 {
3548 	int i, r;
3549 
3550 	for (i = 0; i < adev->num_ip_blocks; i++) {
3551 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3552 			continue;
3553 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3554 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3555 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3556 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3557 
3558 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3559 			if (r) {
3560 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3561 					  adev->ip_blocks[i].version->funcs->name, r);
3562 				return r;
3563 			}
3564 			adev->ip_blocks[i].status.hw = true;
3565 		}
3566 	}
3567 
3568 	return 0;
3569 }
3570 
3571 /**
3572  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3573  *
3574  * @adev: amdgpu_device pointer
3575  *
3576  * First resume function for hardware IPs.  The list of all the hardware
3577  * IPs that make up the asic is walked and the resume callbacks are run for
3578  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3579  * functional state after a suspend and updates the software state as
3580  * necessary.  This function is also used for restoring the GPU after a GPU
3581  * reset.
3582  * Returns 0 on success, negative error code on failure.
3583  */
3584 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3585 {
3586 	int i, r;
3587 
3588 	for (i = 0; i < adev->num_ip_blocks; i++) {
3589 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3590 			continue;
3591 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3592 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3593 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3594 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3595 			continue;
3596 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3597 		if (r) {
3598 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3599 				  adev->ip_blocks[i].version->funcs->name, r);
3600 			return r;
3601 		}
3602 		adev->ip_blocks[i].status.hw = true;
3603 	}
3604 
3605 	return 0;
3606 }
3607 
3608 /**
3609  * amdgpu_device_ip_resume - run resume for hardware IPs
3610  *
3611  * @adev: amdgpu_device pointer
3612  *
3613  * Main resume function for hardware IPs.  The hardware IPs
3614  * are split into two resume functions because they are
3615  * also used in recovering from a GPU reset and some additional
3616  * steps need to be take between them.  In this case (S3/S4) they are
3617  * run sequentially.
3618  * Returns 0 on success, negative error code on failure.
3619  */
3620 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3621 {
3622 	int r;
3623 
3624 	r = amdgpu_device_ip_resume_phase1(adev);
3625 	if (r)
3626 		return r;
3627 
3628 	r = amdgpu_device_fw_loading(adev);
3629 	if (r)
3630 		return r;
3631 
3632 	r = amdgpu_device_ip_resume_phase2(adev);
3633 
3634 	if (adev->mman.buffer_funcs_ring->sched.ready)
3635 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3636 
3637 	return r;
3638 }
3639 
3640 /**
3641  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3642  *
3643  * @adev: amdgpu_device pointer
3644  *
3645  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3646  */
3647 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3648 {
3649 	if (amdgpu_sriov_vf(adev)) {
3650 		if (adev->is_atom_fw) {
3651 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3652 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3653 		} else {
3654 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3655 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3656 		}
3657 
3658 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3659 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3660 	}
3661 }
3662 
3663 /**
3664  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3665  *
3666  * @asic_type: AMD asic type
3667  *
3668  * Check if there is DC (new modesetting infrastructre) support for an asic.
3669  * returns true if DC has support, false if not.
3670  */
3671 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3672 {
3673 	switch (asic_type) {
3674 #ifdef CONFIG_DRM_AMDGPU_SI
3675 	case CHIP_HAINAN:
3676 #endif
3677 	case CHIP_TOPAZ:
3678 		/* chips with no display hardware */
3679 		return false;
3680 #if defined(CONFIG_DRM_AMD_DC)
3681 	case CHIP_TAHITI:
3682 	case CHIP_PITCAIRN:
3683 	case CHIP_VERDE:
3684 	case CHIP_OLAND:
3685 		/*
3686 		 * We have systems in the wild with these ASICs that require
3687 		 * LVDS and VGA support which is not supported with DC.
3688 		 *
3689 		 * Fallback to the non-DC driver here by default so as not to
3690 		 * cause regressions.
3691 		 */
3692 #if defined(CONFIG_DRM_AMD_DC_SI)
3693 		return amdgpu_dc > 0;
3694 #else
3695 		return false;
3696 #endif
3697 	case CHIP_BONAIRE:
3698 	case CHIP_KAVERI:
3699 	case CHIP_KABINI:
3700 	case CHIP_MULLINS:
3701 		/*
3702 		 * We have systems in the wild with these ASICs that require
3703 		 * VGA support which is not supported with DC.
3704 		 *
3705 		 * Fallback to the non-DC driver here by default so as not to
3706 		 * cause regressions.
3707 		 */
3708 		return amdgpu_dc > 0;
3709 	default:
3710 		return amdgpu_dc != 0;
3711 #else
3712 	default:
3713 		if (amdgpu_dc > 0)
3714 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3715 		return false;
3716 #endif
3717 	}
3718 }
3719 
3720 /**
3721  * amdgpu_device_has_dc_support - check if dc is supported
3722  *
3723  * @adev: amdgpu_device pointer
3724  *
3725  * Returns true for supported, false for not supported
3726  */
3727 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3728 {
3729 	if (adev->enable_virtual_display ||
3730 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3731 		return false;
3732 
3733 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3734 }
3735 
3736 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3737 {
3738 	struct amdgpu_device *adev =
3739 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3740 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3741 
3742 	/* It's a bug to not have a hive within this function */
3743 	if (WARN_ON(!hive))
3744 		return;
3745 
3746 	/*
3747 	 * Use task barrier to synchronize all xgmi reset works across the
3748 	 * hive. task_barrier_enter and task_barrier_exit will block
3749 	 * until all the threads running the xgmi reset works reach
3750 	 * those points. task_barrier_full will do both blocks.
3751 	 */
3752 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3753 
3754 		task_barrier_enter(&hive->tb);
3755 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3756 
3757 		if (adev->asic_reset_res)
3758 			goto fail;
3759 
3760 		task_barrier_exit(&hive->tb);
3761 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3762 
3763 		if (adev->asic_reset_res)
3764 			goto fail;
3765 
3766 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3767 	} else {
3768 
3769 		task_barrier_full(&hive->tb);
3770 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3771 	}
3772 
3773 fail:
3774 	if (adev->asic_reset_res)
3775 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3776 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3777 	amdgpu_put_xgmi_hive(hive);
3778 }
3779 
3780 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3781 {
3782 	char *input = amdgpu_lockup_timeout;
3783 	char *timeout_setting = NULL;
3784 	int index = 0;
3785 	long timeout;
3786 	int ret = 0;
3787 
3788 	/*
3789 	 * By default timeout for non compute jobs is 10000
3790 	 * and 60000 for compute jobs.
3791 	 * In SR-IOV or passthrough mode, timeout for compute
3792 	 * jobs are 60000 by default.
3793 	 */
3794 	adev->gfx_timeout = msecs_to_jiffies(10000);
3795 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3796 	if (amdgpu_sriov_vf(adev))
3797 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3798 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3799 	else
3800 		adev->compute_timeout =  msecs_to_jiffies(60000);
3801 
3802 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3803 		while ((timeout_setting = strsep(&input, ",")) &&
3804 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3805 			ret = kstrtol(timeout_setting, 0, &timeout);
3806 			if (ret)
3807 				return ret;
3808 
3809 			if (timeout == 0) {
3810 				index++;
3811 				continue;
3812 			} else if (timeout < 0) {
3813 				timeout = MAX_SCHEDULE_TIMEOUT;
3814 				dev_warn(adev->dev, "lockup timeout disabled");
3815 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3816 			} else {
3817 				timeout = msecs_to_jiffies(timeout);
3818 			}
3819 
3820 			switch (index++) {
3821 			case 0:
3822 				adev->gfx_timeout = timeout;
3823 				break;
3824 			case 1:
3825 				adev->compute_timeout = timeout;
3826 				break;
3827 			case 2:
3828 				adev->sdma_timeout = timeout;
3829 				break;
3830 			case 3:
3831 				adev->video_timeout = timeout;
3832 				break;
3833 			default:
3834 				break;
3835 			}
3836 		}
3837 		/*
3838 		 * There is only one value specified and
3839 		 * it should apply to all non-compute jobs.
3840 		 */
3841 		if (index == 1) {
3842 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3843 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3844 				adev->compute_timeout = adev->gfx_timeout;
3845 		}
3846 	}
3847 
3848 	return ret;
3849 }
3850 
3851 /**
3852  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3853  *
3854  * @adev: amdgpu_device pointer
3855  *
3856  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3857  */
3858 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3859 {
3860 	struct iommu_domain *domain;
3861 
3862 	domain = iommu_get_domain_for_dev(adev->dev);
3863 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3864 		adev->ram_is_direct_mapped = true;
3865 }
3866 
3867 static const struct attribute *amdgpu_dev_attributes[] = {
3868 	&dev_attr_pcie_replay_count.attr,
3869 	NULL
3870 };
3871 
3872 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3873 {
3874 	if (amdgpu_mcbp == 1)
3875 		adev->gfx.mcbp = true;
3876 	else if (amdgpu_mcbp == 0)
3877 		adev->gfx.mcbp = false;
3878 
3879 	if (amdgpu_sriov_vf(adev))
3880 		adev->gfx.mcbp = true;
3881 
3882 	if (adev->gfx.mcbp)
3883 		DRM_INFO("MCBP is enabled\n");
3884 }
3885 
3886 /**
3887  * amdgpu_device_init - initialize the driver
3888  *
3889  * @adev: amdgpu_device pointer
3890  * @flags: driver flags
3891  *
3892  * Initializes the driver info and hw (all asics).
3893  * Returns 0 for success or an error on failure.
3894  * Called at driver startup.
3895  */
3896 int amdgpu_device_init(struct amdgpu_device *adev,
3897 		       uint32_t flags)
3898 {
3899 	struct drm_device *ddev = adev_to_drm(adev);
3900 	struct pci_dev *pdev = adev->pdev;
3901 	int r, i;
3902 	bool px = false;
3903 	u32 max_MBps;
3904 	int tmp;
3905 
3906 	adev->shutdown = false;
3907 	adev->flags = flags;
3908 
3909 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3910 		adev->asic_type = amdgpu_force_asic_type;
3911 	else
3912 		adev->asic_type = flags & AMD_ASIC_MASK;
3913 
3914 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3915 	if (amdgpu_emu_mode == 1)
3916 		adev->usec_timeout *= 10;
3917 	adev->gmc.gart_size = 512 * 1024 * 1024;
3918 	adev->accel_working = false;
3919 	adev->num_rings = 0;
3920 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3921 	adev->mman.buffer_funcs = NULL;
3922 	adev->mman.buffer_funcs_ring = NULL;
3923 	adev->vm_manager.vm_pte_funcs = NULL;
3924 	adev->vm_manager.vm_pte_num_scheds = 0;
3925 	adev->gmc.gmc_funcs = NULL;
3926 	adev->harvest_ip_mask = 0x0;
3927 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3928 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3929 
3930 	adev->smc_rreg = &amdgpu_invalid_rreg;
3931 	adev->smc_wreg = &amdgpu_invalid_wreg;
3932 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3933 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3934 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3935 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3936 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3937 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3938 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3939 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3940 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3941 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
3942 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3943 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3944 	adev->didt_rreg = &amdgpu_invalid_rreg;
3945 	adev->didt_wreg = &amdgpu_invalid_wreg;
3946 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3947 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3948 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3949 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3950 
3951 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3952 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3953 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3954 
3955 	/* mutex initialization are all done here so we
3956 	 * can recall function without having locking issues
3957 	 */
3958 	mutex_init(&adev->firmware.mutex);
3959 	mutex_init(&adev->pm.mutex);
3960 	mutex_init(&adev->gfx.gpu_clock_mutex);
3961 	mutex_init(&adev->srbm_mutex);
3962 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3963 	mutex_init(&adev->gfx.gfx_off_mutex);
3964 	mutex_init(&adev->gfx.partition_mutex);
3965 	mutex_init(&adev->grbm_idx_mutex);
3966 	mutex_init(&adev->mn_lock);
3967 	mutex_init(&adev->virt.vf_errors.lock);
3968 	hash_init(adev->mn_hash);
3969 	mutex_init(&adev->psp.mutex);
3970 	mutex_init(&adev->notifier_lock);
3971 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3972 	mutex_init(&adev->benchmark_mutex);
3973 
3974 	amdgpu_device_init_apu_flags(adev);
3975 
3976 	r = amdgpu_device_check_arguments(adev);
3977 	if (r)
3978 		return r;
3979 
3980 	spin_lock_init(&adev->mmio_idx_lock);
3981 	spin_lock_init(&adev->smc_idx_lock);
3982 	spin_lock_init(&adev->pcie_idx_lock);
3983 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3984 	spin_lock_init(&adev->didt_idx_lock);
3985 	spin_lock_init(&adev->gc_cac_idx_lock);
3986 	spin_lock_init(&adev->se_cac_idx_lock);
3987 	spin_lock_init(&adev->audio_endpt_idx_lock);
3988 	spin_lock_init(&adev->mm_stats.lock);
3989 
3990 	INIT_LIST_HEAD(&adev->shadow_list);
3991 	mutex_init(&adev->shadow_list_lock);
3992 
3993 	INIT_LIST_HEAD(&adev->reset_list);
3994 
3995 	INIT_LIST_HEAD(&adev->ras_list);
3996 
3997 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3998 
3999 	INIT_DELAYED_WORK(&adev->delayed_init_work,
4000 			  amdgpu_device_delayed_init_work_handler);
4001 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4002 			  amdgpu_device_delay_enable_gfx_off);
4003 
4004 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4005 
4006 	adev->gfx.gfx_off_req_count = 1;
4007 	adev->gfx.gfx_off_residency = 0;
4008 	adev->gfx.gfx_off_entrycount = 0;
4009 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4010 
4011 	atomic_set(&adev->throttling_logging_enabled, 1);
4012 	/*
4013 	 * If throttling continues, logging will be performed every minute
4014 	 * to avoid log flooding. "-1" is subtracted since the thermal
4015 	 * throttling interrupt comes every second. Thus, the total logging
4016 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4017 	 * for throttling interrupt) = 60 seconds.
4018 	 */
4019 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4020 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4021 
4022 	/* Registers mapping */
4023 	/* TODO: block userspace mapping of io register */
4024 	if (adev->asic_type >= CHIP_BONAIRE) {
4025 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4026 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4027 	} else {
4028 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4029 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4030 	}
4031 
4032 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4033 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4034 
4035 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4036 	if (!adev->rmmio)
4037 		return -ENOMEM;
4038 
4039 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4040 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4041 
4042 	/*
4043 	 * Reset domain needs to be present early, before XGMI hive discovered
4044 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4045 	 * early on during init and before calling to RREG32.
4046 	 */
4047 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4048 	if (!adev->reset_domain)
4049 		return -ENOMEM;
4050 
4051 	/* detect hw virtualization here */
4052 	amdgpu_detect_virtualization(adev);
4053 
4054 	amdgpu_device_get_pcie_info(adev);
4055 
4056 	r = amdgpu_device_get_job_timeout_settings(adev);
4057 	if (r) {
4058 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4059 		return r;
4060 	}
4061 
4062 	amdgpu_device_set_mcbp(adev);
4063 
4064 	/* early init functions */
4065 	r = amdgpu_device_ip_early_init(adev);
4066 	if (r)
4067 		return r;
4068 
4069 	/* Get rid of things like offb */
4070 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4071 	if (r)
4072 		return r;
4073 
4074 	/* Enable TMZ based on IP_VERSION */
4075 	amdgpu_gmc_tmz_set(adev);
4076 
4077 	if (amdgpu_sriov_vf(adev) &&
4078 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4079 		/* VF MMIO access (except mailbox range) from CPU
4080 		 * will be blocked during sriov runtime
4081 		 */
4082 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4083 
4084 	amdgpu_gmc_noretry_set(adev);
4085 	/* Need to get xgmi info early to decide the reset behavior*/
4086 	if (adev->gmc.xgmi.supported) {
4087 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4088 		if (r)
4089 			return r;
4090 	}
4091 
4092 	/* enable PCIE atomic ops */
4093 	if (amdgpu_sriov_vf(adev)) {
4094 		if (adev->virt.fw_reserve.p_pf2vf)
4095 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4096 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4097 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4098 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4099 	 * internal path natively support atomics, set have_atomics_support to true.
4100 	 */
4101 	} else if ((adev->flags & AMD_IS_APU) &&
4102 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4103 		    IP_VERSION(9, 0, 0))) {
4104 		adev->have_atomics_support = true;
4105 	} else {
4106 		adev->have_atomics_support =
4107 			!pci_enable_atomic_ops_to_root(adev->pdev,
4108 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4109 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4110 	}
4111 
4112 	if (!adev->have_atomics_support)
4113 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4114 
4115 	/* doorbell bar mapping and doorbell index init*/
4116 	amdgpu_doorbell_init(adev);
4117 
4118 	if (amdgpu_emu_mode == 1) {
4119 		/* post the asic on emulation mode */
4120 		emu_soc_asic_init(adev);
4121 		goto fence_driver_init;
4122 	}
4123 
4124 	amdgpu_reset_init(adev);
4125 
4126 	/* detect if we are with an SRIOV vbios */
4127 	if (adev->bios)
4128 		amdgpu_device_detect_sriov_bios(adev);
4129 
4130 	/* check if we need to reset the asic
4131 	 *  E.g., driver was not cleanly unloaded previously, etc.
4132 	 */
4133 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4134 		if (adev->gmc.xgmi.num_physical_nodes) {
4135 			dev_info(adev->dev, "Pending hive reset.\n");
4136 			adev->gmc.xgmi.pending_reset = true;
4137 			/* Only need to init necessary block for SMU to handle the reset */
4138 			for (i = 0; i < adev->num_ip_blocks; i++) {
4139 				if (!adev->ip_blocks[i].status.valid)
4140 					continue;
4141 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4142 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4143 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4144 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4145 					DRM_DEBUG("IP %s disabled for hw_init.\n",
4146 						adev->ip_blocks[i].version->funcs->name);
4147 					adev->ip_blocks[i].status.hw = true;
4148 				}
4149 			}
4150 		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4151 				   !amdgpu_device_has_display_hardware(adev)) {
4152 					r = psp_gpu_reset(adev);
4153 		} else {
4154 				tmp = amdgpu_reset_method;
4155 				/* It should do a default reset when loading or reloading the driver,
4156 				 * regardless of the module parameter reset_method.
4157 				 */
4158 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4159 				r = amdgpu_asic_reset(adev);
4160 				amdgpu_reset_method = tmp;
4161 		}
4162 
4163 		if (r) {
4164 		  dev_err(adev->dev, "asic reset on init failed\n");
4165 		  goto failed;
4166 		}
4167 	}
4168 
4169 	/* Post card if necessary */
4170 	if (amdgpu_device_need_post(adev)) {
4171 		if (!adev->bios) {
4172 			dev_err(adev->dev, "no vBIOS found\n");
4173 			r = -EINVAL;
4174 			goto failed;
4175 		}
4176 		DRM_INFO("GPU posting now...\n");
4177 		r = amdgpu_device_asic_init(adev);
4178 		if (r) {
4179 			dev_err(adev->dev, "gpu post error!\n");
4180 			goto failed;
4181 		}
4182 	}
4183 
4184 	if (adev->bios) {
4185 		if (adev->is_atom_fw) {
4186 			/* Initialize clocks */
4187 			r = amdgpu_atomfirmware_get_clock_info(adev);
4188 			if (r) {
4189 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4190 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4191 				goto failed;
4192 			}
4193 		} else {
4194 			/* Initialize clocks */
4195 			r = amdgpu_atombios_get_clock_info(adev);
4196 			if (r) {
4197 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4198 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4199 				goto failed;
4200 			}
4201 			/* init i2c buses */
4202 			if (!amdgpu_device_has_dc_support(adev))
4203 				amdgpu_atombios_i2c_init(adev);
4204 		}
4205 	}
4206 
4207 fence_driver_init:
4208 	/* Fence driver */
4209 	r = amdgpu_fence_driver_sw_init(adev);
4210 	if (r) {
4211 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4212 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4213 		goto failed;
4214 	}
4215 
4216 	/* init the mode config */
4217 	drm_mode_config_init(adev_to_drm(adev));
4218 
4219 	r = amdgpu_device_ip_init(adev);
4220 	if (r) {
4221 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4222 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4223 		goto release_ras_con;
4224 	}
4225 
4226 	amdgpu_fence_driver_hw_init(adev);
4227 
4228 	dev_info(adev->dev,
4229 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4230 			adev->gfx.config.max_shader_engines,
4231 			adev->gfx.config.max_sh_per_se,
4232 			adev->gfx.config.max_cu_per_sh,
4233 			adev->gfx.cu_info.number);
4234 
4235 	adev->accel_working = true;
4236 
4237 	amdgpu_vm_check_compute_bug(adev);
4238 
4239 	/* Initialize the buffer migration limit. */
4240 	if (amdgpu_moverate >= 0)
4241 		max_MBps = amdgpu_moverate;
4242 	else
4243 		max_MBps = 8; /* Allow 8 MB/s. */
4244 	/* Get a log2 for easy divisions. */
4245 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4246 
4247 	/*
4248 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4249 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4250 	 * gpu instance is counted less.
4251 	 */
4252 	amdgpu_register_gpu_instance(adev);
4253 
4254 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4255 	 * explicit gating rather than handling it automatically.
4256 	 */
4257 	if (!adev->gmc.xgmi.pending_reset) {
4258 		r = amdgpu_device_ip_late_init(adev);
4259 		if (r) {
4260 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4261 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4262 			goto release_ras_con;
4263 		}
4264 		/* must succeed. */
4265 		amdgpu_ras_resume(adev);
4266 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4267 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4268 	}
4269 
4270 	if (amdgpu_sriov_vf(adev)) {
4271 		amdgpu_virt_release_full_gpu(adev, true);
4272 		flush_delayed_work(&adev->delayed_init_work);
4273 	}
4274 
4275 	/*
4276 	 * Place those sysfs registering after `late_init`. As some of those
4277 	 * operations performed in `late_init` might affect the sysfs
4278 	 * interfaces creating.
4279 	 */
4280 	r = amdgpu_atombios_sysfs_init(adev);
4281 	if (r)
4282 		drm_err(&adev->ddev,
4283 			"registering atombios sysfs failed (%d).\n", r);
4284 
4285 	r = amdgpu_pm_sysfs_init(adev);
4286 	if (r)
4287 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4288 
4289 	r = amdgpu_ucode_sysfs_init(adev);
4290 	if (r) {
4291 		adev->ucode_sysfs_en = false;
4292 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4293 	} else
4294 		adev->ucode_sysfs_en = true;
4295 
4296 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4297 	if (r)
4298 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4299 
4300 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4301 	if (r)
4302 		dev_err(adev->dev,
4303 			"Could not create amdgpu board attributes\n");
4304 
4305 	amdgpu_fru_sysfs_init(adev);
4306 	amdgpu_reg_state_sysfs_init(adev);
4307 
4308 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4309 		r = amdgpu_pmu_init(adev);
4310 	if (r)
4311 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4312 
4313 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4314 	if (amdgpu_device_cache_pci_state(adev->pdev))
4315 		pci_restore_state(pdev);
4316 
4317 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4318 	/* this will fail for cards that aren't VGA class devices, just
4319 	 * ignore it
4320 	 */
4321 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4322 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4323 
4324 	px = amdgpu_device_supports_px(ddev);
4325 
4326 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4327 				apple_gmux_detect(NULL, NULL)))
4328 		vga_switcheroo_register_client(adev->pdev,
4329 					       &amdgpu_switcheroo_ops, px);
4330 
4331 	if (px)
4332 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4333 
4334 	if (adev->gmc.xgmi.pending_reset)
4335 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4336 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4337 
4338 	amdgpu_device_check_iommu_direct_map(adev);
4339 
4340 	return 0;
4341 
4342 release_ras_con:
4343 	if (amdgpu_sriov_vf(adev))
4344 		amdgpu_virt_release_full_gpu(adev, true);
4345 
4346 	/* failed in exclusive mode due to timeout */
4347 	if (amdgpu_sriov_vf(adev) &&
4348 		!amdgpu_sriov_runtime(adev) &&
4349 		amdgpu_virt_mmio_blocked(adev) &&
4350 		!amdgpu_virt_wait_reset(adev)) {
4351 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4352 		/* Don't send request since VF is inactive. */
4353 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4354 		adev->virt.ops = NULL;
4355 		r = -EAGAIN;
4356 	}
4357 	amdgpu_release_ras_context(adev);
4358 
4359 failed:
4360 	amdgpu_vf_error_trans_all(adev);
4361 
4362 	return r;
4363 }
4364 
4365 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4366 {
4367 
4368 	/* Clear all CPU mappings pointing to this device */
4369 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4370 
4371 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4372 	amdgpu_doorbell_fini(adev);
4373 
4374 	iounmap(adev->rmmio);
4375 	adev->rmmio = NULL;
4376 	if (adev->mman.aper_base_kaddr)
4377 		iounmap(adev->mman.aper_base_kaddr);
4378 	adev->mman.aper_base_kaddr = NULL;
4379 
4380 	/* Memory manager related */
4381 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4382 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4383 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4384 	}
4385 }
4386 
4387 /**
4388  * amdgpu_device_fini_hw - tear down the driver
4389  *
4390  * @adev: amdgpu_device pointer
4391  *
4392  * Tear down the driver info (all asics).
4393  * Called at driver shutdown.
4394  */
4395 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4396 {
4397 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4398 	flush_delayed_work(&adev->delayed_init_work);
4399 	adev->shutdown = true;
4400 
4401 	/* make sure IB test finished before entering exclusive mode
4402 	 * to avoid preemption on IB test
4403 	 */
4404 	if (amdgpu_sriov_vf(adev)) {
4405 		amdgpu_virt_request_full_gpu(adev, false);
4406 		amdgpu_virt_fini_data_exchange(adev);
4407 	}
4408 
4409 	/* disable all interrupts */
4410 	amdgpu_irq_disable_all(adev);
4411 	if (adev->mode_info.mode_config_initialized) {
4412 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4413 			drm_helper_force_disable_all(adev_to_drm(adev));
4414 		else
4415 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4416 	}
4417 	amdgpu_fence_driver_hw_fini(adev);
4418 
4419 	if (adev->mman.initialized)
4420 		drain_workqueue(adev->mman.bdev.wq);
4421 
4422 	if (adev->pm.sysfs_initialized)
4423 		amdgpu_pm_sysfs_fini(adev);
4424 	if (adev->ucode_sysfs_en)
4425 		amdgpu_ucode_sysfs_fini(adev);
4426 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4427 	amdgpu_fru_sysfs_fini(adev);
4428 
4429 	amdgpu_reg_state_sysfs_fini(adev);
4430 
4431 	/* disable ras feature must before hw fini */
4432 	amdgpu_ras_pre_fini(adev);
4433 
4434 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4435 
4436 	amdgpu_device_ip_fini_early(adev);
4437 
4438 	amdgpu_irq_fini_hw(adev);
4439 
4440 	if (adev->mman.initialized)
4441 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4442 
4443 	amdgpu_gart_dummy_page_fini(adev);
4444 
4445 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4446 		amdgpu_device_unmap_mmio(adev);
4447 
4448 }
4449 
4450 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4451 {
4452 	int idx;
4453 	bool px;
4454 
4455 	amdgpu_fence_driver_sw_fini(adev);
4456 	amdgpu_device_ip_fini(adev);
4457 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4458 	adev->accel_working = false;
4459 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4460 
4461 	amdgpu_reset_fini(adev);
4462 
4463 	/* free i2c buses */
4464 	if (!amdgpu_device_has_dc_support(adev))
4465 		amdgpu_i2c_fini(adev);
4466 
4467 	if (amdgpu_emu_mode != 1)
4468 		amdgpu_atombios_fini(adev);
4469 
4470 	kfree(adev->bios);
4471 	adev->bios = NULL;
4472 
4473 	kfree(adev->fru_info);
4474 	adev->fru_info = NULL;
4475 
4476 	px = amdgpu_device_supports_px(adev_to_drm(adev));
4477 
4478 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4479 				apple_gmux_detect(NULL, NULL)))
4480 		vga_switcheroo_unregister_client(adev->pdev);
4481 
4482 	if (px)
4483 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4484 
4485 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4486 		vga_client_unregister(adev->pdev);
4487 
4488 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4489 
4490 		iounmap(adev->rmmio);
4491 		adev->rmmio = NULL;
4492 		amdgpu_doorbell_fini(adev);
4493 		drm_dev_exit(idx);
4494 	}
4495 
4496 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4497 		amdgpu_pmu_fini(adev);
4498 	if (adev->mman.discovery_bin)
4499 		amdgpu_discovery_fini(adev);
4500 
4501 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4502 	adev->reset_domain = NULL;
4503 
4504 	kfree(adev->pci_state);
4505 
4506 }
4507 
4508 /**
4509  * amdgpu_device_evict_resources - evict device resources
4510  * @adev: amdgpu device object
4511  *
4512  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4513  * of the vram memory type. Mainly used for evicting device resources
4514  * at suspend time.
4515  *
4516  */
4517 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4518 {
4519 	int ret;
4520 
4521 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4522 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4523 		return 0;
4524 
4525 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4526 	if (ret)
4527 		DRM_WARN("evicting device resources failed\n");
4528 	return ret;
4529 }
4530 
4531 /*
4532  * Suspend & resume.
4533  */
4534 /**
4535  * amdgpu_device_prepare - prepare for device suspend
4536  *
4537  * @dev: drm dev pointer
4538  *
4539  * Prepare to put the hw in the suspend state (all asics).
4540  * Returns 0 for success or an error on failure.
4541  * Called at driver suspend.
4542  */
4543 int amdgpu_device_prepare(struct drm_device *dev)
4544 {
4545 	struct amdgpu_device *adev = drm_to_adev(dev);
4546 	int i, r;
4547 
4548 	amdgpu_choose_low_power_state(adev);
4549 
4550 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4551 		return 0;
4552 
4553 	/* Evict the majority of BOs before starting suspend sequence */
4554 	r = amdgpu_device_evict_resources(adev);
4555 	if (r)
4556 		goto unprepare;
4557 
4558 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4559 
4560 	for (i = 0; i < adev->num_ip_blocks; i++) {
4561 		if (!adev->ip_blocks[i].status.valid)
4562 			continue;
4563 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4564 			continue;
4565 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4566 		if (r)
4567 			goto unprepare;
4568 	}
4569 
4570 	return 0;
4571 
4572 unprepare:
4573 	adev->in_s0ix = adev->in_s3 = false;
4574 
4575 	return r;
4576 }
4577 
4578 /**
4579  * amdgpu_device_suspend - initiate device suspend
4580  *
4581  * @dev: drm dev pointer
4582  * @fbcon : notify the fbdev of suspend
4583  *
4584  * Puts the hw in the suspend state (all asics).
4585  * Returns 0 for success or an error on failure.
4586  * Called at driver suspend.
4587  */
4588 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4589 {
4590 	struct amdgpu_device *adev = drm_to_adev(dev);
4591 	int r = 0;
4592 
4593 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4594 		return 0;
4595 
4596 	adev->in_suspend = true;
4597 
4598 	if (amdgpu_sriov_vf(adev)) {
4599 		amdgpu_virt_fini_data_exchange(adev);
4600 		r = amdgpu_virt_request_full_gpu(adev, false);
4601 		if (r)
4602 			return r;
4603 	}
4604 
4605 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4606 		DRM_WARN("smart shift update failed\n");
4607 
4608 	if (fbcon)
4609 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4610 
4611 	cancel_delayed_work_sync(&adev->delayed_init_work);
4612 
4613 	amdgpu_ras_suspend(adev);
4614 
4615 	amdgpu_device_ip_suspend_phase1(adev);
4616 
4617 	if (!adev->in_s0ix)
4618 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4619 
4620 	r = amdgpu_device_evict_resources(adev);
4621 	if (r)
4622 		return r;
4623 
4624 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4625 
4626 	amdgpu_fence_driver_hw_fini(adev);
4627 
4628 	amdgpu_device_ip_suspend_phase2(adev);
4629 
4630 	if (amdgpu_sriov_vf(adev))
4631 		amdgpu_virt_release_full_gpu(adev, false);
4632 
4633 	r = amdgpu_dpm_notify_rlc_state(adev, false);
4634 	if (r)
4635 		return r;
4636 
4637 	return 0;
4638 }
4639 
4640 /**
4641  * amdgpu_device_resume - initiate device resume
4642  *
4643  * @dev: drm dev pointer
4644  * @fbcon : notify the fbdev of resume
4645  *
4646  * Bring the hw back to operating state (all asics).
4647  * Returns 0 for success or an error on failure.
4648  * Called at driver resume.
4649  */
4650 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4651 {
4652 	struct amdgpu_device *adev = drm_to_adev(dev);
4653 	int r = 0;
4654 
4655 	if (amdgpu_sriov_vf(adev)) {
4656 		r = amdgpu_virt_request_full_gpu(adev, true);
4657 		if (r)
4658 			return r;
4659 	}
4660 
4661 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4662 		return 0;
4663 
4664 	if (adev->in_s0ix)
4665 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4666 
4667 	/* post card */
4668 	if (amdgpu_device_need_post(adev)) {
4669 		r = amdgpu_device_asic_init(adev);
4670 		if (r)
4671 			dev_err(adev->dev, "amdgpu asic init failed\n");
4672 	}
4673 
4674 	r = amdgpu_device_ip_resume(adev);
4675 
4676 	if (r) {
4677 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4678 		goto exit;
4679 	}
4680 	amdgpu_fence_driver_hw_init(adev);
4681 
4682 	if (!adev->in_s0ix) {
4683 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4684 		if (r)
4685 			goto exit;
4686 	}
4687 
4688 	r = amdgpu_device_ip_late_init(adev);
4689 	if (r)
4690 		goto exit;
4691 
4692 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4693 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4694 exit:
4695 	if (amdgpu_sriov_vf(adev)) {
4696 		amdgpu_virt_init_data_exchange(adev);
4697 		amdgpu_virt_release_full_gpu(adev, true);
4698 	}
4699 
4700 	if (r)
4701 		return r;
4702 
4703 	/* Make sure IB tests flushed */
4704 	flush_delayed_work(&adev->delayed_init_work);
4705 
4706 	if (fbcon)
4707 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4708 
4709 	amdgpu_ras_resume(adev);
4710 
4711 	if (adev->mode_info.num_crtc) {
4712 		/*
4713 		 * Most of the connector probing functions try to acquire runtime pm
4714 		 * refs to ensure that the GPU is powered on when connector polling is
4715 		 * performed. Since we're calling this from a runtime PM callback,
4716 		 * trying to acquire rpm refs will cause us to deadlock.
4717 		 *
4718 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4719 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4720 		 */
4721 #ifdef CONFIG_PM
4722 		dev->dev->power.disable_depth++;
4723 #endif
4724 		if (!adev->dc_enabled)
4725 			drm_helper_hpd_irq_event(dev);
4726 		else
4727 			drm_kms_helper_hotplug_event(dev);
4728 #ifdef CONFIG_PM
4729 		dev->dev->power.disable_depth--;
4730 #endif
4731 	}
4732 	adev->in_suspend = false;
4733 
4734 	if (adev->enable_mes)
4735 		amdgpu_mes_self_test(adev);
4736 
4737 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4738 		DRM_WARN("smart shift update failed\n");
4739 
4740 	return 0;
4741 }
4742 
4743 /**
4744  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4745  *
4746  * @adev: amdgpu_device pointer
4747  *
4748  * The list of all the hardware IPs that make up the asic is walked and
4749  * the check_soft_reset callbacks are run.  check_soft_reset determines
4750  * if the asic is still hung or not.
4751  * Returns true if any of the IPs are still in a hung state, false if not.
4752  */
4753 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4754 {
4755 	int i;
4756 	bool asic_hang = false;
4757 
4758 	if (amdgpu_sriov_vf(adev))
4759 		return true;
4760 
4761 	if (amdgpu_asic_need_full_reset(adev))
4762 		return true;
4763 
4764 	for (i = 0; i < adev->num_ip_blocks; i++) {
4765 		if (!adev->ip_blocks[i].status.valid)
4766 			continue;
4767 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4768 			adev->ip_blocks[i].status.hang =
4769 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4770 		if (adev->ip_blocks[i].status.hang) {
4771 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4772 			asic_hang = true;
4773 		}
4774 	}
4775 	return asic_hang;
4776 }
4777 
4778 /**
4779  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4780  *
4781  * @adev: amdgpu_device pointer
4782  *
4783  * The list of all the hardware IPs that make up the asic is walked and the
4784  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4785  * handles any IP specific hardware or software state changes that are
4786  * necessary for a soft reset to succeed.
4787  * Returns 0 on success, negative error code on failure.
4788  */
4789 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4790 {
4791 	int i, r = 0;
4792 
4793 	for (i = 0; i < adev->num_ip_blocks; i++) {
4794 		if (!adev->ip_blocks[i].status.valid)
4795 			continue;
4796 		if (adev->ip_blocks[i].status.hang &&
4797 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4798 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4799 			if (r)
4800 				return r;
4801 		}
4802 	}
4803 
4804 	return 0;
4805 }
4806 
4807 /**
4808  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4809  *
4810  * @adev: amdgpu_device pointer
4811  *
4812  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4813  * reset is necessary to recover.
4814  * Returns true if a full asic reset is required, false if not.
4815  */
4816 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4817 {
4818 	int i;
4819 
4820 	if (amdgpu_asic_need_full_reset(adev))
4821 		return true;
4822 
4823 	for (i = 0; i < adev->num_ip_blocks; i++) {
4824 		if (!adev->ip_blocks[i].status.valid)
4825 			continue;
4826 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4827 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4828 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4829 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4830 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4831 			if (adev->ip_blocks[i].status.hang) {
4832 				dev_info(adev->dev, "Some block need full reset!\n");
4833 				return true;
4834 			}
4835 		}
4836 	}
4837 	return false;
4838 }
4839 
4840 /**
4841  * amdgpu_device_ip_soft_reset - do a soft reset
4842  *
4843  * @adev: amdgpu_device pointer
4844  *
4845  * The list of all the hardware IPs that make up the asic is walked and the
4846  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4847  * IP specific hardware or software state changes that are necessary to soft
4848  * reset the IP.
4849  * Returns 0 on success, negative error code on failure.
4850  */
4851 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4852 {
4853 	int i, r = 0;
4854 
4855 	for (i = 0; i < adev->num_ip_blocks; i++) {
4856 		if (!adev->ip_blocks[i].status.valid)
4857 			continue;
4858 		if (adev->ip_blocks[i].status.hang &&
4859 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4860 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4861 			if (r)
4862 				return r;
4863 		}
4864 	}
4865 
4866 	return 0;
4867 }
4868 
4869 /**
4870  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4871  *
4872  * @adev: amdgpu_device pointer
4873  *
4874  * The list of all the hardware IPs that make up the asic is walked and the
4875  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4876  * handles any IP specific hardware or software state changes that are
4877  * necessary after the IP has been soft reset.
4878  * Returns 0 on success, negative error code on failure.
4879  */
4880 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4881 {
4882 	int i, r = 0;
4883 
4884 	for (i = 0; i < adev->num_ip_blocks; i++) {
4885 		if (!adev->ip_blocks[i].status.valid)
4886 			continue;
4887 		if (adev->ip_blocks[i].status.hang &&
4888 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4889 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4890 		if (r)
4891 			return r;
4892 	}
4893 
4894 	return 0;
4895 }
4896 
4897 /**
4898  * amdgpu_device_recover_vram - Recover some VRAM contents
4899  *
4900  * @adev: amdgpu_device pointer
4901  *
4902  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4903  * restore things like GPUVM page tables after a GPU reset where
4904  * the contents of VRAM might be lost.
4905  *
4906  * Returns:
4907  * 0 on success, negative error code on failure.
4908  */
4909 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4910 {
4911 	struct dma_fence *fence = NULL, *next = NULL;
4912 	struct amdgpu_bo *shadow;
4913 	struct amdgpu_bo_vm *vmbo;
4914 	long r = 1, tmo;
4915 
4916 	if (amdgpu_sriov_runtime(adev))
4917 		tmo = msecs_to_jiffies(8000);
4918 	else
4919 		tmo = msecs_to_jiffies(100);
4920 
4921 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4922 	mutex_lock(&adev->shadow_list_lock);
4923 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4924 		/* If vm is compute context or adev is APU, shadow will be NULL */
4925 		if (!vmbo->shadow)
4926 			continue;
4927 		shadow = vmbo->shadow;
4928 
4929 		/* No need to recover an evicted BO */
4930 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4931 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4932 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4933 			continue;
4934 
4935 		r = amdgpu_bo_restore_shadow(shadow, &next);
4936 		if (r)
4937 			break;
4938 
4939 		if (fence) {
4940 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4941 			dma_fence_put(fence);
4942 			fence = next;
4943 			if (tmo == 0) {
4944 				r = -ETIMEDOUT;
4945 				break;
4946 			} else if (tmo < 0) {
4947 				r = tmo;
4948 				break;
4949 			}
4950 		} else {
4951 			fence = next;
4952 		}
4953 	}
4954 	mutex_unlock(&adev->shadow_list_lock);
4955 
4956 	if (fence)
4957 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4958 	dma_fence_put(fence);
4959 
4960 	if (r < 0 || tmo <= 0) {
4961 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4962 		return -EIO;
4963 	}
4964 
4965 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4966 	return 0;
4967 }
4968 
4969 
4970 /**
4971  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4972  *
4973  * @adev: amdgpu_device pointer
4974  * @from_hypervisor: request from hypervisor
4975  *
4976  * do VF FLR and reinitialize Asic
4977  * return 0 means succeeded otherwise failed
4978  */
4979 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4980 				     bool from_hypervisor)
4981 {
4982 	int r;
4983 	struct amdgpu_hive_info *hive = NULL;
4984 	int retry_limit = 0;
4985 
4986 retry:
4987 	amdgpu_amdkfd_pre_reset(adev);
4988 
4989 	amdgpu_device_stop_pending_resets(adev);
4990 
4991 	if (from_hypervisor)
4992 		r = amdgpu_virt_request_full_gpu(adev, true);
4993 	else
4994 		r = amdgpu_virt_reset_gpu(adev);
4995 	if (r)
4996 		return r;
4997 	amdgpu_ras_set_fed(adev, false);
4998 	amdgpu_irq_gpu_reset_resume_helper(adev);
4999 
5000 	/* some sw clean up VF needs to do before recover */
5001 	amdgpu_virt_post_reset(adev);
5002 
5003 	/* Resume IP prior to SMC */
5004 	r = amdgpu_device_ip_reinit_early_sriov(adev);
5005 	if (r)
5006 		goto error;
5007 
5008 	amdgpu_virt_init_data_exchange(adev);
5009 
5010 	r = amdgpu_device_fw_loading(adev);
5011 	if (r)
5012 		return r;
5013 
5014 	/* now we are okay to resume SMC/CP/SDMA */
5015 	r = amdgpu_device_ip_reinit_late_sriov(adev);
5016 	if (r)
5017 		goto error;
5018 
5019 	hive = amdgpu_get_xgmi_hive(adev);
5020 	/* Update PSP FW topology after reset */
5021 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5022 		r = amdgpu_xgmi_update_topology(hive, adev);
5023 
5024 	if (hive)
5025 		amdgpu_put_xgmi_hive(hive);
5026 
5027 	if (!r) {
5028 		r = amdgpu_ib_ring_tests(adev);
5029 
5030 		amdgpu_amdkfd_post_reset(adev);
5031 	}
5032 
5033 error:
5034 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5035 		amdgpu_inc_vram_lost(adev);
5036 		r = amdgpu_device_recover_vram(adev);
5037 	}
5038 	amdgpu_virt_release_full_gpu(adev, true);
5039 
5040 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5041 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5042 			retry_limit++;
5043 			goto retry;
5044 		} else
5045 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5046 	}
5047 
5048 	return r;
5049 }
5050 
5051 /**
5052  * amdgpu_device_has_job_running - check if there is any job in mirror list
5053  *
5054  * @adev: amdgpu_device pointer
5055  *
5056  * check if there is any job in mirror list
5057  */
5058 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5059 {
5060 	int i;
5061 	struct drm_sched_job *job;
5062 
5063 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5064 		struct amdgpu_ring *ring = adev->rings[i];
5065 
5066 		if (!amdgpu_ring_sched_ready(ring))
5067 			continue;
5068 
5069 		spin_lock(&ring->sched.job_list_lock);
5070 		job = list_first_entry_or_null(&ring->sched.pending_list,
5071 					       struct drm_sched_job, list);
5072 		spin_unlock(&ring->sched.job_list_lock);
5073 		if (job)
5074 			return true;
5075 	}
5076 	return false;
5077 }
5078 
5079 /**
5080  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5081  *
5082  * @adev: amdgpu_device pointer
5083  *
5084  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5085  * a hung GPU.
5086  */
5087 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5088 {
5089 
5090 	if (amdgpu_gpu_recovery == 0)
5091 		goto disabled;
5092 
5093 	/* Skip soft reset check in fatal error mode */
5094 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5095 		return true;
5096 
5097 	if (amdgpu_sriov_vf(adev))
5098 		return true;
5099 
5100 	if (amdgpu_gpu_recovery == -1) {
5101 		switch (adev->asic_type) {
5102 #ifdef CONFIG_DRM_AMDGPU_SI
5103 		case CHIP_VERDE:
5104 		case CHIP_TAHITI:
5105 		case CHIP_PITCAIRN:
5106 		case CHIP_OLAND:
5107 		case CHIP_HAINAN:
5108 #endif
5109 #ifdef CONFIG_DRM_AMDGPU_CIK
5110 		case CHIP_KAVERI:
5111 		case CHIP_KABINI:
5112 		case CHIP_MULLINS:
5113 #endif
5114 		case CHIP_CARRIZO:
5115 		case CHIP_STONEY:
5116 		case CHIP_CYAN_SKILLFISH:
5117 			goto disabled;
5118 		default:
5119 			break;
5120 		}
5121 	}
5122 
5123 	return true;
5124 
5125 disabled:
5126 		dev_info(adev->dev, "GPU recovery disabled.\n");
5127 		return false;
5128 }
5129 
5130 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5131 {
5132 	u32 i;
5133 	int ret = 0;
5134 
5135 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5136 
5137 	dev_info(adev->dev, "GPU mode1 reset\n");
5138 
5139 	/* disable BM */
5140 	pci_clear_master(adev->pdev);
5141 
5142 	amdgpu_device_cache_pci_state(adev->pdev);
5143 
5144 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5145 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5146 		ret = amdgpu_dpm_mode1_reset(adev);
5147 	} else {
5148 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5149 		ret = psp_gpu_reset(adev);
5150 	}
5151 
5152 	if (ret)
5153 		goto mode1_reset_failed;
5154 
5155 	amdgpu_device_load_pci_state(adev->pdev);
5156 	ret = amdgpu_psp_wait_for_bootloader(adev);
5157 	if (ret)
5158 		goto mode1_reset_failed;
5159 
5160 	/* wait for asic to come out of reset */
5161 	for (i = 0; i < adev->usec_timeout; i++) {
5162 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5163 
5164 		if (memsize != 0xffffffff)
5165 			break;
5166 		udelay(1);
5167 	}
5168 
5169 	if (i >= adev->usec_timeout) {
5170 		ret = -ETIMEDOUT;
5171 		goto mode1_reset_failed;
5172 	}
5173 
5174 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5175 
5176 	return 0;
5177 
5178 mode1_reset_failed:
5179 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5180 	return ret;
5181 }
5182 
5183 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5184 				 struct amdgpu_reset_context *reset_context)
5185 {
5186 	int i, r = 0;
5187 	struct amdgpu_job *job = NULL;
5188 	bool need_full_reset =
5189 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5190 
5191 	if (reset_context->reset_req_dev == adev)
5192 		job = reset_context->job;
5193 
5194 	if (amdgpu_sriov_vf(adev)) {
5195 		/* stop the data exchange thread */
5196 		amdgpu_virt_fini_data_exchange(adev);
5197 	}
5198 
5199 	amdgpu_fence_driver_isr_toggle(adev, true);
5200 
5201 	/* block all schedulers and reset given job's ring */
5202 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5203 		struct amdgpu_ring *ring = adev->rings[i];
5204 
5205 		if (!amdgpu_ring_sched_ready(ring))
5206 			continue;
5207 
5208 		/* Clear job fence from fence drv to avoid force_completion
5209 		 * leave NULL and vm flush fence in fence drv
5210 		 */
5211 		amdgpu_fence_driver_clear_job_fences(ring);
5212 
5213 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5214 		amdgpu_fence_driver_force_completion(ring);
5215 	}
5216 
5217 	amdgpu_fence_driver_isr_toggle(adev, false);
5218 
5219 	if (job && job->vm)
5220 		drm_sched_increase_karma(&job->base);
5221 
5222 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5223 	/* If reset handler not implemented, continue; otherwise return */
5224 	if (r == -EOPNOTSUPP)
5225 		r = 0;
5226 	else
5227 		return r;
5228 
5229 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5230 	if (!amdgpu_sriov_vf(adev)) {
5231 
5232 		if (!need_full_reset)
5233 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5234 
5235 		if (!need_full_reset && amdgpu_gpu_recovery &&
5236 		    amdgpu_device_ip_check_soft_reset(adev)) {
5237 			amdgpu_device_ip_pre_soft_reset(adev);
5238 			r = amdgpu_device_ip_soft_reset(adev);
5239 			amdgpu_device_ip_post_soft_reset(adev);
5240 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5241 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5242 				need_full_reset = true;
5243 			}
5244 		}
5245 
5246 		if (need_full_reset)
5247 			r = amdgpu_device_ip_suspend(adev);
5248 		if (need_full_reset)
5249 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5250 		else
5251 			clear_bit(AMDGPU_NEED_FULL_RESET,
5252 				  &reset_context->flags);
5253 	}
5254 
5255 	return r;
5256 }
5257 
5258 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5259 {
5260 	int i;
5261 
5262 	lockdep_assert_held(&adev->reset_domain->sem);
5263 
5264 	for (i = 0; i < adev->reset_info.num_regs; i++) {
5265 		adev->reset_info.reset_dump_reg_value[i] =
5266 			RREG32(adev->reset_info.reset_dump_reg_list[i]);
5267 
5268 		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5269 					     adev->reset_info.reset_dump_reg_value[i]);
5270 	}
5271 
5272 	return 0;
5273 }
5274 
5275 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5276 			 struct amdgpu_reset_context *reset_context)
5277 {
5278 	struct amdgpu_device *tmp_adev = NULL;
5279 	bool need_full_reset, skip_hw_reset, vram_lost = false;
5280 	int r = 0;
5281 
5282 	/* Try reset handler method first */
5283 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5284 				    reset_list);
5285 	amdgpu_reset_reg_dumps(tmp_adev);
5286 
5287 	reset_context->reset_device_list = device_list_handle;
5288 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5289 	/* If reset handler not implemented, continue; otherwise return */
5290 	if (r == -EOPNOTSUPP)
5291 		r = 0;
5292 	else
5293 		return r;
5294 
5295 	/* Reset handler not implemented, use the default method */
5296 	need_full_reset =
5297 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5298 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5299 
5300 	/*
5301 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5302 	 * to allow proper links negotiation in FW (within 1 sec)
5303 	 */
5304 	if (!skip_hw_reset && need_full_reset) {
5305 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5306 			/* For XGMI run all resets in parallel to speed up the process */
5307 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5308 				tmp_adev->gmc.xgmi.pending_reset = false;
5309 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5310 					r = -EALREADY;
5311 			} else
5312 				r = amdgpu_asic_reset(tmp_adev);
5313 
5314 			if (r) {
5315 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5316 					 r, adev_to_drm(tmp_adev)->unique);
5317 				goto out;
5318 			}
5319 		}
5320 
5321 		/* For XGMI wait for all resets to complete before proceed */
5322 		if (!r) {
5323 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5324 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5325 					flush_work(&tmp_adev->xgmi_reset_work);
5326 					r = tmp_adev->asic_reset_res;
5327 					if (r)
5328 						break;
5329 				}
5330 			}
5331 		}
5332 	}
5333 
5334 	if (!r && amdgpu_ras_intr_triggered()) {
5335 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5336 			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5337 		}
5338 
5339 		amdgpu_ras_intr_cleared();
5340 	}
5341 
5342 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5343 		if (need_full_reset) {
5344 			/* post card */
5345 			amdgpu_ras_set_fed(tmp_adev, false);
5346 			r = amdgpu_device_asic_init(tmp_adev);
5347 			if (r) {
5348 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5349 			} else {
5350 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5351 
5352 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5353 				if (r)
5354 					goto out;
5355 
5356 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5357 
5358 				amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5359 
5360 				if (vram_lost) {
5361 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5362 					amdgpu_inc_vram_lost(tmp_adev);
5363 				}
5364 
5365 				r = amdgpu_device_fw_loading(tmp_adev);
5366 				if (r)
5367 					return r;
5368 
5369 				r = amdgpu_xcp_restore_partition_mode(
5370 					tmp_adev->xcp_mgr);
5371 				if (r)
5372 					goto out;
5373 
5374 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5375 				if (r)
5376 					goto out;
5377 
5378 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5379 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5380 
5381 				if (vram_lost)
5382 					amdgpu_device_fill_reset_magic(tmp_adev);
5383 
5384 				/*
5385 				 * Add this ASIC as tracked as reset was already
5386 				 * complete successfully.
5387 				 */
5388 				amdgpu_register_gpu_instance(tmp_adev);
5389 
5390 				if (!reset_context->hive &&
5391 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5392 					amdgpu_xgmi_add_device(tmp_adev);
5393 
5394 				r = amdgpu_device_ip_late_init(tmp_adev);
5395 				if (r)
5396 					goto out;
5397 
5398 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5399 
5400 				/*
5401 				 * The GPU enters bad state once faulty pages
5402 				 * by ECC has reached the threshold, and ras
5403 				 * recovery is scheduled next. So add one check
5404 				 * here to break recovery if it indeed exceeds
5405 				 * bad page threshold, and remind user to
5406 				 * retire this GPU or setting one bigger
5407 				 * bad_page_threshold value to fix this once
5408 				 * probing driver again.
5409 				 */
5410 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5411 					/* must succeed. */
5412 					amdgpu_ras_resume(tmp_adev);
5413 				} else {
5414 					r = -EINVAL;
5415 					goto out;
5416 				}
5417 
5418 				/* Update PSP FW topology after reset */
5419 				if (reset_context->hive &&
5420 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5421 					r = amdgpu_xgmi_update_topology(
5422 						reset_context->hive, tmp_adev);
5423 			}
5424 		}
5425 
5426 out:
5427 		if (!r) {
5428 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5429 			r = amdgpu_ib_ring_tests(tmp_adev);
5430 			if (r) {
5431 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5432 				need_full_reset = true;
5433 				r = -EAGAIN;
5434 				goto end;
5435 			}
5436 		}
5437 
5438 		if (!r)
5439 			r = amdgpu_device_recover_vram(tmp_adev);
5440 		else
5441 			tmp_adev->asic_reset_res = r;
5442 	}
5443 
5444 end:
5445 	if (need_full_reset)
5446 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5447 	else
5448 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5449 	return r;
5450 }
5451 
5452 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5453 {
5454 
5455 	switch (amdgpu_asic_reset_method(adev)) {
5456 	case AMD_RESET_METHOD_MODE1:
5457 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5458 		break;
5459 	case AMD_RESET_METHOD_MODE2:
5460 		adev->mp1_state = PP_MP1_STATE_RESET;
5461 		break;
5462 	default:
5463 		adev->mp1_state = PP_MP1_STATE_NONE;
5464 		break;
5465 	}
5466 }
5467 
5468 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5469 {
5470 	amdgpu_vf_error_trans_all(adev);
5471 	adev->mp1_state = PP_MP1_STATE_NONE;
5472 }
5473 
5474 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5475 {
5476 	struct pci_dev *p = NULL;
5477 
5478 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5479 			adev->pdev->bus->number, 1);
5480 	if (p) {
5481 		pm_runtime_enable(&(p->dev));
5482 		pm_runtime_resume(&(p->dev));
5483 	}
5484 
5485 	pci_dev_put(p);
5486 }
5487 
5488 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5489 {
5490 	enum amd_reset_method reset_method;
5491 	struct pci_dev *p = NULL;
5492 	u64 expires;
5493 
5494 	/*
5495 	 * For now, only BACO and mode1 reset are confirmed
5496 	 * to suffer the audio issue without proper suspended.
5497 	 */
5498 	reset_method = amdgpu_asic_reset_method(adev);
5499 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5500 	     (reset_method != AMD_RESET_METHOD_MODE1))
5501 		return -EINVAL;
5502 
5503 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5504 			adev->pdev->bus->number, 1);
5505 	if (!p)
5506 		return -ENODEV;
5507 
5508 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5509 	if (!expires)
5510 		/*
5511 		 * If we cannot get the audio device autosuspend delay,
5512 		 * a fixed 4S interval will be used. Considering 3S is
5513 		 * the audio controller default autosuspend delay setting.
5514 		 * 4S used here is guaranteed to cover that.
5515 		 */
5516 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5517 
5518 	while (!pm_runtime_status_suspended(&(p->dev))) {
5519 		if (!pm_runtime_suspend(&(p->dev)))
5520 			break;
5521 
5522 		if (expires < ktime_get_mono_fast_ns()) {
5523 			dev_warn(adev->dev, "failed to suspend display audio\n");
5524 			pci_dev_put(p);
5525 			/* TODO: abort the succeeding gpu reset? */
5526 			return -ETIMEDOUT;
5527 		}
5528 	}
5529 
5530 	pm_runtime_disable(&(p->dev));
5531 
5532 	pci_dev_put(p);
5533 	return 0;
5534 }
5535 
5536 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5537 {
5538 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5539 
5540 #if defined(CONFIG_DEBUG_FS)
5541 	if (!amdgpu_sriov_vf(adev))
5542 		cancel_work(&adev->reset_work);
5543 #endif
5544 
5545 	if (adev->kfd.dev)
5546 		cancel_work(&adev->kfd.reset_work);
5547 
5548 	if (amdgpu_sriov_vf(adev))
5549 		cancel_work(&adev->virt.flr_work);
5550 
5551 	if (con && adev->ras_enabled)
5552 		cancel_work(&con->recovery_work);
5553 
5554 }
5555 
5556 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5557 {
5558 	struct amdgpu_device *tmp_adev;
5559 	int ret = 0;
5560 	u32 status;
5561 
5562 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5563 		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5564 		if (PCI_POSSIBLE_ERROR(status)) {
5565 			dev_err(tmp_adev->dev, "device lost from bus!");
5566 			ret = -ENODEV;
5567 		}
5568 	}
5569 
5570 	return ret;
5571 }
5572 
5573 /**
5574  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5575  *
5576  * @adev: amdgpu_device pointer
5577  * @job: which job trigger hang
5578  * @reset_context: amdgpu reset context pointer
5579  *
5580  * Attempt to reset the GPU if it has hung (all asics).
5581  * Attempt to do soft-reset or full-reset and reinitialize Asic
5582  * Returns 0 for success or an error on failure.
5583  */
5584 
5585 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5586 			      struct amdgpu_job *job,
5587 			      struct amdgpu_reset_context *reset_context)
5588 {
5589 	struct list_head device_list, *device_list_handle =  NULL;
5590 	bool job_signaled = false;
5591 	struct amdgpu_hive_info *hive = NULL;
5592 	struct amdgpu_device *tmp_adev = NULL;
5593 	int i, r = 0;
5594 	bool need_emergency_restart = false;
5595 	bool audio_suspended = false;
5596 
5597 	/*
5598 	 * Special case: RAS triggered and full reset isn't supported
5599 	 */
5600 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5601 
5602 	/*
5603 	 * Flush RAM to disk so that after reboot
5604 	 * the user can read log and see why the system rebooted.
5605 	 */
5606 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5607 		amdgpu_ras_get_context(adev)->reboot) {
5608 		DRM_WARN("Emergency reboot.");
5609 
5610 		ksys_sync_helper();
5611 		emergency_restart();
5612 	}
5613 
5614 	dev_info(adev->dev, "GPU %s begin!\n",
5615 		need_emergency_restart ? "jobs stop":"reset");
5616 
5617 	if (!amdgpu_sriov_vf(adev))
5618 		hive = amdgpu_get_xgmi_hive(adev);
5619 	if (hive)
5620 		mutex_lock(&hive->hive_lock);
5621 
5622 	reset_context->job = job;
5623 	reset_context->hive = hive;
5624 	/*
5625 	 * Build list of devices to reset.
5626 	 * In case we are in XGMI hive mode, resort the device list
5627 	 * to put adev in the 1st position.
5628 	 */
5629 	INIT_LIST_HEAD(&device_list);
5630 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5631 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5632 			list_add_tail(&tmp_adev->reset_list, &device_list);
5633 			if (adev->shutdown)
5634 				tmp_adev->shutdown = true;
5635 		}
5636 		if (!list_is_first(&adev->reset_list, &device_list))
5637 			list_rotate_to_front(&adev->reset_list, &device_list);
5638 		device_list_handle = &device_list;
5639 	} else {
5640 		list_add_tail(&adev->reset_list, &device_list);
5641 		device_list_handle = &device_list;
5642 	}
5643 
5644 	if (!amdgpu_sriov_vf(adev)) {
5645 		r = amdgpu_device_health_check(device_list_handle);
5646 		if (r)
5647 			goto end_reset;
5648 	}
5649 
5650 	/* We need to lock reset domain only once both for XGMI and single device */
5651 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5652 				    reset_list);
5653 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5654 
5655 	/* block all schedulers and reset given job's ring */
5656 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5657 
5658 		amdgpu_device_set_mp1_state(tmp_adev);
5659 
5660 		/*
5661 		 * Try to put the audio codec into suspend state
5662 		 * before gpu reset started.
5663 		 *
5664 		 * Due to the power domain of the graphics device
5665 		 * is shared with AZ power domain. Without this,
5666 		 * we may change the audio hardware from behind
5667 		 * the audio driver's back. That will trigger
5668 		 * some audio codec errors.
5669 		 */
5670 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5671 			audio_suspended = true;
5672 
5673 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5674 
5675 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5676 
5677 		if (!amdgpu_sriov_vf(tmp_adev))
5678 			amdgpu_amdkfd_pre_reset(tmp_adev);
5679 
5680 		/*
5681 		 * Mark these ASICs to be reseted as untracked first
5682 		 * And add them back after reset completed
5683 		 */
5684 		amdgpu_unregister_gpu_instance(tmp_adev);
5685 
5686 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5687 
5688 		/* disable ras on ALL IPs */
5689 		if (!need_emergency_restart &&
5690 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5691 			amdgpu_ras_suspend(tmp_adev);
5692 
5693 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5694 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5695 
5696 			if (!amdgpu_ring_sched_ready(ring))
5697 				continue;
5698 
5699 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5700 
5701 			if (need_emergency_restart)
5702 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5703 		}
5704 		atomic_inc(&tmp_adev->gpu_reset_counter);
5705 	}
5706 
5707 	if (need_emergency_restart)
5708 		goto skip_sched_resume;
5709 
5710 	/*
5711 	 * Must check guilty signal here since after this point all old
5712 	 * HW fences are force signaled.
5713 	 *
5714 	 * job->base holds a reference to parent fence
5715 	 */
5716 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5717 		job_signaled = true;
5718 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5719 		goto skip_hw_reset;
5720 	}
5721 
5722 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5723 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5724 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5725 		/*TODO Should we stop ?*/
5726 		if (r) {
5727 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5728 				  r, adev_to_drm(tmp_adev)->unique);
5729 			tmp_adev->asic_reset_res = r;
5730 		}
5731 
5732 		if (!amdgpu_sriov_vf(tmp_adev))
5733 			/*
5734 			* Drop all pending non scheduler resets. Scheduler resets
5735 			* were already dropped during drm_sched_stop
5736 			*/
5737 			amdgpu_device_stop_pending_resets(tmp_adev);
5738 	}
5739 
5740 	/* Actual ASIC resets if needed.*/
5741 	/* Host driver will handle XGMI hive reset for SRIOV */
5742 	if (amdgpu_sriov_vf(adev)) {
5743 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5744 		if (r)
5745 			adev->asic_reset_res = r;
5746 
5747 		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5748 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5749 			    IP_VERSION(9, 4, 2) ||
5750 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5751 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5752 			amdgpu_ras_resume(adev);
5753 	} else {
5754 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5755 		if (r && r == -EAGAIN)
5756 			goto retry;
5757 	}
5758 
5759 skip_hw_reset:
5760 
5761 	/* Post ASIC reset for all devs .*/
5762 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5763 
5764 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5765 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5766 
5767 			if (!amdgpu_ring_sched_ready(ring))
5768 				continue;
5769 
5770 			drm_sched_start(&ring->sched, true);
5771 		}
5772 
5773 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5774 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5775 
5776 		if (tmp_adev->asic_reset_res)
5777 			r = tmp_adev->asic_reset_res;
5778 
5779 		tmp_adev->asic_reset_res = 0;
5780 
5781 		if (r) {
5782 			/* bad news, how to tell it to userspace ? */
5783 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5784 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5785 		} else {
5786 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5787 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5788 				DRM_WARN("smart shift update failed\n");
5789 		}
5790 	}
5791 
5792 skip_sched_resume:
5793 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5794 		/* unlock kfd: SRIOV would do it separately */
5795 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5796 			amdgpu_amdkfd_post_reset(tmp_adev);
5797 
5798 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5799 		 * need to bring up kfd here if it's not be initialized before
5800 		 */
5801 		if (!adev->kfd.init_complete)
5802 			amdgpu_amdkfd_device_init(adev);
5803 
5804 		if (audio_suspended)
5805 			amdgpu_device_resume_display_audio(tmp_adev);
5806 
5807 		amdgpu_device_unset_mp1_state(tmp_adev);
5808 
5809 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5810 	}
5811 
5812 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5813 					    reset_list);
5814 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5815 
5816 end_reset:
5817 	if (hive) {
5818 		mutex_unlock(&hive->hive_lock);
5819 		amdgpu_put_xgmi_hive(hive);
5820 	}
5821 
5822 	if (r)
5823 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5824 
5825 	atomic_set(&adev->reset_domain->reset_res, r);
5826 	return r;
5827 }
5828 
5829 /**
5830  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5831  *
5832  * @adev: amdgpu_device pointer
5833  * @speed: pointer to the speed of the link
5834  * @width: pointer to the width of the link
5835  *
5836  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5837  * first physical partner to an AMD dGPU.
5838  * This will exclude any virtual switches and links.
5839  */
5840 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5841 					    enum pci_bus_speed *speed,
5842 					    enum pcie_link_width *width)
5843 {
5844 	struct pci_dev *parent = adev->pdev;
5845 
5846 	if (!speed || !width)
5847 		return;
5848 
5849 	*speed = PCI_SPEED_UNKNOWN;
5850 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5851 
5852 	while ((parent = pci_upstream_bridge(parent))) {
5853 		/* skip upstream/downstream switches internal to dGPU*/
5854 		if (parent->vendor == PCI_VENDOR_ID_ATI)
5855 			continue;
5856 		*speed = pcie_get_speed_cap(parent);
5857 		*width = pcie_get_width_cap(parent);
5858 		break;
5859 	}
5860 }
5861 
5862 /**
5863  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5864  *
5865  * @adev: amdgpu_device pointer
5866  *
5867  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5868  * and lanes) of the slot the device is in. Handles APUs and
5869  * virtualized environments where PCIE config space may not be available.
5870  */
5871 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5872 {
5873 	struct pci_dev *pdev;
5874 	enum pci_bus_speed speed_cap, platform_speed_cap;
5875 	enum pcie_link_width platform_link_width;
5876 
5877 	if (amdgpu_pcie_gen_cap)
5878 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5879 
5880 	if (amdgpu_pcie_lane_cap)
5881 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5882 
5883 	/* covers APUs as well */
5884 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5885 		if (adev->pm.pcie_gen_mask == 0)
5886 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5887 		if (adev->pm.pcie_mlw_mask == 0)
5888 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5889 		return;
5890 	}
5891 
5892 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5893 		return;
5894 
5895 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5896 					&platform_link_width);
5897 
5898 	if (adev->pm.pcie_gen_mask == 0) {
5899 		/* asic caps */
5900 		pdev = adev->pdev;
5901 		speed_cap = pcie_get_speed_cap(pdev);
5902 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5903 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5904 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5905 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5906 		} else {
5907 			if (speed_cap == PCIE_SPEED_32_0GT)
5908 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5909 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5910 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5911 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5912 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5913 			else if (speed_cap == PCIE_SPEED_16_0GT)
5914 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5915 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5916 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5917 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5918 			else if (speed_cap == PCIE_SPEED_8_0GT)
5919 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5920 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5921 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5922 			else if (speed_cap == PCIE_SPEED_5_0GT)
5923 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5924 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5925 			else
5926 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5927 		}
5928 		/* platform caps */
5929 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5930 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5931 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5932 		} else {
5933 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5934 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5935 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5936 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5937 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5938 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5939 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5940 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5941 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5942 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5943 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5944 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5945 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5946 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5947 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5948 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5949 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5950 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5951 			else
5952 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5953 
5954 		}
5955 	}
5956 	if (adev->pm.pcie_mlw_mask == 0) {
5957 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5958 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5959 		} else {
5960 			switch (platform_link_width) {
5961 			case PCIE_LNK_X32:
5962 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5963 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5964 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5965 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5966 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5967 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5968 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5969 				break;
5970 			case PCIE_LNK_X16:
5971 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5972 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5973 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5974 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5975 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5976 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5977 				break;
5978 			case PCIE_LNK_X12:
5979 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5980 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5981 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5982 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5983 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5984 				break;
5985 			case PCIE_LNK_X8:
5986 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5987 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5988 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5989 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5990 				break;
5991 			case PCIE_LNK_X4:
5992 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5993 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5994 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5995 				break;
5996 			case PCIE_LNK_X2:
5997 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5998 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5999 				break;
6000 			case PCIE_LNK_X1:
6001 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6002 				break;
6003 			default:
6004 				break;
6005 			}
6006 		}
6007 	}
6008 }
6009 
6010 /**
6011  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6012  *
6013  * @adev: amdgpu_device pointer
6014  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6015  *
6016  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6017  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6018  * @peer_adev.
6019  */
6020 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6021 				      struct amdgpu_device *peer_adev)
6022 {
6023 #ifdef CONFIG_HSA_AMD_P2P
6024 	uint64_t address_mask = peer_adev->dev->dma_mask ?
6025 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6026 	resource_size_t aper_limit =
6027 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
6028 	bool p2p_access =
6029 		!adev->gmc.xgmi.connected_to_cpu &&
6030 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6031 
6032 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6033 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6034 		!(adev->gmc.aper_base & address_mask ||
6035 		  aper_limit & address_mask));
6036 #else
6037 	return false;
6038 #endif
6039 }
6040 
6041 int amdgpu_device_baco_enter(struct drm_device *dev)
6042 {
6043 	struct amdgpu_device *adev = drm_to_adev(dev);
6044 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6045 
6046 	if (!amdgpu_device_supports_baco(dev))
6047 		return -ENOTSUPP;
6048 
6049 	if (ras && adev->ras_enabled &&
6050 	    adev->nbio.funcs->enable_doorbell_interrupt)
6051 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6052 
6053 	return amdgpu_dpm_baco_enter(adev);
6054 }
6055 
6056 int amdgpu_device_baco_exit(struct drm_device *dev)
6057 {
6058 	struct amdgpu_device *adev = drm_to_adev(dev);
6059 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6060 	int ret = 0;
6061 
6062 	if (!amdgpu_device_supports_baco(dev))
6063 		return -ENOTSUPP;
6064 
6065 	ret = amdgpu_dpm_baco_exit(adev);
6066 	if (ret)
6067 		return ret;
6068 
6069 	if (ras && adev->ras_enabled &&
6070 	    adev->nbio.funcs->enable_doorbell_interrupt)
6071 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6072 
6073 	if (amdgpu_passthrough(adev) &&
6074 	    adev->nbio.funcs->clear_doorbell_interrupt)
6075 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6076 
6077 	return 0;
6078 }
6079 
6080 /**
6081  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6082  * @pdev: PCI device struct
6083  * @state: PCI channel state
6084  *
6085  * Description: Called when a PCI error is detected.
6086  *
6087  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6088  */
6089 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6090 {
6091 	struct drm_device *dev = pci_get_drvdata(pdev);
6092 	struct amdgpu_device *adev = drm_to_adev(dev);
6093 	int i;
6094 
6095 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6096 
6097 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6098 		DRM_WARN("No support for XGMI hive yet...");
6099 		return PCI_ERS_RESULT_DISCONNECT;
6100 	}
6101 
6102 	adev->pci_channel_state = state;
6103 
6104 	switch (state) {
6105 	case pci_channel_io_normal:
6106 		return PCI_ERS_RESULT_CAN_RECOVER;
6107 	/* Fatal error, prepare for slot reset */
6108 	case pci_channel_io_frozen:
6109 		/*
6110 		 * Locking adev->reset_domain->sem will prevent any external access
6111 		 * to GPU during PCI error recovery
6112 		 */
6113 		amdgpu_device_lock_reset_domain(adev->reset_domain);
6114 		amdgpu_device_set_mp1_state(adev);
6115 
6116 		/*
6117 		 * Block any work scheduling as we do for regular GPU reset
6118 		 * for the duration of the recovery
6119 		 */
6120 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6121 			struct amdgpu_ring *ring = adev->rings[i];
6122 
6123 			if (!amdgpu_ring_sched_ready(ring))
6124 				continue;
6125 
6126 			drm_sched_stop(&ring->sched, NULL);
6127 		}
6128 		atomic_inc(&adev->gpu_reset_counter);
6129 		return PCI_ERS_RESULT_NEED_RESET;
6130 	case pci_channel_io_perm_failure:
6131 		/* Permanent error, prepare for device removal */
6132 		return PCI_ERS_RESULT_DISCONNECT;
6133 	}
6134 
6135 	return PCI_ERS_RESULT_NEED_RESET;
6136 }
6137 
6138 /**
6139  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6140  * @pdev: pointer to PCI device
6141  */
6142 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6143 {
6144 
6145 	DRM_INFO("PCI error: mmio enabled callback!!\n");
6146 
6147 	/* TODO - dump whatever for debugging purposes */
6148 
6149 	/* This called only if amdgpu_pci_error_detected returns
6150 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6151 	 * works, no need to reset slot.
6152 	 */
6153 
6154 	return PCI_ERS_RESULT_RECOVERED;
6155 }
6156 
6157 /**
6158  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6159  * @pdev: PCI device struct
6160  *
6161  * Description: This routine is called by the pci error recovery
6162  * code after the PCI slot has been reset, just before we
6163  * should resume normal operations.
6164  */
6165 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6166 {
6167 	struct drm_device *dev = pci_get_drvdata(pdev);
6168 	struct amdgpu_device *adev = drm_to_adev(dev);
6169 	int r, i;
6170 	struct amdgpu_reset_context reset_context;
6171 	u32 memsize;
6172 	struct list_head device_list;
6173 	struct amdgpu_hive_info *hive;
6174 	int hive_ras_recovery = 0;
6175 	struct amdgpu_ras *ras;
6176 
6177 	/* PCI error slot reset should be skipped During RAS recovery */
6178 	hive = amdgpu_get_xgmi_hive(adev);
6179 	if (hive) {
6180 		hive_ras_recovery = atomic_read(&hive->ras_recovery);
6181 		amdgpu_put_xgmi_hive(hive);
6182 	}
6183 	ras = amdgpu_ras_get_context(adev);
6184 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
6185 		 ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
6186 		return PCI_ERS_RESULT_RECOVERED;
6187 
6188 	DRM_INFO("PCI error: slot reset callback!!\n");
6189 
6190 	memset(&reset_context, 0, sizeof(reset_context));
6191 
6192 	INIT_LIST_HEAD(&device_list);
6193 	list_add_tail(&adev->reset_list, &device_list);
6194 
6195 	/* wait for asic to come out of reset */
6196 	msleep(500);
6197 
6198 	/* Restore PCI confspace */
6199 	amdgpu_device_load_pci_state(pdev);
6200 
6201 	/* confirm  ASIC came out of reset */
6202 	for (i = 0; i < adev->usec_timeout; i++) {
6203 		memsize = amdgpu_asic_get_config_memsize(adev);
6204 
6205 		if (memsize != 0xffffffff)
6206 			break;
6207 		udelay(1);
6208 	}
6209 	if (memsize == 0xffffffff) {
6210 		r = -ETIME;
6211 		goto out;
6212 	}
6213 
6214 	reset_context.method = AMD_RESET_METHOD_NONE;
6215 	reset_context.reset_req_dev = adev;
6216 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6217 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6218 
6219 	adev->no_hw_access = true;
6220 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6221 	adev->no_hw_access = false;
6222 	if (r)
6223 		goto out;
6224 
6225 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6226 
6227 out:
6228 	if (!r) {
6229 		if (amdgpu_device_cache_pci_state(adev->pdev))
6230 			pci_restore_state(adev->pdev);
6231 
6232 		DRM_INFO("PCIe error recovery succeeded\n");
6233 	} else {
6234 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6235 		amdgpu_device_unset_mp1_state(adev);
6236 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6237 	}
6238 
6239 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6240 }
6241 
6242 /**
6243  * amdgpu_pci_resume() - resume normal ops after PCI reset
6244  * @pdev: pointer to PCI device
6245  *
6246  * Called when the error recovery driver tells us that its
6247  * OK to resume normal operation.
6248  */
6249 void amdgpu_pci_resume(struct pci_dev *pdev)
6250 {
6251 	struct drm_device *dev = pci_get_drvdata(pdev);
6252 	struct amdgpu_device *adev = drm_to_adev(dev);
6253 	int i;
6254 
6255 
6256 	DRM_INFO("PCI error: resume callback!!\n");
6257 
6258 	/* Only continue execution for the case of pci_channel_io_frozen */
6259 	if (adev->pci_channel_state != pci_channel_io_frozen)
6260 		return;
6261 
6262 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6263 		struct amdgpu_ring *ring = adev->rings[i];
6264 
6265 		if (!amdgpu_ring_sched_ready(ring))
6266 			continue;
6267 
6268 		drm_sched_start(&ring->sched, true);
6269 	}
6270 
6271 	amdgpu_device_unset_mp1_state(adev);
6272 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6273 }
6274 
6275 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6276 {
6277 	struct drm_device *dev = pci_get_drvdata(pdev);
6278 	struct amdgpu_device *adev = drm_to_adev(dev);
6279 	int r;
6280 
6281 	r = pci_save_state(pdev);
6282 	if (!r) {
6283 		kfree(adev->pci_state);
6284 
6285 		adev->pci_state = pci_store_saved_state(pdev);
6286 
6287 		if (!adev->pci_state) {
6288 			DRM_ERROR("Failed to store PCI saved state");
6289 			return false;
6290 		}
6291 	} else {
6292 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6293 		return false;
6294 	}
6295 
6296 	return true;
6297 }
6298 
6299 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6300 {
6301 	struct drm_device *dev = pci_get_drvdata(pdev);
6302 	struct amdgpu_device *adev = drm_to_adev(dev);
6303 	int r;
6304 
6305 	if (!adev->pci_state)
6306 		return false;
6307 
6308 	r = pci_load_saved_state(pdev, adev->pci_state);
6309 
6310 	if (!r) {
6311 		pci_restore_state(pdev);
6312 	} else {
6313 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6314 		return false;
6315 	}
6316 
6317 	return true;
6318 }
6319 
6320 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6321 		struct amdgpu_ring *ring)
6322 {
6323 #ifdef CONFIG_X86_64
6324 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6325 		return;
6326 #endif
6327 	if (adev->gmc.xgmi.connected_to_cpu)
6328 		return;
6329 
6330 	if (ring && ring->funcs->emit_hdp_flush)
6331 		amdgpu_ring_emit_hdp_flush(ring);
6332 	else
6333 		amdgpu_asic_flush_hdp(adev, ring);
6334 }
6335 
6336 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6337 		struct amdgpu_ring *ring)
6338 {
6339 #ifdef CONFIG_X86_64
6340 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6341 		return;
6342 #endif
6343 	if (adev->gmc.xgmi.connected_to_cpu)
6344 		return;
6345 
6346 	amdgpu_asic_invalidate_hdp(adev, ring);
6347 }
6348 
6349 int amdgpu_in_reset(struct amdgpu_device *adev)
6350 {
6351 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6352 }
6353 
6354 /**
6355  * amdgpu_device_halt() - bring hardware to some kind of halt state
6356  *
6357  * @adev: amdgpu_device pointer
6358  *
6359  * Bring hardware to some kind of halt state so that no one can touch it
6360  * any more. It will help to maintain error context when error occurred.
6361  * Compare to a simple hang, the system will keep stable at least for SSH
6362  * access. Then it should be trivial to inspect the hardware state and
6363  * see what's going on. Implemented as following:
6364  *
6365  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6366  *    clears all CPU mappings to device, disallows remappings through page faults
6367  * 2. amdgpu_irq_disable_all() disables all interrupts
6368  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6369  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6370  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6371  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6372  *    flush any in flight DMA operations
6373  */
6374 void amdgpu_device_halt(struct amdgpu_device *adev)
6375 {
6376 	struct pci_dev *pdev = adev->pdev;
6377 	struct drm_device *ddev = adev_to_drm(adev);
6378 
6379 	amdgpu_xcp_dev_unplug(adev);
6380 	drm_dev_unplug(ddev);
6381 
6382 	amdgpu_irq_disable_all(adev);
6383 
6384 	amdgpu_fence_driver_hw_fini(adev);
6385 
6386 	adev->no_hw_access = true;
6387 
6388 	amdgpu_device_unmap_mmio(adev);
6389 
6390 	pci_disable_device(pdev);
6391 	pci_wait_for_pending_transaction(pdev);
6392 }
6393 
6394 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6395 				u32 reg)
6396 {
6397 	unsigned long flags, address, data;
6398 	u32 r;
6399 
6400 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6401 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6402 
6403 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6404 	WREG32(address, reg * 4);
6405 	(void)RREG32(address);
6406 	r = RREG32(data);
6407 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6408 	return r;
6409 }
6410 
6411 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6412 				u32 reg, u32 v)
6413 {
6414 	unsigned long flags, address, data;
6415 
6416 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6417 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6418 
6419 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6420 	WREG32(address, reg * 4);
6421 	(void)RREG32(address);
6422 	WREG32(data, v);
6423 	(void)RREG32(data);
6424 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6425 }
6426 
6427 /**
6428  * amdgpu_device_switch_gang - switch to a new gang
6429  * @adev: amdgpu_device pointer
6430  * @gang: the gang to switch to
6431  *
6432  * Try to switch to a new gang.
6433  * Returns: NULL if we switched to the new gang or a reference to the current
6434  * gang leader.
6435  */
6436 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6437 					    struct dma_fence *gang)
6438 {
6439 	struct dma_fence *old = NULL;
6440 
6441 	do {
6442 		dma_fence_put(old);
6443 		rcu_read_lock();
6444 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
6445 		rcu_read_unlock();
6446 
6447 		if (old == gang)
6448 			break;
6449 
6450 		if (!dma_fence_is_signaled(old))
6451 			return old;
6452 
6453 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6454 			 old, gang) != old);
6455 
6456 	dma_fence_put(old);
6457 	return NULL;
6458 }
6459 
6460 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6461 {
6462 	switch (adev->asic_type) {
6463 #ifdef CONFIG_DRM_AMDGPU_SI
6464 	case CHIP_HAINAN:
6465 #endif
6466 	case CHIP_TOPAZ:
6467 		/* chips with no display hardware */
6468 		return false;
6469 #ifdef CONFIG_DRM_AMDGPU_SI
6470 	case CHIP_TAHITI:
6471 	case CHIP_PITCAIRN:
6472 	case CHIP_VERDE:
6473 	case CHIP_OLAND:
6474 #endif
6475 #ifdef CONFIG_DRM_AMDGPU_CIK
6476 	case CHIP_BONAIRE:
6477 	case CHIP_HAWAII:
6478 	case CHIP_KAVERI:
6479 	case CHIP_KABINI:
6480 	case CHIP_MULLINS:
6481 #endif
6482 	case CHIP_TONGA:
6483 	case CHIP_FIJI:
6484 	case CHIP_POLARIS10:
6485 	case CHIP_POLARIS11:
6486 	case CHIP_POLARIS12:
6487 	case CHIP_VEGAM:
6488 	case CHIP_CARRIZO:
6489 	case CHIP_STONEY:
6490 		/* chips with display hardware */
6491 		return true;
6492 	default:
6493 		/* IP discovery */
6494 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6495 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6496 			return false;
6497 		return true;
6498 	}
6499 }
6500 
6501 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6502 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6503 		uint32_t expected_value, uint32_t mask)
6504 {
6505 	uint32_t ret = 0;
6506 	uint32_t old_ = 0;
6507 	uint32_t tmp_ = RREG32(reg_addr);
6508 	uint32_t loop = adev->usec_timeout;
6509 
6510 	while ((tmp_ & (mask)) != (expected_value)) {
6511 		if (old_ != tmp_) {
6512 			loop = adev->usec_timeout;
6513 			old_ = tmp_;
6514 		} else
6515 			udelay(1);
6516 		tmp_ = RREG32(reg_addr);
6517 		loop--;
6518 		if (!loop) {
6519 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6520 				  inst, reg_name, (uint32_t)expected_value,
6521 				  (uint32_t)(tmp_ & (mask)));
6522 			ret = -ETIMEDOUT;
6523 			break;
6524 		}
6525 	}
6526 	return ret;
6527 }
6528