xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision e50a6ecebe0841d3dfa4d9415d4fae80bb5d91e8)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/aperture.h>
30 #include <linux/power_supply.h>
31 #include <linux/kthread.h>
32 #include <linux/module.h>
33 #include <linux/console.h>
34 #include <linux/slab.h>
35 #include <linux/iommu.h>
36 #include <linux/pci.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39 
40 #include <drm/drm_atomic_helper.h>
41 #include <drm/drm_client_event.h>
42 #include <drm/drm_crtc_helper.h>
43 #include <drm/drm_probe_helper.h>
44 #include <drm/amdgpu_drm.h>
45 #include <linux/device.h>
46 #include <linux/vgaarb.h>
47 #include <linux/vga_switcheroo.h>
48 #include <linux/efi.h>
49 #include "amdgpu.h"
50 #include "amdgpu_trace.h"
51 #include "amdgpu_i2c.h"
52 #include "atom.h"
53 #include "amdgpu_atombios.h"
54 #include "amdgpu_atomfirmware.h"
55 #include "amd_pcie.h"
56 #ifdef CONFIG_DRM_AMDGPU_SI
57 #include "si.h"
58 #endif
59 #ifdef CONFIG_DRM_AMDGPU_CIK
60 #include "cik.h"
61 #endif
62 #include "vi.h"
63 #include "soc15.h"
64 #include "nv.h"
65 #include "bif/bif_4_1_d.h"
66 #include <linux/firmware.h>
67 #include "amdgpu_vf_error.h"
68 
69 #include "amdgpu_amdkfd.h"
70 #include "amdgpu_pm.h"
71 
72 #include "amdgpu_xgmi.h"
73 #include "amdgpu_ras.h"
74 #include "amdgpu_ras_mgr.h"
75 #include "amdgpu_pmu.h"
76 #include "amdgpu_fru_eeprom.h"
77 #include "amdgpu_reset.h"
78 #include "amdgpu_virt.h"
79 #include "amdgpu_dev_coredump.h"
80 
81 #include <linux/suspend.h>
82 #include <drm/task_barrier.h>
83 #include <linux/pm_runtime.h>
84 
85 #include <drm/drm_drv.h>
86 
87 #if IS_ENABLED(CONFIG_X86)
88 #include <asm/intel-family.h>
89 #include <asm/cpu_device_id.h>
90 #endif
91 
92 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
96 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
97 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
98 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
99 MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
100 
101 #define AMDGPU_RESUME_MS		2000
102 #define AMDGPU_MAX_RETRY_LIMIT		2
103 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
104 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
105 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
106 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
107 
108 #define AMDGPU_VBIOS_SKIP (1U << 0)
109 #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
110 
111 static const struct drm_driver amdgpu_kms_driver;
112 
113 const char *amdgpu_asic_name[] = {
114 	"TAHITI",
115 	"PITCAIRN",
116 	"VERDE",
117 	"OLAND",
118 	"HAINAN",
119 	"BONAIRE",
120 	"KAVERI",
121 	"KABINI",
122 	"HAWAII",
123 	"MULLINS",
124 	"TOPAZ",
125 	"TONGA",
126 	"FIJI",
127 	"CARRIZO",
128 	"STONEY",
129 	"POLARIS10",
130 	"POLARIS11",
131 	"POLARIS12",
132 	"VEGAM",
133 	"VEGA10",
134 	"VEGA12",
135 	"VEGA20",
136 	"RAVEN",
137 	"ARCTURUS",
138 	"RENOIR",
139 	"ALDEBARAN",
140 	"NAVI10",
141 	"CYAN_SKILLFISH",
142 	"NAVI14",
143 	"NAVI12",
144 	"SIENNA_CICHLID",
145 	"NAVY_FLOUNDER",
146 	"VANGOGH",
147 	"DIMGREY_CAVEFISH",
148 	"BEIGE_GOBY",
149 	"YELLOW_CARP",
150 	"IP DISCOVERY",
151 	"LAST",
152 };
153 
154 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
155 /*
156  * Default init level where all blocks are expected to be initialized. This is
157  * the level of initialization expected by default and also after a full reset
158  * of the device.
159  */
160 struct amdgpu_init_level amdgpu_init_default = {
161 	.level = AMDGPU_INIT_LEVEL_DEFAULT,
162 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
163 };
164 
165 struct amdgpu_init_level amdgpu_init_recovery = {
166 	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
167 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
168 };
169 
170 /*
171  * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
172  * is used for cases like reset on initialization where the entire hive needs to
173  * be reset before first use.
174  */
175 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
176 	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
177 	.hwini_ip_block_mask =
178 		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
179 		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
180 		BIT(AMD_IP_BLOCK_TYPE_PSP)
181 };
182 
183 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
184 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
185 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
186 
187 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
188 
189 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
190 					     enum amd_ip_block_type block)
191 {
192 	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
193 }
194 
195 void amdgpu_set_init_level(struct amdgpu_device *adev,
196 			   enum amdgpu_init_lvl_id lvl)
197 {
198 	switch (lvl) {
199 	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
200 		adev->init_lvl = &amdgpu_init_minimal_xgmi;
201 		break;
202 	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
203 		adev->init_lvl = &amdgpu_init_recovery;
204 		break;
205 	case AMDGPU_INIT_LEVEL_DEFAULT:
206 		fallthrough;
207 	default:
208 		adev->init_lvl = &amdgpu_init_default;
209 		break;
210 	}
211 }
212 
213 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
214 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
215 				     void *data);
216 
217 /**
218  * DOC: pcie_replay_count
219  *
220  * The amdgpu driver provides a sysfs API for reporting the total number
221  * of PCIe replays (NAKs).
222  * The file pcie_replay_count is used for this and returns the total
223  * number of replays as a sum of the NAKs generated and NAKs received.
224  */
225 
226 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
227 		struct device_attribute *attr, char *buf)
228 {
229 	struct drm_device *ddev = dev_get_drvdata(dev);
230 	struct amdgpu_device *adev = drm_to_adev(ddev);
231 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
232 
233 	return sysfs_emit(buf, "%llu\n", cnt);
234 }
235 
236 static DEVICE_ATTR(pcie_replay_count, 0444,
237 		amdgpu_device_get_pcie_replay_count, NULL);
238 
239 static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
240 {
241 	int ret = 0;
242 
243 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
244 		ret = sysfs_create_file(&adev->dev->kobj,
245 					&dev_attr_pcie_replay_count.attr);
246 
247 	return ret;
248 }
249 
250 static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
251 {
252 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
253 		sysfs_remove_file(&adev->dev->kobj,
254 				  &dev_attr_pcie_replay_count.attr);
255 }
256 
257 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
258 					  const struct bin_attribute *attr, char *buf,
259 					  loff_t ppos, size_t count)
260 {
261 	struct device *dev = kobj_to_dev(kobj);
262 	struct drm_device *ddev = dev_get_drvdata(dev);
263 	struct amdgpu_device *adev = drm_to_adev(ddev);
264 	ssize_t bytes_read;
265 
266 	switch (ppos) {
267 	case AMDGPU_SYS_REG_STATE_XGMI:
268 		bytes_read = amdgpu_asic_get_reg_state(
269 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
270 		break;
271 	case AMDGPU_SYS_REG_STATE_WAFL:
272 		bytes_read = amdgpu_asic_get_reg_state(
273 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
274 		break;
275 	case AMDGPU_SYS_REG_STATE_PCIE:
276 		bytes_read = amdgpu_asic_get_reg_state(
277 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
278 		break;
279 	case AMDGPU_SYS_REG_STATE_USR:
280 		bytes_read = amdgpu_asic_get_reg_state(
281 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
282 		break;
283 	case AMDGPU_SYS_REG_STATE_USR_1:
284 		bytes_read = amdgpu_asic_get_reg_state(
285 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
286 		break;
287 	default:
288 		return -EINVAL;
289 	}
290 
291 	return bytes_read;
292 }
293 
294 static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
295 		      AMDGPU_SYS_REG_STATE_END);
296 
297 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
298 {
299 	int ret;
300 
301 	if (!amdgpu_asic_get_reg_state_supported(adev))
302 		return 0;
303 
304 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
305 
306 	return ret;
307 }
308 
309 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
310 {
311 	if (!amdgpu_asic_get_reg_state_supported(adev))
312 		return;
313 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
314 }
315 
316 /**
317  * DOC: board_info
318  *
319  * The amdgpu driver provides a sysfs API for giving board related information.
320  * It provides the form factor information in the format
321  *
322  *   type : form factor
323  *
324  * Possible form factor values
325  *
326  * - "cem"		- PCIE CEM card
327  * - "oam"		- Open Compute Accelerator Module
328  * - "unknown"	- Not known
329  *
330  */
331 
332 static ssize_t amdgpu_device_get_board_info(struct device *dev,
333 					    struct device_attribute *attr,
334 					    char *buf)
335 {
336 	struct drm_device *ddev = dev_get_drvdata(dev);
337 	struct amdgpu_device *adev = drm_to_adev(ddev);
338 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
339 	const char *pkg;
340 
341 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
342 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
343 
344 	switch (pkg_type) {
345 	case AMDGPU_PKG_TYPE_CEM:
346 		pkg = "cem";
347 		break;
348 	case AMDGPU_PKG_TYPE_OAM:
349 		pkg = "oam";
350 		break;
351 	default:
352 		pkg = "unknown";
353 		break;
354 	}
355 
356 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
357 }
358 
359 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
360 
361 static struct attribute *amdgpu_board_attrs[] = {
362 	&dev_attr_board_info.attr,
363 	NULL,
364 };
365 
366 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
367 					     struct attribute *attr, int n)
368 {
369 	struct device *dev = kobj_to_dev(kobj);
370 	struct drm_device *ddev = dev_get_drvdata(dev);
371 	struct amdgpu_device *adev = drm_to_adev(ddev);
372 
373 	if (adev->flags & AMD_IS_APU)
374 		return 0;
375 
376 	return attr->mode;
377 }
378 
379 static const struct attribute_group amdgpu_board_attrs_group = {
380 	.attrs = amdgpu_board_attrs,
381 	.is_visible = amdgpu_board_attrs_is_visible
382 };
383 
384 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
385 
386 /**
387  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
388  *
389  * @adev: amdgpu device pointer
390  *
391  * Returns true if the device is a dGPU with ATPX power control,
392  * otherwise return false.
393  */
394 bool amdgpu_device_supports_px(struct amdgpu_device *adev)
395 {
396 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
397 		return true;
398 	return false;
399 }
400 
401 /**
402  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
403  *
404  * @adev: amdgpu device pointer
405  *
406  * Returns true if the device is a dGPU with ACPI power control,
407  * otherwise return false.
408  */
409 bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
410 {
411 	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
412 		return false;
413 
414 	if (adev->has_pr3 ||
415 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
416 		return true;
417 	return false;
418 }
419 
420 /**
421  * amdgpu_device_supports_baco - Does the device support BACO
422  *
423  * @adev: amdgpu device pointer
424  *
425  * Return:
426  * 1 if the device supports BACO;
427  * 3 if the device supports MACO (only works if BACO is supported)
428  * otherwise return 0.
429  */
430 int amdgpu_device_supports_baco(struct amdgpu_device *adev)
431 {
432 	return amdgpu_asic_supports_baco(adev);
433 }
434 
435 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
436 {
437 	int bamaco_support;
438 
439 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
440 	bamaco_support = amdgpu_device_supports_baco(adev);
441 
442 	switch (amdgpu_runtime_pm) {
443 	case 2:
444 		if (bamaco_support & MACO_SUPPORT) {
445 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
446 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
447 		} else if (bamaco_support == BACO_SUPPORT) {
448 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
449 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
450 		}
451 		break;
452 	case 1:
453 		if (bamaco_support & BACO_SUPPORT) {
454 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
455 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
456 		}
457 		break;
458 	case -1:
459 	case -2:
460 		if (amdgpu_device_supports_px(adev)) {
461 			/* enable PX as runtime mode */
462 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
463 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
464 		} else if (amdgpu_device_supports_boco(adev)) {
465 			/* enable boco as runtime mode */
466 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
467 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
468 		} else {
469 			if (!bamaco_support)
470 				goto no_runtime_pm;
471 
472 			switch (adev->asic_type) {
473 			case CHIP_VEGA20:
474 			case CHIP_ARCTURUS:
475 				/* BACO are not supported on vega20 and arctrus */
476 				break;
477 			case CHIP_VEGA10:
478 				/* enable BACO as runpm mode if noretry=0 */
479 				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
480 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
481 				break;
482 			default:
483 				/* enable BACO as runpm mode on CI+ */
484 				if (!amdgpu_passthrough(adev))
485 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
486 				break;
487 			}
488 
489 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
490 				if (bamaco_support & MACO_SUPPORT) {
491 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
492 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
493 				} else {
494 					dev_info(adev->dev, "Using BACO for runtime pm\n");
495 				}
496 			}
497 		}
498 		break;
499 	case 0:
500 		dev_info(adev->dev, "runtime pm is manually disabled\n");
501 		break;
502 	default:
503 		break;
504 	}
505 
506 no_runtime_pm:
507 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
508 		dev_info(adev->dev, "Runtime PM not available\n");
509 }
510 /**
511  * amdgpu_device_supports_smart_shift - Is the device dGPU with
512  * smart shift support
513  *
514  * @adev: amdgpu device pointer
515  *
516  * Returns true if the device is a dGPU with Smart Shift support,
517  * otherwise returns false.
518  */
519 bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
520 {
521 	return (amdgpu_device_supports_boco(adev) &&
522 		amdgpu_acpi_is_power_shift_control_supported());
523 }
524 
525 /*
526  * VRAM access helper functions
527  */
528 
529 /**
530  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
531  *
532  * @adev: amdgpu_device pointer
533  * @pos: offset of the buffer in vram
534  * @buf: virtual address of the buffer in system memory
535  * @size: read/write size, sizeof(@buf) must > @size
536  * @write: true - write to vram, otherwise - read from vram
537  */
538 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
539 			     void *buf, size_t size, bool write)
540 {
541 	unsigned long flags;
542 	uint32_t hi = ~0, tmp = 0;
543 	uint32_t *data = buf;
544 	uint64_t last;
545 	int idx;
546 
547 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
548 		return;
549 
550 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
551 
552 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
553 	for (last = pos + size; pos < last; pos += 4) {
554 		tmp = pos >> 31;
555 
556 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
557 		if (tmp != hi) {
558 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
559 			hi = tmp;
560 		}
561 		if (write)
562 			WREG32_NO_KIQ(mmMM_DATA, *data++);
563 		else
564 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
565 	}
566 
567 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
568 	drm_dev_exit(idx);
569 }
570 
571 /**
572  * amdgpu_device_aper_access - access vram by vram aperture
573  *
574  * @adev: amdgpu_device pointer
575  * @pos: offset of the buffer in vram
576  * @buf: virtual address of the buffer in system memory
577  * @size: read/write size, sizeof(@buf) must > @size
578  * @write: true - write to vram, otherwise - read from vram
579  *
580  * The return value means how many bytes have been transferred.
581  */
582 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
583 				 void *buf, size_t size, bool write)
584 {
585 #ifdef CONFIG_64BIT
586 	void __iomem *addr;
587 	size_t count = 0;
588 	uint64_t last;
589 
590 	if (!adev->mman.aper_base_kaddr)
591 		return 0;
592 
593 	last = min(pos + size, adev->gmc.visible_vram_size);
594 	if (last > pos) {
595 		addr = adev->mman.aper_base_kaddr + pos;
596 		count = last - pos;
597 
598 		if (write) {
599 			memcpy_toio(addr, buf, count);
600 			/* Make sure HDP write cache flush happens without any reordering
601 			 * after the system memory contents are sent over PCIe device
602 			 */
603 			mb();
604 			amdgpu_device_flush_hdp(adev, NULL);
605 		} else {
606 			amdgpu_device_invalidate_hdp(adev, NULL);
607 			/* Make sure HDP read cache is invalidated before issuing a read
608 			 * to the PCIe device
609 			 */
610 			mb();
611 			memcpy_fromio(buf, addr, count);
612 		}
613 
614 	}
615 
616 	return count;
617 #else
618 	return 0;
619 #endif
620 }
621 
622 /**
623  * amdgpu_device_vram_access - read/write a buffer in vram
624  *
625  * @adev: amdgpu_device pointer
626  * @pos: offset of the buffer in vram
627  * @buf: virtual address of the buffer in system memory
628  * @size: read/write size, sizeof(@buf) must > @size
629  * @write: true - write to vram, otherwise - read from vram
630  */
631 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
632 			       void *buf, size_t size, bool write)
633 {
634 	size_t count;
635 
636 	/* try to using vram apreature to access vram first */
637 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
638 	size -= count;
639 	if (size) {
640 		/* using MM to access rest vram */
641 		pos += count;
642 		buf += count;
643 		amdgpu_device_mm_access(adev, pos, buf, size, write);
644 	}
645 }
646 
647 /*
648  * register access helper functions.
649  */
650 
651 /* Check if hw access should be skipped because of hotplug or device error */
652 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
653 {
654 	if (adev->no_hw_access)
655 		return true;
656 
657 #ifdef CONFIG_LOCKDEP
658 	/*
659 	 * This is a bit complicated to understand, so worth a comment. What we assert
660 	 * here is that the GPU reset is not running on another thread in parallel.
661 	 *
662 	 * For this we trylock the read side of the reset semaphore, if that succeeds
663 	 * we know that the reset is not running in parallel.
664 	 *
665 	 * If the trylock fails we assert that we are either already holding the read
666 	 * side of the lock or are the reset thread itself and hold the write side of
667 	 * the lock.
668 	 */
669 	if (in_task()) {
670 		if (down_read_trylock(&adev->reset_domain->sem))
671 			up_read(&adev->reset_domain->sem);
672 		else
673 			lockdep_assert_held(&adev->reset_domain->sem);
674 	}
675 #endif
676 	return false;
677 }
678 
679 /**
680  * amdgpu_device_rreg - read a memory mapped IO or indirect register
681  *
682  * @adev: amdgpu_device pointer
683  * @reg: dword aligned register offset
684  * @acc_flags: access flags which require special behavior
685  *
686  * Returns the 32 bit value from the offset specified.
687  */
688 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
689 			    uint32_t reg, uint32_t acc_flags)
690 {
691 	uint32_t ret;
692 
693 	if (amdgpu_device_skip_hw_access(adev))
694 		return 0;
695 
696 	if ((reg * 4) < adev->rmmio_size) {
697 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
698 		    amdgpu_sriov_runtime(adev) &&
699 		    down_read_trylock(&adev->reset_domain->sem)) {
700 			ret = amdgpu_kiq_rreg(adev, reg, 0);
701 			up_read(&adev->reset_domain->sem);
702 		} else {
703 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
704 		}
705 	} else {
706 		ret = adev->pcie_rreg(adev, reg * 4);
707 	}
708 
709 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
710 
711 	return ret;
712 }
713 
714 /*
715  * MMIO register read with bytes helper functions
716  * @offset:bytes offset from MMIO start
717  */
718 
719 /**
720  * amdgpu_mm_rreg8 - read a memory mapped IO register
721  *
722  * @adev: amdgpu_device pointer
723  * @offset: byte aligned register offset
724  *
725  * Returns the 8 bit value from the offset specified.
726  */
727 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
728 {
729 	if (amdgpu_device_skip_hw_access(adev))
730 		return 0;
731 
732 	if (offset < adev->rmmio_size)
733 		return (readb(adev->rmmio + offset));
734 	BUG();
735 }
736 
737 
738 /**
739  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
740  *
741  * @adev: amdgpu_device pointer
742  * @reg: dword aligned register offset
743  * @acc_flags: access flags which require special behavior
744  * @xcc_id: xcc accelerated compute core id
745  *
746  * Returns the 32 bit value from the offset specified.
747  */
748 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
749 				uint32_t reg, uint32_t acc_flags,
750 				uint32_t xcc_id)
751 {
752 	uint32_t ret, rlcg_flag;
753 
754 	if (amdgpu_device_skip_hw_access(adev))
755 		return 0;
756 
757 	if ((reg * 4) < adev->rmmio_size) {
758 		if (amdgpu_sriov_vf(adev) &&
759 		    !amdgpu_sriov_runtime(adev) &&
760 		    adev->gfx.rlc.rlcg_reg_access_supported &&
761 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
762 							 GC_HWIP, false,
763 							 &rlcg_flag)) {
764 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
765 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
766 		    amdgpu_sriov_runtime(adev) &&
767 		    down_read_trylock(&adev->reset_domain->sem)) {
768 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
769 			up_read(&adev->reset_domain->sem);
770 		} else {
771 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
772 		}
773 	} else {
774 		ret = adev->pcie_rreg(adev, reg * 4);
775 	}
776 
777 	return ret;
778 }
779 
780 /*
781  * MMIO register write with bytes helper functions
782  * @offset:bytes offset from MMIO start
783  * @value: the value want to be written to the register
784  */
785 
786 /**
787  * amdgpu_mm_wreg8 - read a memory mapped IO register
788  *
789  * @adev: amdgpu_device pointer
790  * @offset: byte aligned register offset
791  * @value: 8 bit value to write
792  *
793  * Writes the value specified to the offset specified.
794  */
795 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
796 {
797 	if (amdgpu_device_skip_hw_access(adev))
798 		return;
799 
800 	if (offset < adev->rmmio_size)
801 		writeb(value, adev->rmmio + offset);
802 	else
803 		BUG();
804 }
805 
806 /**
807  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
808  *
809  * @adev: amdgpu_device pointer
810  * @reg: dword aligned register offset
811  * @v: 32 bit value to write to the register
812  * @acc_flags: access flags which require special behavior
813  *
814  * Writes the value specified to the offset specified.
815  */
816 void amdgpu_device_wreg(struct amdgpu_device *adev,
817 			uint32_t reg, uint32_t v,
818 			uint32_t acc_flags)
819 {
820 	if (amdgpu_device_skip_hw_access(adev))
821 		return;
822 
823 	if ((reg * 4) < adev->rmmio_size) {
824 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
825 		    amdgpu_sriov_runtime(adev) &&
826 		    down_read_trylock(&adev->reset_domain->sem)) {
827 			amdgpu_kiq_wreg(adev, reg, v, 0);
828 			up_read(&adev->reset_domain->sem);
829 		} else {
830 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
831 		}
832 	} else {
833 		adev->pcie_wreg(adev, reg * 4, v);
834 	}
835 
836 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
837 }
838 
839 /**
840  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
841  *
842  * @adev: amdgpu_device pointer
843  * @reg: mmio/rlc register
844  * @v: value to write
845  * @xcc_id: xcc accelerated compute core id
846  *
847  * this function is invoked only for the debugfs register access
848  */
849 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
850 			     uint32_t reg, uint32_t v,
851 			     uint32_t xcc_id)
852 {
853 	if (amdgpu_device_skip_hw_access(adev))
854 		return;
855 
856 	if (amdgpu_sriov_fullaccess(adev) &&
857 	    adev->gfx.rlc.funcs &&
858 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
859 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
860 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
861 	} else if ((reg * 4) >= adev->rmmio_size) {
862 		adev->pcie_wreg(adev, reg * 4, v);
863 	} else {
864 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
865 	}
866 }
867 
868 /**
869  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
870  *
871  * @adev: amdgpu_device pointer
872  * @reg: dword aligned register offset
873  * @v: 32 bit value to write to the register
874  * @acc_flags: access flags which require special behavior
875  * @xcc_id: xcc accelerated compute core id
876  *
877  * Writes the value specified to the offset specified.
878  */
879 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
880 			uint32_t reg, uint32_t v,
881 			uint32_t acc_flags, uint32_t xcc_id)
882 {
883 	uint32_t rlcg_flag;
884 
885 	if (amdgpu_device_skip_hw_access(adev))
886 		return;
887 
888 	if ((reg * 4) < adev->rmmio_size) {
889 		if (amdgpu_sriov_vf(adev) &&
890 		    !amdgpu_sriov_runtime(adev) &&
891 		    adev->gfx.rlc.rlcg_reg_access_supported &&
892 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
893 							 GC_HWIP, true,
894 							 &rlcg_flag)) {
895 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
896 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
897 		    amdgpu_sriov_runtime(adev) &&
898 		    down_read_trylock(&adev->reset_domain->sem)) {
899 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
900 			up_read(&adev->reset_domain->sem);
901 		} else {
902 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
903 		}
904 	} else {
905 		adev->pcie_wreg(adev, reg * 4, v);
906 	}
907 }
908 
909 /**
910  * amdgpu_device_indirect_rreg - read an indirect register
911  *
912  * @adev: amdgpu_device pointer
913  * @reg_addr: indirect register address to read from
914  *
915  * Returns the value of indirect register @reg_addr
916  */
917 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
918 				u32 reg_addr)
919 {
920 	unsigned long flags, pcie_index, pcie_data;
921 	void __iomem *pcie_index_offset;
922 	void __iomem *pcie_data_offset;
923 	u32 r;
924 
925 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
926 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
927 
928 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
929 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
930 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
931 
932 	writel(reg_addr, pcie_index_offset);
933 	readl(pcie_index_offset);
934 	r = readl(pcie_data_offset);
935 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
936 
937 	return r;
938 }
939 
940 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
941 				    u64 reg_addr)
942 {
943 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
944 	u32 r;
945 	void __iomem *pcie_index_offset;
946 	void __iomem *pcie_index_hi_offset;
947 	void __iomem *pcie_data_offset;
948 
949 	if (unlikely(!adev->nbio.funcs)) {
950 		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
951 		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
952 	} else {
953 		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
954 		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
955 	}
956 
957 	if (reg_addr >> 32) {
958 		if (unlikely(!adev->nbio.funcs))
959 			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
960 		else
961 			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
962 	} else {
963 		pcie_index_hi = 0;
964 	}
965 
966 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
967 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
968 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
969 	if (pcie_index_hi != 0)
970 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
971 				pcie_index_hi * 4;
972 
973 	writel(reg_addr, pcie_index_offset);
974 	readl(pcie_index_offset);
975 	if (pcie_index_hi != 0) {
976 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
977 		readl(pcie_index_hi_offset);
978 	}
979 	r = readl(pcie_data_offset);
980 
981 	/* clear the high bits */
982 	if (pcie_index_hi != 0) {
983 		writel(0, pcie_index_hi_offset);
984 		readl(pcie_index_hi_offset);
985 	}
986 
987 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
988 
989 	return r;
990 }
991 
992 /**
993  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
994  *
995  * @adev: amdgpu_device pointer
996  * @reg_addr: indirect register address to read from
997  *
998  * Returns the value of indirect register @reg_addr
999  */
1000 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1001 				  u32 reg_addr)
1002 {
1003 	unsigned long flags, pcie_index, pcie_data;
1004 	void __iomem *pcie_index_offset;
1005 	void __iomem *pcie_data_offset;
1006 	u64 r;
1007 
1008 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1009 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1010 
1011 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1012 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1013 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1014 
1015 	/* read low 32 bits */
1016 	writel(reg_addr, pcie_index_offset);
1017 	readl(pcie_index_offset);
1018 	r = readl(pcie_data_offset);
1019 	/* read high 32 bits */
1020 	writel(reg_addr + 4, pcie_index_offset);
1021 	readl(pcie_index_offset);
1022 	r |= ((u64)readl(pcie_data_offset) << 32);
1023 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1024 
1025 	return r;
1026 }
1027 
1028 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
1029 				  u64 reg_addr)
1030 {
1031 	unsigned long flags, pcie_index, pcie_data;
1032 	unsigned long pcie_index_hi = 0;
1033 	void __iomem *pcie_index_offset;
1034 	void __iomem *pcie_index_hi_offset;
1035 	void __iomem *pcie_data_offset;
1036 	u64 r;
1037 
1038 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1039 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1040 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1041 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1042 
1043 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1044 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1045 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1046 	if (pcie_index_hi != 0)
1047 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1048 			pcie_index_hi * 4;
1049 
1050 	/* read low 32 bits */
1051 	writel(reg_addr, pcie_index_offset);
1052 	readl(pcie_index_offset);
1053 	if (pcie_index_hi != 0) {
1054 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1055 		readl(pcie_index_hi_offset);
1056 	}
1057 	r = readl(pcie_data_offset);
1058 	/* read high 32 bits */
1059 	writel(reg_addr + 4, pcie_index_offset);
1060 	readl(pcie_index_offset);
1061 	if (pcie_index_hi != 0) {
1062 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1063 		readl(pcie_index_hi_offset);
1064 	}
1065 	r |= ((u64)readl(pcie_data_offset) << 32);
1066 
1067 	/* clear the high bits */
1068 	if (pcie_index_hi != 0) {
1069 		writel(0, pcie_index_hi_offset);
1070 		readl(pcie_index_hi_offset);
1071 	}
1072 
1073 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1074 
1075 	return r;
1076 }
1077 
1078 /**
1079  * amdgpu_device_indirect_wreg - write an indirect register address
1080  *
1081  * @adev: amdgpu_device pointer
1082  * @reg_addr: indirect register offset
1083  * @reg_data: indirect register data
1084  *
1085  */
1086 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1087 				 u32 reg_addr, u32 reg_data)
1088 {
1089 	unsigned long flags, pcie_index, pcie_data;
1090 	void __iomem *pcie_index_offset;
1091 	void __iomem *pcie_data_offset;
1092 
1093 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1094 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1095 
1096 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1097 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1098 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1099 
1100 	writel(reg_addr, pcie_index_offset);
1101 	readl(pcie_index_offset);
1102 	writel(reg_data, pcie_data_offset);
1103 	readl(pcie_data_offset);
1104 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1105 }
1106 
1107 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1108 				     u64 reg_addr, u32 reg_data)
1109 {
1110 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1111 	void __iomem *pcie_index_offset;
1112 	void __iomem *pcie_index_hi_offset;
1113 	void __iomem *pcie_data_offset;
1114 
1115 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1116 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1117 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1118 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1119 	else
1120 		pcie_index_hi = 0;
1121 
1122 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1123 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1124 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1125 	if (pcie_index_hi != 0)
1126 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1127 				pcie_index_hi * 4;
1128 
1129 	writel(reg_addr, pcie_index_offset);
1130 	readl(pcie_index_offset);
1131 	if (pcie_index_hi != 0) {
1132 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1133 		readl(pcie_index_hi_offset);
1134 	}
1135 	writel(reg_data, pcie_data_offset);
1136 	readl(pcie_data_offset);
1137 
1138 	/* clear the high bits */
1139 	if (pcie_index_hi != 0) {
1140 		writel(0, pcie_index_hi_offset);
1141 		readl(pcie_index_hi_offset);
1142 	}
1143 
1144 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1145 }
1146 
1147 /**
1148  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1149  *
1150  * @adev: amdgpu_device pointer
1151  * @reg_addr: indirect register offset
1152  * @reg_data: indirect register data
1153  *
1154  */
1155 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1156 				   u32 reg_addr, u64 reg_data)
1157 {
1158 	unsigned long flags, pcie_index, pcie_data;
1159 	void __iomem *pcie_index_offset;
1160 	void __iomem *pcie_data_offset;
1161 
1162 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1163 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1164 
1165 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1166 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1167 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1168 
1169 	/* write low 32 bits */
1170 	writel(reg_addr, pcie_index_offset);
1171 	readl(pcie_index_offset);
1172 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1173 	readl(pcie_data_offset);
1174 	/* write high 32 bits */
1175 	writel(reg_addr + 4, pcie_index_offset);
1176 	readl(pcie_index_offset);
1177 	writel((u32)(reg_data >> 32), pcie_data_offset);
1178 	readl(pcie_data_offset);
1179 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1180 }
1181 
1182 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1183 				   u64 reg_addr, u64 reg_data)
1184 {
1185 	unsigned long flags, pcie_index, pcie_data;
1186 	unsigned long pcie_index_hi = 0;
1187 	void __iomem *pcie_index_offset;
1188 	void __iomem *pcie_index_hi_offset;
1189 	void __iomem *pcie_data_offset;
1190 
1191 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1192 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1193 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1194 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1195 
1196 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1197 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1198 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1199 	if (pcie_index_hi != 0)
1200 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1201 				pcie_index_hi * 4;
1202 
1203 	/* write low 32 bits */
1204 	writel(reg_addr, pcie_index_offset);
1205 	readl(pcie_index_offset);
1206 	if (pcie_index_hi != 0) {
1207 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1208 		readl(pcie_index_hi_offset);
1209 	}
1210 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1211 	readl(pcie_data_offset);
1212 	/* write high 32 bits */
1213 	writel(reg_addr + 4, pcie_index_offset);
1214 	readl(pcie_index_offset);
1215 	if (pcie_index_hi != 0) {
1216 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1217 		readl(pcie_index_hi_offset);
1218 	}
1219 	writel((u32)(reg_data >> 32), pcie_data_offset);
1220 	readl(pcie_data_offset);
1221 
1222 	/* clear the high bits */
1223 	if (pcie_index_hi != 0) {
1224 		writel(0, pcie_index_hi_offset);
1225 		readl(pcie_index_hi_offset);
1226 	}
1227 
1228 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1229 }
1230 
1231 /**
1232  * amdgpu_device_get_rev_id - query device rev_id
1233  *
1234  * @adev: amdgpu_device pointer
1235  *
1236  * Return device rev_id
1237  */
1238 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1239 {
1240 	return adev->nbio.funcs->get_rev_id(adev);
1241 }
1242 
1243 /**
1244  * amdgpu_invalid_rreg - dummy reg read function
1245  *
1246  * @adev: amdgpu_device pointer
1247  * @reg: offset of register
1248  *
1249  * Dummy register read function.  Used for register blocks
1250  * that certain asics don't have (all asics).
1251  * Returns the value in the register.
1252  */
1253 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1254 {
1255 	dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
1256 	BUG();
1257 	return 0;
1258 }
1259 
1260 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1261 {
1262 	dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
1263 	BUG();
1264 	return 0;
1265 }
1266 
1267 /**
1268  * amdgpu_invalid_wreg - dummy reg write function
1269  *
1270  * @adev: amdgpu_device pointer
1271  * @reg: offset of register
1272  * @v: value to write to the register
1273  *
1274  * Dummy register read function.  Used for register blocks
1275  * that certain asics don't have (all asics).
1276  */
1277 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1278 {
1279 	dev_err(adev->dev,
1280 		"Invalid callback to write register 0x%04X with 0x%08X\n", reg,
1281 		v);
1282 	BUG();
1283 }
1284 
1285 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1286 {
1287 	dev_err(adev->dev,
1288 		"Invalid callback to write register 0x%llX with 0x%08X\n", reg,
1289 		v);
1290 	BUG();
1291 }
1292 
1293 /**
1294  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1295  *
1296  * @adev: amdgpu_device pointer
1297  * @reg: offset of register
1298  *
1299  * Dummy register read function.  Used for register blocks
1300  * that certain asics don't have (all asics).
1301  * Returns the value in the register.
1302  */
1303 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1304 {
1305 	dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
1306 		reg);
1307 	BUG();
1308 	return 0;
1309 }
1310 
1311 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1312 {
1313 	dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
1314 	BUG();
1315 	return 0;
1316 }
1317 
1318 /**
1319  * amdgpu_invalid_wreg64 - dummy reg write function
1320  *
1321  * @adev: amdgpu_device pointer
1322  * @reg: offset of register
1323  * @v: value to write to the register
1324  *
1325  * Dummy register read function.  Used for register blocks
1326  * that certain asics don't have (all asics).
1327  */
1328 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1329 {
1330 	dev_err(adev->dev,
1331 		"Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1332 		reg, v);
1333 	BUG();
1334 }
1335 
1336 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1337 {
1338 	dev_err(adev->dev,
1339 		"Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1340 		reg, v);
1341 	BUG();
1342 }
1343 
1344 /**
1345  * amdgpu_block_invalid_rreg - dummy reg read function
1346  *
1347  * @adev: amdgpu_device pointer
1348  * @block: offset of instance
1349  * @reg: offset of register
1350  *
1351  * Dummy register read function.  Used for register blocks
1352  * that certain asics don't have (all asics).
1353  * Returns the value in the register.
1354  */
1355 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1356 					  uint32_t block, uint32_t reg)
1357 {
1358 	dev_err(adev->dev,
1359 		"Invalid callback to read register 0x%04X in block 0x%04X\n",
1360 		reg, block);
1361 	BUG();
1362 	return 0;
1363 }
1364 
1365 /**
1366  * amdgpu_block_invalid_wreg - dummy reg write function
1367  *
1368  * @adev: amdgpu_device pointer
1369  * @block: offset of instance
1370  * @reg: offset of register
1371  * @v: value to write to the register
1372  *
1373  * Dummy register read function.  Used for register blocks
1374  * that certain asics don't have (all asics).
1375  */
1376 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1377 				      uint32_t block,
1378 				      uint32_t reg, uint32_t v)
1379 {
1380 	dev_err(adev->dev,
1381 		"Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1382 		reg, block, v);
1383 	BUG();
1384 }
1385 
1386 static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
1387 {
1388 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1389 		return AMDGPU_VBIOS_SKIP;
1390 
1391 	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
1392 		return AMDGPU_VBIOS_OPTIONAL;
1393 
1394 	return 0;
1395 }
1396 
1397 /**
1398  * amdgpu_device_asic_init - Wrapper for atom asic_init
1399  *
1400  * @adev: amdgpu_device pointer
1401  *
1402  * Does any asic specific work and then calls atom asic init.
1403  */
1404 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1405 {
1406 	uint32_t flags;
1407 	bool optional;
1408 	int ret;
1409 
1410 	amdgpu_asic_pre_asic_init(adev);
1411 	flags = amdgpu_device_get_vbios_flags(adev);
1412 	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
1413 
1414 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1415 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1416 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
1417 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1418 		amdgpu_psp_wait_for_bootloader(adev);
1419 		if (optional && !adev->bios)
1420 			return 0;
1421 
1422 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1423 		return ret;
1424 	} else {
1425 		if (optional && !adev->bios)
1426 			return 0;
1427 
1428 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1429 	}
1430 
1431 	return 0;
1432 }
1433 
1434 /**
1435  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1436  *
1437  * @adev: amdgpu_device pointer
1438  *
1439  * Allocates a scratch page of VRAM for use by various things in the
1440  * driver.
1441  */
1442 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1443 {
1444 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1445 				       AMDGPU_GEM_DOMAIN_VRAM |
1446 				       AMDGPU_GEM_DOMAIN_GTT,
1447 				       &adev->mem_scratch.robj,
1448 				       &adev->mem_scratch.gpu_addr,
1449 				       (void **)&adev->mem_scratch.ptr);
1450 }
1451 
1452 /**
1453  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1454  *
1455  * @adev: amdgpu_device pointer
1456  *
1457  * Frees the VRAM scratch page.
1458  */
1459 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1460 {
1461 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1462 }
1463 
1464 /**
1465  * amdgpu_device_program_register_sequence - program an array of registers.
1466  *
1467  * @adev: amdgpu_device pointer
1468  * @registers: pointer to the register array
1469  * @array_size: size of the register array
1470  *
1471  * Programs an array or registers with and or masks.
1472  * This is a helper for setting golden registers.
1473  */
1474 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1475 					     const u32 *registers,
1476 					     const u32 array_size)
1477 {
1478 	u32 tmp, reg, and_mask, or_mask;
1479 	int i;
1480 
1481 	if (array_size % 3)
1482 		return;
1483 
1484 	for (i = 0; i < array_size; i += 3) {
1485 		reg = registers[i + 0];
1486 		and_mask = registers[i + 1];
1487 		or_mask = registers[i + 2];
1488 
1489 		if (and_mask == 0xffffffff) {
1490 			tmp = or_mask;
1491 		} else {
1492 			tmp = RREG32(reg);
1493 			tmp &= ~and_mask;
1494 			if (adev->family >= AMDGPU_FAMILY_AI)
1495 				tmp |= (or_mask & and_mask);
1496 			else
1497 				tmp |= or_mask;
1498 		}
1499 		WREG32(reg, tmp);
1500 	}
1501 }
1502 
1503 /**
1504  * amdgpu_device_pci_config_reset - reset the GPU
1505  *
1506  * @adev: amdgpu_device pointer
1507  *
1508  * Resets the GPU using the pci config reset sequence.
1509  * Only applicable to asics prior to vega10.
1510  */
1511 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1512 {
1513 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1514 }
1515 
1516 /**
1517  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1518  *
1519  * @adev: amdgpu_device pointer
1520  *
1521  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1522  */
1523 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1524 {
1525 	return pci_reset_function(adev->pdev);
1526 }
1527 
1528 /*
1529  * amdgpu_device_wb_*()
1530  * Writeback is the method by which the GPU updates special pages in memory
1531  * with the status of certain GPU events (fences, ring pointers,etc.).
1532  */
1533 
1534 /**
1535  * amdgpu_device_wb_fini - Disable Writeback and free memory
1536  *
1537  * @adev: amdgpu_device pointer
1538  *
1539  * Disables Writeback and frees the Writeback memory (all asics).
1540  * Used at driver shutdown.
1541  */
1542 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1543 {
1544 	if (adev->wb.wb_obj) {
1545 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1546 				      &adev->wb.gpu_addr,
1547 				      (void **)&adev->wb.wb);
1548 		adev->wb.wb_obj = NULL;
1549 	}
1550 }
1551 
1552 /**
1553  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1554  *
1555  * @adev: amdgpu_device pointer
1556  *
1557  * Initializes writeback and allocates writeback memory (all asics).
1558  * Used at driver startup.
1559  * Returns 0 on success or an -error on failure.
1560  */
1561 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1562 {
1563 	int r;
1564 
1565 	if (adev->wb.wb_obj == NULL) {
1566 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1567 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1568 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1569 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1570 					    (void **)&adev->wb.wb);
1571 		if (r) {
1572 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1573 			return r;
1574 		}
1575 
1576 		adev->wb.num_wb = AMDGPU_MAX_WB;
1577 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1578 
1579 		/* clear wb memory */
1580 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1581 	}
1582 
1583 	return 0;
1584 }
1585 
1586 /**
1587  * amdgpu_device_wb_get - Allocate a wb entry
1588  *
1589  * @adev: amdgpu_device pointer
1590  * @wb: wb index
1591  *
1592  * Allocate a wb slot for use by the driver (all asics).
1593  * Returns 0 on success or -EINVAL on failure.
1594  */
1595 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1596 {
1597 	unsigned long flags, offset;
1598 
1599 	spin_lock_irqsave(&adev->wb.lock, flags);
1600 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1601 	if (offset < adev->wb.num_wb) {
1602 		__set_bit(offset, adev->wb.used);
1603 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1604 		*wb = offset << 3; /* convert to dw offset */
1605 		return 0;
1606 	} else {
1607 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1608 		return -EINVAL;
1609 	}
1610 }
1611 
1612 /**
1613  * amdgpu_device_wb_free - Free a wb entry
1614  *
1615  * @adev: amdgpu_device pointer
1616  * @wb: wb index
1617  *
1618  * Free a wb slot allocated for use by the driver (all asics)
1619  */
1620 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1621 {
1622 	unsigned long flags;
1623 
1624 	wb >>= 3;
1625 	spin_lock_irqsave(&adev->wb.lock, flags);
1626 	if (wb < adev->wb.num_wb)
1627 		__clear_bit(wb, adev->wb.used);
1628 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1629 }
1630 
1631 /**
1632  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1633  *
1634  * @adev: amdgpu_device pointer
1635  *
1636  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1637  * to fail, but if any of the BARs is not accessible after the size we abort
1638  * driver loading by returning -ENODEV.
1639  */
1640 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1641 {
1642 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1643 	struct pci_bus *root;
1644 	struct resource *res;
1645 	unsigned int i;
1646 	u16 cmd;
1647 	int r;
1648 
1649 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1650 		return 0;
1651 
1652 	/* Bypass for VF */
1653 	if (amdgpu_sriov_vf(adev))
1654 		return 0;
1655 
1656 	if (!amdgpu_rebar)
1657 		return 0;
1658 
1659 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1660 	if ((amdgpu_runtime_pm != 0) &&
1661 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1662 	    adev->pdev->device == 0x731f &&
1663 	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1664 		return 0;
1665 
1666 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1667 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1668 		dev_warn(
1669 			adev->dev,
1670 			"System can't access extended configuration space, please check!!\n");
1671 
1672 	/* skip if the bios has already enabled large BAR */
1673 	if (adev->gmc.real_vram_size &&
1674 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1675 		return 0;
1676 
1677 	/* Check if the root BUS has 64bit memory resources */
1678 	root = adev->pdev->bus;
1679 	while (root->parent)
1680 		root = root->parent;
1681 
1682 	pci_bus_for_each_resource(root, res, i) {
1683 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1684 		    res->start > 0x100000000ull)
1685 			break;
1686 	}
1687 
1688 	/* Trying to resize is pointless without a root hub window above 4GB */
1689 	if (!res)
1690 		return 0;
1691 
1692 	/* Limit the BAR size to what is available */
1693 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1694 			rbar_size);
1695 
1696 	/* Disable memory decoding while we change the BAR addresses and size */
1697 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1698 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1699 			      cmd & ~PCI_COMMAND_MEMORY);
1700 
1701 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1702 	amdgpu_doorbell_fini(adev);
1703 	if (adev->asic_type >= CHIP_BONAIRE)
1704 		pci_release_resource(adev->pdev, 2);
1705 
1706 	pci_release_resource(adev->pdev, 0);
1707 
1708 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1709 	if (r == -ENOSPC)
1710 		dev_info(adev->dev,
1711 			 "Not enough PCI address space for a large BAR.");
1712 	else if (r && r != -ENOTSUPP)
1713 		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1714 
1715 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1716 
1717 	/* When the doorbell or fb BAR isn't available we have no chance of
1718 	 * using the device.
1719 	 */
1720 	r = amdgpu_doorbell_init(adev);
1721 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1722 		return -ENODEV;
1723 
1724 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1725 
1726 	return 0;
1727 }
1728 
1729 /*
1730  * GPU helpers function.
1731  */
1732 /**
1733  * amdgpu_device_need_post - check if the hw need post or not
1734  *
1735  * @adev: amdgpu_device pointer
1736  *
1737  * Check if the asic has been initialized (all asics) at driver startup
1738  * or post is needed if  hw reset is performed.
1739  * Returns true if need or false if not.
1740  */
1741 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1742 {
1743 	uint32_t reg, flags;
1744 
1745 	if (amdgpu_sriov_vf(adev))
1746 		return false;
1747 
1748 	flags = amdgpu_device_get_vbios_flags(adev);
1749 	if (flags & AMDGPU_VBIOS_SKIP)
1750 		return false;
1751 	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1752 		return false;
1753 
1754 	if (amdgpu_passthrough(adev)) {
1755 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1756 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1757 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1758 		 * vpost executed for smc version below 22.15
1759 		 */
1760 		if (adev->asic_type == CHIP_FIJI) {
1761 			int err;
1762 			uint32_t fw_ver;
1763 
1764 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1765 			/* force vPost if error occurred */
1766 			if (err)
1767 				return true;
1768 
1769 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1770 			release_firmware(adev->pm.fw);
1771 			if (fw_ver < 0x00160e00)
1772 				return true;
1773 		}
1774 	}
1775 
1776 	/* Don't post if we need to reset whole hive on init */
1777 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1778 		return false;
1779 
1780 	if (adev->has_hw_reset) {
1781 		adev->has_hw_reset = false;
1782 		return true;
1783 	}
1784 
1785 	/* bios scratch used on CIK+ */
1786 	if (adev->asic_type >= CHIP_BONAIRE)
1787 		return amdgpu_atombios_scratch_need_asic_init(adev);
1788 
1789 	/* check MEM_SIZE for older asics */
1790 	reg = amdgpu_asic_get_config_memsize(adev);
1791 
1792 	if ((reg != 0) && (reg != 0xffffffff))
1793 		return false;
1794 
1795 	return true;
1796 }
1797 
1798 /*
1799  * Check whether seamless boot is supported.
1800  *
1801  * So far we only support seamless boot on DCE 3.0 or later.
1802  * If users report that it works on older ASICS as well, we may
1803  * loosen this.
1804  */
1805 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1806 {
1807 	switch (amdgpu_seamless) {
1808 	case -1:
1809 		break;
1810 	case 1:
1811 		return true;
1812 	case 0:
1813 		return false;
1814 	default:
1815 		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1816 			amdgpu_seamless);
1817 		return false;
1818 	}
1819 
1820 	if (!(adev->flags & AMD_IS_APU))
1821 		return false;
1822 
1823 	if (adev->mman.keep_stolen_vga_memory)
1824 		return false;
1825 
1826 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1827 }
1828 
1829 /*
1830  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1831  * don't support dynamic speed switching. Until we have confirmation from Intel
1832  * that a specific host supports it, it's safer that we keep it disabled for all.
1833  *
1834  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1835  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1836  */
1837 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1838 {
1839 #if IS_ENABLED(CONFIG_X86)
1840 	struct cpuinfo_x86 *c = &cpu_data(0);
1841 
1842 	/* eGPU change speeds based on USB4 fabric conditions */
1843 	if (dev_is_removable(adev->dev))
1844 		return true;
1845 
1846 	if (c->x86_vendor == X86_VENDOR_INTEL)
1847 		return false;
1848 #endif
1849 	return true;
1850 }
1851 
1852 static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1853 {
1854 	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1855 	 * It's unclear if this is a platform-specific or GPU-specific issue.
1856 	 * Disable ASPM on SI for the time being.
1857 	 */
1858 	if (adev->family == AMDGPU_FAMILY_SI)
1859 		return true;
1860 
1861 #if IS_ENABLED(CONFIG_X86)
1862 	struct cpuinfo_x86 *c = &cpu_data(0);
1863 
1864 	if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
1865 		  amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
1866 		return false;
1867 
1868 	if (c->x86 == 6 &&
1869 		adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
1870 		switch (c->x86_model) {
1871 		case VFM_MODEL(INTEL_ALDERLAKE):
1872 		case VFM_MODEL(INTEL_ALDERLAKE_L):
1873 		case VFM_MODEL(INTEL_RAPTORLAKE):
1874 		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1875 		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1876 			return true;
1877 		default:
1878 			return false;
1879 		}
1880 	} else {
1881 		return false;
1882 	}
1883 #else
1884 	return false;
1885 #endif
1886 }
1887 
1888 /**
1889  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1890  *
1891  * @adev: amdgpu_device pointer
1892  *
1893  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1894  * be set for this device.
1895  *
1896  * Returns true if it should be used or false if not.
1897  */
1898 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1899 {
1900 	switch (amdgpu_aspm) {
1901 	case -1:
1902 		break;
1903 	case 0:
1904 		return false;
1905 	case 1:
1906 		return true;
1907 	default:
1908 		return false;
1909 	}
1910 	if (adev->flags & AMD_IS_APU)
1911 		return false;
1912 	if (amdgpu_device_aspm_support_quirk(adev))
1913 		return false;
1914 	return pcie_aspm_enabled(adev->pdev);
1915 }
1916 
1917 /* if we get transitioned to only one device, take VGA back */
1918 /**
1919  * amdgpu_device_vga_set_decode - enable/disable vga decode
1920  *
1921  * @pdev: PCI device pointer
1922  * @state: enable/disable vga decode
1923  *
1924  * Enable/disable vga decode (all asics).
1925  * Returns VGA resource flags.
1926  */
1927 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1928 		bool state)
1929 {
1930 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1931 
1932 	amdgpu_asic_set_vga_state(adev, state);
1933 	if (state)
1934 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1935 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1936 	else
1937 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1938 }
1939 
1940 /**
1941  * amdgpu_device_check_block_size - validate the vm block size
1942  *
1943  * @adev: amdgpu_device pointer
1944  *
1945  * Validates the vm block size specified via module parameter.
1946  * The vm block size defines number of bits in page table versus page directory,
1947  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1948  * page table and the remaining bits are in the page directory.
1949  */
1950 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1951 {
1952 	/* defines number of bits in page table versus page directory,
1953 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1954 	 * page table and the remaining bits are in the page directory
1955 	 */
1956 	if (amdgpu_vm_block_size == -1)
1957 		return;
1958 
1959 	if (amdgpu_vm_block_size < 9) {
1960 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1961 			 amdgpu_vm_block_size);
1962 		amdgpu_vm_block_size = -1;
1963 	}
1964 }
1965 
1966 /**
1967  * amdgpu_device_check_vm_size - validate the vm size
1968  *
1969  * @adev: amdgpu_device pointer
1970  *
1971  * Validates the vm size in GB specified via module parameter.
1972  * The VM size is the size of the GPU virtual memory space in GB.
1973  */
1974 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1975 {
1976 	/* no need to check the default value */
1977 	if (amdgpu_vm_size == -1)
1978 		return;
1979 
1980 	if (amdgpu_vm_size < 1) {
1981 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1982 			 amdgpu_vm_size);
1983 		amdgpu_vm_size = -1;
1984 	}
1985 }
1986 
1987 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1988 {
1989 	struct sysinfo si;
1990 	bool is_os_64 = (sizeof(void *) == 8);
1991 	uint64_t total_memory;
1992 	uint64_t dram_size_seven_GB = 0x1B8000000;
1993 	uint64_t dram_size_three_GB = 0xB8000000;
1994 
1995 	if (amdgpu_smu_memory_pool_size == 0)
1996 		return;
1997 
1998 	if (!is_os_64) {
1999 		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
2000 		goto def_value;
2001 	}
2002 	si_meminfo(&si);
2003 	total_memory = (uint64_t)si.totalram * si.mem_unit;
2004 
2005 	if ((amdgpu_smu_memory_pool_size == 1) ||
2006 		(amdgpu_smu_memory_pool_size == 2)) {
2007 		if (total_memory < dram_size_three_GB)
2008 			goto def_value1;
2009 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
2010 		(amdgpu_smu_memory_pool_size == 8)) {
2011 		if (total_memory < dram_size_seven_GB)
2012 			goto def_value1;
2013 	} else {
2014 		dev_warn(adev->dev, "Smu memory pool size not supported\n");
2015 		goto def_value;
2016 	}
2017 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
2018 
2019 	return;
2020 
2021 def_value1:
2022 	dev_warn(adev->dev, "No enough system memory\n");
2023 def_value:
2024 	adev->pm.smu_prv_buffer_size = 0;
2025 }
2026 
2027 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
2028 {
2029 	if (!(adev->flags & AMD_IS_APU) ||
2030 	    adev->asic_type < CHIP_RAVEN)
2031 		return 0;
2032 
2033 	switch (adev->asic_type) {
2034 	case CHIP_RAVEN:
2035 		if (adev->pdev->device == 0x15dd)
2036 			adev->apu_flags |= AMD_APU_IS_RAVEN;
2037 		if (adev->pdev->device == 0x15d8)
2038 			adev->apu_flags |= AMD_APU_IS_PICASSO;
2039 		break;
2040 	case CHIP_RENOIR:
2041 		if ((adev->pdev->device == 0x1636) ||
2042 		    (adev->pdev->device == 0x164c))
2043 			adev->apu_flags |= AMD_APU_IS_RENOIR;
2044 		else
2045 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
2046 		break;
2047 	case CHIP_VANGOGH:
2048 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
2049 		break;
2050 	case CHIP_YELLOW_CARP:
2051 		break;
2052 	case CHIP_CYAN_SKILLFISH:
2053 		if ((adev->pdev->device == 0x13FE) ||
2054 		    (adev->pdev->device == 0x143F))
2055 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
2056 		break;
2057 	default:
2058 		break;
2059 	}
2060 
2061 	return 0;
2062 }
2063 
2064 /**
2065  * amdgpu_device_check_arguments - validate module params
2066  *
2067  * @adev: amdgpu_device pointer
2068  *
2069  * Validates certain module parameters and updates
2070  * the associated values used by the driver (all asics).
2071  */
2072 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
2073 {
2074 	int i;
2075 
2076 	if (amdgpu_sched_jobs < 4) {
2077 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
2078 			 amdgpu_sched_jobs);
2079 		amdgpu_sched_jobs = 4;
2080 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
2081 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
2082 			 amdgpu_sched_jobs);
2083 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
2084 	}
2085 
2086 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
2087 		/* gart size must be greater or equal to 32M */
2088 		dev_warn(adev->dev, "gart size (%d) too small\n",
2089 			 amdgpu_gart_size);
2090 		amdgpu_gart_size = -1;
2091 	}
2092 
2093 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
2094 		/* gtt size must be greater or equal to 32M */
2095 		dev_warn(adev->dev, "gtt size (%d) too small\n",
2096 				 amdgpu_gtt_size);
2097 		amdgpu_gtt_size = -1;
2098 	}
2099 
2100 	/* valid range is between 4 and 9 inclusive */
2101 	if (amdgpu_vm_fragment_size != -1 &&
2102 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
2103 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
2104 		amdgpu_vm_fragment_size = -1;
2105 	}
2106 
2107 	if (amdgpu_sched_hw_submission < 2) {
2108 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
2109 			 amdgpu_sched_hw_submission);
2110 		amdgpu_sched_hw_submission = 2;
2111 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
2112 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
2113 			 amdgpu_sched_hw_submission);
2114 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
2115 	}
2116 
2117 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
2118 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
2119 		amdgpu_reset_method = -1;
2120 	}
2121 
2122 	amdgpu_device_check_smu_prv_buffer_size(adev);
2123 
2124 	amdgpu_device_check_vm_size(adev);
2125 
2126 	amdgpu_device_check_block_size(adev);
2127 
2128 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
2129 
2130 	for (i = 0; i < MAX_XCP; i++) {
2131 		switch (amdgpu_enforce_isolation) {
2132 		case -1:
2133 		case 0:
2134 		default:
2135 			/* disable */
2136 			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
2137 			break;
2138 		case 1:
2139 			/* enable */
2140 			adev->enforce_isolation[i] =
2141 				AMDGPU_ENFORCE_ISOLATION_ENABLE;
2142 			break;
2143 		case 2:
2144 			/* enable legacy mode */
2145 			adev->enforce_isolation[i] =
2146 				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
2147 			break;
2148 		case 3:
2149 			/* enable only process isolation without submitting cleaner shader */
2150 			adev->enforce_isolation[i] =
2151 				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
2152 			break;
2153 		}
2154 	}
2155 
2156 	return 0;
2157 }
2158 
2159 /**
2160  * amdgpu_switcheroo_set_state - set switcheroo state
2161  *
2162  * @pdev: pci dev pointer
2163  * @state: vga_switcheroo state
2164  *
2165  * Callback for the switcheroo driver.  Suspends or resumes
2166  * the asics before or after it is powered up using ACPI methods.
2167  */
2168 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2169 					enum vga_switcheroo_state state)
2170 {
2171 	struct drm_device *dev = pci_get_drvdata(pdev);
2172 	int r;
2173 
2174 	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
2175 	    state == VGA_SWITCHEROO_OFF)
2176 		return;
2177 
2178 	if (state == VGA_SWITCHEROO_ON) {
2179 		pr_info("switched on\n");
2180 		/* don't suspend or resume card normally */
2181 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2182 
2183 		pci_set_power_state(pdev, PCI_D0);
2184 		amdgpu_device_load_pci_state(pdev);
2185 		r = pci_enable_device(pdev);
2186 		if (r)
2187 			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
2188 				 r);
2189 		amdgpu_device_resume(dev, true);
2190 
2191 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
2192 	} else {
2193 		dev_info(&pdev->dev, "switched off\n");
2194 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2195 		amdgpu_device_prepare(dev);
2196 		amdgpu_device_suspend(dev, true);
2197 		amdgpu_device_cache_pci_state(pdev);
2198 		/* Shut down the device */
2199 		pci_disable_device(pdev);
2200 		pci_set_power_state(pdev, PCI_D3cold);
2201 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2202 	}
2203 }
2204 
2205 /**
2206  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2207  *
2208  * @pdev: pci dev pointer
2209  *
2210  * Callback for the switcheroo driver.  Check of the switcheroo
2211  * state can be changed.
2212  * Returns true if the state can be changed, false if not.
2213  */
2214 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2215 {
2216 	struct drm_device *dev = pci_get_drvdata(pdev);
2217 
2218        /*
2219 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
2220 	* locking inversion with the driver load path. And the access here is
2221 	* completely racy anyway. So don't bother with locking for now.
2222 	*/
2223 	return atomic_read(&dev->open_count) == 0;
2224 }
2225 
2226 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2227 	.set_gpu_state = amdgpu_switcheroo_set_state,
2228 	.reprobe = NULL,
2229 	.can_switch = amdgpu_switcheroo_can_switch,
2230 };
2231 
2232 /**
2233  * amdgpu_device_enable_virtual_display - enable virtual display feature
2234  *
2235  * @adev: amdgpu_device pointer
2236  *
2237  * Enabled the virtual display feature if the user has enabled it via
2238  * the module parameter virtual_display.  This feature provides a virtual
2239  * display hardware on headless boards or in virtualized environments.
2240  * This function parses and validates the configuration string specified by
2241  * the user and configures the virtual display configuration (number of
2242  * virtual connectors, crtcs, etc.) specified.
2243  */
2244 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2245 {
2246 	adev->enable_virtual_display = false;
2247 
2248 	if (amdgpu_virtual_display) {
2249 		const char *pci_address_name = pci_name(adev->pdev);
2250 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2251 
2252 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2253 		pciaddstr_tmp = pciaddstr;
2254 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2255 			pciaddname = strsep(&pciaddname_tmp, ",");
2256 			if (!strcmp("all", pciaddname)
2257 			    || !strcmp(pci_address_name, pciaddname)) {
2258 				long num_crtc;
2259 				int res = -1;
2260 
2261 				adev->enable_virtual_display = true;
2262 
2263 				if (pciaddname_tmp)
2264 					res = kstrtol(pciaddname_tmp, 10,
2265 						      &num_crtc);
2266 
2267 				if (!res) {
2268 					if (num_crtc < 1)
2269 						num_crtc = 1;
2270 					if (num_crtc > 6)
2271 						num_crtc = 6;
2272 					adev->mode_info.num_crtc = num_crtc;
2273 				} else {
2274 					adev->mode_info.num_crtc = 1;
2275 				}
2276 				break;
2277 			}
2278 		}
2279 
2280 		dev_info(
2281 			adev->dev,
2282 			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2283 			amdgpu_virtual_display, pci_address_name,
2284 			adev->enable_virtual_display, adev->mode_info.num_crtc);
2285 
2286 		kfree(pciaddstr);
2287 	}
2288 }
2289 
2290 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2291 {
2292 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2293 		adev->mode_info.num_crtc = 1;
2294 		adev->enable_virtual_display = true;
2295 		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
2296 			 adev->enable_virtual_display,
2297 			 adev->mode_info.num_crtc);
2298 	}
2299 }
2300 
2301 /**
2302  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2303  *
2304  * @adev: amdgpu_device pointer
2305  *
2306  * Parses the asic configuration parameters specified in the gpu info
2307  * firmware and makes them available to the driver for use in configuring
2308  * the asic.
2309  * Returns 0 on success, -EINVAL on failure.
2310  */
2311 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2312 {
2313 	const char *chip_name;
2314 	int err;
2315 	const struct gpu_info_firmware_header_v1_0 *hdr;
2316 
2317 	adev->firmware.gpu_info_fw = NULL;
2318 
2319 	switch (adev->asic_type) {
2320 	default:
2321 		return 0;
2322 	case CHIP_VEGA10:
2323 		chip_name = "vega10";
2324 		break;
2325 	case CHIP_VEGA12:
2326 		chip_name = "vega12";
2327 		break;
2328 	case CHIP_RAVEN:
2329 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2330 			chip_name = "raven2";
2331 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2332 			chip_name = "picasso";
2333 		else
2334 			chip_name = "raven";
2335 		break;
2336 	case CHIP_ARCTURUS:
2337 		chip_name = "arcturus";
2338 		break;
2339 	case CHIP_NAVI12:
2340 		if (adev->discovery.bin)
2341 			return 0;
2342 		chip_name = "navi12";
2343 		break;
2344 	case CHIP_CYAN_SKILLFISH:
2345 		if (adev->discovery.bin)
2346 			return 0;
2347 		chip_name = "cyan_skillfish";
2348 		break;
2349 	}
2350 
2351 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
2352 				   AMDGPU_UCODE_OPTIONAL,
2353 				   "amdgpu/%s_gpu_info.bin", chip_name);
2354 	if (err) {
2355 		dev_err(adev->dev,
2356 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2357 			chip_name);
2358 		goto out;
2359 	}
2360 
2361 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2362 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2363 
2364 	switch (hdr->version_major) {
2365 	case 1:
2366 	{
2367 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2368 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2369 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2370 
2371 		/*
2372 		 * Should be dropped when DAL no longer needs it.
2373 		 */
2374 		if (adev->asic_type == CHIP_NAVI12)
2375 			goto parse_soc_bounding_box;
2376 
2377 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2378 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2379 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2380 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2381 		adev->gfx.config.max_texture_channel_caches =
2382 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2383 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2384 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2385 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2386 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2387 		adev->gfx.config.double_offchip_lds_buf =
2388 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2389 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2390 		adev->gfx.cu_info.max_waves_per_simd =
2391 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2392 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2393 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2394 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2395 		if (hdr->version_minor >= 1) {
2396 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2397 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2398 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2399 			adev->gfx.config.num_sc_per_sh =
2400 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2401 			adev->gfx.config.num_packer_per_sc =
2402 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2403 		}
2404 
2405 parse_soc_bounding_box:
2406 		/*
2407 		 * soc bounding box info is not integrated in disocovery table,
2408 		 * we always need to parse it from gpu info firmware if needed.
2409 		 */
2410 		if (hdr->version_minor == 2) {
2411 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2412 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2413 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2414 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2415 		}
2416 		break;
2417 	}
2418 	default:
2419 		dev_err(adev->dev,
2420 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2421 		err = -EINVAL;
2422 		goto out;
2423 	}
2424 out:
2425 	return err;
2426 }
2427 
2428 static void amdgpu_uid_init(struct amdgpu_device *adev)
2429 {
2430 	/* Initialize the UID for the device */
2431 	adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL);
2432 	if (!adev->uid_info) {
2433 		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
2434 		return;
2435 	}
2436 	adev->uid_info->adev = adev;
2437 }
2438 
2439 static void amdgpu_uid_fini(struct amdgpu_device *adev)
2440 {
2441 	/* Free the UID memory */
2442 	kfree(adev->uid_info);
2443 	adev->uid_info = NULL;
2444 }
2445 
2446 /**
2447  * amdgpu_device_ip_early_init - run early init for hardware IPs
2448  *
2449  * @adev: amdgpu_device pointer
2450  *
2451  * Early initialization pass for hardware IPs.  The hardware IPs that make
2452  * up each asic are discovered each IP's early_init callback is run.  This
2453  * is the first stage in initializing the asic.
2454  * Returns 0 on success, negative error code on failure.
2455  */
2456 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2457 {
2458 	struct amdgpu_ip_block *ip_block;
2459 	struct pci_dev *parent;
2460 	bool total, skip_bios;
2461 	uint32_t bios_flags;
2462 	int i, r;
2463 
2464 	amdgpu_device_enable_virtual_display(adev);
2465 
2466 	if (amdgpu_sriov_vf(adev)) {
2467 		r = amdgpu_virt_request_full_gpu(adev, true);
2468 		if (r)
2469 			return r;
2470 
2471 		r = amdgpu_virt_init_critical_region(adev);
2472 		if (r)
2473 			return r;
2474 	}
2475 
2476 	switch (adev->asic_type) {
2477 #ifdef CONFIG_DRM_AMDGPU_SI
2478 	case CHIP_VERDE:
2479 	case CHIP_TAHITI:
2480 	case CHIP_PITCAIRN:
2481 	case CHIP_OLAND:
2482 	case CHIP_HAINAN:
2483 		adev->family = AMDGPU_FAMILY_SI;
2484 		r = si_set_ip_blocks(adev);
2485 		if (r)
2486 			return r;
2487 		break;
2488 #endif
2489 #ifdef CONFIG_DRM_AMDGPU_CIK
2490 	case CHIP_BONAIRE:
2491 	case CHIP_HAWAII:
2492 	case CHIP_KAVERI:
2493 	case CHIP_KABINI:
2494 	case CHIP_MULLINS:
2495 		if (adev->flags & AMD_IS_APU)
2496 			adev->family = AMDGPU_FAMILY_KV;
2497 		else
2498 			adev->family = AMDGPU_FAMILY_CI;
2499 
2500 		r = cik_set_ip_blocks(adev);
2501 		if (r)
2502 			return r;
2503 		break;
2504 #endif
2505 	case CHIP_TOPAZ:
2506 	case CHIP_TONGA:
2507 	case CHIP_FIJI:
2508 	case CHIP_POLARIS10:
2509 	case CHIP_POLARIS11:
2510 	case CHIP_POLARIS12:
2511 	case CHIP_VEGAM:
2512 	case CHIP_CARRIZO:
2513 	case CHIP_STONEY:
2514 		if (adev->flags & AMD_IS_APU)
2515 			adev->family = AMDGPU_FAMILY_CZ;
2516 		else
2517 			adev->family = AMDGPU_FAMILY_VI;
2518 
2519 		r = vi_set_ip_blocks(adev);
2520 		if (r)
2521 			return r;
2522 		break;
2523 	default:
2524 		r = amdgpu_discovery_set_ip_blocks(adev);
2525 		if (r)
2526 			return r;
2527 		break;
2528 	}
2529 
2530 	/* Check for IP version 9.4.3 with A0 hardware */
2531 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2532 	    !amdgpu_device_get_rev_id(adev)) {
2533 		dev_err(adev->dev, "Unsupported A0 hardware\n");
2534 		return -ENODEV;	/* device unsupported - no device error */
2535 	}
2536 
2537 	if (amdgpu_has_atpx() &&
2538 	    (amdgpu_is_atpx_hybrid() ||
2539 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2540 	    ((adev->flags & AMD_IS_APU) == 0) &&
2541 	    !dev_is_removable(&adev->pdev->dev))
2542 		adev->flags |= AMD_IS_PX;
2543 
2544 	if (!(adev->flags & AMD_IS_APU)) {
2545 		parent = pcie_find_root_port(adev->pdev);
2546 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2547 	}
2548 
2549 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2550 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2551 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2552 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2553 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2554 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2555 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2556 
2557 	adev->virt.is_xgmi_node_migrate_enabled = false;
2558 	if (amdgpu_sriov_vf(adev)) {
2559 		adev->virt.is_xgmi_node_migrate_enabled =
2560 			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2561 	}
2562 
2563 	total = true;
2564 	for (i = 0; i < adev->num_ip_blocks; i++) {
2565 		ip_block = &adev->ip_blocks[i];
2566 
2567 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2568 			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2569 				 adev->ip_blocks[i].version->funcs->name);
2570 			adev->ip_blocks[i].status.valid = false;
2571 		} else if (ip_block->version->funcs->early_init) {
2572 			r = ip_block->version->funcs->early_init(ip_block);
2573 			if (r == -ENOENT) {
2574 				adev->ip_blocks[i].status.valid = false;
2575 			} else if (r) {
2576 				dev_err(adev->dev,
2577 					"early_init of IP block <%s> failed %d\n",
2578 					adev->ip_blocks[i].version->funcs->name,
2579 					r);
2580 				total = false;
2581 			} else {
2582 				adev->ip_blocks[i].status.valid = true;
2583 			}
2584 		} else {
2585 			adev->ip_blocks[i].status.valid = true;
2586 		}
2587 		/* get the vbios after the asic_funcs are set up */
2588 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2589 			r = amdgpu_device_parse_gpu_info_fw(adev);
2590 			if (r)
2591 				return r;
2592 
2593 			bios_flags = amdgpu_device_get_vbios_flags(adev);
2594 			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2595 			/* Read BIOS */
2596 			if (!skip_bios) {
2597 				bool optional =
2598 					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2599 				if (!amdgpu_get_bios(adev) && !optional)
2600 					return -EINVAL;
2601 
2602 				if (optional && !adev->bios)
2603 					dev_info(
2604 						adev->dev,
2605 						"VBIOS image optional, proceeding without VBIOS image");
2606 
2607 				if (adev->bios) {
2608 					r = amdgpu_atombios_init(adev);
2609 					if (r) {
2610 						dev_err(adev->dev,
2611 							"amdgpu_atombios_init failed\n");
2612 						amdgpu_vf_error_put(
2613 							adev,
2614 							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2615 							0, 0);
2616 						return r;
2617 					}
2618 				}
2619 			}
2620 
2621 			/*get pf2vf msg info at it's earliest time*/
2622 			if (amdgpu_sriov_vf(adev))
2623 				amdgpu_virt_init_data_exchange(adev);
2624 
2625 		}
2626 	}
2627 	if (!total)
2628 		return -ENODEV;
2629 
2630 	if (adev->gmc.xgmi.supported)
2631 		amdgpu_xgmi_early_init(adev);
2632 
2633 	if (amdgpu_is_multi_aid(adev))
2634 		amdgpu_uid_init(adev);
2635 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2636 	if (ip_block->status.valid != false)
2637 		amdgpu_amdkfd_device_probe(adev);
2638 
2639 	adev->cg_flags &= amdgpu_cg_mask;
2640 	adev->pg_flags &= amdgpu_pg_mask;
2641 
2642 	return 0;
2643 }
2644 
2645 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2646 {
2647 	int i, r;
2648 
2649 	for (i = 0; i < adev->num_ip_blocks; i++) {
2650 		if (!adev->ip_blocks[i].status.sw)
2651 			continue;
2652 		if (adev->ip_blocks[i].status.hw)
2653 			continue;
2654 		if (!amdgpu_ip_member_of_hwini(
2655 			    adev, adev->ip_blocks[i].version->type))
2656 			continue;
2657 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2658 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2659 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2660 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2661 			if (r) {
2662 				dev_err(adev->dev,
2663 					"hw_init of IP block <%s> failed %d\n",
2664 					adev->ip_blocks[i].version->funcs->name,
2665 					r);
2666 				return r;
2667 			}
2668 			adev->ip_blocks[i].status.hw = true;
2669 		}
2670 	}
2671 
2672 	return 0;
2673 }
2674 
2675 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2676 {
2677 	int i, r;
2678 
2679 	for (i = 0; i < adev->num_ip_blocks; i++) {
2680 		if (!adev->ip_blocks[i].status.sw)
2681 			continue;
2682 		if (adev->ip_blocks[i].status.hw)
2683 			continue;
2684 		if (!amdgpu_ip_member_of_hwini(
2685 			    adev, adev->ip_blocks[i].version->type))
2686 			continue;
2687 		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2688 		if (r) {
2689 			dev_err(adev->dev,
2690 				"hw_init of IP block <%s> failed %d\n",
2691 				adev->ip_blocks[i].version->funcs->name, r);
2692 			return r;
2693 		}
2694 		adev->ip_blocks[i].status.hw = true;
2695 	}
2696 
2697 	return 0;
2698 }
2699 
2700 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2701 {
2702 	int r = 0;
2703 	int i;
2704 	uint32_t smu_version;
2705 
2706 	if (adev->asic_type >= CHIP_VEGA10) {
2707 		for (i = 0; i < adev->num_ip_blocks; i++) {
2708 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2709 				continue;
2710 
2711 			if (!amdgpu_ip_member_of_hwini(adev,
2712 						       AMD_IP_BLOCK_TYPE_PSP))
2713 				break;
2714 
2715 			if (!adev->ip_blocks[i].status.sw)
2716 				continue;
2717 
2718 			/* no need to do the fw loading again if already done*/
2719 			if (adev->ip_blocks[i].status.hw == true)
2720 				break;
2721 
2722 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2723 				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2724 				if (r)
2725 					return r;
2726 			} else {
2727 				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2728 				if (r) {
2729 					dev_err(adev->dev,
2730 						"hw_init of IP block <%s> failed %d\n",
2731 						adev->ip_blocks[i]
2732 							.version->funcs->name,
2733 						r);
2734 					return r;
2735 				}
2736 				adev->ip_blocks[i].status.hw = true;
2737 			}
2738 			break;
2739 		}
2740 	}
2741 
2742 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2743 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2744 
2745 	return r;
2746 }
2747 
2748 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2749 {
2750 	struct drm_sched_init_args args = {
2751 		.ops = &amdgpu_sched_ops,
2752 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
2753 		.timeout_wq = adev->reset_domain->wq,
2754 		.dev = adev->dev,
2755 	};
2756 	long timeout;
2757 	int r, i;
2758 
2759 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2760 		struct amdgpu_ring *ring = adev->rings[i];
2761 
2762 		/* No need to setup the GPU scheduler for rings that don't need it */
2763 		if (!ring || ring->no_scheduler)
2764 			continue;
2765 
2766 		switch (ring->funcs->type) {
2767 		case AMDGPU_RING_TYPE_GFX:
2768 			timeout = adev->gfx_timeout;
2769 			break;
2770 		case AMDGPU_RING_TYPE_COMPUTE:
2771 			timeout = adev->compute_timeout;
2772 			break;
2773 		case AMDGPU_RING_TYPE_SDMA:
2774 			timeout = adev->sdma_timeout;
2775 			break;
2776 		default:
2777 			timeout = adev->video_timeout;
2778 			break;
2779 		}
2780 
2781 		args.timeout = timeout;
2782 		args.credit_limit = ring->num_hw_submission;
2783 		args.score = ring->sched_score;
2784 		args.name = ring->name;
2785 
2786 		r = drm_sched_init(&ring->sched, &args);
2787 		if (r) {
2788 			dev_err(adev->dev,
2789 				"Failed to create scheduler on ring %s.\n",
2790 				ring->name);
2791 			return r;
2792 		}
2793 		r = amdgpu_uvd_entity_init(adev, ring);
2794 		if (r) {
2795 			dev_err(adev->dev,
2796 				"Failed to create UVD scheduling entity on ring %s.\n",
2797 				ring->name);
2798 			return r;
2799 		}
2800 		r = amdgpu_vce_entity_init(adev, ring);
2801 		if (r) {
2802 			dev_err(adev->dev,
2803 				"Failed to create VCE scheduling entity on ring %s.\n",
2804 				ring->name);
2805 			return r;
2806 		}
2807 	}
2808 
2809 	if (adev->xcp_mgr)
2810 		amdgpu_xcp_update_partition_sched_list(adev);
2811 
2812 	return 0;
2813 }
2814 
2815 
2816 /**
2817  * amdgpu_device_ip_init - run init for hardware IPs
2818  *
2819  * @adev: amdgpu_device pointer
2820  *
2821  * Main initialization pass for hardware IPs.  The list of all the hardware
2822  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2823  * are run.  sw_init initializes the software state associated with each IP
2824  * and hw_init initializes the hardware associated with each IP.
2825  * Returns 0 on success, negative error code on failure.
2826  */
2827 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2828 {
2829 	bool init_badpage;
2830 	int i, r;
2831 
2832 	r = amdgpu_ras_init(adev);
2833 	if (r)
2834 		return r;
2835 
2836 	for (i = 0; i < adev->num_ip_blocks; i++) {
2837 		if (!adev->ip_blocks[i].status.valid)
2838 			continue;
2839 		if (adev->ip_blocks[i].version->funcs->sw_init) {
2840 			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2841 			if (r) {
2842 				dev_err(adev->dev,
2843 					"sw_init of IP block <%s> failed %d\n",
2844 					adev->ip_blocks[i].version->funcs->name,
2845 					r);
2846 				goto init_failed;
2847 			}
2848 		}
2849 		adev->ip_blocks[i].status.sw = true;
2850 
2851 		if (!amdgpu_ip_member_of_hwini(
2852 			    adev, adev->ip_blocks[i].version->type))
2853 			continue;
2854 
2855 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2856 			/* need to do common hw init early so everything is set up for gmc */
2857 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2858 			if (r) {
2859 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2860 					r);
2861 				goto init_failed;
2862 			}
2863 			adev->ip_blocks[i].status.hw = true;
2864 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2865 			/* need to do gmc hw init early so we can allocate gpu mem */
2866 			/* Try to reserve bad pages early */
2867 			if (amdgpu_sriov_vf(adev))
2868 				amdgpu_virt_exchange_data(adev);
2869 
2870 			r = amdgpu_device_mem_scratch_init(adev);
2871 			if (r) {
2872 				dev_err(adev->dev,
2873 					"amdgpu_mem_scratch_init failed %d\n",
2874 					r);
2875 				goto init_failed;
2876 			}
2877 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2878 			if (r) {
2879 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2880 					r);
2881 				goto init_failed;
2882 			}
2883 			r = amdgpu_device_wb_init(adev);
2884 			if (r) {
2885 				dev_err(adev->dev,
2886 					"amdgpu_device_wb_init failed %d\n", r);
2887 				goto init_failed;
2888 			}
2889 			adev->ip_blocks[i].status.hw = true;
2890 
2891 			/* right after GMC hw init, we create CSA */
2892 			if (adev->gfx.mcbp) {
2893 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2894 							       AMDGPU_GEM_DOMAIN_VRAM |
2895 							       AMDGPU_GEM_DOMAIN_GTT,
2896 							       AMDGPU_CSA_SIZE);
2897 				if (r) {
2898 					dev_err(adev->dev,
2899 						"allocate CSA failed %d\n", r);
2900 					goto init_failed;
2901 				}
2902 			}
2903 
2904 			r = amdgpu_seq64_init(adev);
2905 			if (r) {
2906 				dev_err(adev->dev, "allocate seq64 failed %d\n",
2907 					r);
2908 				goto init_failed;
2909 			}
2910 		}
2911 	}
2912 
2913 	if (amdgpu_sriov_vf(adev))
2914 		amdgpu_virt_init_data_exchange(adev);
2915 
2916 	r = amdgpu_ib_pool_init(adev);
2917 	if (r) {
2918 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2919 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2920 		goto init_failed;
2921 	}
2922 
2923 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2924 	if (r)
2925 		goto init_failed;
2926 
2927 	r = amdgpu_device_ip_hw_init_phase1(adev);
2928 	if (r)
2929 		goto init_failed;
2930 
2931 	r = amdgpu_device_fw_loading(adev);
2932 	if (r)
2933 		goto init_failed;
2934 
2935 	r = amdgpu_device_ip_hw_init_phase2(adev);
2936 	if (r)
2937 		goto init_failed;
2938 
2939 	/*
2940 	 * retired pages will be loaded from eeprom and reserved here,
2941 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2942 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2943 	 * for I2C communication which only true at this point.
2944 	 *
2945 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2946 	 * failure from bad gpu situation and stop amdgpu init process
2947 	 * accordingly. For other failed cases, it will still release all
2948 	 * the resource and print error message, rather than returning one
2949 	 * negative value to upper level.
2950 	 *
2951 	 * Note: theoretically, this should be called before all vram allocations
2952 	 * to protect retired page from abusing
2953 	 */
2954 	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2955 	r = amdgpu_ras_recovery_init(adev, init_badpage);
2956 	if (r)
2957 		goto init_failed;
2958 
2959 	/**
2960 	 * In case of XGMI grab extra reference for reset domain for this device
2961 	 */
2962 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2963 		if (amdgpu_xgmi_add_device(adev) == 0) {
2964 			if (!amdgpu_sriov_vf(adev)) {
2965 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2966 
2967 				if (WARN_ON(!hive)) {
2968 					r = -ENOENT;
2969 					goto init_failed;
2970 				}
2971 
2972 				if (!hive->reset_domain ||
2973 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2974 					r = -ENOENT;
2975 					amdgpu_put_xgmi_hive(hive);
2976 					goto init_failed;
2977 				}
2978 
2979 				/* Drop the early temporary reset domain we created for device */
2980 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2981 				adev->reset_domain = hive->reset_domain;
2982 				amdgpu_put_xgmi_hive(hive);
2983 			}
2984 		}
2985 	}
2986 
2987 	r = amdgpu_device_init_schedulers(adev);
2988 	if (r)
2989 		goto init_failed;
2990 
2991 	if (adev->mman.buffer_funcs_ring &&
2992 	    adev->mman.buffer_funcs_ring->sched.ready)
2993 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2994 
2995 	/* Don't init kfd if whole hive need to be reset during init */
2996 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
2997 		amdgpu_amdkfd_device_init(adev);
2998 	}
2999 
3000 	amdgpu_fru_get_product_info(adev);
3001 
3002 	r = amdgpu_cper_init(adev);
3003 
3004 init_failed:
3005 
3006 	return r;
3007 }
3008 
3009 /**
3010  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
3011  *
3012  * @adev: amdgpu_device pointer
3013  *
3014  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
3015  * this function before a GPU reset.  If the value is retained after a
3016  * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
3017  */
3018 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
3019 {
3020 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
3021 }
3022 
3023 /**
3024  * amdgpu_device_check_vram_lost - check if vram is valid
3025  *
3026  * @adev: amdgpu_device pointer
3027  *
3028  * Checks the reset magic value written to the gart pointer in VRAM.
3029  * The driver calls this after a GPU reset to see if the contents of
3030  * VRAM is lost or now.
3031  * returns true if vram is lost, false if not.
3032  */
3033 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
3034 {
3035 	if (memcmp(adev->gart.ptr, adev->reset_magic,
3036 			AMDGPU_RESET_MAGIC_NUM))
3037 		return true;
3038 
3039 	if (!amdgpu_in_reset(adev))
3040 		return false;
3041 
3042 	/*
3043 	 * For all ASICs with baco/mode1 reset, the VRAM is
3044 	 * always assumed to be lost.
3045 	 */
3046 	switch (amdgpu_asic_reset_method(adev)) {
3047 	case AMD_RESET_METHOD_LEGACY:
3048 	case AMD_RESET_METHOD_LINK:
3049 	case AMD_RESET_METHOD_BACO:
3050 	case AMD_RESET_METHOD_MODE1:
3051 		return true;
3052 	default:
3053 		return false;
3054 	}
3055 }
3056 
3057 /**
3058  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
3059  *
3060  * @adev: amdgpu_device pointer
3061  * @state: clockgating state (gate or ungate)
3062  *
3063  * The list of all the hardware IPs that make up the asic is walked and the
3064  * set_clockgating_state callbacks are run.
3065  * Late initialization pass enabling clockgating for hardware IPs.
3066  * Fini or suspend, pass disabling clockgating for hardware IPs.
3067  * Returns 0 on success, negative error code on failure.
3068  */
3069 
3070 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3071 			       enum amd_clockgating_state state)
3072 {
3073 	int i, j, r;
3074 
3075 	if (amdgpu_emu_mode == 1)
3076 		return 0;
3077 
3078 	for (j = 0; j < adev->num_ip_blocks; j++) {
3079 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3080 		if (!adev->ip_blocks[i].status.late_initialized)
3081 			continue;
3082 		/* skip CG for GFX, SDMA on S0ix */
3083 		if (adev->in_s0ix &&
3084 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3085 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3086 			continue;
3087 		/* skip CG for VCE/UVD, it's handled specially */
3088 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3089 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3090 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3091 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3092 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3093 			/* enable clockgating to save power */
3094 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
3095 										     state);
3096 			if (r) {
3097 				dev_err(adev->dev,
3098 					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
3099 					adev->ip_blocks[i].version->funcs->name,
3100 					r);
3101 				return r;
3102 			}
3103 		}
3104 	}
3105 
3106 	return 0;
3107 }
3108 
3109 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3110 			       enum amd_powergating_state state)
3111 {
3112 	int i, j, r;
3113 
3114 	if (amdgpu_emu_mode == 1)
3115 		return 0;
3116 
3117 	for (j = 0; j < adev->num_ip_blocks; j++) {
3118 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3119 		if (!adev->ip_blocks[i].status.late_initialized)
3120 			continue;
3121 		/* skip PG for GFX, SDMA on S0ix */
3122 		if (adev->in_s0ix &&
3123 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3124 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3125 			continue;
3126 		/* skip CG for VCE/UVD, it's handled specially */
3127 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3128 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3129 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3130 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3131 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
3132 			/* enable powergating to save power */
3133 			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
3134 											state);
3135 			if (r) {
3136 				dev_err(adev->dev,
3137 					"set_powergating_state(gate) of IP block <%s> failed %d\n",
3138 					adev->ip_blocks[i].version->funcs->name,
3139 					r);
3140 				return r;
3141 			}
3142 		}
3143 	}
3144 	return 0;
3145 }
3146 
3147 static int amdgpu_device_enable_mgpu_fan_boost(void)
3148 {
3149 	struct amdgpu_gpu_instance *gpu_ins;
3150 	struct amdgpu_device *adev;
3151 	int i, ret = 0;
3152 
3153 	mutex_lock(&mgpu_info.mutex);
3154 
3155 	/*
3156 	 * MGPU fan boost feature should be enabled
3157 	 * only when there are two or more dGPUs in
3158 	 * the system
3159 	 */
3160 	if (mgpu_info.num_dgpu < 2)
3161 		goto out;
3162 
3163 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3164 		gpu_ins = &(mgpu_info.gpu_ins[i]);
3165 		adev = gpu_ins->adev;
3166 		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
3167 		    !gpu_ins->mgpu_fan_enabled) {
3168 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3169 			if (ret)
3170 				break;
3171 
3172 			gpu_ins->mgpu_fan_enabled = 1;
3173 		}
3174 	}
3175 
3176 out:
3177 	mutex_unlock(&mgpu_info.mutex);
3178 
3179 	return ret;
3180 }
3181 
3182 /**
3183  * amdgpu_device_ip_late_init - run late init for hardware IPs
3184  *
3185  * @adev: amdgpu_device pointer
3186  *
3187  * Late initialization pass for hardware IPs.  The list of all the hardware
3188  * IPs that make up the asic is walked and the late_init callbacks are run.
3189  * late_init covers any special initialization that an IP requires
3190  * after all of the have been initialized or something that needs to happen
3191  * late in the init process.
3192  * Returns 0 on success, negative error code on failure.
3193  */
3194 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3195 {
3196 	struct amdgpu_gpu_instance *gpu_instance;
3197 	int i = 0, r;
3198 
3199 	for (i = 0; i < adev->num_ip_blocks; i++) {
3200 		if (!adev->ip_blocks[i].status.hw)
3201 			continue;
3202 		if (adev->ip_blocks[i].version->funcs->late_init) {
3203 			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
3204 			if (r) {
3205 				dev_err(adev->dev,
3206 					"late_init of IP block <%s> failed %d\n",
3207 					adev->ip_blocks[i].version->funcs->name,
3208 					r);
3209 				return r;
3210 			}
3211 		}
3212 		adev->ip_blocks[i].status.late_initialized = true;
3213 	}
3214 
3215 	r = amdgpu_ras_late_init(adev);
3216 	if (r) {
3217 		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
3218 		return r;
3219 	}
3220 
3221 	if (!amdgpu_reset_in_recovery(adev))
3222 		amdgpu_ras_set_error_query_ready(adev, true);
3223 
3224 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3225 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3226 
3227 	amdgpu_device_fill_reset_magic(adev);
3228 
3229 	r = amdgpu_device_enable_mgpu_fan_boost();
3230 	if (r)
3231 		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
3232 
3233 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3234 	if (amdgpu_passthrough(adev) &&
3235 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3236 	     adev->asic_type == CHIP_ALDEBARAN))
3237 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3238 
3239 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3240 		mutex_lock(&mgpu_info.mutex);
3241 
3242 		/*
3243 		 * Reset device p-state to low as this was booted with high.
3244 		 *
3245 		 * This should be performed only after all devices from the same
3246 		 * hive get initialized.
3247 		 *
3248 		 * However, it's unknown how many device in the hive in advance.
3249 		 * As this is counted one by one during devices initializations.
3250 		 *
3251 		 * So, we wait for all XGMI interlinked devices initialized.
3252 		 * This may bring some delays as those devices may come from
3253 		 * different hives. But that should be OK.
3254 		 */
3255 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3256 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3257 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3258 				if (gpu_instance->adev->flags & AMD_IS_APU)
3259 					continue;
3260 
3261 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3262 						AMDGPU_XGMI_PSTATE_MIN);
3263 				if (r) {
3264 					dev_err(adev->dev,
3265 						"pstate setting failed (%d).\n",
3266 						r);
3267 					break;
3268 				}
3269 			}
3270 		}
3271 
3272 		mutex_unlock(&mgpu_info.mutex);
3273 	}
3274 
3275 	return 0;
3276 }
3277 
3278 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
3279 {
3280 	struct amdgpu_device *adev = ip_block->adev;
3281 	int r;
3282 
3283 	if (!ip_block->version->funcs->hw_fini) {
3284 		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
3285 			ip_block->version->funcs->name);
3286 	} else {
3287 		r = ip_block->version->funcs->hw_fini(ip_block);
3288 		/* XXX handle errors */
3289 		if (r) {
3290 			dev_dbg(adev->dev,
3291 				"hw_fini of IP block <%s> failed %d\n",
3292 				ip_block->version->funcs->name, r);
3293 		}
3294 	}
3295 
3296 	ip_block->status.hw = false;
3297 }
3298 
3299 /**
3300  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3301  *
3302  * @adev: amdgpu_device pointer
3303  *
3304  * For ASICs need to disable SMC first
3305  */
3306 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3307 {
3308 	int i;
3309 
3310 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3311 		return;
3312 
3313 	for (i = 0; i < adev->num_ip_blocks; i++) {
3314 		if (!adev->ip_blocks[i].status.hw)
3315 			continue;
3316 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3317 			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
3318 			break;
3319 		}
3320 	}
3321 }
3322 
3323 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3324 {
3325 	int i, r;
3326 
3327 	for (i = 0; i < adev->num_ip_blocks; i++) {
3328 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3329 			continue;
3330 
3331 		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
3332 		if (r) {
3333 			dev_dbg(adev->dev,
3334 				"early_fini of IP block <%s> failed %d\n",
3335 				adev->ip_blocks[i].version->funcs->name, r);
3336 		}
3337 	}
3338 
3339 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3340 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3341 
3342 	amdgpu_amdkfd_suspend(adev, true);
3343 	amdgpu_userq_suspend(adev);
3344 
3345 	/* Workaround for ASICs need to disable SMC first */
3346 	amdgpu_device_smu_fini_early(adev);
3347 
3348 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3349 		if (!adev->ip_blocks[i].status.hw)
3350 			continue;
3351 
3352 		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
3353 	}
3354 
3355 	if (amdgpu_sriov_vf(adev)) {
3356 		if (amdgpu_virt_release_full_gpu(adev, false))
3357 			dev_err(adev->dev,
3358 				"failed to release exclusive mode on fini\n");
3359 	}
3360 
3361 	/*
3362 	 * Driver reload on the APU can fail due to firmware validation because
3363 	 * the PSP is always running, as it is shared across the whole SoC.
3364 	 * This same issue does not occur on dGPU because it has a mechanism
3365 	 * that checks whether the PSP is running. A solution for those issues
3366 	 * in the APU is to trigger a GPU reset, but this should be done during
3367 	 * the unload phase to avoid adding boot latency and screen flicker.
3368 	 */
3369 	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
3370 		r = amdgpu_asic_reset(adev);
3371 		if (r)
3372 			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
3373 	}
3374 
3375 	return 0;
3376 }
3377 
3378 /**
3379  * amdgpu_device_ip_fini - run fini for hardware IPs
3380  *
3381  * @adev: amdgpu_device pointer
3382  *
3383  * Main teardown pass for hardware IPs.  The list of all the hardware
3384  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3385  * are run.  hw_fini tears down the hardware associated with each IP
3386  * and sw_fini tears down any software state associated with each IP.
3387  * Returns 0 on success, negative error code on failure.
3388  */
3389 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3390 {
3391 	int i, r;
3392 
3393 	amdgpu_cper_fini(adev);
3394 
3395 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3396 		amdgpu_virt_release_ras_err_handler_data(adev);
3397 
3398 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3399 		amdgpu_xgmi_remove_device(adev);
3400 
3401 	amdgpu_amdkfd_device_fini_sw(adev);
3402 
3403 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3404 		if (!adev->ip_blocks[i].status.sw)
3405 			continue;
3406 
3407 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3408 			amdgpu_ucode_free_bo(adev);
3409 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3410 			amdgpu_device_wb_fini(adev);
3411 			amdgpu_device_mem_scratch_fini(adev);
3412 			amdgpu_ib_pool_fini(adev);
3413 			amdgpu_seq64_fini(adev);
3414 			amdgpu_doorbell_fini(adev);
3415 		}
3416 		if (adev->ip_blocks[i].version->funcs->sw_fini) {
3417 			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
3418 			/* XXX handle errors */
3419 			if (r) {
3420 				dev_dbg(adev->dev,
3421 					"sw_fini of IP block <%s> failed %d\n",
3422 					adev->ip_blocks[i].version->funcs->name,
3423 					r);
3424 			}
3425 		}
3426 		adev->ip_blocks[i].status.sw = false;
3427 		adev->ip_blocks[i].status.valid = false;
3428 	}
3429 
3430 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3431 		if (!adev->ip_blocks[i].status.late_initialized)
3432 			continue;
3433 		if (adev->ip_blocks[i].version->funcs->late_fini)
3434 			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
3435 		adev->ip_blocks[i].status.late_initialized = false;
3436 	}
3437 
3438 	amdgpu_ras_fini(adev);
3439 	amdgpu_uid_fini(adev);
3440 
3441 	return 0;
3442 }
3443 
3444 /**
3445  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3446  *
3447  * @work: work_struct.
3448  */
3449 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3450 {
3451 	struct amdgpu_device *adev =
3452 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3453 	int r;
3454 
3455 	r = amdgpu_ib_ring_tests(adev);
3456 	if (r)
3457 		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
3458 }
3459 
3460 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3461 {
3462 	struct amdgpu_device *adev =
3463 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3464 
3465 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3466 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3467 
3468 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
3469 		adev->gfx.gfx_off_state = true;
3470 }
3471 
3472 /**
3473  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3474  *
3475  * @adev: amdgpu_device pointer
3476  *
3477  * Main suspend function for hardware IPs.  The list of all the hardware
3478  * IPs that make up the asic is walked, clockgating is disabled and the
3479  * suspend callbacks are run.  suspend puts the hardware and software state
3480  * in each IP into a state suitable for suspend.
3481  * Returns 0 on success, negative error code on failure.
3482  */
3483 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3484 {
3485 	int i, r, rec;
3486 
3487 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3488 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3489 
3490 	/*
3491 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3492 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3493 	 * scenario. Add the missing df cstate disablement here.
3494 	 */
3495 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3496 		dev_warn(adev->dev, "Failed to disallow df cstate");
3497 
3498 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3499 		if (!adev->ip_blocks[i].status.valid)
3500 			continue;
3501 
3502 		/* displays are handled separately */
3503 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3504 			continue;
3505 
3506 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3507 		if (r)
3508 			goto unwind;
3509 	}
3510 
3511 	return 0;
3512 unwind:
3513 	rec = amdgpu_device_ip_resume_phase3(adev);
3514 	if (rec)
3515 		dev_err(adev->dev,
3516 			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
3517 			rec);
3518 
3519 	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
3520 
3521 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3522 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3523 
3524 	return r;
3525 }
3526 
3527 /**
3528  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3529  *
3530  * @adev: amdgpu_device pointer
3531  *
3532  * Main suspend function for hardware IPs.  The list of all the hardware
3533  * IPs that make up the asic is walked, clockgating is disabled and the
3534  * suspend callbacks are run.  suspend puts the hardware and software state
3535  * in each IP into a state suitable for suspend.
3536  * Returns 0 on success, negative error code on failure.
3537  */
3538 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3539 {
3540 	int i, r, rec;
3541 
3542 	if (adev->in_s0ix)
3543 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3544 
3545 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3546 		if (!adev->ip_blocks[i].status.valid)
3547 			continue;
3548 		/* displays are handled in phase1 */
3549 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3550 			continue;
3551 		/* PSP lost connection when err_event_athub occurs */
3552 		if (amdgpu_ras_intr_triggered() &&
3553 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3554 			adev->ip_blocks[i].status.hw = false;
3555 			continue;
3556 		}
3557 
3558 		/* skip unnecessary suspend if we do not initialize them yet */
3559 		if (!amdgpu_ip_member_of_hwini(
3560 			    adev, adev->ip_blocks[i].version->type))
3561 			continue;
3562 
3563 		/* Since we skip suspend for S0i3, we need to cancel the delayed
3564 		 * idle work here as the suspend callback never gets called.
3565 		 */
3566 		if (adev->in_s0ix &&
3567 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3568 		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3569 			cancel_delayed_work_sync(&adev->gfx.idle_work);
3570 		/* skip suspend of gfx/mes and psp for S0ix
3571 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3572 		 * like at runtime. PSP is also part of the always on hardware
3573 		 * so no need to suspend it.
3574 		 */
3575 		if (adev->in_s0ix &&
3576 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3577 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3578 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3579 			continue;
3580 
3581 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3582 		if (adev->in_s0ix &&
3583 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3584 		     IP_VERSION(5, 0, 0)) &&
3585 		    (adev->ip_blocks[i].version->type ==
3586 		     AMD_IP_BLOCK_TYPE_SDMA))
3587 			continue;
3588 
3589 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3590 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3591 		 * from this location and RLC Autoload automatically also gets loaded
3592 		 * from here based on PMFW -> PSP message during re-init sequence.
3593 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3594 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3595 		 */
3596 		if (amdgpu_in_reset(adev) &&
3597 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3598 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3599 			continue;
3600 
3601 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3602 		if (r)
3603 			goto unwind;
3604 
3605 		/* handle putting the SMC in the appropriate state */
3606 		if (!amdgpu_sriov_vf(adev)) {
3607 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3608 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3609 				if (r) {
3610 					dev_err(adev->dev,
3611 						"SMC failed to set mp1 state %d, %d\n",
3612 						adev->mp1_state, r);
3613 					goto unwind;
3614 				}
3615 			}
3616 		}
3617 	}
3618 
3619 	return 0;
3620 unwind:
3621 	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3622 	rec = amdgpu_device_ip_resume_phase1(adev);
3623 	if (rec) {
3624 		dev_err(adev->dev,
3625 			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3626 			rec);
3627 		return r;
3628 	}
3629 
3630 	rec = amdgpu_device_fw_loading(adev);
3631 	if (rec) {
3632 		dev_err(adev->dev,
3633 			"amdgpu_device_fw_loading failed during unwind: %d\n",
3634 			rec);
3635 		return r;
3636 	}
3637 
3638 	rec = amdgpu_device_ip_resume_phase2(adev);
3639 	if (rec) {
3640 		dev_err(adev->dev,
3641 			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3642 			rec);
3643 		return r;
3644 	}
3645 
3646 	return r;
3647 }
3648 
3649 /**
3650  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3651  *
3652  * @adev: amdgpu_device pointer
3653  *
3654  * Main suspend function for hardware IPs.  The list of all the hardware
3655  * IPs that make up the asic is walked, clockgating is disabled and the
3656  * suspend callbacks are run.  suspend puts the hardware and software state
3657  * in each IP into a state suitable for suspend.
3658  * Returns 0 on success, negative error code on failure.
3659  */
3660 static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3661 {
3662 	int r;
3663 
3664 	if (amdgpu_sriov_vf(adev)) {
3665 		amdgpu_virt_fini_data_exchange(adev);
3666 		amdgpu_virt_request_full_gpu(adev, false);
3667 	}
3668 
3669 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3670 
3671 	r = amdgpu_device_ip_suspend_phase1(adev);
3672 	if (r)
3673 		return r;
3674 	r = amdgpu_device_ip_suspend_phase2(adev);
3675 
3676 	if (amdgpu_sriov_vf(adev))
3677 		amdgpu_virt_release_full_gpu(adev, false);
3678 
3679 	return r;
3680 }
3681 
3682 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3683 {
3684 	int i, r;
3685 
3686 	static enum amd_ip_block_type ip_order[] = {
3687 		AMD_IP_BLOCK_TYPE_COMMON,
3688 		AMD_IP_BLOCK_TYPE_GMC,
3689 		AMD_IP_BLOCK_TYPE_PSP,
3690 		AMD_IP_BLOCK_TYPE_IH,
3691 	};
3692 
3693 	for (i = 0; i < adev->num_ip_blocks; i++) {
3694 		int j;
3695 		struct amdgpu_ip_block *block;
3696 
3697 		block = &adev->ip_blocks[i];
3698 		block->status.hw = false;
3699 
3700 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3701 
3702 			if (block->version->type != ip_order[j] ||
3703 				!block->status.valid)
3704 				continue;
3705 
3706 			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3707 			if (r) {
3708 				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3709 					 block->version->funcs->name);
3710 				return r;
3711 			}
3712 			block->status.hw = true;
3713 		}
3714 	}
3715 
3716 	return 0;
3717 }
3718 
3719 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3720 {
3721 	struct amdgpu_ip_block *block;
3722 	int i, r = 0;
3723 
3724 	static enum amd_ip_block_type ip_order[] = {
3725 		AMD_IP_BLOCK_TYPE_SMC,
3726 		AMD_IP_BLOCK_TYPE_DCE,
3727 		AMD_IP_BLOCK_TYPE_GFX,
3728 		AMD_IP_BLOCK_TYPE_SDMA,
3729 		AMD_IP_BLOCK_TYPE_MES,
3730 		AMD_IP_BLOCK_TYPE_UVD,
3731 		AMD_IP_BLOCK_TYPE_VCE,
3732 		AMD_IP_BLOCK_TYPE_VCN,
3733 		AMD_IP_BLOCK_TYPE_JPEG
3734 	};
3735 
3736 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3737 		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
3738 
3739 		if (!block)
3740 			continue;
3741 
3742 		if (block->status.valid && !block->status.hw) {
3743 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3744 				r = amdgpu_ip_block_resume(block);
3745 			} else {
3746 				r = block->version->funcs->hw_init(block);
3747 			}
3748 
3749 			if (r) {
3750 				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3751 					 block->version->funcs->name);
3752 				break;
3753 			}
3754 			block->status.hw = true;
3755 		}
3756 	}
3757 
3758 	return r;
3759 }
3760 
3761 /**
3762  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3763  *
3764  * @adev: amdgpu_device pointer
3765  *
3766  * First resume function for hardware IPs.  The list of all the hardware
3767  * IPs that make up the asic is walked and the resume callbacks are run for
3768  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3769  * after a suspend and updates the software state as necessary.  This
3770  * function is also used for restoring the GPU after a GPU reset.
3771  * Returns 0 on success, negative error code on failure.
3772  */
3773 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3774 {
3775 	int i, r;
3776 
3777 	for (i = 0; i < adev->num_ip_blocks; i++) {
3778 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3779 			continue;
3780 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3781 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3782 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3783 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3784 
3785 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3786 			if (r)
3787 				return r;
3788 		}
3789 	}
3790 
3791 	return 0;
3792 }
3793 
3794 /**
3795  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3796  *
3797  * @adev: amdgpu_device pointer
3798  *
3799  * Second resume function for hardware IPs.  The list of all the hardware
3800  * IPs that make up the asic is walked and the resume callbacks are run for
3801  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3802  * functional state after a suspend and updates the software state as
3803  * necessary.  This function is also used for restoring the GPU after a GPU
3804  * reset.
3805  * Returns 0 on success, negative error code on failure.
3806  */
3807 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3808 {
3809 	int i, r;
3810 
3811 	for (i = 0; i < adev->num_ip_blocks; i++) {
3812 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3813 			continue;
3814 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3815 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3816 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3817 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3818 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3819 			continue;
3820 		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3821 		if (r)
3822 			return r;
3823 	}
3824 
3825 	return 0;
3826 }
3827 
3828 /**
3829  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3830  *
3831  * @adev: amdgpu_device pointer
3832  *
3833  * Third resume function for hardware IPs.  The list of all the hardware
3834  * IPs that make up the asic is walked and the resume callbacks are run for
3835  * all DCE.  resume puts the hardware into a functional state after a suspend
3836  * and updates the software state as necessary.  This function is also used
3837  * for restoring the GPU after a GPU reset.
3838  *
3839  * Returns 0 on success, negative error code on failure.
3840  */
3841 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3842 {
3843 	int i, r;
3844 
3845 	for (i = 0; i < adev->num_ip_blocks; i++) {
3846 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3847 			continue;
3848 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3849 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3850 			if (r)
3851 				return r;
3852 		}
3853 	}
3854 
3855 	return 0;
3856 }
3857 
3858 /**
3859  * amdgpu_device_ip_resume - run resume for hardware IPs
3860  *
3861  * @adev: amdgpu_device pointer
3862  *
3863  * Main resume function for hardware IPs.  The hardware IPs
3864  * are split into two resume functions because they are
3865  * also used in recovering from a GPU reset and some additional
3866  * steps need to be take between them.  In this case (S3/S4) they are
3867  * run sequentially.
3868  * Returns 0 on success, negative error code on failure.
3869  */
3870 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3871 {
3872 	int r;
3873 
3874 	r = amdgpu_device_ip_resume_phase1(adev);
3875 	if (r)
3876 		return r;
3877 
3878 	r = amdgpu_device_fw_loading(adev);
3879 	if (r)
3880 		return r;
3881 
3882 	r = amdgpu_device_ip_resume_phase2(adev);
3883 
3884 	if (adev->mman.buffer_funcs_ring->sched.ready)
3885 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3886 
3887 	if (r)
3888 		return r;
3889 
3890 	amdgpu_fence_driver_hw_init(adev);
3891 
3892 	r = amdgpu_device_ip_resume_phase3(adev);
3893 
3894 	return r;
3895 }
3896 
3897 /**
3898  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3899  *
3900  * @adev: amdgpu_device pointer
3901  *
3902  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3903  */
3904 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3905 {
3906 	if (amdgpu_sriov_vf(adev)) {
3907 		if (adev->is_atom_fw) {
3908 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3909 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3910 		} else {
3911 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3912 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3913 		}
3914 
3915 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3916 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3917 	}
3918 }
3919 
3920 /**
3921  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3922  *
3923  * @pdev : pci device context
3924  * @asic_type: AMD asic type
3925  *
3926  * Check if there is DC (new modesetting infrastructre) support for an asic.
3927  * returns true if DC has support, false if not.
3928  */
3929 bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
3930 				       enum amd_asic_type asic_type)
3931 {
3932 	switch (asic_type) {
3933 #ifdef CONFIG_DRM_AMDGPU_SI
3934 	case CHIP_HAINAN:
3935 #endif
3936 	case CHIP_TOPAZ:
3937 		/* chips with no display hardware */
3938 		return false;
3939 #if defined(CONFIG_DRM_AMD_DC)
3940 	case CHIP_TAHITI:
3941 	case CHIP_PITCAIRN:
3942 	case CHIP_VERDE:
3943 	case CHIP_OLAND:
3944 		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
3945 	case CHIP_KAVERI:
3946 	case CHIP_KABINI:
3947 	case CHIP_MULLINS:
3948 		/*
3949 		 * We have systems in the wild with these ASICs that require
3950 		 * TRAVIS and NUTMEG support which is not supported with DC.
3951 		 *
3952 		 * Fallback to the non-DC driver here by default so as not to
3953 		 * cause regressions.
3954 		 */
3955 		return amdgpu_dc > 0;
3956 	default:
3957 		return amdgpu_dc != 0;
3958 #else
3959 	default:
3960 		if (amdgpu_dc > 0)
3961 			dev_info_once(
3962 				&pdev->dev,
3963 				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3964 		return false;
3965 #endif
3966 	}
3967 }
3968 
3969 /**
3970  * amdgpu_device_has_dc_support - check if dc is supported
3971  *
3972  * @adev: amdgpu_device pointer
3973  *
3974  * Returns true for supported, false for not supported
3975  */
3976 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3977 {
3978 	if (adev->enable_virtual_display ||
3979 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3980 		return false;
3981 
3982 	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
3983 }
3984 
3985 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3986 {
3987 	struct amdgpu_device *adev =
3988 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3989 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3990 
3991 	/* It's a bug to not have a hive within this function */
3992 	if (WARN_ON(!hive))
3993 		return;
3994 
3995 	/*
3996 	 * Use task barrier to synchronize all xgmi reset works across the
3997 	 * hive. task_barrier_enter and task_barrier_exit will block
3998 	 * until all the threads running the xgmi reset works reach
3999 	 * those points. task_barrier_full will do both blocks.
4000 	 */
4001 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
4002 
4003 		task_barrier_enter(&hive->tb);
4004 		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
4005 
4006 		if (adev->asic_reset_res)
4007 			goto fail;
4008 
4009 		task_barrier_exit(&hive->tb);
4010 		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
4011 
4012 		if (adev->asic_reset_res)
4013 			goto fail;
4014 
4015 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
4016 	} else {
4017 
4018 		task_barrier_full(&hive->tb);
4019 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
4020 	}
4021 
4022 fail:
4023 	if (adev->asic_reset_res)
4024 		dev_warn(adev->dev,
4025 			 "ASIC reset failed with error, %d for drm dev, %s",
4026 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
4027 	amdgpu_put_xgmi_hive(hive);
4028 }
4029 
4030 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
4031 {
4032 	char *input = amdgpu_lockup_timeout;
4033 	char *timeout_setting = NULL;
4034 	int index = 0;
4035 	long timeout;
4036 	int ret = 0;
4037 
4038 	/* By default timeout for all queues is 2 sec */
4039 	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
4040 		adev->video_timeout = msecs_to_jiffies(2000);
4041 
4042 	if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
4043 		return 0;
4044 
4045 	while ((timeout_setting = strsep(&input, ",")) &&
4046 	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
4047 		ret = kstrtol(timeout_setting, 0, &timeout);
4048 		if (ret)
4049 			return ret;
4050 
4051 		if (timeout == 0) {
4052 			index++;
4053 			continue;
4054 		} else if (timeout < 0) {
4055 			timeout = MAX_SCHEDULE_TIMEOUT;
4056 			dev_warn(adev->dev, "lockup timeout disabled");
4057 			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
4058 		} else {
4059 			timeout = msecs_to_jiffies(timeout);
4060 		}
4061 
4062 		switch (index++) {
4063 		case 0:
4064 			adev->gfx_timeout = timeout;
4065 			break;
4066 		case 1:
4067 			adev->compute_timeout = timeout;
4068 			break;
4069 		case 2:
4070 			adev->sdma_timeout = timeout;
4071 			break;
4072 		case 3:
4073 			adev->video_timeout = timeout;
4074 			break;
4075 		default:
4076 			break;
4077 		}
4078 	}
4079 
4080 	/* When only one value specified apply it to all queues. */
4081 	if (index == 1)
4082 		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
4083 			adev->video_timeout = timeout;
4084 
4085 	return ret;
4086 }
4087 
4088 /**
4089  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4090  *
4091  * @adev: amdgpu_device pointer
4092  *
4093  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4094  */
4095 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4096 {
4097 	struct iommu_domain *domain;
4098 
4099 	domain = iommu_get_domain_for_dev(adev->dev);
4100 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
4101 		adev->ram_is_direct_mapped = true;
4102 }
4103 
4104 #if defined(CONFIG_HSA_AMD_P2P)
4105 /**
4106  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4107  *
4108  * @adev: amdgpu_device pointer
4109  *
4110  * return if IOMMU remapping bar address
4111  */
4112 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4113 {
4114 	struct iommu_domain *domain;
4115 
4116 	domain = iommu_get_domain_for_dev(adev->dev);
4117 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
4118 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
4119 		return true;
4120 
4121 	return false;
4122 }
4123 #endif
4124 
4125 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4126 {
4127 	if (amdgpu_mcbp == 1)
4128 		adev->gfx.mcbp = true;
4129 	else if (amdgpu_mcbp == 0)
4130 		adev->gfx.mcbp = false;
4131 
4132 	if (amdgpu_sriov_vf(adev))
4133 		adev->gfx.mcbp = true;
4134 
4135 	if (adev->gfx.mcbp)
4136 		dev_info(adev->dev, "MCBP is enabled\n");
4137 }
4138 
4139 static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
4140 {
4141 	int r;
4142 
4143 	r = amdgpu_atombios_sysfs_init(adev);
4144 	if (r)
4145 		drm_err(&adev->ddev,
4146 			"registering atombios sysfs failed (%d).\n", r);
4147 
4148 	r = amdgpu_pm_sysfs_init(adev);
4149 	if (r)
4150 		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
4151 
4152 	r = amdgpu_ucode_sysfs_init(adev);
4153 	if (r) {
4154 		adev->ucode_sysfs_en = false;
4155 		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
4156 	} else
4157 		adev->ucode_sysfs_en = true;
4158 
4159 	r = amdgpu_device_attr_sysfs_init(adev);
4160 	if (r)
4161 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4162 
4163 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4164 	if (r)
4165 		dev_err(adev->dev,
4166 			"Could not create amdgpu board attributes\n");
4167 
4168 	amdgpu_fru_sysfs_init(adev);
4169 	amdgpu_reg_state_sysfs_init(adev);
4170 	amdgpu_xcp_sysfs_init(adev);
4171 
4172 	return r;
4173 }
4174 
4175 static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
4176 {
4177 	if (adev->pm.sysfs_initialized)
4178 		amdgpu_pm_sysfs_fini(adev);
4179 	if (adev->ucode_sysfs_en)
4180 		amdgpu_ucode_sysfs_fini(adev);
4181 	amdgpu_device_attr_sysfs_fini(adev);
4182 	amdgpu_fru_sysfs_fini(adev);
4183 
4184 	amdgpu_reg_state_sysfs_fini(adev);
4185 	amdgpu_xcp_sysfs_fini(adev);
4186 }
4187 
4188 /**
4189  * amdgpu_device_init - initialize the driver
4190  *
4191  * @adev: amdgpu_device pointer
4192  * @flags: driver flags
4193  *
4194  * Initializes the driver info and hw (all asics).
4195  * Returns 0 for success or an error on failure.
4196  * Called at driver startup.
4197  */
4198 int amdgpu_device_init(struct amdgpu_device *adev,
4199 		       uint32_t flags)
4200 {
4201 	struct pci_dev *pdev = adev->pdev;
4202 	int r, i;
4203 	bool px = false;
4204 	u32 max_MBps;
4205 	int tmp;
4206 
4207 	adev->shutdown = false;
4208 	adev->flags = flags;
4209 
4210 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
4211 		adev->asic_type = amdgpu_force_asic_type;
4212 	else
4213 		adev->asic_type = flags & AMD_ASIC_MASK;
4214 
4215 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4216 	if (amdgpu_emu_mode == 1)
4217 		adev->usec_timeout *= 10;
4218 	adev->gmc.gart_size = 512 * 1024 * 1024;
4219 	adev->accel_working = false;
4220 	adev->num_rings = 0;
4221 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4222 	adev->mman.buffer_funcs = NULL;
4223 	adev->mman.buffer_funcs_ring = NULL;
4224 	adev->vm_manager.vm_pte_funcs = NULL;
4225 	adev->vm_manager.vm_pte_num_scheds = 0;
4226 	adev->gmc.gmc_funcs = NULL;
4227 	adev->harvest_ip_mask = 0x0;
4228 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4229 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4230 
4231 	adev->smc_rreg = &amdgpu_invalid_rreg;
4232 	adev->smc_wreg = &amdgpu_invalid_wreg;
4233 	adev->pcie_rreg = &amdgpu_invalid_rreg;
4234 	adev->pcie_wreg = &amdgpu_invalid_wreg;
4235 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4236 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4237 	adev->pciep_rreg = &amdgpu_invalid_rreg;
4238 	adev->pciep_wreg = &amdgpu_invalid_wreg;
4239 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4240 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4241 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4242 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4243 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4244 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4245 	adev->didt_rreg = &amdgpu_invalid_rreg;
4246 	adev->didt_wreg = &amdgpu_invalid_wreg;
4247 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4248 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4249 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4250 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4251 
4252 	dev_info(
4253 		adev->dev,
4254 		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4255 		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4256 		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4257 
4258 	/* mutex initialization are all done here so we
4259 	 * can recall function without having locking issues
4260 	 */
4261 	mutex_init(&adev->firmware.mutex);
4262 	mutex_init(&adev->pm.mutex);
4263 	mutex_init(&adev->gfx.gpu_clock_mutex);
4264 	mutex_init(&adev->srbm_mutex);
4265 	mutex_init(&adev->gfx.pipe_reserve_mutex);
4266 	mutex_init(&adev->gfx.gfx_off_mutex);
4267 	mutex_init(&adev->gfx.partition_mutex);
4268 	mutex_init(&adev->grbm_idx_mutex);
4269 	mutex_init(&adev->mn_lock);
4270 	mutex_init(&adev->virt.vf_errors.lock);
4271 	hash_init(adev->mn_hash);
4272 	mutex_init(&adev->psp.mutex);
4273 	mutex_init(&adev->notifier_lock);
4274 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
4275 	mutex_init(&adev->benchmark_mutex);
4276 	mutex_init(&adev->gfx.reset_sem_mutex);
4277 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
4278 	mutex_init(&adev->enforce_isolation_mutex);
4279 	for (i = 0; i < MAX_XCP; ++i) {
4280 		adev->isolation[i].spearhead = dma_fence_get_stub();
4281 		amdgpu_sync_create(&adev->isolation[i].active);
4282 		amdgpu_sync_create(&adev->isolation[i].prev);
4283 	}
4284 	mutex_init(&adev->gfx.userq_sch_mutex);
4285 	mutex_init(&adev->gfx.workload_profile_mutex);
4286 	mutex_init(&adev->vcn.workload_profile_mutex);
4287 
4288 	amdgpu_device_init_apu_flags(adev);
4289 
4290 	r = amdgpu_device_check_arguments(adev);
4291 	if (r)
4292 		return r;
4293 
4294 	spin_lock_init(&adev->mmio_idx_lock);
4295 	spin_lock_init(&adev->smc_idx_lock);
4296 	spin_lock_init(&adev->pcie_idx_lock);
4297 	spin_lock_init(&adev->uvd_ctx_idx_lock);
4298 	spin_lock_init(&adev->didt_idx_lock);
4299 	spin_lock_init(&adev->gc_cac_idx_lock);
4300 	spin_lock_init(&adev->se_cac_idx_lock);
4301 	spin_lock_init(&adev->audio_endpt_idx_lock);
4302 	spin_lock_init(&adev->mm_stats.lock);
4303 	spin_lock_init(&adev->virt.rlcg_reg_lock);
4304 	spin_lock_init(&adev->wb.lock);
4305 
4306 	xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
4307 
4308 	INIT_LIST_HEAD(&adev->reset_list);
4309 
4310 	INIT_LIST_HEAD(&adev->ras_list);
4311 
4312 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4313 
4314 	xa_init(&adev->userq_doorbell_xa);
4315 
4316 	INIT_DELAYED_WORK(&adev->delayed_init_work,
4317 			  amdgpu_device_delayed_init_work_handler);
4318 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4319 			  amdgpu_device_delay_enable_gfx_off);
4320 	/*
4321 	 * Initialize the enforce_isolation work structures for each XCP
4322 	 * partition.  This work handler is responsible for enforcing shader
4323 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
4324 	 * each GFX and compute ring.  If there are any fences, it schedules
4325 	 * the `enforce_isolation_work` to be run after a delay.  If there are
4326 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4327 	 * runqueue.
4328 	 */
4329 	for (i = 0; i < MAX_XCP; i++) {
4330 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4331 				  amdgpu_gfx_enforce_isolation_handler);
4332 		adev->gfx.enforce_isolation[i].adev = adev;
4333 		adev->gfx.enforce_isolation[i].xcp_id = i;
4334 	}
4335 
4336 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4337 	INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
4338 
4339 	adev->gfx.gfx_off_req_count = 1;
4340 	adev->gfx.gfx_off_residency = 0;
4341 	adev->gfx.gfx_off_entrycount = 0;
4342 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4343 
4344 	atomic_set(&adev->throttling_logging_enabled, 1);
4345 	/*
4346 	 * If throttling continues, logging will be performed every minute
4347 	 * to avoid log flooding. "-1" is subtracted since the thermal
4348 	 * throttling interrupt comes every second. Thus, the total logging
4349 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4350 	 * for throttling interrupt) = 60 seconds.
4351 	 */
4352 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4353 
4354 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4355 
4356 	/* Registers mapping */
4357 	/* TODO: block userspace mapping of io register */
4358 	if (adev->asic_type >= CHIP_BONAIRE) {
4359 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4360 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4361 	} else {
4362 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4363 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4364 	}
4365 
4366 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4367 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4368 
4369 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4370 	if (!adev->rmmio)
4371 		return -ENOMEM;
4372 
4373 	dev_info(adev->dev, "register mmio base: 0x%08X\n",
4374 		 (uint32_t)adev->rmmio_base);
4375 	dev_info(adev->dev, "register mmio size: %u\n",
4376 		 (unsigned int)adev->rmmio_size);
4377 
4378 	/*
4379 	 * Reset domain needs to be present early, before XGMI hive discovered
4380 	 * (if any) and initialized to use reset sem and in_gpu reset flag
4381 	 * early on during init and before calling to RREG32.
4382 	 */
4383 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4384 	if (!adev->reset_domain)
4385 		return -ENOMEM;
4386 
4387 	/* detect hw virtualization here */
4388 	amdgpu_virt_init(adev);
4389 
4390 	amdgpu_device_get_pcie_info(adev);
4391 
4392 	r = amdgpu_device_get_job_timeout_settings(adev);
4393 	if (r) {
4394 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4395 		return r;
4396 	}
4397 
4398 	amdgpu_device_set_mcbp(adev);
4399 
4400 	/*
4401 	 * By default, use default mode where all blocks are expected to be
4402 	 * initialized. At present a 'swinit' of blocks is required to be
4403 	 * completed before the need for a different level is detected.
4404 	 */
4405 	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
4406 	/* early init functions */
4407 	r = amdgpu_device_ip_early_init(adev);
4408 	if (r)
4409 		return r;
4410 
4411 	/*
4412 	 * No need to remove conflicting FBs for non-display class devices.
4413 	 * This prevents the sysfb from being freed accidently.
4414 	 */
4415 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
4416 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
4417 		/* Get rid of things like offb */
4418 		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
4419 		if (r)
4420 			return r;
4421 	}
4422 
4423 	/* Enable TMZ based on IP_VERSION */
4424 	amdgpu_gmc_tmz_set(adev);
4425 
4426 	if (amdgpu_sriov_vf(adev) &&
4427 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4428 		/* VF MMIO access (except mailbox range) from CPU
4429 		 * will be blocked during sriov runtime
4430 		 */
4431 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4432 
4433 	amdgpu_gmc_noretry_set(adev);
4434 	/* Need to get xgmi info early to decide the reset behavior*/
4435 	if (adev->gmc.xgmi.supported) {
4436 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4437 		if (r)
4438 			return r;
4439 	}
4440 
4441 	/* enable PCIE atomic ops */
4442 	if (amdgpu_sriov_vf(adev)) {
4443 		if (adev->virt.fw_reserve.p_pf2vf)
4444 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4445 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4446 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4447 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4448 	 * internal path natively support atomics, set have_atomics_support to true.
4449 	 */
4450 	} else if ((adev->flags & AMD_IS_APU) &&
4451 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4452 		    IP_VERSION(9, 0, 0))) {
4453 		adev->have_atomics_support = true;
4454 	} else {
4455 		adev->have_atomics_support =
4456 			!pci_enable_atomic_ops_to_root(adev->pdev,
4457 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4458 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4459 	}
4460 
4461 	if (!adev->have_atomics_support)
4462 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4463 
4464 	/* doorbell bar mapping and doorbell index init*/
4465 	amdgpu_doorbell_init(adev);
4466 
4467 	if (amdgpu_emu_mode == 1) {
4468 		/* post the asic on emulation mode */
4469 		emu_soc_asic_init(adev);
4470 		goto fence_driver_init;
4471 	}
4472 
4473 	amdgpu_reset_init(adev);
4474 
4475 	/* detect if we are with an SRIOV vbios */
4476 	if (adev->bios)
4477 		amdgpu_device_detect_sriov_bios(adev);
4478 
4479 	/* check if we need to reset the asic
4480 	 *  E.g., driver was not cleanly unloaded previously, etc.
4481 	 */
4482 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4483 		if (adev->gmc.xgmi.num_physical_nodes) {
4484 			dev_info(adev->dev, "Pending hive reset.\n");
4485 			amdgpu_set_init_level(adev,
4486 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
4487 		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4488 				   !amdgpu_device_has_display_hardware(adev)) {
4489 					r = psp_gpu_reset(adev);
4490 		} else {
4491 				tmp = amdgpu_reset_method;
4492 				/* It should do a default reset when loading or reloading the driver,
4493 				 * regardless of the module parameter reset_method.
4494 				 */
4495 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4496 				r = amdgpu_asic_reset(adev);
4497 				amdgpu_reset_method = tmp;
4498 		}
4499 
4500 		if (r) {
4501 		  dev_err(adev->dev, "asic reset on init failed\n");
4502 		  goto failed;
4503 		}
4504 	}
4505 
4506 	/* Post card if necessary */
4507 	if (amdgpu_device_need_post(adev)) {
4508 		if (!adev->bios) {
4509 			dev_err(adev->dev, "no vBIOS found\n");
4510 			r = -EINVAL;
4511 			goto failed;
4512 		}
4513 		dev_info(adev->dev, "GPU posting now...\n");
4514 		r = amdgpu_device_asic_init(adev);
4515 		if (r) {
4516 			dev_err(adev->dev, "gpu post error!\n");
4517 			goto failed;
4518 		}
4519 	}
4520 
4521 	if (adev->bios) {
4522 		if (adev->is_atom_fw) {
4523 			/* Initialize clocks */
4524 			r = amdgpu_atomfirmware_get_clock_info(adev);
4525 			if (r) {
4526 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4527 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4528 				goto failed;
4529 			}
4530 		} else {
4531 			/* Initialize clocks */
4532 			r = amdgpu_atombios_get_clock_info(adev);
4533 			if (r) {
4534 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4535 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4536 				goto failed;
4537 			}
4538 			/* init i2c buses */
4539 			amdgpu_i2c_init(adev);
4540 		}
4541 	}
4542 
4543 fence_driver_init:
4544 	/* Fence driver */
4545 	r = amdgpu_fence_driver_sw_init(adev);
4546 	if (r) {
4547 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4548 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4549 		goto failed;
4550 	}
4551 
4552 	/* init the mode config */
4553 	drm_mode_config_init(adev_to_drm(adev));
4554 
4555 	r = amdgpu_device_ip_init(adev);
4556 	if (r) {
4557 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4558 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4559 		goto release_ras_con;
4560 	}
4561 
4562 	amdgpu_fence_driver_hw_init(adev);
4563 
4564 	dev_info(adev->dev,
4565 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4566 			adev->gfx.config.max_shader_engines,
4567 			adev->gfx.config.max_sh_per_se,
4568 			adev->gfx.config.max_cu_per_sh,
4569 			adev->gfx.cu_info.number);
4570 
4571 	adev->accel_working = true;
4572 
4573 	amdgpu_vm_check_compute_bug(adev);
4574 
4575 	/* Initialize the buffer migration limit. */
4576 	if (amdgpu_moverate >= 0)
4577 		max_MBps = amdgpu_moverate;
4578 	else
4579 		max_MBps = 8; /* Allow 8 MB/s. */
4580 	/* Get a log2 for easy divisions. */
4581 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4582 
4583 	/*
4584 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4585 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4586 	 * gpu instance is counted less.
4587 	 */
4588 	amdgpu_register_gpu_instance(adev);
4589 
4590 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4591 	 * explicit gating rather than handling it automatically.
4592 	 */
4593 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4594 		r = amdgpu_device_ip_late_init(adev);
4595 		if (r) {
4596 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4597 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4598 			goto release_ras_con;
4599 		}
4600 		/* must succeed. */
4601 		amdgpu_ras_resume(adev);
4602 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4603 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4604 	}
4605 
4606 	if (amdgpu_sriov_vf(adev)) {
4607 		amdgpu_virt_release_full_gpu(adev, true);
4608 		flush_delayed_work(&adev->delayed_init_work);
4609 	}
4610 
4611 	/* Don't init kfd if whole hive need to be reset during init */
4612 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4613 		kgd2kfd_init_zone_device(adev);
4614 		kfd_update_svm_support_properties(adev);
4615 	}
4616 
4617 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4618 		amdgpu_xgmi_reset_on_init(adev);
4619 
4620 	/*
4621 	 * Place those sysfs registering after `late_init`. As some of those
4622 	 * operations performed in `late_init` might affect the sysfs
4623 	 * interfaces creating.
4624 	 */
4625 	r = amdgpu_device_sys_interface_init(adev);
4626 
4627 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4628 		r = amdgpu_pmu_init(adev);
4629 	if (r)
4630 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4631 
4632 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4633 	if (amdgpu_device_cache_pci_state(adev->pdev))
4634 		pci_restore_state(pdev);
4635 
4636 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4637 	/* this will fail for cards that aren't VGA class devices, just
4638 	 * ignore it
4639 	 */
4640 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4641 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4642 
4643 	px = amdgpu_device_supports_px(adev);
4644 
4645 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4646 				apple_gmux_detect(NULL, NULL)))
4647 		vga_switcheroo_register_client(adev->pdev,
4648 					       &amdgpu_switcheroo_ops, px);
4649 
4650 	if (px)
4651 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4652 
4653 	amdgpu_device_check_iommu_direct_map(adev);
4654 
4655 	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4656 	r = register_pm_notifier(&adev->pm_nb);
4657 	if (r)
4658 		goto failed;
4659 
4660 	return 0;
4661 
4662 release_ras_con:
4663 	if (amdgpu_sriov_vf(adev))
4664 		amdgpu_virt_release_full_gpu(adev, true);
4665 
4666 	/* failed in exclusive mode due to timeout */
4667 	if (amdgpu_sriov_vf(adev) &&
4668 		!amdgpu_sriov_runtime(adev) &&
4669 		amdgpu_virt_mmio_blocked(adev) &&
4670 		!amdgpu_virt_wait_reset(adev)) {
4671 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4672 		/* Don't send request since VF is inactive. */
4673 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4674 		adev->virt.ops = NULL;
4675 		r = -EAGAIN;
4676 	}
4677 	amdgpu_release_ras_context(adev);
4678 
4679 failed:
4680 	amdgpu_vf_error_trans_all(adev);
4681 
4682 	return r;
4683 }
4684 
4685 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4686 {
4687 
4688 	/* Clear all CPU mappings pointing to this device */
4689 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4690 
4691 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4692 	amdgpu_doorbell_fini(adev);
4693 
4694 	iounmap(adev->rmmio);
4695 	adev->rmmio = NULL;
4696 	if (adev->mman.aper_base_kaddr)
4697 		iounmap(adev->mman.aper_base_kaddr);
4698 	adev->mman.aper_base_kaddr = NULL;
4699 
4700 	/* Memory manager related */
4701 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4702 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4703 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4704 	}
4705 }
4706 
4707 /**
4708  * amdgpu_device_fini_hw - tear down the driver
4709  *
4710  * @adev: amdgpu_device pointer
4711  *
4712  * Tear down the driver info (all asics).
4713  * Called at driver shutdown.
4714  */
4715 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4716 {
4717 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4718 	flush_delayed_work(&adev->delayed_init_work);
4719 
4720 	if (adev->mman.initialized)
4721 		drain_workqueue(adev->mman.bdev.wq);
4722 	adev->shutdown = true;
4723 
4724 	unregister_pm_notifier(&adev->pm_nb);
4725 
4726 	/* make sure IB test finished before entering exclusive mode
4727 	 * to avoid preemption on IB test
4728 	 */
4729 	if (amdgpu_sriov_vf(adev)) {
4730 		amdgpu_virt_request_full_gpu(adev, false);
4731 		amdgpu_virt_fini_data_exchange(adev);
4732 	}
4733 
4734 	/* disable all interrupts */
4735 	amdgpu_irq_disable_all(adev);
4736 	if (adev->mode_info.mode_config_initialized) {
4737 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4738 			drm_helper_force_disable_all(adev_to_drm(adev));
4739 		else
4740 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4741 	}
4742 	amdgpu_fence_driver_hw_fini(adev);
4743 
4744 	amdgpu_device_sys_interface_fini(adev);
4745 
4746 	/* disable ras feature must before hw fini */
4747 	amdgpu_ras_pre_fini(adev);
4748 
4749 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4750 
4751 	amdgpu_device_ip_fini_early(adev);
4752 
4753 	amdgpu_irq_fini_hw(adev);
4754 
4755 	if (adev->mman.initialized)
4756 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4757 
4758 	amdgpu_gart_dummy_page_fini(adev);
4759 
4760 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4761 		amdgpu_device_unmap_mmio(adev);
4762 
4763 }
4764 
4765 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4766 {
4767 	int i, idx;
4768 	bool px;
4769 
4770 	amdgpu_device_ip_fini(adev);
4771 	amdgpu_fence_driver_sw_fini(adev);
4772 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4773 	adev->accel_working = false;
4774 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4775 	for (i = 0; i < MAX_XCP; ++i) {
4776 		dma_fence_put(adev->isolation[i].spearhead);
4777 		amdgpu_sync_free(&adev->isolation[i].active);
4778 		amdgpu_sync_free(&adev->isolation[i].prev);
4779 	}
4780 
4781 	amdgpu_reset_fini(adev);
4782 
4783 	/* free i2c buses */
4784 	amdgpu_i2c_fini(adev);
4785 
4786 	if (adev->bios) {
4787 		if (amdgpu_emu_mode != 1)
4788 			amdgpu_atombios_fini(adev);
4789 		amdgpu_bios_release(adev);
4790 	}
4791 
4792 	kfree(adev->fru_info);
4793 	adev->fru_info = NULL;
4794 
4795 	kfree(adev->xcp_mgr);
4796 	adev->xcp_mgr = NULL;
4797 
4798 	px = amdgpu_device_supports_px(adev);
4799 
4800 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4801 				apple_gmux_detect(NULL, NULL)))
4802 		vga_switcheroo_unregister_client(adev->pdev);
4803 
4804 	if (px)
4805 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4806 
4807 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4808 		vga_client_unregister(adev->pdev);
4809 
4810 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4811 
4812 		iounmap(adev->rmmio);
4813 		adev->rmmio = NULL;
4814 		drm_dev_exit(idx);
4815 	}
4816 
4817 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4818 		amdgpu_pmu_fini(adev);
4819 	if (adev->discovery.bin)
4820 		amdgpu_discovery_fini(adev);
4821 
4822 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4823 	adev->reset_domain = NULL;
4824 
4825 	kfree(adev->pci_state);
4826 	kfree(adev->pcie_reset_ctx.swds_pcistate);
4827 	kfree(adev->pcie_reset_ctx.swus_pcistate);
4828 }
4829 
4830 /**
4831  * amdgpu_device_evict_resources - evict device resources
4832  * @adev: amdgpu device object
4833  *
4834  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4835  * of the vram memory type. Mainly used for evicting device resources
4836  * at suspend time.
4837  *
4838  */
4839 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4840 {
4841 	int ret;
4842 
4843 	/* No need to evict vram on APUs unless going to S4 */
4844 	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4845 		return 0;
4846 
4847 	/* No need to evict when going to S5 through S4 callbacks */
4848 	if (system_state == SYSTEM_POWER_OFF)
4849 		return 0;
4850 
4851 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4852 	if (ret) {
4853 		dev_warn(adev->dev, "evicting device resources failed\n");
4854 		return ret;
4855 	}
4856 
4857 	if (adev->in_s4) {
4858 		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
4859 		if (ret)
4860 			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
4861 	}
4862 	return ret;
4863 }
4864 
4865 /*
4866  * Suspend & resume.
4867  */
4868 /**
4869  * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4870  * @nb: notifier block
4871  * @mode: suspend mode
4872  * @data: data
4873  *
4874  * This function is called when the system is about to suspend or hibernate.
4875  * It is used to set the appropriate flags so that eviction can be optimized
4876  * in the pm prepare callback.
4877  */
4878 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
4879 				     void *data)
4880 {
4881 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
4882 
4883 	switch (mode) {
4884 	case PM_HIBERNATION_PREPARE:
4885 		adev->in_s4 = true;
4886 		break;
4887 	case PM_POST_HIBERNATION:
4888 		adev->in_s4 = false;
4889 		break;
4890 	}
4891 
4892 	return NOTIFY_DONE;
4893 }
4894 
4895 /**
4896  * amdgpu_device_prepare - prepare for device suspend
4897  *
4898  * @dev: drm dev pointer
4899  *
4900  * Prepare to put the hw in the suspend state (all asics).
4901  * Returns 0 for success or an error on failure.
4902  * Called at driver suspend.
4903  */
4904 int amdgpu_device_prepare(struct drm_device *dev)
4905 {
4906 	struct amdgpu_device *adev = drm_to_adev(dev);
4907 	int i, r;
4908 
4909 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4910 		return 0;
4911 
4912 	/* Evict the majority of BOs before starting suspend sequence */
4913 	r = amdgpu_device_evict_resources(adev);
4914 	if (r)
4915 		return r;
4916 
4917 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4918 
4919 	for (i = 0; i < adev->num_ip_blocks; i++) {
4920 		if (!adev->ip_blocks[i].status.valid)
4921 			continue;
4922 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4923 			continue;
4924 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4925 		if (r)
4926 			return r;
4927 	}
4928 
4929 	return 0;
4930 }
4931 
4932 /**
4933  * amdgpu_device_complete - complete power state transition
4934  *
4935  * @dev: drm dev pointer
4936  *
4937  * Undo the changes from amdgpu_device_prepare. This will be
4938  * called on all resume transitions, including those that failed.
4939  */
4940 void amdgpu_device_complete(struct drm_device *dev)
4941 {
4942 	struct amdgpu_device *adev = drm_to_adev(dev);
4943 	int i;
4944 
4945 	for (i = 0; i < adev->num_ip_blocks; i++) {
4946 		if (!adev->ip_blocks[i].status.valid)
4947 			continue;
4948 		if (!adev->ip_blocks[i].version->funcs->complete)
4949 			continue;
4950 		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
4951 	}
4952 }
4953 
4954 /**
4955  * amdgpu_device_suspend - initiate device suspend
4956  *
4957  * @dev: drm dev pointer
4958  * @notify_clients: notify in-kernel DRM clients
4959  *
4960  * Puts the hw in the suspend state (all asics).
4961  * Returns 0 for success or an error on failure.
4962  * Called at driver suspend.
4963  */
4964 int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4965 {
4966 	struct amdgpu_device *adev = drm_to_adev(dev);
4967 	int r, rec;
4968 
4969 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4970 		return 0;
4971 
4972 	adev->in_suspend = true;
4973 
4974 	if (amdgpu_sriov_vf(adev)) {
4975 		if (!adev->in_runpm)
4976 			amdgpu_amdkfd_suspend_process(adev);
4977 		amdgpu_virt_fini_data_exchange(adev);
4978 		r = amdgpu_virt_request_full_gpu(adev, false);
4979 		if (r)
4980 			return r;
4981 	}
4982 
4983 	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
4984 	if (r)
4985 		goto unwind_sriov;
4986 
4987 	if (notify_clients)
4988 		drm_client_dev_suspend(adev_to_drm(adev));
4989 
4990 	cancel_delayed_work_sync(&adev->delayed_init_work);
4991 
4992 	amdgpu_ras_suspend(adev);
4993 
4994 	r = amdgpu_device_ip_suspend_phase1(adev);
4995 	if (r)
4996 		goto unwind_smartshift;
4997 
4998 	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4999 	r = amdgpu_userq_suspend(adev);
5000 	if (r)
5001 		goto unwind_ip_phase1;
5002 
5003 	r = amdgpu_device_evict_resources(adev);
5004 	if (r)
5005 		goto unwind_userq;
5006 
5007 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
5008 
5009 	amdgpu_fence_driver_hw_fini(adev);
5010 
5011 	r = amdgpu_device_ip_suspend_phase2(adev);
5012 	if (r)
5013 		goto unwind_evict;
5014 
5015 	if (amdgpu_sriov_vf(adev))
5016 		amdgpu_virt_release_full_gpu(adev, false);
5017 
5018 	return 0;
5019 
5020 unwind_evict:
5021 	if (adev->mman.buffer_funcs_ring->sched.ready)
5022 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
5023 	amdgpu_fence_driver_hw_init(adev);
5024 
5025 unwind_userq:
5026 	rec = amdgpu_userq_resume(adev);
5027 	if (rec) {
5028 		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
5029 		return r;
5030 	}
5031 	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5032 	if (rec) {
5033 		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
5034 		return r;
5035 	}
5036 
5037 unwind_ip_phase1:
5038 	/* suspend phase 1 = resume phase 3 */
5039 	rec = amdgpu_device_ip_resume_phase3(adev);
5040 	if (rec) {
5041 		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
5042 		return r;
5043 	}
5044 
5045 unwind_smartshift:
5046 	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
5047 	if (rec) {
5048 		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
5049 		return r;
5050 	}
5051 
5052 	if (notify_clients)
5053 		drm_client_dev_resume(adev_to_drm(adev));
5054 
5055 	amdgpu_ras_resume(adev);
5056 
5057 unwind_sriov:
5058 	if (amdgpu_sriov_vf(adev)) {
5059 		rec = amdgpu_virt_request_full_gpu(adev, true);
5060 		if (rec) {
5061 			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
5062 			return r;
5063 		}
5064 	}
5065 
5066 	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
5067 
5068 	return r;
5069 }
5070 
5071 static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
5072 {
5073 	int r;
5074 	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
5075 
5076 	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
5077 	 * may not work. The access could be blocked by nBIF protection as VF isn't in
5078 	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
5079 	 * so that QEMU reprograms MSIX table.
5080 	 */
5081 	amdgpu_restore_msix(adev);
5082 
5083 	r = adev->gfxhub.funcs->get_xgmi_info(adev);
5084 	if (r)
5085 		return r;
5086 
5087 	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
5088 		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
5089 
5090 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
5091 	adev->vm_manager.vram_base_offset +=
5092 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
5093 
5094 	return 0;
5095 }
5096 
5097 /**
5098  * amdgpu_device_resume - initiate device resume
5099  *
5100  * @dev: drm dev pointer
5101  * @notify_clients: notify in-kernel DRM clients
5102  *
5103  * Bring the hw back to operating state (all asics).
5104  * Returns 0 for success or an error on failure.
5105  * Called at driver resume.
5106  */
5107 int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
5108 {
5109 	struct amdgpu_device *adev = drm_to_adev(dev);
5110 	int r = 0;
5111 
5112 	if (amdgpu_sriov_vf(adev)) {
5113 		r = amdgpu_virt_request_full_gpu(adev, true);
5114 		if (r)
5115 			return r;
5116 	}
5117 
5118 	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
5119 		r = amdgpu_virt_resume(adev);
5120 		if (r)
5121 			goto exit;
5122 	}
5123 
5124 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5125 		return 0;
5126 
5127 	if (adev->in_s0ix)
5128 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
5129 
5130 	/* post card */
5131 	if (amdgpu_device_need_post(adev)) {
5132 		r = amdgpu_device_asic_init(adev);
5133 		if (r)
5134 			dev_err(adev->dev, "amdgpu asic init failed\n");
5135 	}
5136 
5137 	r = amdgpu_device_ip_resume(adev);
5138 
5139 	if (r) {
5140 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
5141 		goto exit;
5142 	}
5143 
5144 	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5145 	if (r)
5146 		goto exit;
5147 
5148 	r = amdgpu_userq_resume(adev);
5149 	if (r)
5150 		goto exit;
5151 
5152 	r = amdgpu_device_ip_late_init(adev);
5153 	if (r)
5154 		goto exit;
5155 
5156 	queue_delayed_work(system_wq, &adev->delayed_init_work,
5157 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
5158 exit:
5159 	if (amdgpu_sriov_vf(adev)) {
5160 		amdgpu_virt_init_data_exchange(adev);
5161 		amdgpu_virt_release_full_gpu(adev, true);
5162 
5163 		if (!r && !adev->in_runpm)
5164 			r = amdgpu_amdkfd_resume_process(adev);
5165 	}
5166 
5167 	if (r)
5168 		return r;
5169 
5170 	/* Make sure IB tests flushed */
5171 	flush_delayed_work(&adev->delayed_init_work);
5172 
5173 	if (notify_clients)
5174 		drm_client_dev_resume(adev_to_drm(adev));
5175 
5176 	amdgpu_ras_resume(adev);
5177 
5178 	if (adev->mode_info.num_crtc) {
5179 		/*
5180 		 * Most of the connector probing functions try to acquire runtime pm
5181 		 * refs to ensure that the GPU is powered on when connector polling is
5182 		 * performed. Since we're calling this from a runtime PM callback,
5183 		 * trying to acquire rpm refs will cause us to deadlock.
5184 		 *
5185 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
5186 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
5187 		 */
5188 #ifdef CONFIG_PM
5189 		dev->dev->power.disable_depth++;
5190 #endif
5191 		if (!adev->dc_enabled)
5192 			drm_helper_hpd_irq_event(dev);
5193 		else
5194 			drm_kms_helper_hotplug_event(dev);
5195 #ifdef CONFIG_PM
5196 		dev->dev->power.disable_depth--;
5197 #endif
5198 	}
5199 
5200 	amdgpu_vram_mgr_clear_reset_blocks(adev);
5201 	adev->in_suspend = false;
5202 
5203 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
5204 		dev_warn(adev->dev, "smart shift update failed\n");
5205 
5206 	return 0;
5207 }
5208 
5209 /**
5210  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
5211  *
5212  * @adev: amdgpu_device pointer
5213  *
5214  * The list of all the hardware IPs that make up the asic is walked and
5215  * the check_soft_reset callbacks are run.  check_soft_reset determines
5216  * if the asic is still hung or not.
5217  * Returns true if any of the IPs are still in a hung state, false if not.
5218  */
5219 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
5220 {
5221 	int i;
5222 	bool asic_hang = false;
5223 
5224 	if (amdgpu_sriov_vf(adev))
5225 		return true;
5226 
5227 	if (amdgpu_asic_need_full_reset(adev))
5228 		return true;
5229 
5230 	for (i = 0; i < adev->num_ip_blocks; i++) {
5231 		if (!adev->ip_blocks[i].status.valid)
5232 			continue;
5233 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
5234 			adev->ip_blocks[i].status.hang =
5235 				adev->ip_blocks[i].version->funcs->check_soft_reset(
5236 					&adev->ip_blocks[i]);
5237 		if (adev->ip_blocks[i].status.hang) {
5238 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
5239 			asic_hang = true;
5240 		}
5241 	}
5242 	return asic_hang;
5243 }
5244 
5245 /**
5246  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
5247  *
5248  * @adev: amdgpu_device pointer
5249  *
5250  * The list of all the hardware IPs that make up the asic is walked and the
5251  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
5252  * handles any IP specific hardware or software state changes that are
5253  * necessary for a soft reset to succeed.
5254  * Returns 0 on success, negative error code on failure.
5255  */
5256 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
5257 {
5258 	int i, r = 0;
5259 
5260 	for (i = 0; i < adev->num_ip_blocks; i++) {
5261 		if (!adev->ip_blocks[i].status.valid)
5262 			continue;
5263 		if (adev->ip_blocks[i].status.hang &&
5264 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
5265 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
5266 			if (r)
5267 				return r;
5268 		}
5269 	}
5270 
5271 	return 0;
5272 }
5273 
5274 /**
5275  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
5276  *
5277  * @adev: amdgpu_device pointer
5278  *
5279  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
5280  * reset is necessary to recover.
5281  * Returns true if a full asic reset is required, false if not.
5282  */
5283 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
5284 {
5285 	int i;
5286 
5287 	if (amdgpu_asic_need_full_reset(adev))
5288 		return true;
5289 
5290 	for (i = 0; i < adev->num_ip_blocks; i++) {
5291 		if (!adev->ip_blocks[i].status.valid)
5292 			continue;
5293 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
5294 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
5295 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
5296 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
5297 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
5298 			if (adev->ip_blocks[i].status.hang) {
5299 				dev_info(adev->dev, "Some block need full reset!\n");
5300 				return true;
5301 			}
5302 		}
5303 	}
5304 	return false;
5305 }
5306 
5307 /**
5308  * amdgpu_device_ip_soft_reset - do a soft reset
5309  *
5310  * @adev: amdgpu_device pointer
5311  *
5312  * The list of all the hardware IPs that make up the asic is walked and the
5313  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
5314  * IP specific hardware or software state changes that are necessary to soft
5315  * reset the IP.
5316  * Returns 0 on success, negative error code on failure.
5317  */
5318 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
5319 {
5320 	int i, r = 0;
5321 
5322 	for (i = 0; i < adev->num_ip_blocks; i++) {
5323 		if (!adev->ip_blocks[i].status.valid)
5324 			continue;
5325 		if (adev->ip_blocks[i].status.hang &&
5326 		    adev->ip_blocks[i].version->funcs->soft_reset) {
5327 			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
5328 			if (r)
5329 				return r;
5330 		}
5331 	}
5332 
5333 	return 0;
5334 }
5335 
5336 /**
5337  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
5338  *
5339  * @adev: amdgpu_device pointer
5340  *
5341  * The list of all the hardware IPs that make up the asic is walked and the
5342  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
5343  * handles any IP specific hardware or software state changes that are
5344  * necessary after the IP has been soft reset.
5345  * Returns 0 on success, negative error code on failure.
5346  */
5347 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5348 {
5349 	int i, r = 0;
5350 
5351 	for (i = 0; i < adev->num_ip_blocks; i++) {
5352 		if (!adev->ip_blocks[i].status.valid)
5353 			continue;
5354 		if (adev->ip_blocks[i].status.hang &&
5355 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
5356 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
5357 		if (r)
5358 			return r;
5359 	}
5360 
5361 	return 0;
5362 }
5363 
5364 /**
5365  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5366  *
5367  * @adev: amdgpu_device pointer
5368  * @reset_context: amdgpu reset context pointer
5369  *
5370  * do VF FLR and reinitialize Asic
5371  * return 0 means succeeded otherwise failed
5372  */
5373 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5374 				     struct amdgpu_reset_context *reset_context)
5375 {
5376 	int r;
5377 	struct amdgpu_hive_info *hive = NULL;
5378 
5379 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5380 		if (!amdgpu_ras_get_fed_status(adev))
5381 			amdgpu_virt_ready_to_reset(adev);
5382 		amdgpu_virt_wait_reset(adev);
5383 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5384 		r = amdgpu_virt_request_full_gpu(adev, true);
5385 	} else {
5386 		r = amdgpu_virt_reset_gpu(adev);
5387 	}
5388 	if (r)
5389 		return r;
5390 
5391 	amdgpu_ras_clear_err_state(adev);
5392 	amdgpu_irq_gpu_reset_resume_helper(adev);
5393 
5394 	/* some sw clean up VF needs to do before recover */
5395 	amdgpu_virt_post_reset(adev);
5396 
5397 	/* Resume IP prior to SMC */
5398 	r = amdgpu_device_ip_reinit_early_sriov(adev);
5399 	if (r)
5400 		return r;
5401 
5402 	amdgpu_virt_init_data_exchange(adev);
5403 
5404 	r = amdgpu_device_fw_loading(adev);
5405 	if (r)
5406 		return r;
5407 
5408 	/* now we are okay to resume SMC/CP/SDMA */
5409 	r = amdgpu_device_ip_reinit_late_sriov(adev);
5410 	if (r)
5411 		return r;
5412 
5413 	hive = amdgpu_get_xgmi_hive(adev);
5414 	/* Update PSP FW topology after reset */
5415 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5416 		r = amdgpu_xgmi_update_topology(hive, adev);
5417 	if (hive)
5418 		amdgpu_put_xgmi_hive(hive);
5419 	if (r)
5420 		return r;
5421 
5422 	r = amdgpu_ib_ring_tests(adev);
5423 	if (r)
5424 		return r;
5425 
5426 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5427 		amdgpu_inc_vram_lost(adev);
5428 
5429 	/* need to be called during full access so we can't do it later like
5430 	 * bare-metal does.
5431 	 */
5432 	amdgpu_amdkfd_post_reset(adev);
5433 	amdgpu_virt_release_full_gpu(adev, true);
5434 
5435 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5436 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5437 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5438 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5439 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
5440 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5441 		amdgpu_ras_resume(adev);
5442 
5443 	amdgpu_virt_ras_telemetry_post_reset(adev);
5444 
5445 	return 0;
5446 }
5447 
5448 /**
5449  * amdgpu_device_has_job_running - check if there is any unfinished job
5450  *
5451  * @adev: amdgpu_device pointer
5452  *
5453  * check if there is any job running on the device when guest driver receives
5454  * FLR notification from host driver. If there are still jobs running, then
5455  * the guest driver will not respond the FLR reset. Instead, let the job hit
5456  * the timeout and guest driver then issue the reset request.
5457  */
5458 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5459 {
5460 	int i;
5461 
5462 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5463 		struct amdgpu_ring *ring = adev->rings[i];
5464 
5465 		if (!amdgpu_ring_sched_ready(ring))
5466 			continue;
5467 
5468 		if (amdgpu_fence_count_emitted(ring))
5469 			return true;
5470 	}
5471 	return false;
5472 }
5473 
5474 /**
5475  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5476  *
5477  * @adev: amdgpu_device pointer
5478  *
5479  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5480  * a hung GPU.
5481  */
5482 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5483 {
5484 
5485 	if (amdgpu_gpu_recovery == 0)
5486 		goto disabled;
5487 
5488 	/* Skip soft reset check in fatal error mode */
5489 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5490 		return true;
5491 
5492 	if (amdgpu_sriov_vf(adev))
5493 		return true;
5494 
5495 	if (amdgpu_gpu_recovery == -1) {
5496 		switch (adev->asic_type) {
5497 #ifdef CONFIG_DRM_AMDGPU_SI
5498 		case CHIP_VERDE:
5499 		case CHIP_TAHITI:
5500 		case CHIP_PITCAIRN:
5501 		case CHIP_OLAND:
5502 		case CHIP_HAINAN:
5503 #endif
5504 #ifdef CONFIG_DRM_AMDGPU_CIK
5505 		case CHIP_KAVERI:
5506 		case CHIP_KABINI:
5507 		case CHIP_MULLINS:
5508 #endif
5509 		case CHIP_CARRIZO:
5510 		case CHIP_STONEY:
5511 		case CHIP_CYAN_SKILLFISH:
5512 			goto disabled;
5513 		default:
5514 			break;
5515 		}
5516 	}
5517 
5518 	return true;
5519 
5520 disabled:
5521 		dev_info(adev->dev, "GPU recovery disabled.\n");
5522 		return false;
5523 }
5524 
5525 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5526 {
5527 	u32 i;
5528 	int ret = 0;
5529 
5530 	if (adev->bios)
5531 		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5532 
5533 	dev_info(adev->dev, "GPU mode1 reset\n");
5534 
5535 	/* Cache the state before bus master disable. The saved config space
5536 	 * values are used in other cases like restore after mode-2 reset.
5537 	 */
5538 	amdgpu_device_cache_pci_state(adev->pdev);
5539 
5540 	/* disable BM */
5541 	pci_clear_master(adev->pdev);
5542 
5543 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5544 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5545 		ret = amdgpu_dpm_mode1_reset(adev);
5546 	} else {
5547 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5548 		ret = psp_gpu_reset(adev);
5549 	}
5550 
5551 	if (ret)
5552 		goto mode1_reset_failed;
5553 
5554 	amdgpu_device_load_pci_state(adev->pdev);
5555 	ret = amdgpu_psp_wait_for_bootloader(adev);
5556 	if (ret)
5557 		goto mode1_reset_failed;
5558 
5559 	/* wait for asic to come out of reset */
5560 	for (i = 0; i < adev->usec_timeout; i++) {
5561 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5562 
5563 		if (memsize != 0xffffffff)
5564 			break;
5565 		udelay(1);
5566 	}
5567 
5568 	if (i >= adev->usec_timeout) {
5569 		ret = -ETIMEDOUT;
5570 		goto mode1_reset_failed;
5571 	}
5572 
5573 	if (adev->bios)
5574 		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5575 
5576 	return 0;
5577 
5578 mode1_reset_failed:
5579 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5580 	return ret;
5581 }
5582 
5583 int amdgpu_device_link_reset(struct amdgpu_device *adev)
5584 {
5585 	int ret = 0;
5586 
5587 	dev_info(adev->dev, "GPU link reset\n");
5588 
5589 	if (!amdgpu_reset_in_dpc(adev))
5590 		ret = amdgpu_dpm_link_reset(adev);
5591 
5592 	if (ret)
5593 		goto link_reset_failed;
5594 
5595 	ret = amdgpu_psp_wait_for_bootloader(adev);
5596 	if (ret)
5597 		goto link_reset_failed;
5598 
5599 	return 0;
5600 
5601 link_reset_failed:
5602 	dev_err(adev->dev, "GPU link reset failed\n");
5603 	return ret;
5604 }
5605 
5606 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5607 				 struct amdgpu_reset_context *reset_context)
5608 {
5609 	int i, r = 0;
5610 	struct amdgpu_job *job = NULL;
5611 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5612 	bool need_full_reset =
5613 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5614 
5615 	if (reset_context->reset_req_dev == adev)
5616 		job = reset_context->job;
5617 
5618 	if (amdgpu_sriov_vf(adev))
5619 		amdgpu_virt_pre_reset(adev);
5620 
5621 	amdgpu_fence_driver_isr_toggle(adev, true);
5622 
5623 	/* block all schedulers and reset given job's ring */
5624 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5625 		struct amdgpu_ring *ring = adev->rings[i];
5626 
5627 		if (!amdgpu_ring_sched_ready(ring))
5628 			continue;
5629 
5630 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5631 		amdgpu_fence_driver_force_completion(ring);
5632 	}
5633 
5634 	amdgpu_fence_driver_isr_toggle(adev, false);
5635 
5636 	if (job && job->vm)
5637 		drm_sched_increase_karma(&job->base);
5638 
5639 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5640 	/* If reset handler not implemented, continue; otherwise return */
5641 	if (r == -EOPNOTSUPP)
5642 		r = 0;
5643 	else
5644 		return r;
5645 
5646 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5647 	if (!amdgpu_sriov_vf(adev)) {
5648 
5649 		if (!need_full_reset)
5650 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5651 
5652 		if (!need_full_reset && amdgpu_gpu_recovery &&
5653 		    amdgpu_device_ip_check_soft_reset(adev)) {
5654 			amdgpu_device_ip_pre_soft_reset(adev);
5655 			r = amdgpu_device_ip_soft_reset(adev);
5656 			amdgpu_device_ip_post_soft_reset(adev);
5657 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5658 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5659 				need_full_reset = true;
5660 			}
5661 		}
5662 
5663 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5664 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5665 			/* Trigger ip dump before we reset the asic */
5666 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5667 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5668 					tmp_adev->ip_blocks[i].version->funcs
5669 						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5670 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5671 		}
5672 
5673 		if (need_full_reset)
5674 			r = amdgpu_device_ip_suspend(adev);
5675 		if (need_full_reset)
5676 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5677 		else
5678 			clear_bit(AMDGPU_NEED_FULL_RESET,
5679 				  &reset_context->flags);
5680 	}
5681 
5682 	return r;
5683 }
5684 
5685 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5686 {
5687 	struct list_head *device_list_handle;
5688 	bool full_reset, vram_lost = false;
5689 	struct amdgpu_device *tmp_adev;
5690 	int r, init_level;
5691 
5692 	device_list_handle = reset_context->reset_device_list;
5693 
5694 	if (!device_list_handle)
5695 		return -EINVAL;
5696 
5697 	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5698 
5699 	/**
5700 	 * If it's reset on init, it's default init level, otherwise keep level
5701 	 * as recovery level.
5702 	 */
5703 	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5704 			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5705 	else
5706 			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5707 
5708 	r = 0;
5709 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5710 		amdgpu_set_init_level(tmp_adev, init_level);
5711 		if (full_reset) {
5712 			/* post card */
5713 			amdgpu_reset_set_dpc_status(tmp_adev, false);
5714 			amdgpu_ras_clear_err_state(tmp_adev);
5715 			r = amdgpu_device_asic_init(tmp_adev);
5716 			if (r) {
5717 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5718 			} else {
5719 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5720 
5721 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5722 				if (r)
5723 					goto out;
5724 
5725 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5726 
5727 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5728 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5729 
5730 				if (vram_lost) {
5731 					dev_info(
5732 						tmp_adev->dev,
5733 						"VRAM is lost due to GPU reset!\n");
5734 					amdgpu_inc_vram_lost(tmp_adev);
5735 				}
5736 
5737 				r = amdgpu_device_fw_loading(tmp_adev);
5738 				if (r)
5739 					return r;
5740 
5741 				r = amdgpu_xcp_restore_partition_mode(
5742 					tmp_adev->xcp_mgr);
5743 				if (r)
5744 					goto out;
5745 
5746 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5747 				if (r)
5748 					goto out;
5749 
5750 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5751 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5752 
5753 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5754 				if (r)
5755 					goto out;
5756 
5757 				if (vram_lost)
5758 					amdgpu_device_fill_reset_magic(tmp_adev);
5759 
5760 				/*
5761 				 * Add this ASIC as tracked as reset was already
5762 				 * complete successfully.
5763 				 */
5764 				amdgpu_register_gpu_instance(tmp_adev);
5765 
5766 				if (!reset_context->hive &&
5767 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5768 					amdgpu_xgmi_add_device(tmp_adev);
5769 
5770 				r = amdgpu_device_ip_late_init(tmp_adev);
5771 				if (r)
5772 					goto out;
5773 
5774 				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
5775 				if (r)
5776 					goto out;
5777 
5778 				drm_client_dev_resume(adev_to_drm(tmp_adev));
5779 
5780 				/*
5781 				 * The GPU enters bad state once faulty pages
5782 				 * by ECC has reached the threshold, and ras
5783 				 * recovery is scheduled next. So add one check
5784 				 * here to break recovery if it indeed exceeds
5785 				 * bad page threshold, and remind user to
5786 				 * retire this GPU or setting one bigger
5787 				 * bad_page_threshold value to fix this once
5788 				 * probing driver again.
5789 				 */
5790 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5791 					/* must succeed. */
5792 					amdgpu_ras_resume(tmp_adev);
5793 				} else {
5794 					r = -EINVAL;
5795 					goto out;
5796 				}
5797 
5798 				/* Update PSP FW topology after reset */
5799 				if (reset_context->hive &&
5800 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5801 					r = amdgpu_xgmi_update_topology(
5802 						reset_context->hive, tmp_adev);
5803 			}
5804 		}
5805 
5806 out:
5807 		if (!r) {
5808 			/* IP init is complete now, set level as default */
5809 			amdgpu_set_init_level(tmp_adev,
5810 					      AMDGPU_INIT_LEVEL_DEFAULT);
5811 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5812 			r = amdgpu_ib_ring_tests(tmp_adev);
5813 			if (r) {
5814 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5815 				r = -EAGAIN;
5816 				goto end;
5817 			}
5818 		}
5819 
5820 		if (r)
5821 			tmp_adev->asic_reset_res = r;
5822 	}
5823 
5824 end:
5825 	return r;
5826 }
5827 
5828 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5829 			 struct amdgpu_reset_context *reset_context)
5830 {
5831 	struct amdgpu_device *tmp_adev = NULL;
5832 	bool need_full_reset, skip_hw_reset;
5833 	int r = 0;
5834 
5835 	/* Try reset handler method first */
5836 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5837 				    reset_list);
5838 
5839 	reset_context->reset_device_list = device_list_handle;
5840 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5841 	/* If reset handler not implemented, continue; otherwise return */
5842 	if (r == -EOPNOTSUPP)
5843 		r = 0;
5844 	else
5845 		return r;
5846 
5847 	/* Reset handler not implemented, use the default method */
5848 	need_full_reset =
5849 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5850 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5851 
5852 	/*
5853 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5854 	 * to allow proper links negotiation in FW (within 1 sec)
5855 	 */
5856 	if (!skip_hw_reset && need_full_reset) {
5857 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5858 			/* For XGMI run all resets in parallel to speed up the process */
5859 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5860 				if (!queue_work(system_unbound_wq,
5861 						&tmp_adev->xgmi_reset_work))
5862 					r = -EALREADY;
5863 			} else
5864 				r = amdgpu_asic_reset(tmp_adev);
5865 
5866 			if (r) {
5867 				dev_err(tmp_adev->dev,
5868 					"ASIC reset failed with error, %d for drm dev, %s",
5869 					r, adev_to_drm(tmp_adev)->unique);
5870 				goto out;
5871 			}
5872 		}
5873 
5874 		/* For XGMI wait for all resets to complete before proceed */
5875 		if (!r) {
5876 			list_for_each_entry(tmp_adev, device_list_handle,
5877 					    reset_list) {
5878 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5879 					flush_work(&tmp_adev->xgmi_reset_work);
5880 					r = tmp_adev->asic_reset_res;
5881 					if (r)
5882 						break;
5883 				}
5884 			}
5885 		}
5886 	}
5887 
5888 	if (!r && amdgpu_ras_intr_triggered()) {
5889 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5890 			amdgpu_ras_reset_error_count(tmp_adev,
5891 						     AMDGPU_RAS_BLOCK__MMHUB);
5892 		}
5893 
5894 		amdgpu_ras_intr_cleared();
5895 	}
5896 
5897 	r = amdgpu_device_reinit_after_reset(reset_context);
5898 	if (r == -EAGAIN)
5899 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5900 	else
5901 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5902 
5903 out:
5904 	return r;
5905 }
5906 
5907 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5908 {
5909 
5910 	switch (amdgpu_asic_reset_method(adev)) {
5911 	case AMD_RESET_METHOD_MODE1:
5912 	case AMD_RESET_METHOD_LINK:
5913 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5914 		break;
5915 	case AMD_RESET_METHOD_MODE2:
5916 		adev->mp1_state = PP_MP1_STATE_RESET;
5917 		break;
5918 	default:
5919 		adev->mp1_state = PP_MP1_STATE_NONE;
5920 		break;
5921 	}
5922 }
5923 
5924 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5925 {
5926 	amdgpu_vf_error_trans_all(adev);
5927 	adev->mp1_state = PP_MP1_STATE_NONE;
5928 }
5929 
5930 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5931 {
5932 	struct pci_dev *p = NULL;
5933 
5934 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5935 			adev->pdev->bus->number, 1);
5936 	if (p) {
5937 		pm_runtime_enable(&(p->dev));
5938 		pm_runtime_resume(&(p->dev));
5939 	}
5940 
5941 	pci_dev_put(p);
5942 }
5943 
5944 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5945 {
5946 	enum amd_reset_method reset_method;
5947 	struct pci_dev *p = NULL;
5948 	u64 expires;
5949 
5950 	/*
5951 	 * For now, only BACO and mode1 reset are confirmed
5952 	 * to suffer the audio issue without proper suspended.
5953 	 */
5954 	reset_method = amdgpu_asic_reset_method(adev);
5955 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5956 	     (reset_method != AMD_RESET_METHOD_MODE1))
5957 		return -EINVAL;
5958 
5959 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5960 			adev->pdev->bus->number, 1);
5961 	if (!p)
5962 		return -ENODEV;
5963 
5964 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5965 	if (!expires)
5966 		/*
5967 		 * If we cannot get the audio device autosuspend delay,
5968 		 * a fixed 4S interval will be used. Considering 3S is
5969 		 * the audio controller default autosuspend delay setting.
5970 		 * 4S used here is guaranteed to cover that.
5971 		 */
5972 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5973 
5974 	while (!pm_runtime_status_suspended(&(p->dev))) {
5975 		if (!pm_runtime_suspend(&(p->dev)))
5976 			break;
5977 
5978 		if (expires < ktime_get_mono_fast_ns()) {
5979 			dev_warn(adev->dev, "failed to suspend display audio\n");
5980 			pci_dev_put(p);
5981 			/* TODO: abort the succeeding gpu reset? */
5982 			return -ETIMEDOUT;
5983 		}
5984 	}
5985 
5986 	pm_runtime_disable(&(p->dev));
5987 
5988 	pci_dev_put(p);
5989 	return 0;
5990 }
5991 
5992 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5993 {
5994 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5995 
5996 #if defined(CONFIG_DEBUG_FS)
5997 	if (!amdgpu_sriov_vf(adev))
5998 		cancel_work(&adev->reset_work);
5999 #endif
6000 	cancel_work(&adev->userq_reset_work);
6001 
6002 	if (adev->kfd.dev)
6003 		cancel_work(&adev->kfd.reset_work);
6004 
6005 	if (amdgpu_sriov_vf(adev))
6006 		cancel_work(&adev->virt.flr_work);
6007 
6008 	if (con && adev->ras_enabled)
6009 		cancel_work(&con->recovery_work);
6010 
6011 }
6012 
6013 static int amdgpu_device_health_check(struct list_head *device_list_handle)
6014 {
6015 	struct amdgpu_device *tmp_adev;
6016 	int ret = 0;
6017 
6018 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6019 		ret |= amdgpu_device_bus_status_check(tmp_adev);
6020 	}
6021 
6022 	return ret;
6023 }
6024 
6025 static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
6026 					  struct list_head *device_list,
6027 					  struct amdgpu_hive_info *hive)
6028 {
6029 	struct amdgpu_device *tmp_adev = NULL;
6030 
6031 	/*
6032 	 * Build list of devices to reset.
6033 	 * In case we are in XGMI hive mode, resort the device list
6034 	 * to put adev in the 1st position.
6035 	 */
6036 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
6037 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6038 			list_add_tail(&tmp_adev->reset_list, device_list);
6039 			if (adev->shutdown)
6040 				tmp_adev->shutdown = true;
6041 			if (amdgpu_reset_in_dpc(adev))
6042 				tmp_adev->pcie_reset_ctx.in_link_reset = true;
6043 		}
6044 		if (!list_is_first(&adev->reset_list, device_list))
6045 			list_rotate_to_front(&adev->reset_list, device_list);
6046 	} else {
6047 		list_add_tail(&adev->reset_list, device_list);
6048 	}
6049 }
6050 
6051 static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
6052 						  struct list_head *device_list)
6053 {
6054 	struct amdgpu_device *tmp_adev = NULL;
6055 
6056 	if (list_empty(device_list))
6057 		return;
6058 	tmp_adev =
6059 		list_first_entry(device_list, struct amdgpu_device, reset_list);
6060 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
6061 }
6062 
6063 static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
6064 						  struct list_head *device_list)
6065 {
6066 	struct amdgpu_device *tmp_adev = NULL;
6067 
6068 	if (list_empty(device_list))
6069 		return;
6070 	tmp_adev =
6071 		list_first_entry(device_list, struct amdgpu_device, reset_list);
6072 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
6073 }
6074 
6075 static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
6076 					  struct amdgpu_job *job,
6077 					  struct amdgpu_reset_context *reset_context,
6078 					  struct list_head *device_list,
6079 					  struct amdgpu_hive_info *hive,
6080 					  bool need_emergency_restart)
6081 {
6082 	struct amdgpu_device *tmp_adev = NULL;
6083 	int i;
6084 
6085 	/* block all schedulers and reset given job's ring */
6086 	list_for_each_entry(tmp_adev, device_list, reset_list) {
6087 		amdgpu_device_set_mp1_state(tmp_adev);
6088 
6089 		/*
6090 		 * Try to put the audio codec into suspend state
6091 		 * before gpu reset started.
6092 		 *
6093 		 * Due to the power domain of the graphics device
6094 		 * is shared with AZ power domain. Without this,
6095 		 * we may change the audio hardware from behind
6096 		 * the audio driver's back. That will trigger
6097 		 * some audio codec errors.
6098 		 */
6099 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
6100 			tmp_adev->pcie_reset_ctx.audio_suspended = true;
6101 
6102 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
6103 
6104 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
6105 
6106 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
6107 
6108 		/*
6109 		 * Mark these ASICs to be reset as untracked first
6110 		 * And add them back after reset completed
6111 		 */
6112 		amdgpu_unregister_gpu_instance(tmp_adev);
6113 
6114 		drm_client_dev_suspend(adev_to_drm(tmp_adev));
6115 
6116 		/* disable ras on ALL IPs */
6117 		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
6118 		    amdgpu_device_ip_need_full_reset(tmp_adev))
6119 			amdgpu_ras_suspend(tmp_adev);
6120 
6121 		amdgpu_userq_pre_reset(tmp_adev);
6122 
6123 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6124 			struct amdgpu_ring *ring = tmp_adev->rings[i];
6125 
6126 			if (!amdgpu_ring_sched_ready(ring))
6127 				continue;
6128 
6129 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
6130 
6131 			if (need_emergency_restart)
6132 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
6133 		}
6134 		atomic_inc(&tmp_adev->gpu_reset_counter);
6135 	}
6136 }
6137 
6138 static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
6139 			      struct list_head *device_list,
6140 			      struct amdgpu_reset_context *reset_context)
6141 {
6142 	struct amdgpu_device *tmp_adev = NULL;
6143 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
6144 	int r = 0;
6145 
6146 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
6147 	list_for_each_entry(tmp_adev, device_list, reset_list) {
6148 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
6149 		/*TODO Should we stop ?*/
6150 		if (r) {
6151 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
6152 				  r, adev_to_drm(tmp_adev)->unique);
6153 			tmp_adev->asic_reset_res = r;
6154 		}
6155 	}
6156 
6157 	/* Actual ASIC resets if needed.*/
6158 	/* Host driver will handle XGMI hive reset for SRIOV */
6159 	if (amdgpu_sriov_vf(adev)) {
6160 
6161 		/* Bail out of reset early */
6162 		if (amdgpu_ras_is_rma(adev))
6163 			return -ENODEV;
6164 
6165 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
6166 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
6167 			amdgpu_ras_set_fed(adev, true);
6168 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
6169 		}
6170 
6171 		r = amdgpu_device_reset_sriov(adev, reset_context);
6172 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
6173 			amdgpu_virt_release_full_gpu(adev, true);
6174 			goto retry;
6175 		}
6176 		if (r)
6177 			adev->asic_reset_res = r;
6178 	} else {
6179 		r = amdgpu_do_asic_reset(device_list, reset_context);
6180 		if (r && r == -EAGAIN)
6181 			goto retry;
6182 	}
6183 
6184 	list_for_each_entry(tmp_adev, device_list, reset_list) {
6185 		/*
6186 		 * Drop any pending non scheduler resets queued before reset is done.
6187 		 * Any reset scheduled after this point would be valid. Scheduler resets
6188 		 * were already dropped during drm_sched_stop and no new ones can come
6189 		 * in before drm_sched_start.
6190 		 */
6191 		amdgpu_device_stop_pending_resets(tmp_adev);
6192 	}
6193 
6194 	return r;
6195 }
6196 
6197 static int amdgpu_device_sched_resume(struct list_head *device_list,
6198 			      struct amdgpu_reset_context *reset_context,
6199 			      bool   job_signaled)
6200 {
6201 	struct amdgpu_device *tmp_adev = NULL;
6202 	int i, r = 0;
6203 
6204 	/* Post ASIC reset for all devs .*/
6205 	list_for_each_entry(tmp_adev, device_list, reset_list) {
6206 
6207 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6208 			struct amdgpu_ring *ring = tmp_adev->rings[i];
6209 
6210 			if (!amdgpu_ring_sched_ready(ring))
6211 				continue;
6212 
6213 			drm_sched_start(&ring->sched, 0);
6214 		}
6215 
6216 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
6217 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
6218 
6219 		if (tmp_adev->asic_reset_res) {
6220 			/* bad news, how to tell it to userspace ?
6221 			 * for ras error, we should report GPU bad status instead of
6222 			 * reset failure
6223 			 */
6224 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
6225 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
6226 				dev_info(
6227 					tmp_adev->dev,
6228 					"GPU reset(%d) failed with error %d \n",
6229 					atomic_read(
6230 						&tmp_adev->gpu_reset_counter),
6231 					tmp_adev->asic_reset_res);
6232 			amdgpu_vf_error_put(tmp_adev,
6233 					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
6234 					    tmp_adev->asic_reset_res);
6235 			if (!r)
6236 				r = tmp_adev->asic_reset_res;
6237 			tmp_adev->asic_reset_res = 0;
6238 		} else {
6239 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
6240 				 atomic_read(&tmp_adev->gpu_reset_counter));
6241 			if (amdgpu_acpi_smart_shift_update(tmp_adev,
6242 							   AMDGPU_SS_DEV_D0))
6243 				dev_warn(tmp_adev->dev,
6244 					 "smart shift update failed\n");
6245 		}
6246 	}
6247 
6248 	return r;
6249 }
6250 
6251 static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
6252 			      struct list_head *device_list,
6253 			      bool   need_emergency_restart)
6254 {
6255 	struct amdgpu_device *tmp_adev = NULL;
6256 
6257 	list_for_each_entry(tmp_adev, device_list, reset_list) {
6258 		/* unlock kfd: SRIOV would do it separately */
6259 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
6260 			amdgpu_amdkfd_post_reset(tmp_adev);
6261 
6262 		/* kfd_post_reset will do nothing if kfd device is not initialized,
6263 		 * need to bring up kfd here if it's not be initialized before
6264 		 */
6265 		if (!adev->kfd.init_complete)
6266 			amdgpu_amdkfd_device_init(adev);
6267 
6268 		if (tmp_adev->pcie_reset_ctx.audio_suspended)
6269 			amdgpu_device_resume_display_audio(tmp_adev);
6270 
6271 		amdgpu_device_unset_mp1_state(tmp_adev);
6272 
6273 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
6274 
6275 	}
6276 }
6277 
6278 
6279 /**
6280  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
6281  *
6282  * @adev: amdgpu_device pointer
6283  * @job: which job trigger hang
6284  * @reset_context: amdgpu reset context pointer
6285  *
6286  * Attempt to reset the GPU if it has hung (all asics).
6287  * Attempt to do soft-reset or full-reset and reinitialize Asic
6288  * Returns 0 for success or an error on failure.
6289  */
6290 
6291 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
6292 			      struct amdgpu_job *job,
6293 			      struct amdgpu_reset_context *reset_context)
6294 {
6295 	struct list_head device_list;
6296 	bool job_signaled = false;
6297 	struct amdgpu_hive_info *hive = NULL;
6298 	int r = 0;
6299 	bool need_emergency_restart = false;
6300 	/* save the pasid here as the job may be freed before the end of the reset */
6301 	int pasid = job ? job->pasid : -EINVAL;
6302 
6303 	/*
6304 	 * If it reaches here because of hang/timeout and a RAS error is
6305 	 * detected at the same time, let RAS recovery take care of it.
6306 	 */
6307 	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
6308 	    !amdgpu_sriov_vf(adev) &&
6309 	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
6310 		dev_dbg(adev->dev,
6311 			"Gpu recovery from source: %d yielding to RAS error recovery handling",
6312 			reset_context->src);
6313 		return 0;
6314 	}
6315 
6316 	/*
6317 	 * Special case: RAS triggered and full reset isn't supported
6318 	 */
6319 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
6320 
6321 	/*
6322 	 * Flush RAM to disk so that after reboot
6323 	 * the user can read log and see why the system rebooted.
6324 	 */
6325 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
6326 		amdgpu_ras_get_context(adev)->reboot) {
6327 		dev_warn(adev->dev, "Emergency reboot.");
6328 
6329 		ksys_sync_helper();
6330 		emergency_restart();
6331 	}
6332 
6333 	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
6334 		 need_emergency_restart ? "jobs stop" : "reset",
6335 		 reset_context->src);
6336 
6337 	if (!amdgpu_sriov_vf(adev))
6338 		hive = amdgpu_get_xgmi_hive(adev);
6339 	if (hive)
6340 		mutex_lock(&hive->hive_lock);
6341 
6342 	reset_context->job = job;
6343 	reset_context->hive = hive;
6344 	INIT_LIST_HEAD(&device_list);
6345 
6346 	amdgpu_device_recovery_prepare(adev, &device_list, hive);
6347 
6348 	if (!amdgpu_sriov_vf(adev)) {
6349 		r = amdgpu_device_health_check(&device_list);
6350 		if (r)
6351 			goto end_reset;
6352 	}
6353 
6354 	/* Cannot be called after locking reset domain */
6355 	amdgpu_ras_pre_reset(adev, &device_list);
6356 
6357 	/* We need to lock reset domain only once both for XGMI and single device */
6358 	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6359 
6360 	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
6361 				      hive, need_emergency_restart);
6362 	if (need_emergency_restart)
6363 		goto skip_sched_resume;
6364 	/*
6365 	 * Must check guilty signal here since after this point all old
6366 	 * HW fences are force signaled.
6367 	 *
6368 	 * job->base holds a reference to parent fence
6369 	 */
6370 	if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
6371 		job_signaled = true;
6372 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
6373 		goto skip_hw_reset;
6374 	}
6375 
6376 	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
6377 	if (r)
6378 		goto reset_unlock;
6379 skip_hw_reset:
6380 	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
6381 	if (r)
6382 		goto reset_unlock;
6383 skip_sched_resume:
6384 	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
6385 reset_unlock:
6386 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6387 	amdgpu_ras_post_reset(adev, &device_list);
6388 end_reset:
6389 	if (hive) {
6390 		mutex_unlock(&hive->hive_lock);
6391 		amdgpu_put_xgmi_hive(hive);
6392 	}
6393 
6394 	if (r)
6395 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
6396 
6397 	atomic_set(&adev->reset_domain->reset_res, r);
6398 
6399 	if (!r) {
6400 		struct amdgpu_task_info *ti = NULL;
6401 
6402 		/*
6403 		 * The job may already be freed at this point via the sched tdr workqueue so
6404 		 * use the cached pasid.
6405 		 */
6406 		if (pasid >= 0)
6407 			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
6408 
6409 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
6410 				     ti ? &ti->task : NULL);
6411 
6412 		amdgpu_vm_put_task_info(ti);
6413 	}
6414 
6415 	return r;
6416 }
6417 
6418 /**
6419  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
6420  *
6421  * @adev: amdgpu_device pointer
6422  * @speed: pointer to the speed of the link
6423  * @width: pointer to the width of the link
6424  *
6425  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6426  * first physical partner to an AMD dGPU.
6427  * This will exclude any virtual switches and links.
6428  */
6429 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
6430 					    enum pci_bus_speed *speed,
6431 					    enum pcie_link_width *width)
6432 {
6433 	struct pci_dev *parent = adev->pdev;
6434 
6435 	if (!speed || !width)
6436 		return;
6437 
6438 	*speed = PCI_SPEED_UNKNOWN;
6439 	*width = PCIE_LNK_WIDTH_UNKNOWN;
6440 
6441 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
6442 		while ((parent = pci_upstream_bridge(parent))) {
6443 			/* skip upstream/downstream switches internal to dGPU*/
6444 			if (parent->vendor == PCI_VENDOR_ID_ATI)
6445 				continue;
6446 			*speed = pcie_get_speed_cap(parent);
6447 			*width = pcie_get_width_cap(parent);
6448 			break;
6449 		}
6450 	} else {
6451 		/* use the current speeds rather than max if switching is not supported */
6452 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
6453 	}
6454 }
6455 
6456 /**
6457  * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
6458  *
6459  * @adev: amdgpu_device pointer
6460  * @speed: pointer to the speed of the link
6461  * @width: pointer to the width of the link
6462  *
6463  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6464  * AMD dGPU which may be a virtual upstream bridge.
6465  */
6466 static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
6467 					enum pci_bus_speed *speed,
6468 					enum pcie_link_width *width)
6469 {
6470 	struct pci_dev *parent = adev->pdev;
6471 
6472 	if (!speed || !width)
6473 		return;
6474 
6475 	parent = pci_upstream_bridge(parent);
6476 	if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
6477 		/* use the upstream/downstream switches internal to dGPU */
6478 		*speed = pcie_get_speed_cap(parent);
6479 		*width = pcie_get_width_cap(parent);
6480 		while ((parent = pci_upstream_bridge(parent))) {
6481 			if (parent->vendor == PCI_VENDOR_ID_ATI) {
6482 				/* use the upstream/downstream switches internal to dGPU */
6483 				*speed = pcie_get_speed_cap(parent);
6484 				*width = pcie_get_width_cap(parent);
6485 			}
6486 		}
6487 	} else {
6488 		/* use the device itself */
6489 		*speed = pcie_get_speed_cap(adev->pdev);
6490 		*width = pcie_get_width_cap(adev->pdev);
6491 	}
6492 }
6493 
6494 /**
6495  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6496  *
6497  * @adev: amdgpu_device pointer
6498  *
6499  * Fetches and stores in the driver the PCIE capabilities (gen speed
6500  * and lanes) of the slot the device is in. Handles APUs and
6501  * virtualized environments where PCIE config space may not be available.
6502  */
6503 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6504 {
6505 	enum pci_bus_speed speed_cap, platform_speed_cap;
6506 	enum pcie_link_width platform_link_width, link_width;
6507 
6508 	if (amdgpu_pcie_gen_cap)
6509 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6510 
6511 	if (amdgpu_pcie_lane_cap)
6512 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6513 
6514 	/* covers APUs as well */
6515 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6516 		if (adev->pm.pcie_gen_mask == 0)
6517 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6518 		if (adev->pm.pcie_mlw_mask == 0)
6519 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6520 		return;
6521 	}
6522 
6523 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6524 		return;
6525 
6526 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6527 					&platform_link_width);
6528 	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
6529 
6530 	if (adev->pm.pcie_gen_mask == 0) {
6531 		/* asic caps */
6532 		if (speed_cap == PCI_SPEED_UNKNOWN) {
6533 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6534 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6535 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6536 		} else {
6537 			if (speed_cap == PCIE_SPEED_32_0GT)
6538 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6539 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6540 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6541 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6542 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6543 			else if (speed_cap == PCIE_SPEED_16_0GT)
6544 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6545 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6546 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6547 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6548 			else if (speed_cap == PCIE_SPEED_8_0GT)
6549 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6550 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6551 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6552 			else if (speed_cap == PCIE_SPEED_5_0GT)
6553 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6554 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6555 			else
6556 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6557 		}
6558 		/* platform caps */
6559 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6560 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6561 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6562 		} else {
6563 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6564 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6565 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6566 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6567 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6568 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6569 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6570 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6571 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6572 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6573 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6574 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6575 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6576 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6577 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6578 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6579 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6580 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6581 			else
6582 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6583 
6584 		}
6585 	}
6586 	if (adev->pm.pcie_mlw_mask == 0) {
6587 		/* asic caps */
6588 		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6589 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6590 		} else {
6591 			switch (link_width) {
6592 			case PCIE_LNK_X32:
6593 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6594 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6595 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6596 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6597 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6598 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6599 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6600 				break;
6601 			case PCIE_LNK_X16:
6602 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6603 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6604 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6605 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6606 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6607 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6608 				break;
6609 			case PCIE_LNK_X12:
6610 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6611 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6612 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6613 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6614 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6615 				break;
6616 			case PCIE_LNK_X8:
6617 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6618 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6619 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6620 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6621 				break;
6622 			case PCIE_LNK_X4:
6623 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6624 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6625 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6626 				break;
6627 			case PCIE_LNK_X2:
6628 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6629 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6630 				break;
6631 			case PCIE_LNK_X1:
6632 				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6633 				break;
6634 			default:
6635 				break;
6636 			}
6637 		}
6638 		/* platform caps */
6639 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6640 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6641 		} else {
6642 			switch (platform_link_width) {
6643 			case PCIE_LNK_X32:
6644 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6645 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6646 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6647 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6648 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6649 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6650 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6651 				break;
6652 			case PCIE_LNK_X16:
6653 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6654 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6655 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6656 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6657 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6658 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6659 				break;
6660 			case PCIE_LNK_X12:
6661 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6662 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6663 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6664 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6665 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6666 				break;
6667 			case PCIE_LNK_X8:
6668 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6669 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6670 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6671 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6672 				break;
6673 			case PCIE_LNK_X4:
6674 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6675 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6676 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6677 				break;
6678 			case PCIE_LNK_X2:
6679 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6680 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6681 				break;
6682 			case PCIE_LNK_X1:
6683 				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6684 				break;
6685 			default:
6686 				break;
6687 			}
6688 		}
6689 	}
6690 }
6691 
6692 /**
6693  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6694  *
6695  * @adev: amdgpu_device pointer
6696  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6697  *
6698  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6699  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6700  * @peer_adev.
6701  */
6702 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6703 				      struct amdgpu_device *peer_adev)
6704 {
6705 #ifdef CONFIG_HSA_AMD_P2P
6706 	bool p2p_access =
6707 		!adev->gmc.xgmi.connected_to_cpu &&
6708 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6709 	if (!p2p_access)
6710 		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6711 			pci_name(peer_adev->pdev));
6712 
6713 	bool is_large_bar = adev->gmc.visible_vram_size &&
6714 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6715 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6716 
6717 	if (!p2p_addressable) {
6718 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6719 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6720 		resource_size_t aper_limit =
6721 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6722 
6723 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6724 				     aper_limit & address_mask);
6725 	}
6726 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6727 #else
6728 	return false;
6729 #endif
6730 }
6731 
6732 int amdgpu_device_baco_enter(struct amdgpu_device *adev)
6733 {
6734 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6735 
6736 	if (!amdgpu_device_supports_baco(adev))
6737 		return -ENOTSUPP;
6738 
6739 	if (ras && adev->ras_enabled &&
6740 	    adev->nbio.funcs->enable_doorbell_interrupt)
6741 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6742 
6743 	return amdgpu_dpm_baco_enter(adev);
6744 }
6745 
6746 int amdgpu_device_baco_exit(struct amdgpu_device *adev)
6747 {
6748 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6749 	int ret = 0;
6750 
6751 	if (!amdgpu_device_supports_baco(adev))
6752 		return -ENOTSUPP;
6753 
6754 	ret = amdgpu_dpm_baco_exit(adev);
6755 	if (ret)
6756 		return ret;
6757 
6758 	if (ras && adev->ras_enabled &&
6759 	    adev->nbio.funcs->enable_doorbell_interrupt)
6760 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6761 
6762 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6763 	    adev->nbio.funcs->clear_doorbell_interrupt)
6764 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6765 
6766 	return 0;
6767 }
6768 
6769 /**
6770  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6771  * @pdev: PCI device struct
6772  * @state: PCI channel state
6773  *
6774  * Description: Called when a PCI error is detected.
6775  *
6776  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6777  */
6778 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6779 {
6780 	struct drm_device *dev = pci_get_drvdata(pdev);
6781 	struct amdgpu_device *adev = drm_to_adev(dev);
6782 	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
6783 		amdgpu_get_xgmi_hive(adev);
6784 	struct amdgpu_reset_context reset_context;
6785 	struct list_head device_list;
6786 
6787 	dev_info(adev->dev, "PCI error: detected callback!!\n");
6788 
6789 	adev->pci_channel_state = state;
6790 
6791 	switch (state) {
6792 	case pci_channel_io_normal:
6793 		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6794 		return PCI_ERS_RESULT_CAN_RECOVER;
6795 	case pci_channel_io_frozen:
6796 		/* Fatal error, prepare for slot reset */
6797 		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6798 		if (hive) {
6799 			/* Hive devices should be able to support FW based
6800 			 * link reset on other devices, if not return.
6801 			 */
6802 			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6803 				dev_warn(adev->dev,
6804 					 "No support for XGMI hive yet...\n");
6805 				return PCI_ERS_RESULT_DISCONNECT;
6806 			}
6807 			/* Set dpc status only if device is part of hive
6808 			 * Non-hive devices should be able to recover after
6809 			 * link reset.
6810 			 */
6811 			amdgpu_reset_set_dpc_status(adev, true);
6812 
6813 			mutex_lock(&hive->hive_lock);
6814 		}
6815 		memset(&reset_context, 0, sizeof(reset_context));
6816 		INIT_LIST_HEAD(&device_list);
6817 
6818 		amdgpu_device_recovery_prepare(adev, &device_list, hive);
6819 		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6820 		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6821 					      hive, false);
6822 		if (hive)
6823 			mutex_unlock(&hive->hive_lock);
6824 		return PCI_ERS_RESULT_NEED_RESET;
6825 	case pci_channel_io_perm_failure:
6826 		/* Permanent error, prepare for device removal */
6827 		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6828 		return PCI_ERS_RESULT_DISCONNECT;
6829 	}
6830 
6831 	return PCI_ERS_RESULT_NEED_RESET;
6832 }
6833 
6834 /**
6835  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6836  * @pdev: pointer to PCI device
6837  */
6838 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6839 {
6840 	struct drm_device *dev = pci_get_drvdata(pdev);
6841 	struct amdgpu_device *adev = drm_to_adev(dev);
6842 
6843 	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6844 
6845 	/* TODO - dump whatever for debugging purposes */
6846 
6847 	/* This called only if amdgpu_pci_error_detected returns
6848 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6849 	 * works, no need to reset slot.
6850 	 */
6851 
6852 	return PCI_ERS_RESULT_RECOVERED;
6853 }
6854 
6855 /**
6856  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6857  * @pdev: PCI device struct
6858  *
6859  * Description: This routine is called by the pci error recovery
6860  * code after the PCI slot has been reset, just before we
6861  * should resume normal operations.
6862  */
6863 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6864 {
6865 	struct drm_device *dev = pci_get_drvdata(pdev);
6866 	struct amdgpu_device *adev = drm_to_adev(dev);
6867 	struct amdgpu_reset_context reset_context;
6868 	struct amdgpu_device *tmp_adev;
6869 	struct amdgpu_hive_info *hive;
6870 	struct list_head device_list;
6871 	struct pci_dev *link_dev;
6872 	int r = 0, i, timeout;
6873 	u32 memsize;
6874 	u16 status;
6875 
6876 	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6877 
6878 	memset(&reset_context, 0, sizeof(reset_context));
6879 
6880 	if (adev->pcie_reset_ctx.swus)
6881 		link_dev = adev->pcie_reset_ctx.swus;
6882 	else
6883 		link_dev = adev->pdev;
6884 	/* wait for asic to come out of reset, timeout = 10s */
6885 	timeout = 10000;
6886 	do {
6887 		usleep_range(10000, 10500);
6888 		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
6889 		timeout -= 10;
6890 	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
6891 		 (status != PCI_VENDOR_ID_AMD));
6892 
6893 	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
6894 		r = -ETIME;
6895 		goto out;
6896 	}
6897 
6898 	amdgpu_device_load_switch_state(adev);
6899 	/* Restore PCI confspace */
6900 	amdgpu_device_load_pci_state(pdev);
6901 
6902 	/* confirm  ASIC came out of reset */
6903 	for (i = 0; i < adev->usec_timeout; i++) {
6904 		memsize = amdgpu_asic_get_config_memsize(adev);
6905 
6906 		if (memsize != 0xffffffff)
6907 			break;
6908 		udelay(1);
6909 	}
6910 	if (memsize == 0xffffffff) {
6911 		r = -ETIME;
6912 		goto out;
6913 	}
6914 
6915 	reset_context.method = AMD_RESET_METHOD_NONE;
6916 	reset_context.reset_req_dev = adev;
6917 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6918 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
6919 	INIT_LIST_HEAD(&device_list);
6920 
6921 	hive = amdgpu_get_xgmi_hive(adev);
6922 	if (hive) {
6923 		mutex_lock(&hive->hive_lock);
6924 		reset_context.hive = hive;
6925 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6926 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
6927 			list_add_tail(&tmp_adev->reset_list, &device_list);
6928 		}
6929 	} else {
6930 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6931 		list_add_tail(&adev->reset_list, &device_list);
6932 	}
6933 
6934 	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
6935 out:
6936 	if (!r) {
6937 		if (amdgpu_device_cache_pci_state(adev->pdev))
6938 			pci_restore_state(adev->pdev);
6939 		dev_info(adev->dev, "PCIe error recovery succeeded\n");
6940 	} else {
6941 		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6942 		if (hive) {
6943 			list_for_each_entry(tmp_adev, &device_list, reset_list)
6944 				amdgpu_device_unset_mp1_state(tmp_adev);
6945 		}
6946 		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6947 	}
6948 
6949 	if (hive) {
6950 		mutex_unlock(&hive->hive_lock);
6951 		amdgpu_put_xgmi_hive(hive);
6952 	}
6953 
6954 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6955 }
6956 
6957 /**
6958  * amdgpu_pci_resume() - resume normal ops after PCI reset
6959  * @pdev: pointer to PCI device
6960  *
6961  * Called when the error recovery driver tells us that its
6962  * OK to resume normal operation.
6963  */
6964 void amdgpu_pci_resume(struct pci_dev *pdev)
6965 {
6966 	struct drm_device *dev = pci_get_drvdata(pdev);
6967 	struct amdgpu_device *adev = drm_to_adev(dev);
6968 	struct list_head device_list;
6969 	struct amdgpu_hive_info *hive = NULL;
6970 	struct amdgpu_device *tmp_adev = NULL;
6971 
6972 	dev_info(adev->dev, "PCI error: resume callback!!\n");
6973 
6974 	/* Only continue execution for the case of pci_channel_io_frozen */
6975 	if (adev->pci_channel_state != pci_channel_io_frozen)
6976 		return;
6977 
6978 	INIT_LIST_HEAD(&device_list);
6979 
6980 	hive = amdgpu_get_xgmi_hive(adev);
6981 	if (hive) {
6982 		mutex_lock(&hive->hive_lock);
6983 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6984 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
6985 			list_add_tail(&tmp_adev->reset_list, &device_list);
6986 		}
6987 	} else
6988 		list_add_tail(&adev->reset_list, &device_list);
6989 
6990 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
6991 	amdgpu_device_gpu_resume(adev, &device_list, false);
6992 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6993 
6994 	if (hive) {
6995 		mutex_unlock(&hive->hive_lock);
6996 		amdgpu_put_xgmi_hive(hive);
6997 	}
6998 }
6999 
7000 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
7001 {
7002 	struct pci_dev *swus, *swds;
7003 	int r;
7004 
7005 	swds = pci_upstream_bridge(adev->pdev);
7006 	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
7007 	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
7008 		return;
7009 	swus = pci_upstream_bridge(swds);
7010 	if (!swus ||
7011 	    (swus->vendor != PCI_VENDOR_ID_ATI &&
7012 	     swus->vendor != PCI_VENDOR_ID_AMD) ||
7013 	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
7014 		return;
7015 
7016 	/* If already saved, return */
7017 	if (adev->pcie_reset_ctx.swus)
7018 		return;
7019 	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
7020 	r = pci_save_state(swds);
7021 	if (r)
7022 		return;
7023 	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
7024 
7025 	r = pci_save_state(swus);
7026 	if (r)
7027 		return;
7028 	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
7029 
7030 	adev->pcie_reset_ctx.swus = swus;
7031 }
7032 
7033 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
7034 {
7035 	struct pci_dev *pdev;
7036 	int r;
7037 
7038 	if (!adev->pcie_reset_ctx.swds_pcistate ||
7039 	    !adev->pcie_reset_ctx.swus_pcistate)
7040 		return;
7041 
7042 	pdev = adev->pcie_reset_ctx.swus;
7043 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
7044 	if (!r) {
7045 		pci_restore_state(pdev);
7046 	} else {
7047 		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
7048 		return;
7049 	}
7050 
7051 	pdev = pci_upstream_bridge(adev->pdev);
7052 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
7053 	if (!r)
7054 		pci_restore_state(pdev);
7055 	else
7056 		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
7057 }
7058 
7059 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
7060 {
7061 	struct drm_device *dev = pci_get_drvdata(pdev);
7062 	struct amdgpu_device *adev = drm_to_adev(dev);
7063 	int r;
7064 
7065 	if (amdgpu_sriov_vf(adev))
7066 		return false;
7067 
7068 	r = pci_save_state(pdev);
7069 	if (!r) {
7070 		kfree(adev->pci_state);
7071 
7072 		adev->pci_state = pci_store_saved_state(pdev);
7073 
7074 		if (!adev->pci_state) {
7075 			dev_err(adev->dev, "Failed to store PCI saved state");
7076 			return false;
7077 		}
7078 	} else {
7079 		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
7080 		return false;
7081 	}
7082 
7083 	amdgpu_device_cache_switch_state(adev);
7084 
7085 	return true;
7086 }
7087 
7088 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
7089 {
7090 	struct drm_device *dev = pci_get_drvdata(pdev);
7091 	struct amdgpu_device *adev = drm_to_adev(dev);
7092 	int r;
7093 
7094 	if (!adev->pci_state)
7095 		return false;
7096 
7097 	r = pci_load_saved_state(pdev, adev->pci_state);
7098 
7099 	if (!r) {
7100 		pci_restore_state(pdev);
7101 	} else {
7102 		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
7103 		return false;
7104 	}
7105 
7106 	return true;
7107 }
7108 
7109 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
7110 		struct amdgpu_ring *ring)
7111 {
7112 #ifdef CONFIG_X86_64
7113 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
7114 		return;
7115 #endif
7116 	if (adev->gmc.xgmi.connected_to_cpu)
7117 		return;
7118 
7119 	if (ring && ring->funcs->emit_hdp_flush) {
7120 		amdgpu_ring_emit_hdp_flush(ring);
7121 		return;
7122 	}
7123 
7124 	if (!ring && amdgpu_sriov_runtime(adev)) {
7125 		if (!amdgpu_kiq_hdp_flush(adev))
7126 			return;
7127 	}
7128 
7129 	amdgpu_hdp_flush(adev, ring);
7130 }
7131 
7132 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
7133 		struct amdgpu_ring *ring)
7134 {
7135 #ifdef CONFIG_X86_64
7136 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
7137 		return;
7138 #endif
7139 	if (adev->gmc.xgmi.connected_to_cpu)
7140 		return;
7141 
7142 	amdgpu_hdp_invalidate(adev, ring);
7143 }
7144 
7145 int amdgpu_in_reset(struct amdgpu_device *adev)
7146 {
7147 	return atomic_read(&adev->reset_domain->in_gpu_reset);
7148 }
7149 
7150 /**
7151  * amdgpu_device_halt() - bring hardware to some kind of halt state
7152  *
7153  * @adev: amdgpu_device pointer
7154  *
7155  * Bring hardware to some kind of halt state so that no one can touch it
7156  * any more. It will help to maintain error context when error occurred.
7157  * Compare to a simple hang, the system will keep stable at least for SSH
7158  * access. Then it should be trivial to inspect the hardware state and
7159  * see what's going on. Implemented as following:
7160  *
7161  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
7162  *    clears all CPU mappings to device, disallows remappings through page faults
7163  * 2. amdgpu_irq_disable_all() disables all interrupts
7164  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
7165  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
7166  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
7167  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
7168  *    flush any in flight DMA operations
7169  */
7170 void amdgpu_device_halt(struct amdgpu_device *adev)
7171 {
7172 	struct pci_dev *pdev = adev->pdev;
7173 	struct drm_device *ddev = adev_to_drm(adev);
7174 
7175 	amdgpu_xcp_dev_unplug(adev);
7176 	drm_dev_unplug(ddev);
7177 
7178 	amdgpu_irq_disable_all(adev);
7179 
7180 	amdgpu_fence_driver_hw_fini(adev);
7181 
7182 	adev->no_hw_access = true;
7183 
7184 	amdgpu_device_unmap_mmio(adev);
7185 
7186 	pci_disable_device(pdev);
7187 	pci_wait_for_pending_transaction(pdev);
7188 }
7189 
7190 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
7191 				u32 reg)
7192 {
7193 	unsigned long flags, address, data;
7194 	u32 r;
7195 
7196 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7197 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7198 
7199 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7200 	WREG32(address, reg * 4);
7201 	(void)RREG32(address);
7202 	r = RREG32(data);
7203 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
7204 	return r;
7205 }
7206 
7207 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
7208 				u32 reg, u32 v)
7209 {
7210 	unsigned long flags, address, data;
7211 
7212 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7213 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7214 
7215 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7216 	WREG32(address, reg * 4);
7217 	(void)RREG32(address);
7218 	WREG32(data, v);
7219 	(void)RREG32(data);
7220 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
7221 }
7222 
7223 /**
7224  * amdgpu_device_get_gang - return a reference to the current gang
7225  * @adev: amdgpu_device pointer
7226  *
7227  * Returns: A new reference to the current gang leader.
7228  */
7229 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
7230 {
7231 	struct dma_fence *fence;
7232 
7233 	rcu_read_lock();
7234 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
7235 	rcu_read_unlock();
7236 	return fence;
7237 }
7238 
7239 /**
7240  * amdgpu_device_switch_gang - switch to a new gang
7241  * @adev: amdgpu_device pointer
7242  * @gang: the gang to switch to
7243  *
7244  * Try to switch to a new gang.
7245  * Returns: NULL if we switched to the new gang or a reference to the current
7246  * gang leader.
7247  */
7248 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
7249 					    struct dma_fence *gang)
7250 {
7251 	struct dma_fence *old = NULL;
7252 
7253 	dma_fence_get(gang);
7254 	do {
7255 		dma_fence_put(old);
7256 		old = amdgpu_device_get_gang(adev);
7257 		if (old == gang)
7258 			break;
7259 
7260 		if (!dma_fence_is_signaled(old)) {
7261 			dma_fence_put(gang);
7262 			return old;
7263 		}
7264 
7265 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
7266 			 old, gang) != old);
7267 
7268 	/*
7269 	 * Drop it once for the exchanged reference in adev and once for the
7270 	 * thread local reference acquired in amdgpu_device_get_gang().
7271 	 */
7272 	dma_fence_put(old);
7273 	dma_fence_put(old);
7274 	return NULL;
7275 }
7276 
7277 /**
7278  * amdgpu_device_enforce_isolation - enforce HW isolation
7279  * @adev: the amdgpu device pointer
7280  * @ring: the HW ring the job is supposed to run on
7281  * @job: the job which is about to be pushed to the HW ring
7282  *
7283  * Makes sure that only one client at a time can use the GFX block.
7284  * Returns: The dependency to wait on before the job can be pushed to the HW.
7285  * The function is called multiple times until NULL is returned.
7286  */
7287 struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
7288 						  struct amdgpu_ring *ring,
7289 						  struct amdgpu_job *job)
7290 {
7291 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
7292 	struct drm_sched_fence *f = job->base.s_fence;
7293 	struct dma_fence *dep;
7294 	void *owner;
7295 	int r;
7296 
7297 	/*
7298 	 * For now enforce isolation only for the GFX block since we only need
7299 	 * the cleaner shader on those rings.
7300 	 */
7301 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
7302 	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
7303 		return NULL;
7304 
7305 	/*
7306 	 * All submissions where enforce isolation is false are handled as if
7307 	 * they come from a single client. Use ~0l as the owner to distinct it
7308 	 * from kernel submissions where the owner is NULL.
7309 	 */
7310 	owner = job->enforce_isolation ? f->owner : (void *)~0l;
7311 
7312 	mutex_lock(&adev->enforce_isolation_mutex);
7313 
7314 	/*
7315 	 * The "spearhead" submission is the first one which changes the
7316 	 * ownership to its client. We always need to wait for it to be
7317 	 * pushed to the HW before proceeding with anything.
7318 	 */
7319 	if (&f->scheduled != isolation->spearhead &&
7320 	    !dma_fence_is_signaled(isolation->spearhead)) {
7321 		dep = isolation->spearhead;
7322 		goto out_grab_ref;
7323 	}
7324 
7325 	if (isolation->owner != owner) {
7326 
7327 		/*
7328 		 * Wait for any gang to be assembled before switching to a
7329 		 * different owner or otherwise we could deadlock the
7330 		 * submissions.
7331 		 */
7332 		if (!job->gang_submit) {
7333 			dep = amdgpu_device_get_gang(adev);
7334 			if (!dma_fence_is_signaled(dep))
7335 				goto out_return_dep;
7336 			dma_fence_put(dep);
7337 		}
7338 
7339 		dma_fence_put(isolation->spearhead);
7340 		isolation->spearhead = dma_fence_get(&f->scheduled);
7341 		amdgpu_sync_move(&isolation->active, &isolation->prev);
7342 		trace_amdgpu_isolation(isolation->owner, owner);
7343 		isolation->owner = owner;
7344 	}
7345 
7346 	/*
7347 	 * Specifying the ring here helps to pipeline submissions even when
7348 	 * isolation is enabled. If that is not desired for testing NULL can be
7349 	 * used instead of the ring to enforce a CPU round trip while switching
7350 	 * between clients.
7351 	 */
7352 	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
7353 	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
7354 	if (r)
7355 		dev_warn(adev->dev, "OOM tracking isolation\n");
7356 
7357 out_grab_ref:
7358 	dma_fence_get(dep);
7359 out_return_dep:
7360 	mutex_unlock(&adev->enforce_isolation_mutex);
7361 	return dep;
7362 }
7363 
7364 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
7365 {
7366 	switch (adev->asic_type) {
7367 #ifdef CONFIG_DRM_AMDGPU_SI
7368 	case CHIP_HAINAN:
7369 #endif
7370 	case CHIP_TOPAZ:
7371 		/* chips with no display hardware */
7372 		return false;
7373 #ifdef CONFIG_DRM_AMDGPU_SI
7374 	case CHIP_TAHITI:
7375 	case CHIP_PITCAIRN:
7376 	case CHIP_VERDE:
7377 	case CHIP_OLAND:
7378 #endif
7379 #ifdef CONFIG_DRM_AMDGPU_CIK
7380 	case CHIP_BONAIRE:
7381 	case CHIP_HAWAII:
7382 	case CHIP_KAVERI:
7383 	case CHIP_KABINI:
7384 	case CHIP_MULLINS:
7385 #endif
7386 	case CHIP_TONGA:
7387 	case CHIP_FIJI:
7388 	case CHIP_POLARIS10:
7389 	case CHIP_POLARIS11:
7390 	case CHIP_POLARIS12:
7391 	case CHIP_VEGAM:
7392 	case CHIP_CARRIZO:
7393 	case CHIP_STONEY:
7394 		/* chips with display hardware */
7395 		return true;
7396 	default:
7397 		/* IP discovery */
7398 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
7399 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
7400 			return false;
7401 		return true;
7402 	}
7403 }
7404 
7405 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
7406 		uint32_t inst, uint32_t reg_addr, char reg_name[],
7407 		uint32_t expected_value, uint32_t mask)
7408 {
7409 	uint32_t ret = 0;
7410 	uint32_t old_ = 0;
7411 	uint32_t tmp_ = RREG32(reg_addr);
7412 	uint32_t loop = adev->usec_timeout;
7413 
7414 	while ((tmp_ & (mask)) != (expected_value)) {
7415 		if (old_ != tmp_) {
7416 			loop = adev->usec_timeout;
7417 			old_ = tmp_;
7418 		} else
7419 			udelay(1);
7420 		tmp_ = RREG32(reg_addr);
7421 		loop--;
7422 		if (!loop) {
7423 			dev_warn(
7424 				adev->dev,
7425 				"Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
7426 				inst, reg_name, (uint32_t)expected_value,
7427 				(uint32_t)(tmp_ & (mask)));
7428 			ret = -ETIMEDOUT;
7429 			break;
7430 		}
7431 	}
7432 	return ret;
7433 }
7434 
7435 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
7436 {
7437 	ssize_t size = 0;
7438 
7439 	if (!ring || !ring->adev)
7440 		return size;
7441 
7442 	if (amdgpu_device_should_recover_gpu(ring->adev))
7443 		size |= AMDGPU_RESET_TYPE_FULL;
7444 
7445 	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
7446 	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
7447 		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
7448 
7449 	return size;
7450 }
7451 
7452 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
7453 {
7454 	ssize_t size = 0;
7455 
7456 	if (supported_reset == 0) {
7457 		size += sysfs_emit_at(buf, size, "unsupported");
7458 		size += sysfs_emit_at(buf, size, "\n");
7459 		return size;
7460 
7461 	}
7462 
7463 	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
7464 		size += sysfs_emit_at(buf, size, "soft ");
7465 
7466 	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
7467 		size += sysfs_emit_at(buf, size, "queue ");
7468 
7469 	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
7470 		size += sysfs_emit_at(buf, size, "pipe ");
7471 
7472 	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
7473 		size += sysfs_emit_at(buf, size, "full ");
7474 
7475 	size += sysfs_emit_at(buf, size, "\n");
7476 	return size;
7477 }
7478 
7479 void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
7480 			   enum amdgpu_uid_type type, uint8_t inst,
7481 			   uint64_t uid)
7482 {
7483 	if (!uid_info)
7484 		return;
7485 
7486 	if (type >= AMDGPU_UID_TYPE_MAX) {
7487 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
7488 			     type);
7489 		return;
7490 	}
7491 
7492 	if (inst >= AMDGPU_UID_INST_MAX) {
7493 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
7494 			     inst);
7495 		return;
7496 	}
7497 
7498 	if (uid_info->uid[type][inst] != 0) {
7499 		dev_warn_once(
7500 			uid_info->adev->dev,
7501 			"Overwriting existing UID %llu for type %d instance %d\n",
7502 			uid_info->uid[type][inst], type, inst);
7503 	}
7504 
7505 	uid_info->uid[type][inst] = uid;
7506 }
7507 
7508 u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
7509 			  enum amdgpu_uid_type type, uint8_t inst)
7510 {
7511 	if (!uid_info)
7512 		return 0;
7513 
7514 	if (type >= AMDGPU_UID_TYPE_MAX) {
7515 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
7516 			     type);
7517 		return 0;
7518 	}
7519 
7520 	if (inst >= AMDGPU_UID_INST_MAX) {
7521 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
7522 			     inst);
7523 		return 0;
7524 	}
7525 
7526 	return uid_info->uid[type][inst];
7527 }
7528