xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision eb51c5a103f63b6e9c3f9cebf7b1e2e1056d3119)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/aperture.h>
30 #include <linux/power_supply.h>
31 #include <linux/kthread.h>
32 #include <linux/module.h>
33 #include <linux/console.h>
34 #include <linux/slab.h>
35 #include <linux/iommu.h>
36 #include <linux/pci.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39 #include <linux/nospec.h>
40 
41 #include <drm/drm_atomic_helper.h>
42 #include <drm/drm_client_event.h>
43 #include <drm/drm_crtc_helper.h>
44 #include <drm/drm_probe_helper.h>
45 #include <drm/amdgpu_drm.h>
46 #include <linux/device.h>
47 #include <linux/vgaarb.h>
48 #include <linux/vga_switcheroo.h>
49 #include <linux/efi.h>
50 #include "amdgpu.h"
51 #include "amdgpu_trace.h"
52 #include "amdgpu_i2c.h"
53 #include "atom.h"
54 #include "amdgpu_atombios.h"
55 #include "amdgpu_atomfirmware.h"
56 #include "amd_pcie.h"
57 #ifdef CONFIG_DRM_AMDGPU_SI
58 #include "si.h"
59 #endif
60 #ifdef CONFIG_DRM_AMDGPU_CIK
61 #include "cik.h"
62 #endif
63 #include "vi.h"
64 #include "soc15.h"
65 #include "nv.h"
66 #include "bif/bif_4_1_d.h"
67 #include <linux/firmware.h>
68 #include "amdgpu_vf_error.h"
69 
70 #include "amdgpu_amdkfd.h"
71 #include "amdgpu_pm.h"
72 
73 #include "amdgpu_xgmi.h"
74 #include "amdgpu_ras.h"
75 #include "amdgpu_ras_mgr.h"
76 #include "amdgpu_pmu.h"
77 #include "amdgpu_fru_eeprom.h"
78 #include "amdgpu_reset.h"
79 #include "amdgpu_virt.h"
80 #include "amdgpu_dev_coredump.h"
81 
82 #include <linux/suspend.h>
83 #include <drm/task_barrier.h>
84 #include <linux/pm_runtime.h>
85 
86 #include <drm/drm_drv.h>
87 
88 #if IS_ENABLED(CONFIG_X86)
89 #include <asm/intel-family.h>
90 #include <asm/cpu_device_id.h>
91 #endif
92 
93 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
98 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
99 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
100 MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
101 
102 #define AMDGPU_RESUME_MS		2000
103 #define AMDGPU_MAX_RETRY_LIMIT		2
104 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
105 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
106 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
107 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
108 
109 #define AMDGPU_VBIOS_SKIP (1U << 0)
110 #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
111 
112 static const struct drm_driver amdgpu_kms_driver;
113 
114 const char *amdgpu_asic_name[] = {
115 	"TAHITI",
116 	"PITCAIRN",
117 	"VERDE",
118 	"OLAND",
119 	"HAINAN",
120 	"BONAIRE",
121 	"KAVERI",
122 	"KABINI",
123 	"HAWAII",
124 	"MULLINS",
125 	"TOPAZ",
126 	"TONGA",
127 	"FIJI",
128 	"CARRIZO",
129 	"STONEY",
130 	"POLARIS10",
131 	"POLARIS11",
132 	"POLARIS12",
133 	"VEGAM",
134 	"VEGA10",
135 	"VEGA12",
136 	"VEGA20",
137 	"RAVEN",
138 	"ARCTURUS",
139 	"RENOIR",
140 	"ALDEBARAN",
141 	"NAVI10",
142 	"CYAN_SKILLFISH",
143 	"NAVI14",
144 	"NAVI12",
145 	"SIENNA_CICHLID",
146 	"NAVY_FLOUNDER",
147 	"VANGOGH",
148 	"DIMGREY_CAVEFISH",
149 	"BEIGE_GOBY",
150 	"YELLOW_CARP",
151 	"IP DISCOVERY",
152 	"LAST",
153 };
154 
155 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
156 /*
157  * Default init level where all blocks are expected to be initialized. This is
158  * the level of initialization expected by default and also after a full reset
159  * of the device.
160  */
161 struct amdgpu_init_level amdgpu_init_default = {
162 	.level = AMDGPU_INIT_LEVEL_DEFAULT,
163 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
164 };
165 
166 struct amdgpu_init_level amdgpu_init_recovery = {
167 	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
168 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
169 };
170 
171 /*
172  * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
173  * is used for cases like reset on initialization where the entire hive needs to
174  * be reset before first use.
175  */
176 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
177 	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
178 	.hwini_ip_block_mask =
179 		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
180 		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
181 		BIT(AMD_IP_BLOCK_TYPE_PSP)
182 };
183 
184 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
185 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
186 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
187 
188 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
189 
190 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
191 					     enum amd_ip_block_type block)
192 {
193 	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
194 }
195 
196 void amdgpu_set_init_level(struct amdgpu_device *adev,
197 			   enum amdgpu_init_lvl_id lvl)
198 {
199 	switch (lvl) {
200 	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
201 		adev->init_lvl = &amdgpu_init_minimal_xgmi;
202 		break;
203 	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
204 		adev->init_lvl = &amdgpu_init_recovery;
205 		break;
206 	case AMDGPU_INIT_LEVEL_DEFAULT:
207 		fallthrough;
208 	default:
209 		adev->init_lvl = &amdgpu_init_default;
210 		break;
211 	}
212 }
213 
214 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
215 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
216 				     void *data);
217 
218 /**
219  * DOC: pcie_replay_count
220  *
221  * The amdgpu driver provides a sysfs API for reporting the total number
222  * of PCIe replays (NAKs).
223  * The file pcie_replay_count is used for this and returns the total
224  * number of replays as a sum of the NAKs generated and NAKs received.
225  */
226 
227 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
228 		struct device_attribute *attr, char *buf)
229 {
230 	struct drm_device *ddev = dev_get_drvdata(dev);
231 	struct amdgpu_device *adev = drm_to_adev(ddev);
232 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
233 
234 	return sysfs_emit(buf, "%llu\n", cnt);
235 }
236 
237 static DEVICE_ATTR(pcie_replay_count, 0444,
238 		amdgpu_device_get_pcie_replay_count, NULL);
239 
240 static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
241 {
242 	int ret = 0;
243 
244 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
245 		ret = sysfs_create_file(&adev->dev->kobj,
246 					&dev_attr_pcie_replay_count.attr);
247 
248 	return ret;
249 }
250 
251 static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
252 {
253 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
254 		sysfs_remove_file(&adev->dev->kobj,
255 				  &dev_attr_pcie_replay_count.attr);
256 }
257 
258 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
259 					  const struct bin_attribute *attr, char *buf,
260 					  loff_t ppos, size_t count)
261 {
262 	struct device *dev = kobj_to_dev(kobj);
263 	struct drm_device *ddev = dev_get_drvdata(dev);
264 	struct amdgpu_device *adev = drm_to_adev(ddev);
265 	ssize_t bytes_read;
266 
267 	switch (ppos) {
268 	case AMDGPU_SYS_REG_STATE_XGMI:
269 		bytes_read = amdgpu_asic_get_reg_state(
270 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
271 		break;
272 	case AMDGPU_SYS_REG_STATE_WAFL:
273 		bytes_read = amdgpu_asic_get_reg_state(
274 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
275 		break;
276 	case AMDGPU_SYS_REG_STATE_PCIE:
277 		bytes_read = amdgpu_asic_get_reg_state(
278 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
279 		break;
280 	case AMDGPU_SYS_REG_STATE_USR:
281 		bytes_read = amdgpu_asic_get_reg_state(
282 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
283 		break;
284 	case AMDGPU_SYS_REG_STATE_USR_1:
285 		bytes_read = amdgpu_asic_get_reg_state(
286 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
287 		break;
288 	default:
289 		return -EINVAL;
290 	}
291 
292 	return bytes_read;
293 }
294 
295 static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
296 		      AMDGPU_SYS_REG_STATE_END);
297 
298 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
299 {
300 	int ret;
301 
302 	if (!amdgpu_asic_get_reg_state_supported(adev))
303 		return 0;
304 
305 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
306 
307 	return ret;
308 }
309 
310 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
311 {
312 	if (!amdgpu_asic_get_reg_state_supported(adev))
313 		return;
314 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
315 }
316 
317 /**
318  * DOC: board_info
319  *
320  * The amdgpu driver provides a sysfs API for giving board related information.
321  * It provides the form factor information in the format
322  *
323  *   type : form factor
324  *
325  * Possible form factor values
326  *
327  * - "cem"		- PCIE CEM card
328  * - "oam"		- Open Compute Accelerator Module
329  * - "unknown"	- Not known
330  *
331  */
332 
333 static ssize_t amdgpu_device_get_board_info(struct device *dev,
334 					    struct device_attribute *attr,
335 					    char *buf)
336 {
337 	struct drm_device *ddev = dev_get_drvdata(dev);
338 	struct amdgpu_device *adev = drm_to_adev(ddev);
339 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
340 	const char *pkg;
341 
342 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
343 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
344 
345 	switch (pkg_type) {
346 	case AMDGPU_PKG_TYPE_CEM:
347 		pkg = "cem";
348 		break;
349 	case AMDGPU_PKG_TYPE_OAM:
350 		pkg = "oam";
351 		break;
352 	default:
353 		pkg = "unknown";
354 		break;
355 	}
356 
357 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
358 }
359 
360 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
361 
362 static struct attribute *amdgpu_board_attrs[] = {
363 	&dev_attr_board_info.attr,
364 	NULL,
365 };
366 
367 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
368 					     struct attribute *attr, int n)
369 {
370 	struct device *dev = kobj_to_dev(kobj);
371 	struct drm_device *ddev = dev_get_drvdata(dev);
372 	struct amdgpu_device *adev = drm_to_adev(ddev);
373 
374 	if (adev->flags & AMD_IS_APU)
375 		return 0;
376 
377 	return attr->mode;
378 }
379 
380 static const struct attribute_group amdgpu_board_attrs_group = {
381 	.attrs = amdgpu_board_attrs,
382 	.is_visible = amdgpu_board_attrs_is_visible
383 };
384 
385 /**
386  * DOC: uma/carveout_options
387  *
388  * This is a read-only file that lists all available UMA allocation
389  * options and their corresponding indices. Example output::
390  *
391  *     $ cat uma/carveout_options
392  *     0: Minimum (512 MB)
393  *     1:  (1 GB)
394  *     2:  (2 GB)
395  *     3:  (4 GB)
396  *     4:  (6 GB)
397  *     5:  (8 GB)
398  *     6:  (12 GB)
399  *     7: Medium (16 GB)
400  *     8:  (24 GB)
401  *     9: High (32 GB)
402  */
403 static ssize_t carveout_options_show(struct device *dev,
404 				     struct device_attribute *attr,
405 				     char *buf)
406 {
407 	struct drm_device *ddev = dev_get_drvdata(dev);
408 	struct amdgpu_device *adev = drm_to_adev(ddev);
409 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
410 	uint32_t memory_carved;
411 	ssize_t size = 0;
412 
413 	if (!uma_info || !uma_info->num_entries)
414 		return -ENODEV;
415 
416 	for (int i = 0; i < uma_info->num_entries; i++) {
417 		memory_carved = uma_info->entries[i].memory_carved_mb;
418 		if (memory_carved >= SZ_1G/SZ_1M) {
419 			size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n",
420 					      i,
421 					      uma_info->entries[i].name,
422 					      memory_carved >> 10);
423 		} else {
424 			size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n",
425 					      i,
426 					      uma_info->entries[i].name,
427 					      memory_carved);
428 		}
429 	}
430 
431 	return size;
432 }
433 static DEVICE_ATTR_RO(carveout_options);
434 
435 /**
436  * DOC: uma/carveout
437  *
438  * This file is both readable and writable. When read, it shows the
439  * index of the current setting. Writing a valid index to this file
440  * allows users to change the UMA carveout size to the selected option
441  * on the next boot.
442  *
443  * The available options and their corresponding indices can be read
444  * from the uma/carveout_options file.
445  */
446 static ssize_t carveout_show(struct device *dev,
447 			     struct device_attribute *attr,
448 			     char *buf)
449 {
450 	struct drm_device *ddev = dev_get_drvdata(dev);
451 	struct amdgpu_device *adev = drm_to_adev(ddev);
452 
453 	return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index);
454 }
455 
456 static ssize_t carveout_store(struct device *dev,
457 			      struct device_attribute *attr,
458 			      const char *buf, size_t count)
459 {
460 	struct drm_device *ddev = dev_get_drvdata(dev);
461 	struct amdgpu_device *adev = drm_to_adev(ddev);
462 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
463 	struct amdgpu_uma_carveout_option *opt;
464 	unsigned long val;
465 	uint8_t flags;
466 	int r;
467 
468 	r = kstrtoul(buf, 10, &val);
469 	if (r)
470 		return r;
471 
472 	if (val >= uma_info->num_entries)
473 		return -EINVAL;
474 
475 	val = array_index_nospec(val, uma_info->num_entries);
476 	opt = &uma_info->entries[val];
477 
478 	if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) &&
479 	    !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) {
480 		drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val);
481 		return -EINVAL;
482 	}
483 
484 	flags = opt->flags;
485 	flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1);
486 
487 	guard(mutex)(&uma_info->update_lock);
488 
489 	r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags);
490 	if (r)
491 		return r;
492 
493 	uma_info->uma_option_index = val;
494 
495 	return count;
496 }
497 static DEVICE_ATTR_RW(carveout);
498 
499 static struct attribute *amdgpu_uma_attrs[] = {
500 	&dev_attr_carveout.attr,
501 	&dev_attr_carveout_options.attr,
502 	NULL
503 };
504 
505 const struct attribute_group amdgpu_uma_attr_group = {
506 	.name = "uma",
507 	.attrs = amdgpu_uma_attrs
508 };
509 
510 static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev)
511 {
512 	int rc;
513 
514 	if (!(adev->flags & AMD_IS_APU))
515 		return;
516 
517 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
518 		return;
519 
520 	rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info);
521 	if (rc) {
522 		drm_dbg(adev_to_drm(adev),
523 			"Failed to parse UMA carveout info from VBIOS: %d\n", rc);
524 		goto out_info;
525 	}
526 
527 	mutex_init(&adev->uma_info.update_lock);
528 
529 	rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group);
530 	if (rc) {
531 		drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc);
532 		goto out_attr;
533 	}
534 
535 	return;
536 
537 out_attr:
538 	mutex_destroy(&adev->uma_info.update_lock);
539 out_info:
540 	return;
541 }
542 
543 static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev)
544 {
545 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
546 
547 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
548 		return;
549 
550 	mutex_destroy(&uma_info->update_lock);
551 	uma_info->num_entries = 0;
552 }
553 
554 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
555 
556 /**
557  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
558  *
559  * @adev: amdgpu device pointer
560  *
561  * Returns true if the device is a dGPU with ATPX power control,
562  * otherwise return false.
563  */
564 bool amdgpu_device_supports_px(struct amdgpu_device *adev)
565 {
566 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
567 		return true;
568 	return false;
569 }
570 
571 /**
572  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
573  *
574  * @adev: amdgpu device pointer
575  *
576  * Returns true if the device is a dGPU with ACPI power control,
577  * otherwise return false.
578  */
579 bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
580 {
581 	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
582 		return false;
583 
584 	if (adev->has_pr3 ||
585 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
586 		return true;
587 	return false;
588 }
589 
590 /**
591  * amdgpu_device_supports_baco - Does the device support BACO
592  *
593  * @adev: amdgpu device pointer
594  *
595  * Return:
596  * 1 if the device supports BACO;
597  * 3 if the device supports MACO (only works if BACO is supported)
598  * otherwise return 0.
599  */
600 int amdgpu_device_supports_baco(struct amdgpu_device *adev)
601 {
602 	return amdgpu_asic_supports_baco(adev);
603 }
604 
605 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
606 {
607 	int bamaco_support;
608 
609 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
610 	bamaco_support = amdgpu_device_supports_baco(adev);
611 
612 	switch (amdgpu_runtime_pm) {
613 	case 2:
614 		if (bamaco_support & MACO_SUPPORT) {
615 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
616 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
617 		} else if (bamaco_support == BACO_SUPPORT) {
618 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
619 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
620 		}
621 		break;
622 	case 1:
623 		if (bamaco_support & BACO_SUPPORT) {
624 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
625 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
626 		}
627 		break;
628 	case -1:
629 	case -2:
630 		if (amdgpu_device_supports_px(adev)) {
631 			/* enable PX as runtime mode */
632 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
633 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
634 		} else if (amdgpu_device_supports_boco(adev)) {
635 			/* enable boco as runtime mode */
636 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
637 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
638 		} else {
639 			if (!bamaco_support)
640 				goto no_runtime_pm;
641 
642 			switch (adev->asic_type) {
643 			case CHIP_VEGA20:
644 			case CHIP_ARCTURUS:
645 				/* BACO are not supported on vega20 and arctrus */
646 				break;
647 			case CHIP_VEGA10:
648 				/* enable BACO as runpm mode if noretry=0 */
649 				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
650 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
651 				break;
652 			default:
653 				/* enable BACO as runpm mode on CI+ */
654 				if (!amdgpu_passthrough(adev))
655 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
656 				break;
657 			}
658 
659 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
660 				if (bamaco_support & MACO_SUPPORT) {
661 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
662 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
663 				} else {
664 					dev_info(adev->dev, "Using BACO for runtime pm\n");
665 				}
666 			}
667 		}
668 		break;
669 	case 0:
670 		dev_info(adev->dev, "runtime pm is manually disabled\n");
671 		break;
672 	default:
673 		break;
674 	}
675 
676 no_runtime_pm:
677 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
678 		dev_info(adev->dev, "Runtime PM not available\n");
679 }
680 /**
681  * amdgpu_device_supports_smart_shift - Is the device dGPU with
682  * smart shift support
683  *
684  * @adev: amdgpu device pointer
685  *
686  * Returns true if the device is a dGPU with Smart Shift support,
687  * otherwise returns false.
688  */
689 bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
690 {
691 	return (amdgpu_device_supports_boco(adev) &&
692 		amdgpu_acpi_is_power_shift_control_supported());
693 }
694 
695 /*
696  * VRAM access helper functions
697  */
698 
699 /**
700  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
701  *
702  * @adev: amdgpu_device pointer
703  * @pos: offset of the buffer in vram
704  * @buf: virtual address of the buffer in system memory
705  * @size: read/write size, sizeof(@buf) must > @size
706  * @write: true - write to vram, otherwise - read from vram
707  */
708 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
709 			     void *buf, size_t size, bool write)
710 {
711 	unsigned long flags;
712 	uint32_t hi = ~0, tmp = 0;
713 	uint32_t *data = buf;
714 	uint64_t last;
715 	int idx;
716 
717 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
718 		return;
719 
720 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
721 
722 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
723 	for (last = pos + size; pos < last; pos += 4) {
724 		tmp = pos >> 31;
725 
726 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
727 		if (tmp != hi) {
728 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
729 			hi = tmp;
730 		}
731 		if (write)
732 			WREG32_NO_KIQ(mmMM_DATA, *data++);
733 		else
734 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
735 	}
736 
737 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
738 	drm_dev_exit(idx);
739 }
740 
741 /**
742  * amdgpu_device_aper_access - access vram by vram aperture
743  *
744  * @adev: amdgpu_device pointer
745  * @pos: offset of the buffer in vram
746  * @buf: virtual address of the buffer in system memory
747  * @size: read/write size, sizeof(@buf) must > @size
748  * @write: true - write to vram, otherwise - read from vram
749  *
750  * The return value means how many bytes have been transferred.
751  */
752 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
753 				 void *buf, size_t size, bool write)
754 {
755 #ifdef CONFIG_64BIT
756 	void __iomem *addr;
757 	size_t count = 0;
758 	uint64_t last;
759 
760 	if (!adev->mman.aper_base_kaddr)
761 		return 0;
762 
763 	last = min(pos + size, adev->gmc.visible_vram_size);
764 	if (last > pos) {
765 		addr = adev->mman.aper_base_kaddr + pos;
766 		count = last - pos;
767 
768 		if (write) {
769 			memcpy_toio(addr, buf, count);
770 			/* Make sure HDP write cache flush happens without any reordering
771 			 * after the system memory contents are sent over PCIe device
772 			 */
773 			mb();
774 			amdgpu_device_flush_hdp(adev, NULL);
775 		} else {
776 			amdgpu_device_invalidate_hdp(adev, NULL);
777 			/* Make sure HDP read cache is invalidated before issuing a read
778 			 * to the PCIe device
779 			 */
780 			mb();
781 			memcpy_fromio(buf, addr, count);
782 		}
783 
784 	}
785 
786 	return count;
787 #else
788 	return 0;
789 #endif
790 }
791 
792 /**
793  * amdgpu_device_vram_access - read/write a buffer in vram
794  *
795  * @adev: amdgpu_device pointer
796  * @pos: offset of the buffer in vram
797  * @buf: virtual address of the buffer in system memory
798  * @size: read/write size, sizeof(@buf) must > @size
799  * @write: true - write to vram, otherwise - read from vram
800  */
801 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
802 			       void *buf, size_t size, bool write)
803 {
804 	size_t count;
805 
806 	/* try to using vram apreature to access vram first */
807 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
808 	size -= count;
809 	if (size) {
810 		/* using MM to access rest vram */
811 		pos += count;
812 		buf += count;
813 		amdgpu_device_mm_access(adev, pos, buf, size, write);
814 	}
815 }
816 
817 /*
818  * register access helper functions.
819  */
820 
821 /* Check if hw access should be skipped because of hotplug or device error */
822 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
823 {
824 	if (adev->no_hw_access)
825 		return true;
826 
827 #ifdef CONFIG_LOCKDEP
828 	/*
829 	 * This is a bit complicated to understand, so worth a comment. What we assert
830 	 * here is that the GPU reset is not running on another thread in parallel.
831 	 *
832 	 * For this we trylock the read side of the reset semaphore, if that succeeds
833 	 * we know that the reset is not running in parallel.
834 	 *
835 	 * If the trylock fails we assert that we are either already holding the read
836 	 * side of the lock or are the reset thread itself and hold the write side of
837 	 * the lock.
838 	 */
839 	if (in_task()) {
840 		if (down_read_trylock(&adev->reset_domain->sem))
841 			up_read(&adev->reset_domain->sem);
842 		else
843 			lockdep_assert_held(&adev->reset_domain->sem);
844 	}
845 #endif
846 	return false;
847 }
848 
849 /**
850  * amdgpu_device_get_rev_id - query device rev_id
851  *
852  * @adev: amdgpu_device pointer
853  *
854  * Return device rev_id
855  */
856 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
857 {
858 	return adev->nbio.funcs->get_rev_id(adev);
859 }
860 
861 static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
862 {
863 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
864 		return AMDGPU_VBIOS_SKIP;
865 
866 	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
867 		return AMDGPU_VBIOS_OPTIONAL;
868 
869 	return 0;
870 }
871 
872 /**
873  * amdgpu_device_asic_init - Wrapper for atom asic_init
874  *
875  * @adev: amdgpu_device pointer
876  *
877  * Does any asic specific work and then calls atom asic init.
878  */
879 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
880 {
881 	uint32_t flags;
882 	bool optional;
883 	int ret;
884 
885 	amdgpu_asic_pre_asic_init(adev);
886 	flags = amdgpu_device_get_vbios_flags(adev);
887 	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
888 
889 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
890 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
891 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
892 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
893 		amdgpu_psp_wait_for_bootloader(adev);
894 		if (optional && !adev->bios)
895 			return 0;
896 
897 		ret = amdgpu_atomfirmware_asic_init(adev, true);
898 		return ret;
899 	} else {
900 		if (optional && !adev->bios)
901 			return 0;
902 
903 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
904 	}
905 
906 	return 0;
907 }
908 
909 /**
910  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
911  *
912  * @adev: amdgpu_device pointer
913  *
914  * Allocates a scratch page of VRAM for use by various things in the
915  * driver.
916  */
917 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
918 {
919 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
920 				       AMDGPU_GEM_DOMAIN_VRAM |
921 				       AMDGPU_GEM_DOMAIN_GTT,
922 				       &adev->mem_scratch.robj,
923 				       &adev->mem_scratch.gpu_addr,
924 				       (void **)&adev->mem_scratch.ptr);
925 }
926 
927 /**
928  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
929  *
930  * @adev: amdgpu_device pointer
931  *
932  * Frees the VRAM scratch page.
933  */
934 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
935 {
936 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
937 }
938 
939 /**
940  * amdgpu_device_program_register_sequence - program an array of registers.
941  *
942  * @adev: amdgpu_device pointer
943  * @registers: pointer to the register array
944  * @array_size: size of the register array
945  *
946  * Programs an array or registers with and or masks.
947  * This is a helper for setting golden registers.
948  */
949 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
950 					     const u32 *registers,
951 					     const u32 array_size)
952 {
953 	u32 tmp, reg, and_mask, or_mask;
954 	int i;
955 
956 	if (array_size % 3)
957 		return;
958 
959 	for (i = 0; i < array_size; i += 3) {
960 		reg = registers[i + 0];
961 		and_mask = registers[i + 1];
962 		or_mask = registers[i + 2];
963 
964 		if (and_mask == 0xffffffff) {
965 			tmp = or_mask;
966 		} else {
967 			tmp = RREG32(reg);
968 			tmp &= ~and_mask;
969 			if (adev->family >= AMDGPU_FAMILY_AI)
970 				tmp |= (or_mask & and_mask);
971 			else
972 				tmp |= or_mask;
973 		}
974 		WREG32(reg, tmp);
975 	}
976 }
977 
978 /**
979  * amdgpu_device_pci_config_reset - reset the GPU
980  *
981  * @adev: amdgpu_device pointer
982  *
983  * Resets the GPU using the pci config reset sequence.
984  * Only applicable to asics prior to vega10.
985  */
986 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
987 {
988 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
989 }
990 
991 /**
992  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
993  *
994  * @adev: amdgpu_device pointer
995  *
996  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
997  */
998 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
999 {
1000 	return pci_reset_function(adev->pdev);
1001 }
1002 
1003 /*
1004  * amdgpu_device_wb_*()
1005  * Writeback is the method by which the GPU updates special pages in memory
1006  * with the status of certain GPU events (fences, ring pointers,etc.).
1007  */
1008 
1009 /**
1010  * amdgpu_device_wb_fini - Disable Writeback and free memory
1011  *
1012  * @adev: amdgpu_device pointer
1013  *
1014  * Disables Writeback and frees the Writeback memory (all asics).
1015  * Used at driver shutdown.
1016  */
1017 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1018 {
1019 	if (adev->wb.wb_obj) {
1020 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1021 				      &adev->wb.gpu_addr,
1022 				      (void **)&adev->wb.wb);
1023 		adev->wb.wb_obj = NULL;
1024 	}
1025 }
1026 
1027 /**
1028  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1029  *
1030  * @adev: amdgpu_device pointer
1031  *
1032  * Initializes writeback and allocates writeback memory (all asics).
1033  * Used at driver startup.
1034  * Returns 0 on success or an -error on failure.
1035  */
1036 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1037 {
1038 	int r;
1039 
1040 	if (adev->wb.wb_obj == NULL) {
1041 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1042 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1043 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1044 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1045 					    (void **)&adev->wb.wb);
1046 		if (r) {
1047 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1048 			return r;
1049 		}
1050 
1051 		adev->wb.num_wb = AMDGPU_MAX_WB;
1052 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1053 
1054 		/* clear wb memory */
1055 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 /**
1062  * amdgpu_device_wb_get - Allocate a wb entry
1063  *
1064  * @adev: amdgpu_device pointer
1065  * @wb: wb index
1066  *
1067  * Allocate a wb slot for use by the driver (all asics).
1068  * Returns 0 on success or -EINVAL on failure.
1069  */
1070 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1071 {
1072 	unsigned long flags, offset;
1073 
1074 	spin_lock_irqsave(&adev->wb.lock, flags);
1075 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1076 	if (offset < adev->wb.num_wb) {
1077 		__set_bit(offset, adev->wb.used);
1078 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1079 		*wb = offset << 3; /* convert to dw offset */
1080 		return 0;
1081 	} else {
1082 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1083 		return -EINVAL;
1084 	}
1085 }
1086 
1087 /**
1088  * amdgpu_device_wb_free - Free a wb entry
1089  *
1090  * @adev: amdgpu_device pointer
1091  * @wb: wb index
1092  *
1093  * Free a wb slot allocated for use by the driver (all asics)
1094  */
1095 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1096 {
1097 	unsigned long flags;
1098 
1099 	wb >>= 3;
1100 	spin_lock_irqsave(&adev->wb.lock, flags);
1101 	if (wb < adev->wb.num_wb)
1102 		__clear_bit(wb, adev->wb.used);
1103 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1104 }
1105 
1106 /**
1107  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1108  *
1109  * @adev: amdgpu_device pointer
1110  *
1111  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1112  * to fail, but if any of the BARs is not accessible after the size we abort
1113  * driver loading by returning -ENODEV.
1114  */
1115 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1116 {
1117 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1118 	struct pci_bus *root;
1119 	struct resource *res;
1120 	int max_size, r;
1121 	unsigned int i;
1122 	u16 cmd;
1123 
1124 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1125 		return 0;
1126 
1127 	/* Bypass for VF */
1128 	if (amdgpu_sriov_vf(adev))
1129 		return 0;
1130 
1131 	if (!amdgpu_rebar)
1132 		return 0;
1133 
1134 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1135 	if ((amdgpu_runtime_pm != 0) &&
1136 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1137 	    adev->pdev->device == 0x731f &&
1138 	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1139 		return 0;
1140 
1141 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1142 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1143 		dev_warn(
1144 			adev->dev,
1145 			"System can't access extended configuration space, please check!!\n");
1146 
1147 	/* skip if the bios has already enabled large BAR */
1148 	if (adev->gmc.real_vram_size &&
1149 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1150 		return 0;
1151 
1152 	/* Check if the root BUS has 64bit memory resources */
1153 	root = adev->pdev->bus;
1154 	while (root->parent)
1155 		root = root->parent;
1156 
1157 	pci_bus_for_each_resource(root, res, i) {
1158 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1159 		    res->start > 0x100000000ull)
1160 			break;
1161 	}
1162 
1163 	/* Trying to resize is pointless without a root hub window above 4GB */
1164 	if (!res)
1165 		return 0;
1166 
1167 	/* Limit the BAR size to what is available */
1168 	max_size = pci_rebar_get_max_size(adev->pdev, 0);
1169 	if (max_size < 0)
1170 		return 0;
1171 	rbar_size = min(max_size, rbar_size);
1172 
1173 	/* Disable memory decoding while we change the BAR addresses and size */
1174 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1175 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1176 			      cmd & ~PCI_COMMAND_MEMORY);
1177 
1178 	/* Tear down doorbell as resizing will release BARs */
1179 	amdgpu_doorbell_fini(adev);
1180 
1181 	r = pci_resize_resource(adev->pdev, 0, rbar_size,
1182 				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
1183 								  : 1 << 2);
1184 	if (r == -ENOSPC)
1185 		dev_info(adev->dev,
1186 			 "Not enough PCI address space for a large BAR.");
1187 	else if (r && r != -ENOTSUPP)
1188 		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1189 
1190 	/* When the doorbell or fb BAR isn't available we have no chance of
1191 	 * using the device.
1192 	 */
1193 	r = amdgpu_doorbell_init(adev);
1194 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1195 		return -ENODEV;
1196 
1197 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1198 
1199 	return 0;
1200 }
1201 
1202 /*
1203  * GPU helpers function.
1204  */
1205 /**
1206  * amdgpu_device_need_post - check if the hw need post or not
1207  *
1208  * @adev: amdgpu_device pointer
1209  *
1210  * Check if the asic has been initialized (all asics) at driver startup
1211  * or post is needed if  hw reset is performed.
1212  * Returns true if need or false if not.
1213  */
1214 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1215 {
1216 	uint32_t reg, flags;
1217 
1218 	if (amdgpu_sriov_vf(adev))
1219 		return false;
1220 
1221 	flags = amdgpu_device_get_vbios_flags(adev);
1222 	if (flags & AMDGPU_VBIOS_SKIP)
1223 		return false;
1224 	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1225 		return false;
1226 
1227 	if (amdgpu_passthrough(adev)) {
1228 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1229 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1230 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1231 		 * vpost executed for smc version below 22.15
1232 		 */
1233 		if (adev->asic_type == CHIP_FIJI) {
1234 			int err;
1235 			uint32_t fw_ver;
1236 
1237 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1238 			/* force vPost if error occurred */
1239 			if (err)
1240 				return true;
1241 
1242 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1243 			release_firmware(adev->pm.fw);
1244 			if (fw_ver < 0x00160e00)
1245 				return true;
1246 		}
1247 	}
1248 
1249 	/* Don't post if we need to reset whole hive on init */
1250 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1251 		return false;
1252 
1253 	if (adev->has_hw_reset) {
1254 		adev->has_hw_reset = false;
1255 		return true;
1256 	}
1257 
1258 	/* bios scratch used on CIK+ */
1259 	if (adev->asic_type >= CHIP_BONAIRE)
1260 		return amdgpu_atombios_scratch_need_asic_init(adev);
1261 
1262 	/* check MEM_SIZE for older asics */
1263 	reg = amdgpu_asic_get_config_memsize(adev);
1264 
1265 	if ((reg != 0) && (reg != 0xffffffff))
1266 		return false;
1267 
1268 	return true;
1269 }
1270 
1271 /*
1272  * Check whether seamless boot is supported.
1273  *
1274  * So far we only support seamless boot on DCE 3.0 or later.
1275  * If users report that it works on older ASICS as well, we may
1276  * loosen this.
1277  */
1278 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1279 {
1280 	switch (amdgpu_seamless) {
1281 	case -1:
1282 		break;
1283 	case 1:
1284 		return true;
1285 	case 0:
1286 		return false;
1287 	default:
1288 		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1289 			amdgpu_seamless);
1290 		return false;
1291 	}
1292 
1293 	if (!(adev->flags & AMD_IS_APU))
1294 		return false;
1295 
1296 	if (adev->mman.keep_stolen_vga_memory)
1297 		return false;
1298 
1299 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1300 }
1301 
1302 /*
1303  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1304  * don't support dynamic speed switching. Until we have confirmation from Intel
1305  * that a specific host supports it, it's safer that we keep it disabled for all.
1306  *
1307  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1308  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1309  */
1310 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1311 {
1312 #if IS_ENABLED(CONFIG_X86)
1313 	struct cpuinfo_x86 *c = &cpu_data(0);
1314 
1315 	/* eGPU change speeds based on USB4 fabric conditions */
1316 	if (dev_is_removable(adev->dev))
1317 		return true;
1318 
1319 	if (c->x86_vendor == X86_VENDOR_INTEL)
1320 		return false;
1321 #endif
1322 	return true;
1323 }
1324 
1325 static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1326 {
1327 	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1328 	 * It's unclear if this is a platform-specific or GPU-specific issue.
1329 	 * Disable ASPM on SI for the time being.
1330 	 */
1331 	if (adev->family == AMDGPU_FAMILY_SI)
1332 		return true;
1333 
1334 #if IS_ENABLED(CONFIG_X86)
1335 	struct cpuinfo_x86 *c = &cpu_data(0);
1336 
1337 	if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
1338 		  amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
1339 		return false;
1340 
1341 	if (c->x86 == 6 &&
1342 		adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
1343 		switch (c->x86_model) {
1344 		case VFM_MODEL(INTEL_ALDERLAKE):
1345 		case VFM_MODEL(INTEL_ALDERLAKE_L):
1346 		case VFM_MODEL(INTEL_RAPTORLAKE):
1347 		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1348 		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1349 			return true;
1350 		default:
1351 			return false;
1352 		}
1353 	} else {
1354 		return false;
1355 	}
1356 #else
1357 	return false;
1358 #endif
1359 }
1360 
1361 /**
1362  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1363  *
1364  * @adev: amdgpu_device pointer
1365  *
1366  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1367  * be set for this device.
1368  *
1369  * Returns true if it should be used or false if not.
1370  */
1371 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1372 {
1373 	switch (amdgpu_aspm) {
1374 	case -1:
1375 		break;
1376 	case 0:
1377 		return false;
1378 	case 1:
1379 		return true;
1380 	default:
1381 		return false;
1382 	}
1383 	if (adev->flags & AMD_IS_APU)
1384 		return false;
1385 	if (amdgpu_device_aspm_support_quirk(adev))
1386 		return false;
1387 	return pcie_aspm_enabled(adev->pdev);
1388 }
1389 
1390 /* if we get transitioned to only one device, take VGA back */
1391 /**
1392  * amdgpu_device_vga_set_decode - enable/disable vga decode
1393  *
1394  * @pdev: PCI device pointer
1395  * @state: enable/disable vga decode
1396  *
1397  * Enable/disable vga decode (all asics).
1398  * Returns VGA resource flags.
1399  */
1400 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1401 		bool state)
1402 {
1403 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1404 
1405 	amdgpu_asic_set_vga_state(adev, state);
1406 	if (state)
1407 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1408 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1409 	else
1410 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1411 }
1412 
1413 /**
1414  * amdgpu_device_check_block_size - validate the vm block size
1415  *
1416  * @adev: amdgpu_device pointer
1417  *
1418  * Validates the vm block size specified via module parameter.
1419  * The vm block size defines number of bits in page table versus page directory,
1420  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1421  * page table and the remaining bits are in the page directory.
1422  */
1423 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1424 {
1425 	/* defines number of bits in page table versus page directory,
1426 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1427 	 * page table and the remaining bits are in the page directory
1428 	 */
1429 	if (amdgpu_vm_block_size == -1)
1430 		return;
1431 
1432 	if (amdgpu_vm_block_size < 9) {
1433 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1434 			 amdgpu_vm_block_size);
1435 		amdgpu_vm_block_size = -1;
1436 	}
1437 }
1438 
1439 /**
1440  * amdgpu_device_check_vm_size - validate the vm size
1441  *
1442  * @adev: amdgpu_device pointer
1443  *
1444  * Validates the vm size in GB specified via module parameter.
1445  * The VM size is the size of the GPU virtual memory space in GB.
1446  */
1447 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1448 {
1449 	/* no need to check the default value */
1450 	if (amdgpu_vm_size == -1)
1451 		return;
1452 
1453 	if (amdgpu_vm_size < 1) {
1454 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1455 			 amdgpu_vm_size);
1456 		amdgpu_vm_size = -1;
1457 	}
1458 }
1459 
1460 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1461 {
1462 	struct sysinfo si;
1463 	bool is_os_64 = (sizeof(void *) == 8);
1464 	uint64_t total_memory;
1465 	uint64_t dram_size_seven_GB = 0x1B8000000;
1466 	uint64_t dram_size_three_GB = 0xB8000000;
1467 
1468 	if (amdgpu_smu_memory_pool_size == 0)
1469 		return;
1470 
1471 	if (!is_os_64) {
1472 		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
1473 		goto def_value;
1474 	}
1475 	si_meminfo(&si);
1476 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1477 
1478 	if ((amdgpu_smu_memory_pool_size == 1) ||
1479 		(amdgpu_smu_memory_pool_size == 2)) {
1480 		if (total_memory < dram_size_three_GB)
1481 			goto def_value1;
1482 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1483 		(amdgpu_smu_memory_pool_size == 8)) {
1484 		if (total_memory < dram_size_seven_GB)
1485 			goto def_value1;
1486 	} else {
1487 		dev_warn(adev->dev, "Smu memory pool size not supported\n");
1488 		goto def_value;
1489 	}
1490 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1491 
1492 	return;
1493 
1494 def_value1:
1495 	dev_warn(adev->dev, "No enough system memory\n");
1496 def_value:
1497 	adev->pm.smu_prv_buffer_size = 0;
1498 }
1499 
1500 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1501 {
1502 	if (!(adev->flags & AMD_IS_APU) ||
1503 	    adev->asic_type < CHIP_RAVEN)
1504 		return 0;
1505 
1506 	switch (adev->asic_type) {
1507 	case CHIP_RAVEN:
1508 		if (adev->pdev->device == 0x15dd)
1509 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1510 		if (adev->pdev->device == 0x15d8)
1511 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1512 		break;
1513 	case CHIP_RENOIR:
1514 		if ((adev->pdev->device == 0x1636) ||
1515 		    (adev->pdev->device == 0x164c))
1516 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1517 		else
1518 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1519 		break;
1520 	case CHIP_VANGOGH:
1521 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1522 		break;
1523 	case CHIP_YELLOW_CARP:
1524 		break;
1525 	case CHIP_CYAN_SKILLFISH:
1526 		if ((adev->pdev->device == 0x13FE) ||
1527 		    (adev->pdev->device == 0x143F))
1528 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1529 		break;
1530 	default:
1531 		break;
1532 	}
1533 
1534 	return 0;
1535 }
1536 
1537 /**
1538  * amdgpu_device_check_arguments - validate module params
1539  *
1540  * @adev: amdgpu_device pointer
1541  *
1542  * Validates certain module parameters and updates
1543  * the associated values used by the driver (all asics).
1544  */
1545 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1546 {
1547 	int i;
1548 
1549 	if (amdgpu_sched_jobs < 4) {
1550 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1551 			 amdgpu_sched_jobs);
1552 		amdgpu_sched_jobs = 4;
1553 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1554 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1555 			 amdgpu_sched_jobs);
1556 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1557 	}
1558 
1559 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1560 		/* gart size must be greater or equal to 32M */
1561 		dev_warn(adev->dev, "gart size (%d) too small\n",
1562 			 amdgpu_gart_size);
1563 		amdgpu_gart_size = -1;
1564 	}
1565 
1566 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1567 		/* gtt size must be greater or equal to 32M */
1568 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1569 				 amdgpu_gtt_size);
1570 		amdgpu_gtt_size = -1;
1571 	}
1572 
1573 	/* valid range is between 4 and 9 inclusive */
1574 	if (amdgpu_vm_fragment_size != -1 &&
1575 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1576 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1577 		amdgpu_vm_fragment_size = -1;
1578 	}
1579 
1580 	if (amdgpu_sched_hw_submission < 2) {
1581 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1582 			 amdgpu_sched_hw_submission);
1583 		amdgpu_sched_hw_submission = 2;
1584 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1585 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1586 			 amdgpu_sched_hw_submission);
1587 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1588 	}
1589 
1590 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1591 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1592 		amdgpu_reset_method = -1;
1593 	}
1594 
1595 	amdgpu_device_check_smu_prv_buffer_size(adev);
1596 
1597 	amdgpu_device_check_vm_size(adev);
1598 
1599 	amdgpu_device_check_block_size(adev);
1600 
1601 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1602 
1603 	for (i = 0; i < MAX_XCP; i++) {
1604 		switch (amdgpu_enforce_isolation) {
1605 		case -1:
1606 		case 0:
1607 		default:
1608 			/* disable */
1609 			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
1610 			break;
1611 		case 1:
1612 			/* enable */
1613 			adev->enforce_isolation[i] =
1614 				AMDGPU_ENFORCE_ISOLATION_ENABLE;
1615 			break;
1616 		case 2:
1617 			/* enable legacy mode */
1618 			adev->enforce_isolation[i] =
1619 				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
1620 			break;
1621 		case 3:
1622 			/* enable only process isolation without submitting cleaner shader */
1623 			adev->enforce_isolation[i] =
1624 				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
1625 			break;
1626 		}
1627 	}
1628 
1629 	return 0;
1630 }
1631 
1632 /**
1633  * amdgpu_switcheroo_set_state - set switcheroo state
1634  *
1635  * @pdev: pci dev pointer
1636  * @state: vga_switcheroo state
1637  *
1638  * Callback for the switcheroo driver.  Suspends or resumes
1639  * the asics before or after it is powered up using ACPI methods.
1640  */
1641 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1642 					enum vga_switcheroo_state state)
1643 {
1644 	struct drm_device *dev = pci_get_drvdata(pdev);
1645 	int r;
1646 
1647 	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
1648 	    state == VGA_SWITCHEROO_OFF)
1649 		return;
1650 
1651 	if (state == VGA_SWITCHEROO_ON) {
1652 		pr_info("switched on\n");
1653 		/* don't suspend or resume card normally */
1654 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1655 
1656 		pci_set_power_state(pdev, PCI_D0);
1657 		amdgpu_device_load_pci_state(pdev);
1658 		r = pci_enable_device(pdev);
1659 		if (r)
1660 			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
1661 				 r);
1662 		amdgpu_device_resume(dev, true);
1663 
1664 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1665 	} else {
1666 		dev_info(&pdev->dev, "switched off\n");
1667 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1668 		amdgpu_device_prepare(dev);
1669 		amdgpu_device_suspend(dev, true);
1670 		amdgpu_device_cache_pci_state(pdev);
1671 		/* Shut down the device */
1672 		pci_disable_device(pdev);
1673 		pci_set_power_state(pdev, PCI_D3cold);
1674 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1675 	}
1676 }
1677 
1678 /**
1679  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1680  *
1681  * @pdev: pci dev pointer
1682  *
1683  * Callback for the switcheroo driver.  Check of the switcheroo
1684  * state can be changed.
1685  * Returns true if the state can be changed, false if not.
1686  */
1687 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1688 {
1689 	struct drm_device *dev = pci_get_drvdata(pdev);
1690 
1691        /*
1692 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1693 	* locking inversion with the driver load path. And the access here is
1694 	* completely racy anyway. So don't bother with locking for now.
1695 	*/
1696 	return atomic_read(&dev->open_count) == 0;
1697 }
1698 
1699 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1700 	.set_gpu_state = amdgpu_switcheroo_set_state,
1701 	.reprobe = NULL,
1702 	.can_switch = amdgpu_switcheroo_can_switch,
1703 };
1704 
1705 /**
1706  * amdgpu_device_enable_virtual_display - enable virtual display feature
1707  *
1708  * @adev: amdgpu_device pointer
1709  *
1710  * Enabled the virtual display feature if the user has enabled it via
1711  * the module parameter virtual_display.  This feature provides a virtual
1712  * display hardware on headless boards or in virtualized environments.
1713  * This function parses and validates the configuration string specified by
1714  * the user and configures the virtual display configuration (number of
1715  * virtual connectors, crtcs, etc.) specified.
1716  */
1717 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1718 {
1719 	adev->enable_virtual_display = false;
1720 
1721 	if (amdgpu_virtual_display) {
1722 		const char *pci_address_name = pci_name(adev->pdev);
1723 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1724 
1725 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1726 		pciaddstr_tmp = pciaddstr;
1727 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1728 			pciaddname = strsep(&pciaddname_tmp, ",");
1729 			if (!strcmp("all", pciaddname)
1730 			    || !strcmp(pci_address_name, pciaddname)) {
1731 				long num_crtc;
1732 				int res = -1;
1733 
1734 				adev->enable_virtual_display = true;
1735 
1736 				if (pciaddname_tmp)
1737 					res = kstrtol(pciaddname_tmp, 10,
1738 						      &num_crtc);
1739 
1740 				if (!res) {
1741 					if (num_crtc < 1)
1742 						num_crtc = 1;
1743 					if (num_crtc > 6)
1744 						num_crtc = 6;
1745 					adev->mode_info.num_crtc = num_crtc;
1746 				} else {
1747 					adev->mode_info.num_crtc = 1;
1748 				}
1749 				break;
1750 			}
1751 		}
1752 
1753 		dev_info(
1754 			adev->dev,
1755 			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1756 			amdgpu_virtual_display, pci_address_name,
1757 			adev->enable_virtual_display, adev->mode_info.num_crtc);
1758 
1759 		kfree(pciaddstr);
1760 	}
1761 }
1762 
1763 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1764 {
1765 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1766 		adev->mode_info.num_crtc = 1;
1767 		adev->enable_virtual_display = true;
1768 		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
1769 			 adev->enable_virtual_display,
1770 			 adev->mode_info.num_crtc);
1771 	}
1772 }
1773 
1774 /**
1775  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1776  *
1777  * @adev: amdgpu_device pointer
1778  *
1779  * Parses the asic configuration parameters specified in the gpu info
1780  * firmware and makes them available to the driver for use in configuring
1781  * the asic.
1782  * Returns 0 on success, -EINVAL on failure.
1783  */
1784 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1785 {
1786 	const char *chip_name;
1787 	int err;
1788 	const struct gpu_info_firmware_header_v1_0 *hdr;
1789 
1790 	adev->firmware.gpu_info_fw = NULL;
1791 
1792 	switch (adev->asic_type) {
1793 	default:
1794 		return 0;
1795 	case CHIP_VEGA10:
1796 		chip_name = "vega10";
1797 		break;
1798 	case CHIP_VEGA12:
1799 		chip_name = "vega12";
1800 		break;
1801 	case CHIP_RAVEN:
1802 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1803 			chip_name = "raven2";
1804 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1805 			chip_name = "picasso";
1806 		else
1807 			chip_name = "raven";
1808 		break;
1809 	case CHIP_ARCTURUS:
1810 		chip_name = "arcturus";
1811 		break;
1812 	case CHIP_NAVI12:
1813 		if (adev->discovery.bin)
1814 			return 0;
1815 		chip_name = "navi12";
1816 		break;
1817 	case CHIP_CYAN_SKILLFISH:
1818 		if (adev->discovery.bin)
1819 			return 0;
1820 		chip_name = "cyan_skillfish";
1821 		break;
1822 	}
1823 
1824 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
1825 				   AMDGPU_UCODE_OPTIONAL,
1826 				   "amdgpu/%s_gpu_info.bin", chip_name);
1827 	if (err) {
1828 		dev_err(adev->dev,
1829 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
1830 			chip_name);
1831 		goto out;
1832 	}
1833 
1834 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1835 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1836 
1837 	switch (hdr->version_major) {
1838 	case 1:
1839 	{
1840 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1841 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1842 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1843 
1844 		/*
1845 		 * Should be dropped when DAL no longer needs it.
1846 		 */
1847 		if (adev->asic_type == CHIP_NAVI12)
1848 			goto parse_soc_bounding_box;
1849 
1850 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1851 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1852 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1853 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1854 		adev->gfx.config.max_texture_channel_caches =
1855 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1856 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1857 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1858 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1859 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1860 		adev->gfx.config.double_offchip_lds_buf =
1861 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1862 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1863 		adev->gfx.cu_info.max_waves_per_simd =
1864 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1865 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1866 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1867 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1868 		if (hdr->version_minor >= 1) {
1869 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1870 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1871 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1872 			adev->gfx.config.num_sc_per_sh =
1873 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1874 			adev->gfx.config.num_packer_per_sc =
1875 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1876 		}
1877 
1878 parse_soc_bounding_box:
1879 		/*
1880 		 * soc bounding box info is not integrated in disocovery table,
1881 		 * we always need to parse it from gpu info firmware if needed.
1882 		 */
1883 		if (hdr->version_minor == 2) {
1884 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1885 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1886 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1887 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1888 		}
1889 		break;
1890 	}
1891 	default:
1892 		dev_err(adev->dev,
1893 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1894 		err = -EINVAL;
1895 		goto out;
1896 	}
1897 out:
1898 	return err;
1899 }
1900 
1901 static void amdgpu_uid_init(struct amdgpu_device *adev)
1902 {
1903 	/* Initialize the UID for the device */
1904 	adev->uid_info = kzalloc_obj(struct amdgpu_uid);
1905 	if (!adev->uid_info) {
1906 		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
1907 		return;
1908 	}
1909 	adev->uid_info->adev = adev;
1910 }
1911 
1912 static void amdgpu_uid_fini(struct amdgpu_device *adev)
1913 {
1914 	/* Free the UID memory */
1915 	kfree(adev->uid_info);
1916 	adev->uid_info = NULL;
1917 }
1918 
1919 /**
1920  * amdgpu_device_ip_early_init - run early init for hardware IPs
1921  *
1922  * @adev: amdgpu_device pointer
1923  *
1924  * Early initialization pass for hardware IPs.  The hardware IPs that make
1925  * up each asic are discovered each IP's early_init callback is run.  This
1926  * is the first stage in initializing the asic.
1927  * Returns 0 on success, negative error code on failure.
1928  */
1929 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1930 {
1931 	struct amdgpu_ip_block *ip_block;
1932 	struct pci_dev *parent;
1933 	bool total, skip_bios;
1934 	uint32_t bios_flags;
1935 	int i, r;
1936 
1937 	amdgpu_device_enable_virtual_display(adev);
1938 
1939 	if (amdgpu_sriov_vf(adev)) {
1940 		r = amdgpu_virt_request_full_gpu(adev, true);
1941 		if (r)
1942 			return r;
1943 
1944 		r = amdgpu_virt_init_critical_region(adev);
1945 		if (r)
1946 			return r;
1947 	}
1948 
1949 	switch (adev->asic_type) {
1950 #ifdef CONFIG_DRM_AMDGPU_SI
1951 	case CHIP_VERDE:
1952 	case CHIP_TAHITI:
1953 	case CHIP_PITCAIRN:
1954 	case CHIP_OLAND:
1955 	case CHIP_HAINAN:
1956 		adev->family = AMDGPU_FAMILY_SI;
1957 		r = si_set_ip_blocks(adev);
1958 		if (r)
1959 			return r;
1960 		break;
1961 #endif
1962 #ifdef CONFIG_DRM_AMDGPU_CIK
1963 	case CHIP_BONAIRE:
1964 	case CHIP_HAWAII:
1965 	case CHIP_KAVERI:
1966 	case CHIP_KABINI:
1967 	case CHIP_MULLINS:
1968 		if (adev->flags & AMD_IS_APU)
1969 			adev->family = AMDGPU_FAMILY_KV;
1970 		else
1971 			adev->family = AMDGPU_FAMILY_CI;
1972 
1973 		r = cik_set_ip_blocks(adev);
1974 		if (r)
1975 			return r;
1976 		break;
1977 #endif
1978 	case CHIP_TOPAZ:
1979 	case CHIP_TONGA:
1980 	case CHIP_FIJI:
1981 	case CHIP_POLARIS10:
1982 	case CHIP_POLARIS11:
1983 	case CHIP_POLARIS12:
1984 	case CHIP_VEGAM:
1985 	case CHIP_CARRIZO:
1986 	case CHIP_STONEY:
1987 		if (adev->flags & AMD_IS_APU)
1988 			adev->family = AMDGPU_FAMILY_CZ;
1989 		else
1990 			adev->family = AMDGPU_FAMILY_VI;
1991 
1992 		r = vi_set_ip_blocks(adev);
1993 		if (r)
1994 			return r;
1995 		break;
1996 	default:
1997 		r = amdgpu_discovery_set_ip_blocks(adev);
1998 		if (r)
1999 			return r;
2000 		break;
2001 	}
2002 
2003 	/* Check for IP version 9.4.3 with A0 hardware */
2004 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2005 	    !amdgpu_device_get_rev_id(adev)) {
2006 		dev_err(adev->dev, "Unsupported A0 hardware\n");
2007 		return -ENODEV;	/* device unsupported - no device error */
2008 	}
2009 
2010 	if (amdgpu_has_atpx() &&
2011 	    (amdgpu_is_atpx_hybrid() ||
2012 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2013 	    ((adev->flags & AMD_IS_APU) == 0) &&
2014 	    !dev_is_removable(&adev->pdev->dev))
2015 		adev->flags |= AMD_IS_PX;
2016 
2017 	if (!(adev->flags & AMD_IS_APU)) {
2018 		parent = pcie_find_root_port(adev->pdev);
2019 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2020 	}
2021 
2022 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2023 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2024 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2025 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2026 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2027 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2028 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2029 
2030 	adev->virt.is_xgmi_node_migrate_enabled = false;
2031 	if (amdgpu_sriov_vf(adev)) {
2032 		adev->virt.is_xgmi_node_migrate_enabled =
2033 			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2034 	}
2035 
2036 	total = true;
2037 	for (i = 0; i < adev->num_ip_blocks; i++) {
2038 		ip_block = &adev->ip_blocks[i];
2039 
2040 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2041 			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2042 				 adev->ip_blocks[i].version->funcs->name);
2043 			adev->ip_blocks[i].status.valid = false;
2044 		} else if (ip_block->version->funcs->early_init) {
2045 			r = ip_block->version->funcs->early_init(ip_block);
2046 			if (r == -ENOENT) {
2047 				adev->ip_blocks[i].status.valid = false;
2048 			} else if (r) {
2049 				dev_err(adev->dev,
2050 					"early_init of IP block <%s> failed %d\n",
2051 					adev->ip_blocks[i].version->funcs->name,
2052 					r);
2053 				total = false;
2054 			} else {
2055 				adev->ip_blocks[i].status.valid = true;
2056 			}
2057 		} else {
2058 			adev->ip_blocks[i].status.valid = true;
2059 		}
2060 		/* get the vbios after the asic_funcs are set up */
2061 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2062 			r = amdgpu_device_parse_gpu_info_fw(adev);
2063 			if (r)
2064 				return r;
2065 
2066 			bios_flags = amdgpu_device_get_vbios_flags(adev);
2067 			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2068 			/* Read BIOS */
2069 			if (!skip_bios) {
2070 				bool optional =
2071 					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2072 				if (!amdgpu_get_bios(adev) && !optional)
2073 					return -EINVAL;
2074 
2075 				if (optional && !adev->bios)
2076 					dev_info(
2077 						adev->dev,
2078 						"VBIOS image optional, proceeding without VBIOS image");
2079 
2080 				if (adev->bios) {
2081 					r = amdgpu_atombios_init(adev);
2082 					if (r) {
2083 						dev_err(adev->dev,
2084 							"amdgpu_atombios_init failed\n");
2085 						amdgpu_vf_error_put(
2086 							adev,
2087 							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2088 							0, 0);
2089 						return r;
2090 					}
2091 				}
2092 			}
2093 
2094 			/*get pf2vf msg info at it's earliest time*/
2095 			if (amdgpu_sriov_vf(adev))
2096 				amdgpu_virt_init_data_exchange(adev);
2097 
2098 		}
2099 	}
2100 	if (!total)
2101 		return -ENODEV;
2102 
2103 	if (adev->gmc.xgmi.supported)
2104 		amdgpu_xgmi_early_init(adev);
2105 
2106 	if (amdgpu_is_multi_aid(adev))
2107 		amdgpu_uid_init(adev);
2108 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2109 	if (ip_block->status.valid != false)
2110 		amdgpu_amdkfd_device_probe(adev);
2111 
2112 	adev->cg_flags &= amdgpu_cg_mask;
2113 	adev->pg_flags &= amdgpu_pg_mask;
2114 
2115 	return 0;
2116 }
2117 
2118 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2119 {
2120 	int i, r;
2121 
2122 	for (i = 0; i < adev->num_ip_blocks; i++) {
2123 		if (!adev->ip_blocks[i].status.sw)
2124 			continue;
2125 		if (adev->ip_blocks[i].status.hw)
2126 			continue;
2127 		if (!amdgpu_ip_member_of_hwini(
2128 			    adev, adev->ip_blocks[i].version->type))
2129 			continue;
2130 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2131 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2132 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2133 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2134 			if (r) {
2135 				dev_err(adev->dev,
2136 					"hw_init of IP block <%s> failed %d\n",
2137 					adev->ip_blocks[i].version->funcs->name,
2138 					r);
2139 				return r;
2140 			}
2141 			adev->ip_blocks[i].status.hw = true;
2142 		}
2143 	}
2144 
2145 	return 0;
2146 }
2147 
2148 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2149 {
2150 	int i, r;
2151 
2152 	for (i = 0; i < adev->num_ip_blocks; i++) {
2153 		if (!adev->ip_blocks[i].status.sw)
2154 			continue;
2155 		if (adev->ip_blocks[i].status.hw)
2156 			continue;
2157 		if (!amdgpu_ip_member_of_hwini(
2158 			    adev, adev->ip_blocks[i].version->type))
2159 			continue;
2160 		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2161 		if (r) {
2162 			dev_err(adev->dev,
2163 				"hw_init of IP block <%s> failed %d\n",
2164 				adev->ip_blocks[i].version->funcs->name, r);
2165 			return r;
2166 		}
2167 		adev->ip_blocks[i].status.hw = true;
2168 	}
2169 
2170 	return 0;
2171 }
2172 
2173 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2174 {
2175 	int r = 0;
2176 	int i;
2177 	uint32_t smu_version;
2178 
2179 	if (adev->asic_type >= CHIP_VEGA10) {
2180 		for (i = 0; i < adev->num_ip_blocks; i++) {
2181 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2182 				continue;
2183 
2184 			if (!amdgpu_ip_member_of_hwini(adev,
2185 						       AMD_IP_BLOCK_TYPE_PSP))
2186 				break;
2187 
2188 			if (!adev->ip_blocks[i].status.sw)
2189 				continue;
2190 
2191 			/* no need to do the fw loading again if already done*/
2192 			if (adev->ip_blocks[i].status.hw == true)
2193 				break;
2194 
2195 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2196 				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2197 				if (r)
2198 					return r;
2199 			} else {
2200 				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2201 				if (r) {
2202 					dev_err(adev->dev,
2203 						"hw_init of IP block <%s> failed %d\n",
2204 						adev->ip_blocks[i]
2205 							.version->funcs->name,
2206 						r);
2207 					return r;
2208 				}
2209 				adev->ip_blocks[i].status.hw = true;
2210 			}
2211 			break;
2212 		}
2213 	}
2214 
2215 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2216 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2217 
2218 	return r;
2219 }
2220 
2221 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2222 {
2223 	struct drm_sched_init_args args = {
2224 		.ops = &amdgpu_sched_ops,
2225 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
2226 		.timeout_wq = adev->reset_domain->wq,
2227 		.dev = adev->dev,
2228 	};
2229 	long timeout;
2230 	int r, i;
2231 
2232 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2233 		struct amdgpu_ring *ring = adev->rings[i];
2234 
2235 		/* No need to setup the GPU scheduler for rings that don't need it */
2236 		if (!ring || ring->no_scheduler)
2237 			continue;
2238 
2239 		switch (ring->funcs->type) {
2240 		case AMDGPU_RING_TYPE_GFX:
2241 			timeout = adev->gfx_timeout;
2242 			break;
2243 		case AMDGPU_RING_TYPE_COMPUTE:
2244 			timeout = adev->compute_timeout;
2245 			break;
2246 		case AMDGPU_RING_TYPE_SDMA:
2247 			timeout = adev->sdma_timeout;
2248 			break;
2249 		default:
2250 			timeout = adev->video_timeout;
2251 			break;
2252 		}
2253 
2254 		args.timeout = timeout;
2255 		args.credit_limit = ring->num_hw_submission;
2256 		args.score = ring->sched_score;
2257 		args.name = ring->name;
2258 
2259 		r = drm_sched_init(&ring->sched, &args);
2260 		if (r) {
2261 			dev_err(adev->dev,
2262 				"Failed to create scheduler on ring %s.\n",
2263 				ring->name);
2264 			return r;
2265 		}
2266 		r = amdgpu_uvd_entity_init(adev, ring);
2267 		if (r) {
2268 			dev_err(adev->dev,
2269 				"Failed to create UVD scheduling entity on ring %s.\n",
2270 				ring->name);
2271 			return r;
2272 		}
2273 		r = amdgpu_vce_entity_init(adev, ring);
2274 		if (r) {
2275 			dev_err(adev->dev,
2276 				"Failed to create VCE scheduling entity on ring %s.\n",
2277 				ring->name);
2278 			return r;
2279 		}
2280 	}
2281 
2282 	if (adev->xcp_mgr)
2283 		amdgpu_xcp_update_partition_sched_list(adev);
2284 
2285 	return 0;
2286 }
2287 
2288 
2289 /**
2290  * amdgpu_device_ip_init - run init for hardware IPs
2291  *
2292  * @adev: amdgpu_device pointer
2293  *
2294  * Main initialization pass for hardware IPs.  The list of all the hardware
2295  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2296  * are run.  sw_init initializes the software state associated with each IP
2297  * and hw_init initializes the hardware associated with each IP.
2298  * Returns 0 on success, negative error code on failure.
2299  */
2300 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2301 {
2302 	bool init_badpage;
2303 	int i, r;
2304 
2305 	r = amdgpu_ras_init(adev);
2306 	if (r)
2307 		return r;
2308 
2309 	for (i = 0; i < adev->num_ip_blocks; i++) {
2310 		if (!adev->ip_blocks[i].status.valid)
2311 			continue;
2312 		if (adev->ip_blocks[i].version->funcs->sw_init) {
2313 			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2314 			if (r) {
2315 				dev_err(adev->dev,
2316 					"sw_init of IP block <%s> failed %d\n",
2317 					adev->ip_blocks[i].version->funcs->name,
2318 					r);
2319 				goto init_failed;
2320 			}
2321 		}
2322 		adev->ip_blocks[i].status.sw = true;
2323 
2324 		if (!amdgpu_ip_member_of_hwini(
2325 			    adev, adev->ip_blocks[i].version->type))
2326 			continue;
2327 
2328 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2329 			/* need to do common hw init early so everything is set up for gmc */
2330 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2331 			if (r) {
2332 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2333 					r);
2334 				goto init_failed;
2335 			}
2336 			adev->ip_blocks[i].status.hw = true;
2337 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2338 			/* need to do gmc hw init early so we can allocate gpu mem */
2339 			/* Try to reserve bad pages early */
2340 			if (amdgpu_sriov_vf(adev))
2341 				amdgpu_virt_exchange_data(adev);
2342 
2343 			r = amdgpu_device_mem_scratch_init(adev);
2344 			if (r) {
2345 				dev_err(adev->dev,
2346 					"amdgpu_mem_scratch_init failed %d\n",
2347 					r);
2348 				goto init_failed;
2349 			}
2350 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2351 			if (r) {
2352 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2353 					r);
2354 				goto init_failed;
2355 			}
2356 			r = amdgpu_device_wb_init(adev);
2357 			if (r) {
2358 				dev_err(adev->dev,
2359 					"amdgpu_device_wb_init failed %d\n", r);
2360 				goto init_failed;
2361 			}
2362 			adev->ip_blocks[i].status.hw = true;
2363 
2364 			/* right after GMC hw init, we create CSA */
2365 			if (adev->gfx.mcbp) {
2366 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2367 							       AMDGPU_GEM_DOMAIN_VRAM |
2368 							       AMDGPU_GEM_DOMAIN_GTT,
2369 							       AMDGPU_CSA_SIZE);
2370 				if (r) {
2371 					dev_err(adev->dev,
2372 						"allocate CSA failed %d\n", r);
2373 					goto init_failed;
2374 				}
2375 			}
2376 
2377 			r = amdgpu_seq64_init(adev);
2378 			if (r) {
2379 				dev_err(adev->dev, "allocate seq64 failed %d\n",
2380 					r);
2381 				goto init_failed;
2382 			}
2383 		}
2384 	}
2385 
2386 	if (amdgpu_sriov_vf(adev))
2387 		amdgpu_virt_init_data_exchange(adev);
2388 
2389 	r = amdgpu_ib_pool_init(adev);
2390 	if (r) {
2391 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2392 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2393 		goto init_failed;
2394 	}
2395 
2396 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2397 	if (r)
2398 		goto init_failed;
2399 
2400 	r = amdgpu_device_ip_hw_init_phase1(adev);
2401 	if (r)
2402 		goto init_failed;
2403 
2404 	r = amdgpu_device_fw_loading(adev);
2405 	if (r)
2406 		goto init_failed;
2407 
2408 	r = amdgpu_device_ip_hw_init_phase2(adev);
2409 	if (r)
2410 		goto init_failed;
2411 
2412 	/*
2413 	 * retired pages will be loaded from eeprom and reserved here,
2414 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2415 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2416 	 * for I2C communication which only true at this point.
2417 	 *
2418 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2419 	 * failure from bad gpu situation and stop amdgpu init process
2420 	 * accordingly. For other failed cases, it will still release all
2421 	 * the resource and print error message, rather than returning one
2422 	 * negative value to upper level.
2423 	 *
2424 	 * Note: theoretically, this should be called before all vram allocations
2425 	 * to protect retired page from abusing
2426 	 */
2427 	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2428 	r = amdgpu_ras_recovery_init(adev, init_badpage);
2429 	if (r)
2430 		goto init_failed;
2431 
2432 	/**
2433 	 * In case of XGMI grab extra reference for reset domain for this device
2434 	 */
2435 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2436 		if (amdgpu_xgmi_add_device(adev) == 0) {
2437 			if (!amdgpu_sriov_vf(adev)) {
2438 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2439 
2440 				if (WARN_ON(!hive)) {
2441 					r = -ENOENT;
2442 					goto init_failed;
2443 				}
2444 
2445 				if (!hive->reset_domain ||
2446 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2447 					r = -ENOENT;
2448 					amdgpu_put_xgmi_hive(hive);
2449 					goto init_failed;
2450 				}
2451 
2452 				/* Drop the early temporary reset domain we created for device */
2453 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2454 				adev->reset_domain = hive->reset_domain;
2455 				amdgpu_put_xgmi_hive(hive);
2456 			}
2457 		}
2458 	}
2459 
2460 	r = amdgpu_device_init_schedulers(adev);
2461 	if (r)
2462 		goto init_failed;
2463 
2464 	amdgpu_ttm_set_buffer_funcs_status(adev, true);
2465 
2466 	/* Don't init kfd if whole hive need to be reset during init */
2467 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
2468 		amdgpu_amdkfd_device_init(adev);
2469 	}
2470 
2471 	amdgpu_fru_get_product_info(adev);
2472 
2473 	r = amdgpu_cper_init(adev);
2474 
2475 init_failed:
2476 
2477 	return r;
2478 }
2479 
2480 /**
2481  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2482  *
2483  * @adev: amdgpu_device pointer
2484  *
2485  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2486  * this function before a GPU reset.  If the value is retained after a
2487  * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
2488  */
2489 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2490 {
2491 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2492 }
2493 
2494 /**
2495  * amdgpu_device_check_vram_lost - check if vram is valid
2496  *
2497  * @adev: amdgpu_device pointer
2498  *
2499  * Checks the reset magic value written to the gart pointer in VRAM.
2500  * The driver calls this after a GPU reset to see if the contents of
2501  * VRAM is lost or now.
2502  * returns true if vram is lost, false if not.
2503  */
2504 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2505 {
2506 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2507 			AMDGPU_RESET_MAGIC_NUM))
2508 		return true;
2509 
2510 	if (!amdgpu_in_reset(adev))
2511 		return false;
2512 
2513 	/*
2514 	 * For all ASICs with baco/mode1 reset, the VRAM is
2515 	 * always assumed to be lost.
2516 	 */
2517 	switch (amdgpu_asic_reset_method(adev)) {
2518 	case AMD_RESET_METHOD_LEGACY:
2519 	case AMD_RESET_METHOD_LINK:
2520 	case AMD_RESET_METHOD_BACO:
2521 	case AMD_RESET_METHOD_MODE1:
2522 		return true;
2523 	default:
2524 		return false;
2525 	}
2526 }
2527 
2528 /**
2529  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2530  *
2531  * @adev: amdgpu_device pointer
2532  * @state: clockgating state (gate or ungate)
2533  *
2534  * The list of all the hardware IPs that make up the asic is walked and the
2535  * set_clockgating_state callbacks are run.
2536  * Late initialization pass enabling clockgating for hardware IPs.
2537  * Fini or suspend, pass disabling clockgating for hardware IPs.
2538  * Returns 0 on success, negative error code on failure.
2539  */
2540 
2541 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2542 			       enum amd_clockgating_state state)
2543 {
2544 	int i, j, r;
2545 
2546 	if (amdgpu_emu_mode == 1)
2547 		return 0;
2548 
2549 	for (j = 0; j < adev->num_ip_blocks; j++) {
2550 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2551 		if (!adev->ip_blocks[i].status.late_initialized)
2552 			continue;
2553 		if (!adev->ip_blocks[i].version)
2554 			continue;
2555 		/* skip CG for GFX, SDMA on S0ix */
2556 		if (adev->in_s0ix &&
2557 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2558 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2559 			continue;
2560 		/* skip CG for VCE/UVD, it's handled specially */
2561 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2562 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2563 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2564 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2565 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2566 			/* enable clockgating to save power */
2567 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
2568 										     state);
2569 			if (r) {
2570 				dev_err(adev->dev,
2571 					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
2572 					adev->ip_blocks[i].version->funcs->name,
2573 					r);
2574 				return r;
2575 			}
2576 		}
2577 	}
2578 
2579 	return 0;
2580 }
2581 
2582 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2583 			       enum amd_powergating_state state)
2584 {
2585 	int i, j, r;
2586 
2587 	if (amdgpu_emu_mode == 1)
2588 		return 0;
2589 
2590 	for (j = 0; j < adev->num_ip_blocks; j++) {
2591 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2592 		if (!adev->ip_blocks[i].status.late_initialized)
2593 			continue;
2594 		if (!adev->ip_blocks[i].version)
2595 			continue;
2596 		/* skip PG for GFX, SDMA on S0ix */
2597 		if (adev->in_s0ix &&
2598 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2599 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2600 			continue;
2601 		/* skip CG for VCE/UVD, it's handled specially */
2602 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2603 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2604 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2605 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2606 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2607 			/* enable powergating to save power */
2608 			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
2609 											state);
2610 			if (r) {
2611 				dev_err(adev->dev,
2612 					"set_powergating_state(gate) of IP block <%s> failed %d\n",
2613 					adev->ip_blocks[i].version->funcs->name,
2614 					r);
2615 				return r;
2616 			}
2617 		}
2618 	}
2619 	return 0;
2620 }
2621 
2622 static int amdgpu_device_enable_mgpu_fan_boost(void)
2623 {
2624 	struct amdgpu_gpu_instance *gpu_ins;
2625 	struct amdgpu_device *adev;
2626 	int i, ret = 0;
2627 
2628 	mutex_lock(&mgpu_info.mutex);
2629 
2630 	/*
2631 	 * MGPU fan boost feature should be enabled
2632 	 * only when there are two or more dGPUs in
2633 	 * the system
2634 	 */
2635 	if (mgpu_info.num_dgpu < 2)
2636 		goto out;
2637 
2638 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2639 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2640 		adev = gpu_ins->adev;
2641 		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
2642 		    !gpu_ins->mgpu_fan_enabled) {
2643 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2644 			if (ret)
2645 				break;
2646 
2647 			gpu_ins->mgpu_fan_enabled = 1;
2648 		}
2649 	}
2650 
2651 out:
2652 	mutex_unlock(&mgpu_info.mutex);
2653 
2654 	return ret;
2655 }
2656 
2657 /**
2658  * amdgpu_device_ip_late_init - run late init for hardware IPs
2659  *
2660  * @adev: amdgpu_device pointer
2661  *
2662  * Late initialization pass for hardware IPs.  The list of all the hardware
2663  * IPs that make up the asic is walked and the late_init callbacks are run.
2664  * late_init covers any special initialization that an IP requires
2665  * after all of the have been initialized or something that needs to happen
2666  * late in the init process.
2667  * Returns 0 on success, negative error code on failure.
2668  */
2669 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2670 {
2671 	struct amdgpu_gpu_instance *gpu_instance;
2672 	int i = 0, r;
2673 
2674 	for (i = 0; i < adev->num_ip_blocks; i++) {
2675 		if (!adev->ip_blocks[i].status.hw)
2676 			continue;
2677 		if (adev->ip_blocks[i].version->funcs->late_init) {
2678 			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
2679 			if (r) {
2680 				dev_err(adev->dev,
2681 					"late_init of IP block <%s> failed %d\n",
2682 					adev->ip_blocks[i].version->funcs->name,
2683 					r);
2684 				return r;
2685 			}
2686 		}
2687 		adev->ip_blocks[i].status.late_initialized = true;
2688 	}
2689 
2690 	r = amdgpu_ras_late_init(adev);
2691 	if (r) {
2692 		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
2693 		return r;
2694 	}
2695 
2696 	if (!amdgpu_reset_in_recovery(adev))
2697 		amdgpu_ras_set_error_query_ready(adev, true);
2698 
2699 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2700 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2701 
2702 	amdgpu_device_fill_reset_magic(adev);
2703 
2704 	r = amdgpu_device_enable_mgpu_fan_boost();
2705 	if (r)
2706 		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
2707 
2708 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2709 	if (amdgpu_passthrough(adev) &&
2710 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2711 	     adev->asic_type == CHIP_ALDEBARAN))
2712 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2713 
2714 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2715 		mutex_lock(&mgpu_info.mutex);
2716 
2717 		/*
2718 		 * Reset device p-state to low as this was booted with high.
2719 		 *
2720 		 * This should be performed only after all devices from the same
2721 		 * hive get initialized.
2722 		 *
2723 		 * However, it's unknown how many device in the hive in advance.
2724 		 * As this is counted one by one during devices initializations.
2725 		 *
2726 		 * So, we wait for all XGMI interlinked devices initialized.
2727 		 * This may bring some delays as those devices may come from
2728 		 * different hives. But that should be OK.
2729 		 */
2730 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2731 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2732 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2733 				if (gpu_instance->adev->flags & AMD_IS_APU)
2734 					continue;
2735 
2736 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2737 						AMDGPU_XGMI_PSTATE_MIN);
2738 				if (r) {
2739 					dev_err(adev->dev,
2740 						"pstate setting failed (%d).\n",
2741 						r);
2742 					break;
2743 				}
2744 			}
2745 		}
2746 
2747 		mutex_unlock(&mgpu_info.mutex);
2748 	}
2749 
2750 	return 0;
2751 }
2752 
2753 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
2754 {
2755 	struct amdgpu_device *adev = ip_block->adev;
2756 	int r;
2757 
2758 	if (!ip_block->version->funcs->hw_fini) {
2759 		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
2760 			ip_block->version->funcs->name);
2761 	} else {
2762 		r = ip_block->version->funcs->hw_fini(ip_block);
2763 		/* XXX handle errors */
2764 		if (r) {
2765 			dev_dbg(adev->dev,
2766 				"hw_fini of IP block <%s> failed %d\n",
2767 				ip_block->version->funcs->name, r);
2768 		}
2769 	}
2770 
2771 	ip_block->status.hw = false;
2772 }
2773 
2774 /**
2775  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2776  *
2777  * @adev: amdgpu_device pointer
2778  *
2779  * For ASICs need to disable SMC first
2780  */
2781 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2782 {
2783 	int i;
2784 
2785 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
2786 		return;
2787 
2788 	for (i = 0; i < adev->num_ip_blocks; i++) {
2789 		if (!adev->ip_blocks[i].status.hw)
2790 			continue;
2791 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2792 			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2793 			break;
2794 		}
2795 	}
2796 }
2797 
2798 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2799 {
2800 	int i, r;
2801 
2802 	for (i = 0; i < adev->num_ip_blocks; i++) {
2803 		if (!adev->ip_blocks[i].version)
2804 			continue;
2805 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2806 			continue;
2807 
2808 		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
2809 		if (r) {
2810 			dev_dbg(adev->dev,
2811 				"early_fini of IP block <%s> failed %d\n",
2812 				adev->ip_blocks[i].version->funcs->name, r);
2813 		}
2814 	}
2815 
2816 	amdgpu_amdkfd_suspend(adev, true);
2817 	amdgpu_amdkfd_teardown_processes(adev);
2818 	amdgpu_userq_suspend(adev);
2819 
2820 	/* Workaround for ASICs need to disable SMC first */
2821 	amdgpu_device_smu_fini_early(adev);
2822 
2823 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2824 		if (!adev->ip_blocks[i].status.hw)
2825 			continue;
2826 
2827 		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2828 	}
2829 
2830 	if (amdgpu_sriov_vf(adev)) {
2831 		if (amdgpu_virt_release_full_gpu(adev, false))
2832 			dev_err(adev->dev,
2833 				"failed to release exclusive mode on fini\n");
2834 	}
2835 
2836 	/*
2837 	 * Driver reload on the APU can fail due to firmware validation because
2838 	 * the PSP is always running, as it is shared across the whole SoC.
2839 	 * This same issue does not occur on dGPU because it has a mechanism
2840 	 * that checks whether the PSP is running. A solution for those issues
2841 	 * in the APU is to trigger a GPU reset, but this should be done during
2842 	 * the unload phase to avoid adding boot latency and screen flicker.
2843 	 */
2844 	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
2845 		r = amdgpu_asic_reset(adev);
2846 		if (r)
2847 			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
2848 	}
2849 
2850 	return 0;
2851 }
2852 
2853 /**
2854  * amdgpu_device_ip_fini - run fini for hardware IPs
2855  *
2856  * @adev: amdgpu_device pointer
2857  *
2858  * Main teardown pass for hardware IPs.  The list of all the hardware
2859  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2860  * are run.  hw_fini tears down the hardware associated with each IP
2861  * and sw_fini tears down any software state associated with each IP.
2862  * Returns 0 on success, negative error code on failure.
2863  */
2864 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2865 {
2866 	int i, r;
2867 
2868 	amdgpu_cper_fini(adev);
2869 
2870 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2871 		amdgpu_virt_release_ras_err_handler_data(adev);
2872 
2873 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2874 		amdgpu_xgmi_remove_device(adev);
2875 
2876 	amdgpu_amdkfd_device_fini_sw(adev);
2877 
2878 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2879 		if (!adev->ip_blocks[i].status.sw)
2880 			continue;
2881 
2882 		if (!adev->ip_blocks[i].version)
2883 			continue;
2884 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2885 			amdgpu_ucode_free_bo(adev);
2886 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2887 			amdgpu_device_wb_fini(adev);
2888 			amdgpu_device_mem_scratch_fini(adev);
2889 			amdgpu_ib_pool_fini(adev);
2890 			amdgpu_seq64_fini(adev);
2891 			amdgpu_doorbell_fini(adev);
2892 		}
2893 		if (adev->ip_blocks[i].version->funcs->sw_fini) {
2894 			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
2895 			/* XXX handle errors */
2896 			if (r) {
2897 				dev_dbg(adev->dev,
2898 					"sw_fini of IP block <%s> failed %d\n",
2899 					adev->ip_blocks[i].version->funcs->name,
2900 					r);
2901 			}
2902 		}
2903 		adev->ip_blocks[i].status.sw = false;
2904 		adev->ip_blocks[i].status.valid = false;
2905 	}
2906 
2907 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2908 		if (!adev->ip_blocks[i].status.late_initialized)
2909 			continue;
2910 		if (!adev->ip_blocks[i].version)
2911 			continue;
2912 		if (adev->ip_blocks[i].version->funcs->late_fini)
2913 			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
2914 		adev->ip_blocks[i].status.late_initialized = false;
2915 	}
2916 
2917 	amdgpu_ras_fini(adev);
2918 	amdgpu_uid_fini(adev);
2919 
2920 	return 0;
2921 }
2922 
2923 /**
2924  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2925  *
2926  * @work: work_struct.
2927  */
2928 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2929 {
2930 	struct amdgpu_device *adev =
2931 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2932 	int r;
2933 
2934 	r = amdgpu_ib_ring_tests(adev);
2935 	if (r)
2936 		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
2937 }
2938 
2939 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2940 {
2941 	struct amdgpu_device *adev =
2942 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2943 
2944 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2945 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2946 
2947 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
2948 		adev->gfx.gfx_off_state = true;
2949 }
2950 
2951 /**
2952  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2953  *
2954  * @adev: amdgpu_device pointer
2955  *
2956  * Main suspend function for hardware IPs.  The list of all the hardware
2957  * IPs that make up the asic is walked, clockgating is disabled and the
2958  * suspend callbacks are run.  suspend puts the hardware and software state
2959  * in each IP into a state suitable for suspend.
2960  * Returns 0 on success, negative error code on failure.
2961  */
2962 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2963 {
2964 	int i, r, rec;
2965 
2966 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2967 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2968 
2969 	/*
2970 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2971 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2972 	 * scenario. Add the missing df cstate disablement here.
2973 	 */
2974 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2975 		dev_warn(adev->dev, "Failed to disallow df cstate");
2976 
2977 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2978 		if (!adev->ip_blocks[i].status.valid)
2979 			continue;
2980 
2981 		/* displays are handled separately */
2982 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2983 			continue;
2984 
2985 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
2986 		if (r)
2987 			goto unwind;
2988 	}
2989 
2990 	return 0;
2991 unwind:
2992 	rec = amdgpu_device_ip_resume_phase3(adev);
2993 	if (rec)
2994 		dev_err(adev->dev,
2995 			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
2996 			rec);
2997 
2998 	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
2999 
3000 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3001 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3002 
3003 	return r;
3004 }
3005 
3006 /**
3007  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3008  *
3009  * @adev: amdgpu_device pointer
3010  *
3011  * Main suspend function for hardware IPs.  The list of all the hardware
3012  * IPs that make up the asic is walked, clockgating is disabled and the
3013  * suspend callbacks are run.  suspend puts the hardware and software state
3014  * in each IP into a state suitable for suspend.
3015  * Returns 0 on success, negative error code on failure.
3016  */
3017 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3018 {
3019 	int i, r, rec;
3020 
3021 	if (adev->in_s0ix)
3022 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3023 
3024 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3025 		if (!adev->ip_blocks[i].status.valid)
3026 			continue;
3027 		/* displays are handled in phase1 */
3028 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3029 			continue;
3030 		/* PSP lost connection when err_event_athub occurs */
3031 		if (amdgpu_ras_intr_triggered() &&
3032 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3033 			adev->ip_blocks[i].status.hw = false;
3034 			continue;
3035 		}
3036 
3037 		/* skip unnecessary suspend if we do not initialize them yet */
3038 		if (!amdgpu_ip_member_of_hwini(
3039 			    adev, adev->ip_blocks[i].version->type))
3040 			continue;
3041 
3042 		/* Since we skip suspend for S0i3, we need to cancel the delayed
3043 		 * idle work here as the suspend callback never gets called.
3044 		 */
3045 		if (adev->in_s0ix &&
3046 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3047 		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3048 			cancel_delayed_work_sync(&adev->gfx.idle_work);
3049 		/* skip suspend of gfx/mes and psp for S0ix
3050 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3051 		 * like at runtime. PSP is also part of the always on hardware
3052 		 * so no need to suspend it.
3053 		 */
3054 		if (adev->in_s0ix &&
3055 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3056 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3057 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3058 			continue;
3059 
3060 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3061 		if (adev->in_s0ix &&
3062 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3063 		     IP_VERSION(5, 0, 0)) &&
3064 		    (adev->ip_blocks[i].version->type ==
3065 		     AMD_IP_BLOCK_TYPE_SDMA))
3066 			continue;
3067 
3068 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3069 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3070 		 * from this location and RLC Autoload automatically also gets loaded
3071 		 * from here based on PMFW -> PSP message during re-init sequence.
3072 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3073 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3074 		 */
3075 		if (amdgpu_in_reset(adev) &&
3076 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3077 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3078 			continue;
3079 
3080 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3081 		if (r)
3082 			goto unwind;
3083 
3084 		/* handle putting the SMC in the appropriate state */
3085 		if (!amdgpu_sriov_vf(adev)) {
3086 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3087 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3088 				if (r) {
3089 					dev_err(adev->dev,
3090 						"SMC failed to set mp1 state %d, %d\n",
3091 						adev->mp1_state, r);
3092 					goto unwind;
3093 				}
3094 			}
3095 		}
3096 	}
3097 
3098 	return 0;
3099 unwind:
3100 	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3101 	rec = amdgpu_device_ip_resume_phase1(adev);
3102 	if (rec) {
3103 		dev_err(adev->dev,
3104 			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3105 			rec);
3106 		return r;
3107 	}
3108 
3109 	rec = amdgpu_device_fw_loading(adev);
3110 	if (rec) {
3111 		dev_err(adev->dev,
3112 			"amdgpu_device_fw_loading failed during unwind: %d\n",
3113 			rec);
3114 		return r;
3115 	}
3116 
3117 	rec = amdgpu_device_ip_resume_phase2(adev);
3118 	if (rec) {
3119 		dev_err(adev->dev,
3120 			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3121 			rec);
3122 		return r;
3123 	}
3124 
3125 	return r;
3126 }
3127 
3128 /**
3129  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3130  *
3131  * @adev: amdgpu_device pointer
3132  *
3133  * Main suspend function for hardware IPs.  The list of all the hardware
3134  * IPs that make up the asic is walked, clockgating is disabled and the
3135  * suspend callbacks are run.  suspend puts the hardware and software state
3136  * in each IP into a state suitable for suspend.
3137  * Returns 0 on success, negative error code on failure.
3138  */
3139 static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3140 {
3141 	int r;
3142 
3143 	if (amdgpu_sriov_vf(adev)) {
3144 		amdgpu_virt_fini_data_exchange(adev);
3145 		amdgpu_virt_request_full_gpu(adev, false);
3146 	}
3147 
3148 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3149 
3150 	r = amdgpu_device_ip_suspend_phase1(adev);
3151 	if (r)
3152 		return r;
3153 	r = amdgpu_device_ip_suspend_phase2(adev);
3154 
3155 	if (amdgpu_sriov_vf(adev))
3156 		amdgpu_virt_release_full_gpu(adev, false);
3157 
3158 	return r;
3159 }
3160 
3161 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3162 {
3163 	int i, r;
3164 
3165 	static enum amd_ip_block_type ip_order[] = {
3166 		AMD_IP_BLOCK_TYPE_COMMON,
3167 		AMD_IP_BLOCK_TYPE_GMC,
3168 		AMD_IP_BLOCK_TYPE_PSP,
3169 		AMD_IP_BLOCK_TYPE_IH,
3170 	};
3171 
3172 	for (i = 0; i < adev->num_ip_blocks; i++) {
3173 		int j;
3174 		struct amdgpu_ip_block *block;
3175 
3176 		block = &adev->ip_blocks[i];
3177 		block->status.hw = false;
3178 
3179 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3180 
3181 			if (block->version->type != ip_order[j] ||
3182 				!block->status.valid)
3183 				continue;
3184 
3185 			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3186 			if (r) {
3187 				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3188 					 block->version->funcs->name);
3189 				return r;
3190 			}
3191 			block->status.hw = true;
3192 		}
3193 	}
3194 
3195 	return 0;
3196 }
3197 
3198 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3199 {
3200 	struct amdgpu_ip_block *block;
3201 	int i, r = 0;
3202 
3203 	static enum amd_ip_block_type ip_order[] = {
3204 		AMD_IP_BLOCK_TYPE_SMC,
3205 		AMD_IP_BLOCK_TYPE_DCE,
3206 		AMD_IP_BLOCK_TYPE_GFX,
3207 		AMD_IP_BLOCK_TYPE_SDMA,
3208 		AMD_IP_BLOCK_TYPE_MES,
3209 		AMD_IP_BLOCK_TYPE_UVD,
3210 		AMD_IP_BLOCK_TYPE_VCE,
3211 		AMD_IP_BLOCK_TYPE_VCN,
3212 		AMD_IP_BLOCK_TYPE_JPEG
3213 	};
3214 
3215 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3216 		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
3217 
3218 		if (!block)
3219 			continue;
3220 
3221 		if (block->status.valid && !block->status.hw) {
3222 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3223 				r = amdgpu_ip_block_resume(block);
3224 			} else {
3225 				r = block->version->funcs->hw_init(block);
3226 			}
3227 
3228 			if (r) {
3229 				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3230 					 block->version->funcs->name);
3231 				break;
3232 			}
3233 			block->status.hw = true;
3234 		}
3235 	}
3236 
3237 	return r;
3238 }
3239 
3240 /**
3241  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3242  *
3243  * @adev: amdgpu_device pointer
3244  *
3245  * First resume function for hardware IPs.  The list of all the hardware
3246  * IPs that make up the asic is walked and the resume callbacks are run for
3247  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3248  * after a suspend and updates the software state as necessary.  This
3249  * function is also used for restoring the GPU after a GPU reset.
3250  * Returns 0 on success, negative error code on failure.
3251  */
3252 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3253 {
3254 	int i, r;
3255 
3256 	for (i = 0; i < adev->num_ip_blocks; i++) {
3257 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3258 			continue;
3259 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3260 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3261 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3262 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3263 
3264 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3265 			if (r)
3266 				return r;
3267 		}
3268 	}
3269 
3270 	return 0;
3271 }
3272 
3273 /**
3274  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3275  *
3276  * @adev: amdgpu_device pointer
3277  *
3278  * Second resume function for hardware IPs.  The list of all the hardware
3279  * IPs that make up the asic is walked and the resume callbacks are run for
3280  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3281  * functional state after a suspend and updates the software state as
3282  * necessary.  This function is also used for restoring the GPU after a GPU
3283  * reset.
3284  * Returns 0 on success, negative error code on failure.
3285  */
3286 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3287 {
3288 	int i, r;
3289 
3290 	for (i = 0; i < adev->num_ip_blocks; i++) {
3291 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3292 			continue;
3293 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3294 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3295 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3296 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3297 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3298 			continue;
3299 		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3300 		if (r)
3301 			return r;
3302 	}
3303 
3304 	return 0;
3305 }
3306 
3307 /**
3308  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3309  *
3310  * @adev: amdgpu_device pointer
3311  *
3312  * Third resume function for hardware IPs.  The list of all the hardware
3313  * IPs that make up the asic is walked and the resume callbacks are run for
3314  * all DCE.  resume puts the hardware into a functional state after a suspend
3315  * and updates the software state as necessary.  This function is also used
3316  * for restoring the GPU after a GPU reset.
3317  *
3318  * Returns 0 on success, negative error code on failure.
3319  */
3320 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3321 {
3322 	int i, r;
3323 
3324 	for (i = 0; i < adev->num_ip_blocks; i++) {
3325 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3326 			continue;
3327 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3328 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3329 			if (r)
3330 				return r;
3331 		}
3332 	}
3333 
3334 	return 0;
3335 }
3336 
3337 /**
3338  * amdgpu_device_ip_resume - run resume for hardware IPs
3339  *
3340  * @adev: amdgpu_device pointer
3341  *
3342  * Main resume function for hardware IPs.  The hardware IPs
3343  * are split into two resume functions because they are
3344  * also used in recovering from a GPU reset and some additional
3345  * steps need to be take between them.  In this case (S3/S4) they are
3346  * run sequentially.
3347  * Returns 0 on success, negative error code on failure.
3348  */
3349 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3350 {
3351 	int r;
3352 
3353 	r = amdgpu_device_ip_resume_phase1(adev);
3354 	if (r)
3355 		return r;
3356 
3357 	r = amdgpu_device_fw_loading(adev);
3358 	if (r)
3359 		return r;
3360 
3361 	r = amdgpu_device_ip_resume_phase2(adev);
3362 
3363 	amdgpu_ttm_set_buffer_funcs_status(adev, true);
3364 
3365 	if (r)
3366 		return r;
3367 
3368 	amdgpu_fence_driver_hw_init(adev);
3369 
3370 	r = amdgpu_device_ip_resume_phase3(adev);
3371 
3372 	return r;
3373 }
3374 
3375 /**
3376  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3377  *
3378  * @adev: amdgpu_device pointer
3379  *
3380  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3381  */
3382 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3383 {
3384 	if (amdgpu_sriov_vf(adev)) {
3385 		if (adev->is_atom_fw) {
3386 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3387 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3388 		} else {
3389 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3390 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3391 		}
3392 
3393 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3394 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3395 	}
3396 }
3397 
3398 /**
3399  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3400  *
3401  * @pdev : pci device context
3402  * @asic_type: AMD asic type
3403  *
3404  * Check if there is DC (new modesetting infrastructre) support for an asic.
3405  * returns true if DC has support, false if not.
3406  */
3407 bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
3408 				       enum amd_asic_type asic_type)
3409 {
3410 	switch (asic_type) {
3411 #ifdef CONFIG_DRM_AMDGPU_SI
3412 	case CHIP_HAINAN:
3413 #endif
3414 	case CHIP_TOPAZ:
3415 		/* chips with no display hardware */
3416 		return false;
3417 #if defined(CONFIG_DRM_AMD_DC)
3418 	case CHIP_TAHITI:
3419 	case CHIP_PITCAIRN:
3420 	case CHIP_VERDE:
3421 	case CHIP_OLAND:
3422 		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
3423 	default:
3424 		return amdgpu_dc != 0;
3425 #else
3426 	default:
3427 		if (amdgpu_dc > 0)
3428 			dev_info_once(
3429 				&pdev->dev,
3430 				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3431 		return false;
3432 #endif
3433 	}
3434 }
3435 
3436 /**
3437  * amdgpu_device_has_dc_support - check if dc is supported
3438  *
3439  * @adev: amdgpu_device pointer
3440  *
3441  * Returns true for supported, false for not supported
3442  */
3443 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3444 {
3445 	if (adev->enable_virtual_display ||
3446 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3447 		return false;
3448 
3449 	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
3450 }
3451 
3452 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3453 {
3454 	struct amdgpu_device *adev =
3455 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3456 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3457 
3458 	/* It's a bug to not have a hive within this function */
3459 	if (WARN_ON(!hive))
3460 		return;
3461 
3462 	/*
3463 	 * Use task barrier to synchronize all xgmi reset works across the
3464 	 * hive. task_barrier_enter and task_barrier_exit will block
3465 	 * until all the threads running the xgmi reset works reach
3466 	 * those points. task_barrier_full will do both blocks.
3467 	 */
3468 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3469 
3470 		task_barrier_enter(&hive->tb);
3471 		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
3472 
3473 		if (adev->asic_reset_res)
3474 			goto fail;
3475 
3476 		task_barrier_exit(&hive->tb);
3477 		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
3478 
3479 		if (adev->asic_reset_res)
3480 			goto fail;
3481 
3482 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3483 	} else {
3484 
3485 		task_barrier_full(&hive->tb);
3486 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3487 	}
3488 
3489 fail:
3490 	if (adev->asic_reset_res)
3491 		dev_warn(adev->dev,
3492 			 "ASIC reset failed with error, %d for drm dev, %s",
3493 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3494 	amdgpu_put_xgmi_hive(hive);
3495 }
3496 
3497 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3498 {
3499 	char *input = amdgpu_lockup_timeout;
3500 	char *timeout_setting = NULL;
3501 	int index = 0;
3502 	long timeout;
3503 	int ret = 0;
3504 
3505 	/* By default timeout for all queues is 2 sec */
3506 	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3507 		adev->video_timeout = msecs_to_jiffies(2000);
3508 
3509 	if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
3510 		return 0;
3511 
3512 	while ((timeout_setting = strsep(&input, ",")) &&
3513 	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3514 		ret = kstrtol(timeout_setting, 0, &timeout);
3515 		if (ret)
3516 			return ret;
3517 
3518 		if (timeout == 0) {
3519 			index++;
3520 			continue;
3521 		} else if (timeout < 0) {
3522 			timeout = MAX_SCHEDULE_TIMEOUT;
3523 			dev_warn(adev->dev, "lockup timeout disabled");
3524 			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3525 		} else {
3526 			timeout = msecs_to_jiffies(timeout);
3527 		}
3528 
3529 		switch (index++) {
3530 		case 0:
3531 			adev->gfx_timeout = timeout;
3532 			break;
3533 		case 1:
3534 			adev->compute_timeout = timeout;
3535 			break;
3536 		case 2:
3537 			adev->sdma_timeout = timeout;
3538 			break;
3539 		case 3:
3540 			adev->video_timeout = timeout;
3541 			break;
3542 		default:
3543 			break;
3544 		}
3545 	}
3546 
3547 	/* When only one value specified apply it to all queues. */
3548 	if (index == 1)
3549 		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3550 			adev->video_timeout = timeout;
3551 
3552 	return ret;
3553 }
3554 
3555 /**
3556  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3557  *
3558  * @adev: amdgpu_device pointer
3559  *
3560  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3561  */
3562 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3563 {
3564 	struct iommu_domain *domain;
3565 
3566 	domain = iommu_get_domain_for_dev(adev->dev);
3567 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3568 		adev->ram_is_direct_mapped = true;
3569 }
3570 
3571 #if defined(CONFIG_HSA_AMD_P2P)
3572 /**
3573  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
3574  *
3575  * @adev: amdgpu_device pointer
3576  *
3577  * return if IOMMU remapping bar address
3578  */
3579 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
3580 {
3581 	struct iommu_domain *domain;
3582 
3583 	domain = iommu_get_domain_for_dev(adev->dev);
3584 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
3585 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
3586 		return true;
3587 
3588 	return false;
3589 }
3590 #endif
3591 
3592 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3593 {
3594 	if (amdgpu_mcbp == 1)
3595 		adev->gfx.mcbp = true;
3596 	else if (amdgpu_mcbp == 0)
3597 		adev->gfx.mcbp = false;
3598 
3599 	if (amdgpu_sriov_vf(adev))
3600 		adev->gfx.mcbp = true;
3601 
3602 	if (adev->gfx.mcbp)
3603 		dev_info(adev->dev, "MCBP is enabled\n");
3604 }
3605 
3606 static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
3607 {
3608 	int r;
3609 
3610 	r = amdgpu_atombios_sysfs_init(adev);
3611 	if (r)
3612 		drm_err(&adev->ddev,
3613 			"registering atombios sysfs failed (%d).\n", r);
3614 
3615 	r = amdgpu_pm_sysfs_init(adev);
3616 	if (r)
3617 		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
3618 
3619 	r = amdgpu_ucode_sysfs_init(adev);
3620 	if (r) {
3621 		adev->ucode_sysfs_en = false;
3622 		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
3623 	} else
3624 		adev->ucode_sysfs_en = true;
3625 
3626 	r = amdgpu_device_attr_sysfs_init(adev);
3627 	if (r)
3628 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3629 
3630 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
3631 	if (r)
3632 		dev_err(adev->dev,
3633 			"Could not create amdgpu board attributes\n");
3634 
3635 	amdgpu_fru_sysfs_init(adev);
3636 	amdgpu_reg_state_sysfs_init(adev);
3637 	amdgpu_xcp_sysfs_init(adev);
3638 	amdgpu_uma_sysfs_init(adev);
3639 
3640 	return r;
3641 }
3642 
3643 static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
3644 {
3645 	if (adev->pm.sysfs_initialized)
3646 		amdgpu_pm_sysfs_fini(adev);
3647 	if (adev->ucode_sysfs_en)
3648 		amdgpu_ucode_sysfs_fini(adev);
3649 	amdgpu_device_attr_sysfs_fini(adev);
3650 	amdgpu_fru_sysfs_fini(adev);
3651 
3652 	amdgpu_reg_state_sysfs_fini(adev);
3653 	amdgpu_xcp_sysfs_fini(adev);
3654 	amdgpu_uma_sysfs_fini(adev);
3655 }
3656 
3657 /**
3658  * amdgpu_device_init - initialize the driver
3659  *
3660  * @adev: amdgpu_device pointer
3661  * @flags: driver flags
3662  *
3663  * Initializes the driver info and hw (all asics).
3664  * Returns 0 for success or an error on failure.
3665  * Called at driver startup.
3666  */
3667 int amdgpu_device_init(struct amdgpu_device *adev,
3668 		       uint32_t flags)
3669 {
3670 	struct pci_dev *pdev = adev->pdev;
3671 	int r, i;
3672 	bool px = false;
3673 	u32 max_MBps;
3674 	int tmp;
3675 
3676 	adev->shutdown = false;
3677 	adev->flags = flags;
3678 
3679 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3680 		adev->asic_type = amdgpu_force_asic_type;
3681 	else
3682 		adev->asic_type = flags & AMD_ASIC_MASK;
3683 
3684 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3685 	if (amdgpu_emu_mode == 1)
3686 		adev->usec_timeout *= 10;
3687 	adev->gmc.gart_size = 512 * 1024 * 1024;
3688 	adev->accel_working = false;
3689 	adev->num_rings = 0;
3690 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3691 	adev->mman.buffer_funcs = NULL;
3692 	adev->mman.buffer_funcs_ring = NULL;
3693 	adev->vm_manager.vm_pte_funcs = NULL;
3694 	adev->vm_manager.vm_pte_num_scheds = 0;
3695 	adev->gmc.gmc_funcs = NULL;
3696 	adev->harvest_ip_mask = 0x0;
3697 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3698 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3699 
3700 	amdgpu_reg_access_init(adev);
3701 
3702 	dev_info(
3703 		adev->dev,
3704 		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3705 		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3706 		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3707 
3708 	/* mutex initialization are all done here so we
3709 	 * can recall function without having locking issues
3710 	 */
3711 	mutex_init(&adev->firmware.mutex);
3712 	mutex_init(&adev->pm.mutex);
3713 	mutex_init(&adev->gfx.gpu_clock_mutex);
3714 	mutex_init(&adev->srbm_mutex);
3715 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3716 	mutex_init(&adev->gfx.gfx_off_mutex);
3717 	mutex_init(&adev->gfx.partition_mutex);
3718 	mutex_init(&adev->grbm_idx_mutex);
3719 	mutex_init(&adev->mn_lock);
3720 	mutex_init(&adev->virt.vf_errors.lock);
3721 	hash_init(adev->mn_hash);
3722 	mutex_init(&adev->psp.mutex);
3723 	mutex_init(&adev->notifier_lock);
3724 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3725 	mutex_init(&adev->benchmark_mutex);
3726 	mutex_init(&adev->gfx.reset_sem_mutex);
3727 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
3728 	mutex_init(&adev->enforce_isolation_mutex);
3729 	for (i = 0; i < MAX_XCP; ++i) {
3730 		adev->isolation[i].spearhead = dma_fence_get_stub();
3731 		amdgpu_sync_create(&adev->isolation[i].active);
3732 		amdgpu_sync_create(&adev->isolation[i].prev);
3733 	}
3734 	mutex_init(&adev->gfx.userq_sch_mutex);
3735 	mutex_init(&adev->gfx.workload_profile_mutex);
3736 	mutex_init(&adev->vcn.workload_profile_mutex);
3737 
3738 	amdgpu_device_init_apu_flags(adev);
3739 
3740 	r = amdgpu_device_check_arguments(adev);
3741 	if (r)
3742 		return r;
3743 
3744 	spin_lock_init(&adev->mmio_idx_lock);
3745 	spin_lock_init(&adev->mm_stats.lock);
3746 	spin_lock_init(&adev->virt.rlcg_reg_lock);
3747 	spin_lock_init(&adev->wb.lock);
3748 
3749 	xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
3750 
3751 	INIT_LIST_HEAD(&adev->reset_list);
3752 
3753 	INIT_LIST_HEAD(&adev->ras_list);
3754 
3755 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3756 
3757 	xa_init(&adev->userq_doorbell_xa);
3758 
3759 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3760 			  amdgpu_device_delayed_init_work_handler);
3761 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3762 			  amdgpu_device_delay_enable_gfx_off);
3763 	/*
3764 	 * Initialize the enforce_isolation work structures for each XCP
3765 	 * partition.  This work handler is responsible for enforcing shader
3766 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
3767 	 * each GFX and compute ring.  If there are any fences, it schedules
3768 	 * the `enforce_isolation_work` to be run after a delay.  If there are
3769 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
3770 	 * runqueue.
3771 	 */
3772 	for (i = 0; i < MAX_XCP; i++) {
3773 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
3774 				  amdgpu_gfx_enforce_isolation_handler);
3775 		adev->gfx.enforce_isolation[i].adev = adev;
3776 		adev->gfx.enforce_isolation[i].xcp_id = i;
3777 	}
3778 
3779 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3780 	INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
3781 
3782 	adev->gfx.gfx_off_req_count = 1;
3783 	adev->gfx.gfx_off_residency = 0;
3784 	adev->gfx.gfx_off_entrycount = 0;
3785 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3786 
3787 	atomic_set(&adev->throttling_logging_enabled, 1);
3788 	/*
3789 	 * If throttling continues, logging will be performed every minute
3790 	 * to avoid log flooding. "-1" is subtracted since the thermal
3791 	 * throttling interrupt comes every second. Thus, the total logging
3792 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3793 	 * for throttling interrupt) = 60 seconds.
3794 	 */
3795 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3796 
3797 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3798 
3799 	/* Registers mapping */
3800 	/* TODO: block userspace mapping of io register */
3801 	if (adev->asic_type >= CHIP_BONAIRE) {
3802 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3803 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3804 	} else {
3805 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3806 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3807 	}
3808 
3809 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3810 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3811 
3812 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3813 	if (!adev->rmmio)
3814 		return -ENOMEM;
3815 
3816 	dev_info(adev->dev, "register mmio base: 0x%08X\n",
3817 		 (uint32_t)adev->rmmio_base);
3818 	dev_info(adev->dev, "register mmio size: %u\n",
3819 		 (unsigned int)adev->rmmio_size);
3820 
3821 	/*
3822 	 * Reset domain needs to be present early, before XGMI hive discovered
3823 	 * (if any) and initialized to use reset sem and in_gpu reset flag
3824 	 * early on during init and before calling to RREG32.
3825 	 */
3826 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3827 	if (!adev->reset_domain)
3828 		return -ENOMEM;
3829 
3830 	/* detect hw virtualization here */
3831 	amdgpu_virt_init(adev);
3832 
3833 	amdgpu_device_get_pcie_info(adev);
3834 
3835 	r = amdgpu_device_get_job_timeout_settings(adev);
3836 	if (r) {
3837 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3838 		return r;
3839 	}
3840 
3841 	amdgpu_device_set_mcbp(adev);
3842 
3843 	/*
3844 	 * By default, use default mode where all blocks are expected to be
3845 	 * initialized. At present a 'swinit' of blocks is required to be
3846 	 * completed before the need for a different level is detected.
3847 	 */
3848 	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
3849 	/* early init functions */
3850 	r = amdgpu_device_ip_early_init(adev);
3851 	if (r)
3852 		return r;
3853 
3854 	/*
3855 	 * No need to remove conflicting FBs for non-display class devices.
3856 	 * This prevents the sysfb from being freed accidently.
3857 	 */
3858 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
3859 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
3860 		/* Get rid of things like offb */
3861 		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
3862 		if (r)
3863 			return r;
3864 	}
3865 
3866 	/* Enable TMZ based on IP_VERSION */
3867 	amdgpu_gmc_tmz_set(adev);
3868 
3869 	if (amdgpu_sriov_vf(adev) &&
3870 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
3871 		/* VF MMIO access (except mailbox range) from CPU
3872 		 * will be blocked during sriov runtime
3873 		 */
3874 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
3875 
3876 	amdgpu_gmc_noretry_set(adev);
3877 	/* Need to get xgmi info early to decide the reset behavior*/
3878 	if (adev->gmc.xgmi.supported) {
3879 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
3880 		if (r)
3881 			return r;
3882 	}
3883 
3884 	/* enable PCIE atomic ops */
3885 	if (amdgpu_sriov_vf(adev)) {
3886 		if (adev->virt.fw_reserve.p_pf2vf)
3887 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3888 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3889 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3890 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3891 	 * internal path natively support atomics, set have_atomics_support to true.
3892 	 */
3893 	} else if ((adev->flags & AMD_IS_APU &&
3894 		   amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) ||
3895 		   (adev->gmc.xgmi.connected_to_cpu &&
3896 		   amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))) {
3897 		adev->have_atomics_support = true;
3898 	} else {
3899 		adev->have_atomics_support =
3900 			!pci_enable_atomic_ops_to_root(adev->pdev,
3901 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3902 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3903 	}
3904 
3905 	if (!adev->have_atomics_support)
3906 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3907 
3908 	/* doorbell bar mapping and doorbell index init*/
3909 	amdgpu_doorbell_init(adev);
3910 
3911 	if (amdgpu_emu_mode == 1) {
3912 		/* post the asic on emulation mode */
3913 		emu_soc_asic_init(adev);
3914 		goto fence_driver_init;
3915 	}
3916 
3917 	amdgpu_reset_init(adev);
3918 
3919 	/* detect if we are with an SRIOV vbios */
3920 	if (adev->bios)
3921 		amdgpu_device_detect_sriov_bios(adev);
3922 
3923 	/* check if we need to reset the asic
3924 	 *  E.g., driver was not cleanly unloaded previously, etc.
3925 	 */
3926 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3927 		if (adev->gmc.xgmi.num_physical_nodes) {
3928 			dev_info(adev->dev, "Pending hive reset.\n");
3929 			amdgpu_set_init_level(adev,
3930 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3931 		} else {
3932 				tmp = amdgpu_reset_method;
3933 				/* It should do a default reset when loading or reloading the driver,
3934 				 * regardless of the module parameter reset_method.
3935 				 */
3936 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3937 				r = amdgpu_asic_reset(adev);
3938 				amdgpu_reset_method = tmp;
3939 		}
3940 
3941 		if (r) {
3942 		  dev_err(adev->dev, "asic reset on init failed\n");
3943 		  goto failed;
3944 		}
3945 	}
3946 
3947 	/* Post card if necessary */
3948 	if (amdgpu_device_need_post(adev)) {
3949 		if (!adev->bios) {
3950 			dev_err(adev->dev, "no vBIOS found\n");
3951 			r = -EINVAL;
3952 			goto failed;
3953 		}
3954 		dev_info(adev->dev, "GPU posting now...\n");
3955 		r = amdgpu_device_asic_init(adev);
3956 		if (r) {
3957 			dev_err(adev->dev, "gpu post error!\n");
3958 			goto failed;
3959 		}
3960 	}
3961 
3962 	if (adev->bios) {
3963 		if (adev->is_atom_fw) {
3964 			/* Initialize clocks */
3965 			r = amdgpu_atomfirmware_get_clock_info(adev);
3966 			if (r) {
3967 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3968 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3969 				goto failed;
3970 			}
3971 		} else {
3972 			/* Initialize clocks */
3973 			r = amdgpu_atombios_get_clock_info(adev);
3974 			if (r) {
3975 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3976 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3977 				goto failed;
3978 			}
3979 			/* init i2c buses */
3980 			amdgpu_i2c_init(adev);
3981 		}
3982 	}
3983 
3984 fence_driver_init:
3985 	/* Fence driver */
3986 	r = amdgpu_fence_driver_sw_init(adev);
3987 	if (r) {
3988 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3989 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3990 		goto failed;
3991 	}
3992 
3993 	/* init the mode config */
3994 	drm_mode_config_init(adev_to_drm(adev));
3995 
3996 	r = amdgpu_device_ip_init(adev);
3997 	if (r) {
3998 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3999 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4000 		goto release_ras_con;
4001 	}
4002 
4003 	amdgpu_fence_driver_hw_init(adev);
4004 
4005 	dev_info(adev->dev,
4006 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4007 			adev->gfx.config.max_shader_engines,
4008 			adev->gfx.config.max_sh_per_se,
4009 			adev->gfx.config.max_cu_per_sh,
4010 			adev->gfx.cu_info.number);
4011 
4012 	adev->accel_working = true;
4013 
4014 	amdgpu_vm_check_compute_bug(adev);
4015 
4016 	/* Initialize the buffer migration limit. */
4017 	if (amdgpu_moverate >= 0)
4018 		max_MBps = amdgpu_moverate;
4019 	else
4020 		max_MBps = 8; /* Allow 8 MB/s. */
4021 	/* Get a log2 for easy divisions. */
4022 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4023 
4024 	/*
4025 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4026 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4027 	 * gpu instance is counted less.
4028 	 */
4029 	amdgpu_register_gpu_instance(adev);
4030 
4031 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4032 	 * explicit gating rather than handling it automatically.
4033 	 */
4034 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4035 		r = amdgpu_device_ip_late_init(adev);
4036 		if (r) {
4037 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4038 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4039 			goto release_ras_con;
4040 		}
4041 		/* must succeed. */
4042 		amdgpu_ras_resume(adev);
4043 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4044 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4045 	}
4046 
4047 	if (amdgpu_sriov_vf(adev)) {
4048 		amdgpu_virt_release_full_gpu(adev, true);
4049 		flush_delayed_work(&adev->delayed_init_work);
4050 	}
4051 
4052 	/* Don't init kfd if whole hive need to be reset during init */
4053 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4054 		kgd2kfd_init_zone_device(adev);
4055 		kfd_update_svm_support_properties(adev);
4056 	}
4057 
4058 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4059 		amdgpu_xgmi_reset_on_init(adev);
4060 
4061 	/*
4062 	 * Place those sysfs registering after `late_init`. As some of those
4063 	 * operations performed in `late_init` might affect the sysfs
4064 	 * interfaces creating.
4065 	 */
4066 	r = amdgpu_device_sys_interface_init(adev);
4067 
4068 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4069 		r = amdgpu_pmu_init(adev);
4070 	if (r)
4071 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4072 
4073 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4074 	if (amdgpu_device_cache_pci_state(adev->pdev))
4075 		pci_restore_state(pdev);
4076 
4077 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4078 	/* this will fail for cards that aren't VGA class devices, just
4079 	 * ignore it
4080 	 */
4081 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4082 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4083 
4084 	px = amdgpu_device_supports_px(adev);
4085 
4086 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4087 				apple_gmux_detect(NULL, NULL)))
4088 		vga_switcheroo_register_client(adev->pdev,
4089 					       &amdgpu_switcheroo_ops, px);
4090 
4091 	if (px)
4092 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4093 
4094 	amdgpu_device_check_iommu_direct_map(adev);
4095 
4096 	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4097 	r = register_pm_notifier(&adev->pm_nb);
4098 	if (r)
4099 		goto failed;
4100 
4101 	return 0;
4102 
4103 release_ras_con:
4104 	if (amdgpu_sriov_vf(adev))
4105 		amdgpu_virt_release_full_gpu(adev, true);
4106 
4107 	/* failed in exclusive mode due to timeout */
4108 	if (amdgpu_sriov_vf(adev) &&
4109 		!amdgpu_sriov_runtime(adev) &&
4110 		amdgpu_virt_mmio_blocked(adev) &&
4111 		!amdgpu_virt_wait_reset(adev)) {
4112 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4113 		/* Don't send request since VF is inactive. */
4114 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4115 		adev->virt.ops = NULL;
4116 		r = -EAGAIN;
4117 	}
4118 	amdgpu_release_ras_context(adev);
4119 
4120 failed:
4121 	amdgpu_vf_error_trans_all(adev);
4122 
4123 	return r;
4124 }
4125 
4126 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4127 {
4128 
4129 	/* Clear all CPU mappings pointing to this device */
4130 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4131 
4132 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4133 	amdgpu_doorbell_fini(adev);
4134 
4135 	iounmap(adev->rmmio);
4136 	adev->rmmio = NULL;
4137 	if (adev->mman.aper_base_kaddr)
4138 		iounmap(adev->mman.aper_base_kaddr);
4139 	adev->mman.aper_base_kaddr = NULL;
4140 
4141 	/* Memory manager related */
4142 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4143 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4144 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4145 	}
4146 }
4147 
4148 /**
4149  * amdgpu_device_fini_hw - tear down the driver
4150  *
4151  * @adev: amdgpu_device pointer
4152  *
4153  * Tear down the driver info (all asics).
4154  * Called at driver shutdown.
4155  */
4156 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4157 {
4158 	dev_info(adev->dev, "finishing device.\n");
4159 	flush_delayed_work(&adev->delayed_init_work);
4160 
4161 	if (adev->mman.initialized)
4162 		drain_workqueue(adev->mman.bdev.wq);
4163 	adev->shutdown = true;
4164 
4165 	unregister_pm_notifier(&adev->pm_nb);
4166 
4167 	/* make sure IB test finished before entering exclusive mode
4168 	 * to avoid preemption on IB test
4169 	 */
4170 	if (amdgpu_sriov_vf(adev)) {
4171 		amdgpu_virt_request_full_gpu(adev, false);
4172 		amdgpu_virt_fini_data_exchange(adev);
4173 	}
4174 
4175 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
4176 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
4177 
4178 	/* disable all interrupts */
4179 	amdgpu_irq_disable_all(adev);
4180 	if (adev->mode_info.mode_config_initialized) {
4181 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4182 			drm_helper_force_disable_all(adev_to_drm(adev));
4183 		else
4184 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4185 	}
4186 	amdgpu_fence_driver_hw_fini(adev);
4187 
4188 	amdgpu_device_sys_interface_fini(adev);
4189 
4190 	/* disable ras feature must before hw fini */
4191 	amdgpu_ras_pre_fini(adev);
4192 
4193 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4194 
4195 	/*
4196 	 * device went through surprise hotplug; we need to destroy topology
4197 	 * before ip_fini_early to prevent kfd locking refcount issues by calling
4198 	 * amdgpu_amdkfd_suspend()
4199 	 */
4200 	if (pci_dev_is_disconnected(adev->pdev))
4201 		amdgpu_amdkfd_device_fini_sw(adev);
4202 
4203 	amdgpu_device_ip_fini_early(adev);
4204 
4205 	amdgpu_irq_fini_hw(adev);
4206 
4207 	if (adev->mman.initialized)
4208 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4209 
4210 	amdgpu_gart_dummy_page_fini(adev);
4211 
4212 	if (pci_dev_is_disconnected(adev->pdev))
4213 		amdgpu_device_unmap_mmio(adev);
4214 
4215 }
4216 
4217 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4218 {
4219 	int i, idx;
4220 	bool px;
4221 
4222 	amdgpu_device_ip_fini(adev);
4223 	amdgpu_fence_driver_sw_fini(adev);
4224 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4225 	adev->accel_working = false;
4226 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4227 	for (i = 0; i < MAX_XCP; ++i) {
4228 		dma_fence_put(adev->isolation[i].spearhead);
4229 		amdgpu_sync_free(&adev->isolation[i].active);
4230 		amdgpu_sync_free(&adev->isolation[i].prev);
4231 	}
4232 
4233 	amdgpu_reset_fini(adev);
4234 
4235 	/* free i2c buses */
4236 	amdgpu_i2c_fini(adev);
4237 
4238 	if (adev->bios) {
4239 		if (amdgpu_emu_mode != 1)
4240 			amdgpu_atombios_fini(adev);
4241 		amdgpu_bios_release(adev);
4242 	}
4243 
4244 	kfree(adev->fru_info);
4245 	adev->fru_info = NULL;
4246 
4247 	kfree(adev->xcp_mgr);
4248 	adev->xcp_mgr = NULL;
4249 
4250 	px = amdgpu_device_supports_px(adev);
4251 
4252 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4253 				apple_gmux_detect(NULL, NULL)))
4254 		vga_switcheroo_unregister_client(adev->pdev);
4255 
4256 	if (px)
4257 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4258 
4259 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4260 		vga_client_unregister(adev->pdev);
4261 
4262 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4263 
4264 		iounmap(adev->rmmio);
4265 		adev->rmmio = NULL;
4266 		drm_dev_exit(idx);
4267 	}
4268 
4269 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4270 		amdgpu_pmu_fini(adev);
4271 	if (adev->discovery.bin)
4272 		amdgpu_discovery_fini(adev);
4273 
4274 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4275 	adev->reset_domain = NULL;
4276 
4277 	kfree(adev->pci_state);
4278 	kfree(adev->pcie_reset_ctx.swds_pcistate);
4279 	kfree(adev->pcie_reset_ctx.swus_pcistate);
4280 }
4281 
4282 /**
4283  * amdgpu_device_evict_resources - evict device resources
4284  * @adev: amdgpu device object
4285  *
4286  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4287  * of the vram memory type. Mainly used for evicting device resources
4288  * at suspend time.
4289  *
4290  */
4291 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4292 {
4293 	int ret;
4294 
4295 	/* No need to evict vram on APUs unless going to S4 */
4296 	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4297 		return 0;
4298 
4299 	/* No need to evict when going to S5 through S4 callbacks */
4300 	if (system_state == SYSTEM_POWER_OFF)
4301 		return 0;
4302 
4303 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4304 	if (ret) {
4305 		dev_warn(adev->dev, "evicting device resources failed\n");
4306 		return ret;
4307 	}
4308 
4309 	if (adev->in_s4) {
4310 		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
4311 		if (ret)
4312 			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
4313 	}
4314 	return ret;
4315 }
4316 
4317 /*
4318  * Suspend & resume.
4319  */
4320 /**
4321  * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4322  * @nb: notifier block
4323  * @mode: suspend mode
4324  * @data: data
4325  *
4326  * This function is called when the system is about to suspend or hibernate.
4327  * It is used to set the appropriate flags so that eviction can be optimized
4328  * in the pm prepare callback.
4329  */
4330 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
4331 				     void *data)
4332 {
4333 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
4334 
4335 	switch (mode) {
4336 	case PM_HIBERNATION_PREPARE:
4337 		adev->in_s4 = true;
4338 		break;
4339 	case PM_POST_HIBERNATION:
4340 		adev->in_s4 = false;
4341 		break;
4342 	}
4343 
4344 	return NOTIFY_DONE;
4345 }
4346 
4347 /**
4348  * amdgpu_device_prepare - prepare for device suspend
4349  *
4350  * @dev: drm dev pointer
4351  *
4352  * Prepare to put the hw in the suspend state (all asics).
4353  * Returns 0 for success or an error on failure.
4354  * Called at driver suspend.
4355  */
4356 int amdgpu_device_prepare(struct drm_device *dev)
4357 {
4358 	struct amdgpu_device *adev = drm_to_adev(dev);
4359 	int i, r;
4360 
4361 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4362 		return 0;
4363 
4364 	/* Evict the majority of BOs before starting suspend sequence */
4365 	r = amdgpu_device_evict_resources(adev);
4366 	if (r)
4367 		return r;
4368 
4369 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4370 
4371 	for (i = 0; i < adev->num_ip_blocks; i++) {
4372 		if (!adev->ip_blocks[i].status.valid)
4373 			continue;
4374 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4375 			continue;
4376 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4377 		if (r)
4378 			return r;
4379 	}
4380 
4381 	return 0;
4382 }
4383 
4384 /**
4385  * amdgpu_device_complete - complete power state transition
4386  *
4387  * @dev: drm dev pointer
4388  *
4389  * Undo the changes from amdgpu_device_prepare. This will be
4390  * called on all resume transitions, including those that failed.
4391  */
4392 void amdgpu_device_complete(struct drm_device *dev)
4393 {
4394 	struct amdgpu_device *adev = drm_to_adev(dev);
4395 	int i;
4396 
4397 	for (i = 0; i < adev->num_ip_blocks; i++) {
4398 		if (!adev->ip_blocks[i].status.valid)
4399 			continue;
4400 		if (!adev->ip_blocks[i].version->funcs->complete)
4401 			continue;
4402 		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
4403 	}
4404 }
4405 
4406 /**
4407  * amdgpu_device_suspend - initiate device suspend
4408  *
4409  * @dev: drm dev pointer
4410  * @notify_clients: notify in-kernel DRM clients
4411  *
4412  * Puts the hw in the suspend state (all asics).
4413  * Returns 0 for success or an error on failure.
4414  * Called at driver suspend.
4415  */
4416 int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4417 {
4418 	struct amdgpu_device *adev = drm_to_adev(dev);
4419 	int r, rec;
4420 
4421 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4422 		return 0;
4423 
4424 	adev->in_suspend = true;
4425 
4426 	if (amdgpu_sriov_vf(adev)) {
4427 		if (!adev->in_runpm)
4428 			amdgpu_amdkfd_suspend_process(adev);
4429 		amdgpu_virt_fini_data_exchange(adev);
4430 		r = amdgpu_virt_request_full_gpu(adev, false);
4431 		if (r)
4432 			return r;
4433 	}
4434 
4435 	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
4436 	if (r)
4437 		goto unwind_sriov;
4438 
4439 	if (notify_clients)
4440 		drm_client_dev_suspend(adev_to_drm(adev));
4441 
4442 	cancel_delayed_work_sync(&adev->delayed_init_work);
4443 
4444 	amdgpu_ras_suspend(adev);
4445 
4446 	r = amdgpu_device_ip_suspend_phase1(adev);
4447 	if (r)
4448 		goto unwind_smartshift;
4449 
4450 	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4451 	r = amdgpu_userq_suspend(adev);
4452 	if (r)
4453 		goto unwind_ip_phase1;
4454 
4455 	r = amdgpu_device_evict_resources(adev);
4456 	if (r)
4457 		goto unwind_userq;
4458 
4459 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4460 
4461 	amdgpu_fence_driver_hw_fini(adev);
4462 
4463 	r = amdgpu_device_ip_suspend_phase2(adev);
4464 	if (r)
4465 		goto unwind_evict;
4466 
4467 	if (amdgpu_sriov_vf(adev))
4468 		amdgpu_virt_release_full_gpu(adev, false);
4469 
4470 	return 0;
4471 
4472 unwind_evict:
4473 	amdgpu_ttm_set_buffer_funcs_status(adev, true);
4474 	amdgpu_fence_driver_hw_init(adev);
4475 
4476 unwind_userq:
4477 	rec = amdgpu_userq_resume(adev);
4478 	if (rec) {
4479 		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
4480 		return r;
4481 	}
4482 	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4483 	if (rec) {
4484 		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
4485 		return r;
4486 	}
4487 
4488 unwind_ip_phase1:
4489 	/* suspend phase 1 = resume phase 3 */
4490 	rec = amdgpu_device_ip_resume_phase3(adev);
4491 	if (rec) {
4492 		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
4493 		return r;
4494 	}
4495 
4496 unwind_smartshift:
4497 	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
4498 	if (rec) {
4499 		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
4500 		return r;
4501 	}
4502 
4503 	if (notify_clients)
4504 		drm_client_dev_resume(adev_to_drm(adev));
4505 
4506 	amdgpu_ras_resume(adev);
4507 
4508 unwind_sriov:
4509 	if (amdgpu_sriov_vf(adev)) {
4510 		rec = amdgpu_virt_request_full_gpu(adev, true);
4511 		if (rec) {
4512 			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
4513 			return r;
4514 		}
4515 	}
4516 
4517 	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
4518 
4519 	return r;
4520 }
4521 
4522 static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
4523 {
4524 	int r;
4525 	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
4526 
4527 	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
4528 	 * may not work. The access could be blocked by nBIF protection as VF isn't in
4529 	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
4530 	 * so that QEMU reprograms MSIX table.
4531 	 */
4532 	amdgpu_restore_msix(adev);
4533 
4534 	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4535 	if (r)
4536 		return r;
4537 
4538 	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
4539 		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
4540 
4541 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
4542 	adev->vm_manager.vram_base_offset +=
4543 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
4544 
4545 	return 0;
4546 }
4547 
4548 /**
4549  * amdgpu_device_resume - initiate device resume
4550  *
4551  * @dev: drm dev pointer
4552  * @notify_clients: notify in-kernel DRM clients
4553  *
4554  * Bring the hw back to operating state (all asics).
4555  * Returns 0 for success or an error on failure.
4556  * Called at driver resume.
4557  */
4558 int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
4559 {
4560 	struct amdgpu_device *adev = drm_to_adev(dev);
4561 	int r = 0;
4562 
4563 	if (amdgpu_sriov_vf(adev)) {
4564 		r = amdgpu_virt_request_full_gpu(adev, true);
4565 		if (r)
4566 			return r;
4567 	}
4568 
4569 	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
4570 		r = amdgpu_virt_resume(adev);
4571 		if (r)
4572 			goto exit;
4573 	}
4574 
4575 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4576 		return 0;
4577 
4578 	if (adev->in_s0ix)
4579 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4580 
4581 	/* post card */
4582 	if (amdgpu_device_need_post(adev)) {
4583 		r = amdgpu_device_asic_init(adev);
4584 		if (r)
4585 			dev_err(adev->dev, "amdgpu asic init failed\n");
4586 	}
4587 
4588 	r = amdgpu_device_ip_resume(adev);
4589 
4590 	if (r) {
4591 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4592 		goto exit;
4593 	}
4594 
4595 	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4596 	if (r)
4597 		goto exit;
4598 
4599 	r = amdgpu_userq_resume(adev);
4600 	if (r)
4601 		goto exit;
4602 
4603 	r = amdgpu_device_ip_late_init(adev);
4604 	if (r)
4605 		goto exit;
4606 
4607 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4608 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4609 exit:
4610 	if (amdgpu_sriov_vf(adev)) {
4611 		amdgpu_virt_init_data_exchange(adev);
4612 		amdgpu_virt_release_full_gpu(adev, true);
4613 
4614 		if (!r && !adev->in_runpm)
4615 			r = amdgpu_amdkfd_resume_process(adev);
4616 	}
4617 
4618 	if (r)
4619 		return r;
4620 
4621 	/* Make sure IB tests flushed */
4622 	flush_delayed_work(&adev->delayed_init_work);
4623 
4624 	if (notify_clients)
4625 		drm_client_dev_resume(adev_to_drm(adev));
4626 
4627 	amdgpu_ras_resume(adev);
4628 
4629 	if (adev->mode_info.num_crtc) {
4630 		/*
4631 		 * Most of the connector probing functions try to acquire runtime pm
4632 		 * refs to ensure that the GPU is powered on when connector polling is
4633 		 * performed. Since we're calling this from a runtime PM callback,
4634 		 * trying to acquire rpm refs will cause us to deadlock.
4635 		 *
4636 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4637 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4638 		 */
4639 #ifdef CONFIG_PM
4640 		dev->dev->power.disable_depth++;
4641 #endif
4642 		if (!adev->dc_enabled)
4643 			drm_helper_hpd_irq_event(dev);
4644 		else
4645 			drm_kms_helper_hotplug_event(dev);
4646 #ifdef CONFIG_PM
4647 		dev->dev->power.disable_depth--;
4648 #endif
4649 	}
4650 
4651 	amdgpu_vram_mgr_clear_reset_blocks(adev);
4652 	adev->in_suspend = false;
4653 
4654 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
4655 		dev_warn(adev->dev, "smart shift update failed\n");
4656 
4657 	return 0;
4658 }
4659 
4660 /**
4661  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4662  *
4663  * @adev: amdgpu_device pointer
4664  *
4665  * The list of all the hardware IPs that make up the asic is walked and
4666  * the check_soft_reset callbacks are run.  check_soft_reset determines
4667  * if the asic is still hung or not.
4668  * Returns true if any of the IPs are still in a hung state, false if not.
4669  */
4670 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4671 {
4672 	int i;
4673 	bool asic_hang = false;
4674 
4675 	if (amdgpu_sriov_vf(adev))
4676 		return true;
4677 
4678 	if (amdgpu_asic_need_full_reset(adev))
4679 		return true;
4680 
4681 	for (i = 0; i < adev->num_ip_blocks; i++) {
4682 		if (!adev->ip_blocks[i].status.valid)
4683 			continue;
4684 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4685 			adev->ip_blocks[i].status.hang =
4686 				adev->ip_blocks[i].version->funcs->check_soft_reset(
4687 					&adev->ip_blocks[i]);
4688 		if (adev->ip_blocks[i].status.hang) {
4689 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4690 			asic_hang = true;
4691 		}
4692 	}
4693 	return asic_hang;
4694 }
4695 
4696 /**
4697  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4698  *
4699  * @adev: amdgpu_device pointer
4700  *
4701  * The list of all the hardware IPs that make up the asic is walked and the
4702  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4703  * handles any IP specific hardware or software state changes that are
4704  * necessary for a soft reset to succeed.
4705  * Returns 0 on success, negative error code on failure.
4706  */
4707 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4708 {
4709 	int i, r = 0;
4710 
4711 	for (i = 0; i < adev->num_ip_blocks; i++) {
4712 		if (!adev->ip_blocks[i].status.valid)
4713 			continue;
4714 		if (adev->ip_blocks[i].status.hang &&
4715 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4716 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
4717 			if (r)
4718 				return r;
4719 		}
4720 	}
4721 
4722 	return 0;
4723 }
4724 
4725 /**
4726  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4727  *
4728  * @adev: amdgpu_device pointer
4729  *
4730  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4731  * reset is necessary to recover.
4732  * Returns true if a full asic reset is required, false if not.
4733  */
4734 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4735 {
4736 	int i;
4737 
4738 	if (amdgpu_asic_need_full_reset(adev))
4739 		return true;
4740 
4741 	for (i = 0; i < adev->num_ip_blocks; i++) {
4742 		if (!adev->ip_blocks[i].status.valid)
4743 			continue;
4744 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4745 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4746 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4747 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4748 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4749 			if (adev->ip_blocks[i].status.hang) {
4750 				dev_info(adev->dev, "Some block need full reset!\n");
4751 				return true;
4752 			}
4753 		}
4754 	}
4755 	return false;
4756 }
4757 
4758 /**
4759  * amdgpu_device_ip_soft_reset - do a soft reset
4760  *
4761  * @adev: amdgpu_device pointer
4762  *
4763  * The list of all the hardware IPs that make up the asic is walked and the
4764  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4765  * IP specific hardware or software state changes that are necessary to soft
4766  * reset the IP.
4767  * Returns 0 on success, negative error code on failure.
4768  */
4769 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4770 {
4771 	int i, r = 0;
4772 
4773 	for (i = 0; i < adev->num_ip_blocks; i++) {
4774 		if (!adev->ip_blocks[i].status.valid)
4775 			continue;
4776 		if (adev->ip_blocks[i].status.hang &&
4777 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4778 			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
4779 			if (r)
4780 				return r;
4781 		}
4782 	}
4783 
4784 	return 0;
4785 }
4786 
4787 /**
4788  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4789  *
4790  * @adev: amdgpu_device pointer
4791  *
4792  * The list of all the hardware IPs that make up the asic is walked and the
4793  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4794  * handles any IP specific hardware or software state changes that are
4795  * necessary after the IP has been soft reset.
4796  * Returns 0 on success, negative error code on failure.
4797  */
4798 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4799 {
4800 	int i, r = 0;
4801 
4802 	for (i = 0; i < adev->num_ip_blocks; i++) {
4803 		if (!adev->ip_blocks[i].status.valid)
4804 			continue;
4805 		if (adev->ip_blocks[i].status.hang &&
4806 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4807 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
4808 		if (r)
4809 			return r;
4810 	}
4811 
4812 	return 0;
4813 }
4814 
4815 /**
4816  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4817  *
4818  * @adev: amdgpu_device pointer
4819  * @reset_context: amdgpu reset context pointer
4820  *
4821  * do VF FLR and reinitialize Asic
4822  * return 0 means succeeded otherwise failed
4823  */
4824 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4825 				     struct amdgpu_reset_context *reset_context)
4826 {
4827 	int r;
4828 	struct amdgpu_hive_info *hive = NULL;
4829 
4830 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
4831 		if (!amdgpu_ras_get_fed_status(adev))
4832 			amdgpu_virt_ready_to_reset(adev);
4833 		amdgpu_virt_wait_reset(adev);
4834 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
4835 		r = amdgpu_virt_request_full_gpu(adev, true);
4836 	} else {
4837 		r = amdgpu_virt_reset_gpu(adev);
4838 	}
4839 	if (r)
4840 		return r;
4841 
4842 	amdgpu_ras_clear_err_state(adev);
4843 	amdgpu_irq_gpu_reset_resume_helper(adev);
4844 
4845 	/* some sw clean up VF needs to do before recover */
4846 	amdgpu_virt_post_reset(adev);
4847 
4848 	/* Resume IP prior to SMC */
4849 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4850 	if (r)
4851 		return r;
4852 
4853 	amdgpu_virt_init_data_exchange(adev);
4854 
4855 	r = amdgpu_device_fw_loading(adev);
4856 	if (r)
4857 		return r;
4858 
4859 	/* now we are okay to resume SMC/CP/SDMA */
4860 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4861 	if (r)
4862 		return r;
4863 
4864 	hive = amdgpu_get_xgmi_hive(adev);
4865 	/* Update PSP FW topology after reset */
4866 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4867 		r = amdgpu_xgmi_update_topology(hive, adev);
4868 	if (hive)
4869 		amdgpu_put_xgmi_hive(hive);
4870 	if (r)
4871 		return r;
4872 
4873 	r = amdgpu_ib_ring_tests(adev);
4874 	if (r)
4875 		return r;
4876 
4877 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
4878 		amdgpu_inc_vram_lost(adev);
4879 
4880 	/* need to be called during full access so we can't do it later like
4881 	 * bare-metal does.
4882 	 */
4883 	amdgpu_amdkfd_post_reset(adev);
4884 	amdgpu_virt_release_full_gpu(adev, true);
4885 
4886 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4887 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
4888 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
4889 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
4890 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4891 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
4892 		amdgpu_ras_resume(adev);
4893 
4894 	amdgpu_virt_ras_telemetry_post_reset(adev);
4895 
4896 	return 0;
4897 }
4898 
4899 /**
4900  * amdgpu_device_has_job_running - check if there is any unfinished job
4901  *
4902  * @adev: amdgpu_device pointer
4903  *
4904  * check if there is any job running on the device when guest driver receives
4905  * FLR notification from host driver. If there are still jobs running, then
4906  * the guest driver will not respond the FLR reset. Instead, let the job hit
4907  * the timeout and guest driver then issue the reset request.
4908  */
4909 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4910 {
4911 	int i;
4912 
4913 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4914 		struct amdgpu_ring *ring = adev->rings[i];
4915 
4916 		if (!amdgpu_ring_sched_ready(ring))
4917 			continue;
4918 
4919 		if (amdgpu_fence_count_emitted(ring))
4920 			return true;
4921 	}
4922 	return false;
4923 }
4924 
4925 /**
4926  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4927  *
4928  * @adev: amdgpu_device pointer
4929  *
4930  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4931  * a hung GPU.
4932  */
4933 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4934 {
4935 
4936 	if (amdgpu_gpu_recovery == 0)
4937 		goto disabled;
4938 
4939 	/* Skip soft reset check in fatal error mode */
4940 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4941 		return true;
4942 
4943 	if (amdgpu_sriov_vf(adev))
4944 		return true;
4945 
4946 	if (amdgpu_gpu_recovery == -1) {
4947 		switch (adev->asic_type) {
4948 #ifdef CONFIG_DRM_AMDGPU_SI
4949 		case CHIP_VERDE:
4950 		case CHIP_TAHITI:
4951 		case CHIP_PITCAIRN:
4952 		case CHIP_OLAND:
4953 		case CHIP_HAINAN:
4954 #endif
4955 #ifdef CONFIG_DRM_AMDGPU_CIK
4956 		case CHIP_KAVERI:
4957 		case CHIP_KABINI:
4958 		case CHIP_MULLINS:
4959 #endif
4960 		case CHIP_CARRIZO:
4961 		case CHIP_STONEY:
4962 		case CHIP_CYAN_SKILLFISH:
4963 			goto disabled;
4964 		default:
4965 			break;
4966 		}
4967 	}
4968 
4969 	return true;
4970 
4971 disabled:
4972 		dev_info(adev->dev, "GPU recovery disabled.\n");
4973 		return false;
4974 }
4975 
4976 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4977 {
4978 	u32 i;
4979 	int ret = 0;
4980 
4981 	if (adev->bios)
4982 		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4983 
4984 	dev_info(adev->dev, "GPU mode1 reset\n");
4985 
4986 	/* Cache the state before bus master disable. The saved config space
4987 	 * values are used in other cases like restore after mode-2 reset.
4988 	 */
4989 	amdgpu_device_cache_pci_state(adev->pdev);
4990 
4991 	/* disable BM */
4992 	pci_clear_master(adev->pdev);
4993 
4994 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4995 		dev_info(adev->dev, "GPU smu mode1 reset\n");
4996 		ret = amdgpu_dpm_mode1_reset(adev);
4997 	} else {
4998 		dev_info(adev->dev, "GPU psp mode1 reset\n");
4999 		ret = psp_gpu_reset(adev);
5000 	}
5001 
5002 	if (ret)
5003 		goto mode1_reset_failed;
5004 
5005 	/* enable mmio access after mode 1 reset completed */
5006 	adev->no_hw_access = false;
5007 
5008 	/* ensure no_hw_access is updated before we access hw */
5009 	smp_mb();
5010 
5011 	amdgpu_device_load_pci_state(adev->pdev);
5012 	ret = amdgpu_psp_wait_for_bootloader(adev);
5013 	if (ret)
5014 		goto mode1_reset_failed;
5015 
5016 	/* wait for asic to come out of reset */
5017 	for (i = 0; i < adev->usec_timeout; i++) {
5018 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5019 
5020 		if (memsize != 0xffffffff)
5021 			break;
5022 		udelay(1);
5023 	}
5024 
5025 	if (i >= adev->usec_timeout) {
5026 		ret = -ETIMEDOUT;
5027 		goto mode1_reset_failed;
5028 	}
5029 
5030 	if (adev->bios)
5031 		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5032 
5033 	return 0;
5034 
5035 mode1_reset_failed:
5036 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5037 	return ret;
5038 }
5039 
5040 int amdgpu_device_link_reset(struct amdgpu_device *adev)
5041 {
5042 	int ret = 0;
5043 
5044 	dev_info(adev->dev, "GPU link reset\n");
5045 
5046 	if (!amdgpu_reset_in_dpc(adev))
5047 		ret = amdgpu_dpm_link_reset(adev);
5048 
5049 	if (ret)
5050 		goto link_reset_failed;
5051 
5052 	ret = amdgpu_psp_wait_for_bootloader(adev);
5053 	if (ret)
5054 		goto link_reset_failed;
5055 
5056 	return 0;
5057 
5058 link_reset_failed:
5059 	dev_err(adev->dev, "GPU link reset failed\n");
5060 	return ret;
5061 }
5062 
5063 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5064 				 struct amdgpu_reset_context *reset_context)
5065 {
5066 	int i, r = 0;
5067 	struct amdgpu_job *job = NULL;
5068 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5069 	bool need_full_reset =
5070 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5071 
5072 	if (reset_context->reset_req_dev == adev)
5073 		job = reset_context->job;
5074 
5075 	if (amdgpu_sriov_vf(adev))
5076 		amdgpu_virt_pre_reset(adev);
5077 
5078 	amdgpu_fence_driver_isr_toggle(adev, true);
5079 
5080 	/* block all schedulers and reset given job's ring */
5081 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5082 		struct amdgpu_ring *ring = adev->rings[i];
5083 
5084 		if (!amdgpu_ring_sched_ready(ring))
5085 			continue;
5086 
5087 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5088 		amdgpu_fence_driver_force_completion(ring);
5089 	}
5090 
5091 	amdgpu_fence_driver_isr_toggle(adev, false);
5092 
5093 	if (job && job->vm)
5094 		drm_sched_increase_karma(&job->base);
5095 
5096 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5097 	/* If reset handler not implemented, continue; otherwise return */
5098 	if (r == -EOPNOTSUPP)
5099 		r = 0;
5100 	else
5101 		return r;
5102 
5103 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5104 	if (!amdgpu_sriov_vf(adev)) {
5105 
5106 		if (!need_full_reset)
5107 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5108 
5109 		if (!need_full_reset && amdgpu_gpu_recovery &&
5110 		    amdgpu_device_ip_check_soft_reset(adev)) {
5111 			amdgpu_device_ip_pre_soft_reset(adev);
5112 			r = amdgpu_device_ip_soft_reset(adev);
5113 			amdgpu_device_ip_post_soft_reset(adev);
5114 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5115 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5116 				need_full_reset = true;
5117 			}
5118 		}
5119 
5120 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5121 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5122 			/* Trigger ip dump before we reset the asic */
5123 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5124 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5125 					tmp_adev->ip_blocks[i].version->funcs
5126 						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5127 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5128 		}
5129 
5130 		if (need_full_reset)
5131 			r = amdgpu_device_ip_suspend(adev);
5132 		if (need_full_reset)
5133 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5134 		else
5135 			clear_bit(AMDGPU_NEED_FULL_RESET,
5136 				  &reset_context->flags);
5137 	}
5138 
5139 	return r;
5140 }
5141 
5142 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5143 {
5144 	struct list_head *device_list_handle;
5145 	bool full_reset, vram_lost = false;
5146 	struct amdgpu_device *tmp_adev;
5147 	int r, init_level;
5148 
5149 	device_list_handle = reset_context->reset_device_list;
5150 
5151 	if (!device_list_handle)
5152 		return -EINVAL;
5153 
5154 	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5155 
5156 	/**
5157 	 * If it's reset on init, it's default init level, otherwise keep level
5158 	 * as recovery level.
5159 	 */
5160 	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5161 			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5162 	else
5163 			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5164 
5165 	r = 0;
5166 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5167 		amdgpu_set_init_level(tmp_adev, init_level);
5168 		if (full_reset) {
5169 			/* post card */
5170 			amdgpu_reset_set_dpc_status(tmp_adev, false);
5171 			amdgpu_ras_clear_err_state(tmp_adev);
5172 			r = amdgpu_device_asic_init(tmp_adev);
5173 			if (r) {
5174 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5175 			} else {
5176 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5177 
5178 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5179 				if (r)
5180 					goto out;
5181 
5182 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5183 
5184 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5185 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5186 
5187 				if (vram_lost) {
5188 					dev_info(
5189 						tmp_adev->dev,
5190 						"VRAM is lost due to GPU reset!\n");
5191 					amdgpu_inc_vram_lost(tmp_adev);
5192 				}
5193 
5194 				r = amdgpu_device_fw_loading(tmp_adev);
5195 				if (r)
5196 					return r;
5197 
5198 				r = amdgpu_xcp_restore_partition_mode(
5199 					tmp_adev->xcp_mgr);
5200 				if (r)
5201 					goto out;
5202 
5203 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5204 				if (r)
5205 					goto out;
5206 
5207 				amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5208 
5209 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5210 				if (r)
5211 					goto out;
5212 
5213 				if (vram_lost)
5214 					amdgpu_device_fill_reset_magic(tmp_adev);
5215 
5216 				/*
5217 				 * Add this ASIC as tracked as reset was already
5218 				 * complete successfully.
5219 				 */
5220 				amdgpu_register_gpu_instance(tmp_adev);
5221 
5222 				if (!reset_context->hive &&
5223 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5224 					amdgpu_xgmi_add_device(tmp_adev);
5225 
5226 				r = amdgpu_device_ip_late_init(tmp_adev);
5227 				if (r)
5228 					goto out;
5229 
5230 				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
5231 				if (r)
5232 					goto out;
5233 
5234 				drm_client_dev_resume(adev_to_drm(tmp_adev));
5235 
5236 				/*
5237 				 * The GPU enters bad state once faulty pages
5238 				 * by ECC has reached the threshold, and ras
5239 				 * recovery is scheduled next. So add one check
5240 				 * here to break recovery if it indeed exceeds
5241 				 * bad page threshold, and remind user to
5242 				 * retire this GPU or setting one bigger
5243 				 * bad_page_threshold value to fix this once
5244 				 * probing driver again.
5245 				 */
5246 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5247 					/* must succeed. */
5248 					amdgpu_ras_resume(tmp_adev);
5249 				} else {
5250 					r = -EINVAL;
5251 					goto out;
5252 				}
5253 
5254 				/* Update PSP FW topology after reset */
5255 				if (reset_context->hive &&
5256 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5257 					r = amdgpu_xgmi_update_topology(
5258 						reset_context->hive, tmp_adev);
5259 			}
5260 		}
5261 
5262 out:
5263 		if (!r) {
5264 			/* IP init is complete now, set level as default */
5265 			amdgpu_set_init_level(tmp_adev,
5266 					      AMDGPU_INIT_LEVEL_DEFAULT);
5267 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5268 			r = amdgpu_ib_ring_tests(tmp_adev);
5269 			if (r) {
5270 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5271 				r = -EAGAIN;
5272 				goto end;
5273 			}
5274 		}
5275 
5276 		if (r)
5277 			tmp_adev->asic_reset_res = r;
5278 	}
5279 
5280 end:
5281 	return r;
5282 }
5283 
5284 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5285 			 struct amdgpu_reset_context *reset_context)
5286 {
5287 	struct amdgpu_device *tmp_adev = NULL;
5288 	bool need_full_reset, skip_hw_reset;
5289 	int r = 0;
5290 
5291 	/* Try reset handler method first */
5292 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5293 				    reset_list);
5294 
5295 	reset_context->reset_device_list = device_list_handle;
5296 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5297 	/* If reset handler not implemented, continue; otherwise return */
5298 	if (r == -EOPNOTSUPP)
5299 		r = 0;
5300 	else
5301 		return r;
5302 
5303 	/* Reset handler not implemented, use the default method */
5304 	need_full_reset =
5305 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5306 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5307 
5308 	/*
5309 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5310 	 * to allow proper links negotiation in FW (within 1 sec)
5311 	 */
5312 	if (!skip_hw_reset && need_full_reset) {
5313 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5314 			/* For XGMI run all resets in parallel to speed up the process */
5315 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5316 				if (!queue_work(system_unbound_wq,
5317 						&tmp_adev->xgmi_reset_work))
5318 					r = -EALREADY;
5319 			} else
5320 				r = amdgpu_asic_reset(tmp_adev);
5321 
5322 			if (r) {
5323 				dev_err(tmp_adev->dev,
5324 					"ASIC reset failed with error, %d for drm dev, %s",
5325 					r, adev_to_drm(tmp_adev)->unique);
5326 				goto out;
5327 			}
5328 		}
5329 
5330 		/* For XGMI wait for all resets to complete before proceed */
5331 		if (!r) {
5332 			list_for_each_entry(tmp_adev, device_list_handle,
5333 					    reset_list) {
5334 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5335 					flush_work(&tmp_adev->xgmi_reset_work);
5336 					r = tmp_adev->asic_reset_res;
5337 					if (r)
5338 						break;
5339 				}
5340 			}
5341 		}
5342 	}
5343 
5344 	if (!r && amdgpu_ras_intr_triggered()) {
5345 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5346 			amdgpu_ras_reset_error_count(tmp_adev,
5347 						     AMDGPU_RAS_BLOCK__MMHUB);
5348 		}
5349 
5350 		amdgpu_ras_intr_cleared();
5351 	}
5352 
5353 	r = amdgpu_device_reinit_after_reset(reset_context);
5354 	if (r == -EAGAIN)
5355 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5356 	else
5357 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5358 
5359 out:
5360 	return r;
5361 }
5362 
5363 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5364 {
5365 
5366 	switch (amdgpu_asic_reset_method(adev)) {
5367 	case AMD_RESET_METHOD_MODE1:
5368 	case AMD_RESET_METHOD_LINK:
5369 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5370 		break;
5371 	case AMD_RESET_METHOD_MODE2:
5372 		adev->mp1_state = PP_MP1_STATE_RESET;
5373 		break;
5374 	default:
5375 		adev->mp1_state = PP_MP1_STATE_NONE;
5376 		break;
5377 	}
5378 }
5379 
5380 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5381 {
5382 	amdgpu_vf_error_trans_all(adev);
5383 	adev->mp1_state = PP_MP1_STATE_NONE;
5384 }
5385 
5386 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5387 {
5388 	struct pci_dev *p = NULL;
5389 
5390 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5391 			adev->pdev->bus->number, 1);
5392 	if (p) {
5393 		pm_runtime_enable(&(p->dev));
5394 		pm_runtime_resume(&(p->dev));
5395 	}
5396 
5397 	pci_dev_put(p);
5398 }
5399 
5400 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5401 {
5402 	enum amd_reset_method reset_method;
5403 	struct pci_dev *p = NULL;
5404 	u64 expires;
5405 
5406 	/*
5407 	 * For now, only BACO and mode1 reset are confirmed
5408 	 * to suffer the audio issue without proper suspended.
5409 	 */
5410 	reset_method = amdgpu_asic_reset_method(adev);
5411 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5412 	     (reset_method != AMD_RESET_METHOD_MODE1))
5413 		return -EINVAL;
5414 
5415 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5416 			adev->pdev->bus->number, 1);
5417 	if (!p)
5418 		return -ENODEV;
5419 
5420 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5421 	if (!expires)
5422 		/*
5423 		 * If we cannot get the audio device autosuspend delay,
5424 		 * a fixed 4S interval will be used. Considering 3S is
5425 		 * the audio controller default autosuspend delay setting.
5426 		 * 4S used here is guaranteed to cover that.
5427 		 */
5428 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5429 
5430 	while (!pm_runtime_status_suspended(&(p->dev))) {
5431 		if (!pm_runtime_suspend(&(p->dev)))
5432 			break;
5433 
5434 		if (expires < ktime_get_mono_fast_ns()) {
5435 			dev_warn(adev->dev, "failed to suspend display audio\n");
5436 			pci_dev_put(p);
5437 			/* TODO: abort the succeeding gpu reset? */
5438 			return -ETIMEDOUT;
5439 		}
5440 	}
5441 
5442 	pm_runtime_disable(&(p->dev));
5443 
5444 	pci_dev_put(p);
5445 	return 0;
5446 }
5447 
5448 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5449 {
5450 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5451 
5452 #if defined(CONFIG_DEBUG_FS)
5453 	if (!amdgpu_sriov_vf(adev))
5454 		cancel_work(&adev->reset_work);
5455 #endif
5456 	cancel_work(&adev->userq_reset_work);
5457 
5458 	if (adev->kfd.dev)
5459 		cancel_work(&adev->kfd.reset_work);
5460 
5461 	if (amdgpu_sriov_vf(adev))
5462 		cancel_work(&adev->virt.flr_work);
5463 
5464 	if (con && adev->ras_enabled)
5465 		cancel_work(&con->recovery_work);
5466 
5467 }
5468 
5469 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5470 {
5471 	struct amdgpu_device *tmp_adev;
5472 	int ret = 0;
5473 
5474 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5475 		ret |= amdgpu_device_bus_status_check(tmp_adev);
5476 	}
5477 
5478 	return ret;
5479 }
5480 
5481 static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
5482 					  struct list_head *device_list,
5483 					  struct amdgpu_hive_info *hive)
5484 {
5485 	struct amdgpu_device *tmp_adev = NULL;
5486 
5487 	/*
5488 	 * Build list of devices to reset.
5489 	 * In case we are in XGMI hive mode, resort the device list
5490 	 * to put adev in the 1st position.
5491 	 */
5492 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5493 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5494 			list_add_tail(&tmp_adev->reset_list, device_list);
5495 			if (adev->shutdown)
5496 				tmp_adev->shutdown = true;
5497 			if (amdgpu_reset_in_dpc(adev))
5498 				tmp_adev->pcie_reset_ctx.in_link_reset = true;
5499 		}
5500 		if (!list_is_first(&adev->reset_list, device_list))
5501 			list_rotate_to_front(&adev->reset_list, device_list);
5502 	} else {
5503 		list_add_tail(&adev->reset_list, device_list);
5504 	}
5505 }
5506 
5507 static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
5508 						  struct list_head *device_list)
5509 {
5510 	struct amdgpu_device *tmp_adev = NULL;
5511 
5512 	if (list_empty(device_list))
5513 		return;
5514 	tmp_adev =
5515 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5516 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5517 }
5518 
5519 static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
5520 						  struct list_head *device_list)
5521 {
5522 	struct amdgpu_device *tmp_adev = NULL;
5523 
5524 	if (list_empty(device_list))
5525 		return;
5526 	tmp_adev =
5527 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5528 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5529 }
5530 
5531 static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
5532 					  struct amdgpu_job *job,
5533 					  struct amdgpu_reset_context *reset_context,
5534 					  struct list_head *device_list,
5535 					  struct amdgpu_hive_info *hive,
5536 					  bool need_emergency_restart)
5537 {
5538 	struct amdgpu_device *tmp_adev = NULL;
5539 	int i;
5540 
5541 	/* block all schedulers and reset given job's ring */
5542 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5543 		amdgpu_device_set_mp1_state(tmp_adev);
5544 
5545 		/*
5546 		 * Try to put the audio codec into suspend state
5547 		 * before gpu reset started.
5548 		 *
5549 		 * Due to the power domain of the graphics device
5550 		 * is shared with AZ power domain. Without this,
5551 		 * we may change the audio hardware from behind
5552 		 * the audio driver's back. That will trigger
5553 		 * some audio codec errors.
5554 		 */
5555 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5556 			tmp_adev->pcie_reset_ctx.audio_suspended = true;
5557 
5558 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5559 
5560 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5561 
5562 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5563 
5564 		/*
5565 		 * Mark these ASICs to be reset as untracked first
5566 		 * And add them back after reset completed
5567 		 */
5568 		amdgpu_unregister_gpu_instance(tmp_adev);
5569 
5570 		drm_client_dev_suspend(adev_to_drm(tmp_adev));
5571 
5572 		/* disable ras on ALL IPs */
5573 		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
5574 		    amdgpu_device_ip_need_full_reset(tmp_adev))
5575 			amdgpu_ras_suspend(tmp_adev);
5576 
5577 		amdgpu_userq_pre_reset(tmp_adev);
5578 
5579 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5580 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5581 
5582 			if (!amdgpu_ring_sched_ready(ring))
5583 				continue;
5584 
5585 			drm_sched_wqueue_stop(&ring->sched);
5586 
5587 			if (need_emergency_restart)
5588 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5589 		}
5590 		atomic_inc(&tmp_adev->gpu_reset_counter);
5591 	}
5592 }
5593 
5594 static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
5595 			      struct list_head *device_list,
5596 			      struct amdgpu_reset_context *reset_context)
5597 {
5598 	struct amdgpu_device *tmp_adev = NULL;
5599 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5600 	int r = 0;
5601 
5602 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5603 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5604 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5605 		/*TODO Should we stop ?*/
5606 		if (r) {
5607 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5608 				  r, adev_to_drm(tmp_adev)->unique);
5609 			tmp_adev->asic_reset_res = r;
5610 		}
5611 	}
5612 
5613 	/* Actual ASIC resets if needed.*/
5614 	/* Host driver will handle XGMI hive reset for SRIOV */
5615 	if (amdgpu_sriov_vf(adev)) {
5616 
5617 		/* Bail out of reset early */
5618 		if (amdgpu_ras_is_rma(adev))
5619 			return -ENODEV;
5620 
5621 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5622 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5623 			amdgpu_ras_set_fed(adev, true);
5624 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5625 		}
5626 
5627 		r = amdgpu_device_reset_sriov(adev, reset_context);
5628 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5629 			amdgpu_virt_release_full_gpu(adev, true);
5630 			goto retry;
5631 		}
5632 		if (r)
5633 			adev->asic_reset_res = r;
5634 	} else {
5635 		r = amdgpu_do_asic_reset(device_list, reset_context);
5636 		if (r && r == -EAGAIN)
5637 			goto retry;
5638 	}
5639 
5640 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5641 		/*
5642 		 * Drop any pending non scheduler resets queued before reset is done.
5643 		 * Any reset scheduled after this point would be valid. Scheduler resets
5644 		 * were already dropped during drm_sched_stop and no new ones can come
5645 		 * in before drm_sched_start.
5646 		 */
5647 		amdgpu_device_stop_pending_resets(tmp_adev);
5648 	}
5649 
5650 	return r;
5651 }
5652 
5653 static int amdgpu_device_sched_resume(struct list_head *device_list,
5654 			      struct amdgpu_reset_context *reset_context,
5655 			      bool   job_signaled)
5656 {
5657 	struct amdgpu_device *tmp_adev = NULL;
5658 	int i, r = 0;
5659 
5660 	/* Post ASIC reset for all devs .*/
5661 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5662 
5663 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5664 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5665 
5666 			if (!amdgpu_ring_sched_ready(ring))
5667 				continue;
5668 
5669 			drm_sched_wqueue_start(&ring->sched);
5670 		}
5671 
5672 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5673 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5674 
5675 		if (tmp_adev->asic_reset_res) {
5676 			/* bad news, how to tell it to userspace ?
5677 			 * for ras error, we should report GPU bad status instead of
5678 			 * reset failure
5679 			 */
5680 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5681 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5682 				dev_info(
5683 					tmp_adev->dev,
5684 					"GPU reset(%d) failed with error %d\n",
5685 					atomic_read(
5686 						&tmp_adev->gpu_reset_counter),
5687 					tmp_adev->asic_reset_res);
5688 			amdgpu_vf_error_put(tmp_adev,
5689 					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
5690 					    tmp_adev->asic_reset_res);
5691 			if (!r)
5692 				r = tmp_adev->asic_reset_res;
5693 			tmp_adev->asic_reset_res = 0;
5694 		} else {
5695 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
5696 				 atomic_read(&tmp_adev->gpu_reset_counter));
5697 			if (amdgpu_acpi_smart_shift_update(tmp_adev,
5698 							   AMDGPU_SS_DEV_D0))
5699 				dev_warn(tmp_adev->dev,
5700 					 "smart shift update failed\n");
5701 		}
5702 	}
5703 
5704 	return r;
5705 }
5706 
5707 static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
5708 			      struct list_head *device_list,
5709 			      bool   need_emergency_restart)
5710 {
5711 	struct amdgpu_device *tmp_adev = NULL;
5712 
5713 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5714 		/* unlock kfd: SRIOV would do it separately */
5715 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5716 			amdgpu_amdkfd_post_reset(tmp_adev);
5717 
5718 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5719 		 * need to bring up kfd here if it's not be initialized before
5720 		 */
5721 		if (!adev->kfd.init_complete)
5722 			amdgpu_amdkfd_device_init(adev);
5723 
5724 		if (tmp_adev->pcie_reset_ctx.audio_suspended)
5725 			amdgpu_device_resume_display_audio(tmp_adev);
5726 
5727 		amdgpu_device_unset_mp1_state(tmp_adev);
5728 
5729 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5730 
5731 	}
5732 }
5733 
5734 
5735 /**
5736  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5737  *
5738  * @adev: amdgpu_device pointer
5739  * @job: which job trigger hang
5740  * @reset_context: amdgpu reset context pointer
5741  *
5742  * Attempt to reset the GPU if it has hung (all asics).
5743  * Attempt to do soft-reset or full-reset and reinitialize Asic
5744  * Returns 0 for success or an error on failure.
5745  */
5746 
5747 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5748 			      struct amdgpu_job *job,
5749 			      struct amdgpu_reset_context *reset_context)
5750 {
5751 	struct list_head device_list;
5752 	bool job_signaled = false;
5753 	struct amdgpu_hive_info *hive = NULL;
5754 	int r = 0;
5755 	bool need_emergency_restart = false;
5756 	/* save the pasid here as the job may be freed before the end of the reset */
5757 	int pasid = job ? job->pasid : -EINVAL;
5758 
5759 	/*
5760 	 * If it reaches here because of hang/timeout and a RAS error is
5761 	 * detected at the same time, let RAS recovery take care of it.
5762 	 */
5763 	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
5764 	    !amdgpu_sriov_vf(adev) &&
5765 	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
5766 		dev_dbg(adev->dev,
5767 			"Gpu recovery from source: %d yielding to RAS error recovery handling",
5768 			reset_context->src);
5769 		return 0;
5770 	}
5771 
5772 	/*
5773 	 * Special case: RAS triggered and full reset isn't supported
5774 	 */
5775 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5776 
5777 	/*
5778 	 * Flush RAM to disk so that after reboot
5779 	 * the user can read log and see why the system rebooted.
5780 	 */
5781 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5782 		amdgpu_ras_get_context(adev)->reboot) {
5783 		dev_warn(adev->dev, "Emergency reboot.");
5784 
5785 		ksys_sync_helper();
5786 		emergency_restart();
5787 	}
5788 
5789 	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
5790 		 need_emergency_restart ? "jobs stop" : "reset",
5791 		 reset_context->src);
5792 
5793 	if (!amdgpu_sriov_vf(adev))
5794 		hive = amdgpu_get_xgmi_hive(adev);
5795 	if (hive)
5796 		mutex_lock(&hive->hive_lock);
5797 
5798 	reset_context->job = job;
5799 	reset_context->hive = hive;
5800 	INIT_LIST_HEAD(&device_list);
5801 
5802 	amdgpu_device_recovery_prepare(adev, &device_list, hive);
5803 
5804 	if (!amdgpu_sriov_vf(adev)) {
5805 		r = amdgpu_device_health_check(&device_list);
5806 		if (r)
5807 			goto end_reset;
5808 	}
5809 
5810 	/* Cannot be called after locking reset domain */
5811 	amdgpu_ras_pre_reset(adev, &device_list);
5812 
5813 	/* We need to lock reset domain only once both for XGMI and single device */
5814 	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
5815 
5816 	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
5817 				      hive, need_emergency_restart);
5818 	if (need_emergency_restart)
5819 		goto skip_sched_resume;
5820 	/*
5821 	 * Must check guilty signal here since after this point all old
5822 	 * HW fences are force signaled.
5823 	 *
5824 	 * job->base holds a reference to parent fence
5825 	 */
5826 	if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) {
5827 		job_signaled = true;
5828 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5829 		goto skip_hw_reset;
5830 	}
5831 
5832 	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
5833 	if (r)
5834 		goto reset_unlock;
5835 skip_hw_reset:
5836 	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
5837 	if (r)
5838 		goto reset_unlock;
5839 skip_sched_resume:
5840 	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
5841 reset_unlock:
5842 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
5843 	amdgpu_ras_post_reset(adev, &device_list);
5844 end_reset:
5845 	if (hive) {
5846 		mutex_unlock(&hive->hive_lock);
5847 		amdgpu_put_xgmi_hive(hive);
5848 	}
5849 
5850 	if (r)
5851 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5852 
5853 	atomic_set(&adev->reset_domain->reset_res, r);
5854 
5855 	if (!r) {
5856 		struct amdgpu_task_info *ti = NULL;
5857 
5858 		/*
5859 		 * The job may already be freed at this point via the sched tdr workqueue so
5860 		 * use the cached pasid.
5861 		 */
5862 		if (pasid >= 0)
5863 			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
5864 
5865 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
5866 				     ti ? &ti->task : NULL);
5867 
5868 		amdgpu_vm_put_task_info(ti);
5869 	}
5870 
5871 	return r;
5872 }
5873 
5874 /**
5875  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5876  *
5877  * @adev: amdgpu_device pointer
5878  * @speed: pointer to the speed of the link
5879  * @width: pointer to the width of the link
5880  *
5881  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5882  * first physical partner to an AMD dGPU.
5883  * This will exclude any virtual switches and links.
5884  */
5885 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5886 					    enum pci_bus_speed *speed,
5887 					    enum pcie_link_width *width)
5888 {
5889 	struct pci_dev *parent = adev->pdev;
5890 
5891 	if (!speed || !width)
5892 		return;
5893 
5894 	*speed = PCI_SPEED_UNKNOWN;
5895 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5896 
5897 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5898 		while ((parent = pci_upstream_bridge(parent))) {
5899 			/* skip upstream/downstream switches internal to dGPU*/
5900 			if (parent->vendor == PCI_VENDOR_ID_ATI)
5901 				continue;
5902 			*speed = pcie_get_speed_cap(parent);
5903 			*width = pcie_get_width_cap(parent);
5904 			break;
5905 		}
5906 	} else {
5907 		/* use the current speeds rather than max if switching is not supported */
5908 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5909 	}
5910 }
5911 
5912 /**
5913  * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
5914  *
5915  * @adev: amdgpu_device pointer
5916  * @speed: pointer to the speed of the link
5917  * @width: pointer to the width of the link
5918  *
5919  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5920  * AMD dGPU which may be a virtual upstream bridge.
5921  */
5922 static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
5923 					enum pci_bus_speed *speed,
5924 					enum pcie_link_width *width)
5925 {
5926 	struct pci_dev *parent = adev->pdev;
5927 
5928 	if (!speed || !width)
5929 		return;
5930 
5931 	parent = pci_upstream_bridge(parent);
5932 	if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
5933 		/* use the upstream/downstream switches internal to dGPU */
5934 		*speed = pcie_get_speed_cap(parent);
5935 		*width = pcie_get_width_cap(parent);
5936 		while ((parent = pci_upstream_bridge(parent))) {
5937 			if (parent->vendor == PCI_VENDOR_ID_ATI) {
5938 				/* use the upstream/downstream switches internal to dGPU */
5939 				*speed = pcie_get_speed_cap(parent);
5940 				*width = pcie_get_width_cap(parent);
5941 			}
5942 		}
5943 	} else {
5944 		/* use the device itself */
5945 		*speed = pcie_get_speed_cap(adev->pdev);
5946 		*width = pcie_get_width_cap(adev->pdev);
5947 	}
5948 }
5949 
5950 /**
5951  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5952  *
5953  * @adev: amdgpu_device pointer
5954  *
5955  * Fetches and stores in the driver the PCIE capabilities (gen speed
5956  * and lanes) of the slot the device is in. Handles APUs and
5957  * virtualized environments where PCIE config space may not be available.
5958  */
5959 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5960 {
5961 	enum pci_bus_speed speed_cap, platform_speed_cap;
5962 	enum pcie_link_width platform_link_width, link_width;
5963 
5964 	if (amdgpu_pcie_gen_cap)
5965 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5966 
5967 	if (amdgpu_pcie_lane_cap)
5968 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5969 
5970 	/* covers APUs as well */
5971 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5972 		if (adev->pm.pcie_gen_mask == 0)
5973 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5974 		if (adev->pm.pcie_mlw_mask == 0)
5975 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5976 		return;
5977 	}
5978 
5979 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5980 		return;
5981 
5982 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5983 					&platform_link_width);
5984 	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
5985 
5986 	if (adev->pm.pcie_gen_mask == 0) {
5987 		/* asic caps */
5988 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5989 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5990 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5991 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5992 		} else {
5993 			if (speed_cap == PCIE_SPEED_32_0GT)
5994 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5995 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5996 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5997 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5998 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5999 			else if (speed_cap == PCIE_SPEED_16_0GT)
6000 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6001 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6002 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6003 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6004 			else if (speed_cap == PCIE_SPEED_8_0GT)
6005 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6006 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6007 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6008 			else if (speed_cap == PCIE_SPEED_5_0GT)
6009 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6010 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6011 			else
6012 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6013 		}
6014 		/* platform caps */
6015 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6016 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6017 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6018 		} else {
6019 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6020 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6021 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6022 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6023 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6024 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6025 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6026 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6027 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6028 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6029 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6030 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6031 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6032 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6033 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6034 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6035 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6036 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6037 			else
6038 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6039 
6040 		}
6041 	}
6042 	if (adev->pm.pcie_mlw_mask == 0) {
6043 		/* asic caps */
6044 		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6045 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6046 		} else {
6047 			switch (link_width) {
6048 			case PCIE_LNK_X32:
6049 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6050 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6051 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6052 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6053 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6054 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6055 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6056 				break;
6057 			case PCIE_LNK_X16:
6058 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6059 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6060 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6061 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6062 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6063 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6064 				break;
6065 			case PCIE_LNK_X12:
6066 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6067 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6068 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6069 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6070 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6071 				break;
6072 			case PCIE_LNK_X8:
6073 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6074 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6075 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6076 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6077 				break;
6078 			case PCIE_LNK_X4:
6079 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6080 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6081 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6082 				break;
6083 			case PCIE_LNK_X2:
6084 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6085 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6086 				break;
6087 			case PCIE_LNK_X1:
6088 				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6089 				break;
6090 			default:
6091 				break;
6092 			}
6093 		}
6094 		/* platform caps */
6095 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6096 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6097 		} else {
6098 			switch (platform_link_width) {
6099 			case PCIE_LNK_X32:
6100 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6101 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6102 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6103 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6104 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6105 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6106 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6107 				break;
6108 			case PCIE_LNK_X16:
6109 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6110 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6111 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6112 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6113 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6114 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6115 				break;
6116 			case PCIE_LNK_X12:
6117 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6118 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6119 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6120 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6121 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6122 				break;
6123 			case PCIE_LNK_X8:
6124 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6125 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6126 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6127 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6128 				break;
6129 			case PCIE_LNK_X4:
6130 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6131 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6132 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6133 				break;
6134 			case PCIE_LNK_X2:
6135 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6136 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6137 				break;
6138 			case PCIE_LNK_X1:
6139 				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6140 				break;
6141 			default:
6142 				break;
6143 			}
6144 		}
6145 	}
6146 }
6147 
6148 /**
6149  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6150  *
6151  * @adev: amdgpu_device pointer
6152  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6153  *
6154  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6155  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6156  * @peer_adev.
6157  */
6158 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6159 				      struct amdgpu_device *peer_adev)
6160 {
6161 #ifdef CONFIG_HSA_AMD_P2P
6162 	bool p2p_access =
6163 		!adev->gmc.xgmi.connected_to_cpu &&
6164 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6165 	if (!p2p_access)
6166 		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6167 			pci_name(peer_adev->pdev));
6168 
6169 	bool is_large_bar = adev->gmc.visible_vram_size &&
6170 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6171 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6172 
6173 	if (!p2p_addressable) {
6174 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6175 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6176 		resource_size_t aper_limit =
6177 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6178 
6179 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6180 				     aper_limit & address_mask);
6181 	}
6182 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6183 #else
6184 	return false;
6185 #endif
6186 }
6187 
6188 int amdgpu_device_baco_enter(struct amdgpu_device *adev)
6189 {
6190 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6191 
6192 	if (!amdgpu_device_supports_baco(adev))
6193 		return -ENOTSUPP;
6194 
6195 	if (ras && adev->ras_enabled &&
6196 	    adev->nbio.funcs->enable_doorbell_interrupt)
6197 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6198 
6199 	return amdgpu_dpm_baco_enter(adev);
6200 }
6201 
6202 int amdgpu_device_baco_exit(struct amdgpu_device *adev)
6203 {
6204 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6205 	int ret = 0;
6206 
6207 	if (!amdgpu_device_supports_baco(adev))
6208 		return -ENOTSUPP;
6209 
6210 	ret = amdgpu_dpm_baco_exit(adev);
6211 	if (ret)
6212 		return ret;
6213 
6214 	if (ras && adev->ras_enabled &&
6215 	    adev->nbio.funcs->enable_doorbell_interrupt)
6216 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6217 
6218 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6219 	    adev->nbio.funcs->clear_doorbell_interrupt)
6220 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6221 
6222 	return 0;
6223 }
6224 
6225 /**
6226  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6227  * @pdev: PCI device struct
6228  * @state: PCI channel state
6229  *
6230  * Description: Called when a PCI error is detected.
6231  *
6232  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6233  */
6234 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6235 {
6236 	struct drm_device *dev = pci_get_drvdata(pdev);
6237 	struct amdgpu_device *adev = drm_to_adev(dev);
6238 	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
6239 		amdgpu_get_xgmi_hive(adev);
6240 	struct amdgpu_reset_context reset_context;
6241 	struct list_head device_list;
6242 
6243 	dev_info(adev->dev, "PCI error: detected callback!!\n");
6244 
6245 	adev->pci_channel_state = state;
6246 
6247 	switch (state) {
6248 	case pci_channel_io_normal:
6249 		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6250 		return PCI_ERS_RESULT_CAN_RECOVER;
6251 	case pci_channel_io_frozen:
6252 		/* Fatal error, prepare for slot reset */
6253 		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6254 		if (hive) {
6255 			/* Hive devices should be able to support FW based
6256 			 * link reset on other devices, if not return.
6257 			 */
6258 			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6259 				dev_warn(adev->dev,
6260 					 "No support for XGMI hive yet...\n");
6261 				return PCI_ERS_RESULT_DISCONNECT;
6262 			}
6263 			/* Set dpc status only if device is part of hive
6264 			 * Non-hive devices should be able to recover after
6265 			 * link reset.
6266 			 */
6267 			amdgpu_reset_set_dpc_status(adev, true);
6268 
6269 			mutex_lock(&hive->hive_lock);
6270 		}
6271 		memset(&reset_context, 0, sizeof(reset_context));
6272 		INIT_LIST_HEAD(&device_list);
6273 
6274 		amdgpu_device_recovery_prepare(adev, &device_list, hive);
6275 		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6276 		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6277 					      hive, false);
6278 		if (hive)
6279 			mutex_unlock(&hive->hive_lock);
6280 		return PCI_ERS_RESULT_NEED_RESET;
6281 	case pci_channel_io_perm_failure:
6282 		/* Permanent error, prepare for device removal */
6283 		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6284 		return PCI_ERS_RESULT_DISCONNECT;
6285 	}
6286 
6287 	return PCI_ERS_RESULT_NEED_RESET;
6288 }
6289 
6290 /**
6291  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6292  * @pdev: pointer to PCI device
6293  */
6294 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6295 {
6296 	struct drm_device *dev = pci_get_drvdata(pdev);
6297 	struct amdgpu_device *adev = drm_to_adev(dev);
6298 
6299 	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6300 
6301 	/* TODO - dump whatever for debugging purposes */
6302 
6303 	/* This called only if amdgpu_pci_error_detected returns
6304 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6305 	 * works, no need to reset slot.
6306 	 */
6307 
6308 	return PCI_ERS_RESULT_RECOVERED;
6309 }
6310 
6311 /**
6312  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6313  * @pdev: PCI device struct
6314  *
6315  * Description: This routine is called by the pci error recovery
6316  * code after the PCI slot has been reset, just before we
6317  * should resume normal operations.
6318  */
6319 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6320 {
6321 	struct drm_device *dev = pci_get_drvdata(pdev);
6322 	struct amdgpu_device *adev = drm_to_adev(dev);
6323 	struct amdgpu_reset_context reset_context;
6324 	struct amdgpu_device *tmp_adev;
6325 	struct amdgpu_hive_info *hive;
6326 	struct list_head device_list;
6327 	struct pci_dev *link_dev;
6328 	int r = 0, i, timeout;
6329 	u32 memsize;
6330 	u16 status;
6331 
6332 	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6333 
6334 	memset(&reset_context, 0, sizeof(reset_context));
6335 	INIT_LIST_HEAD(&device_list);
6336 	hive = amdgpu_get_xgmi_hive(adev);
6337 	if (hive) {
6338 		mutex_lock(&hive->hive_lock);
6339 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6340 			list_add_tail(&tmp_adev->reset_list, &device_list);
6341 	} else {
6342 		list_add_tail(&adev->reset_list, &device_list);
6343 	}
6344 
6345 	if (adev->pcie_reset_ctx.swus)
6346 		link_dev = adev->pcie_reset_ctx.swus;
6347 	else
6348 		link_dev = adev->pdev;
6349 	/* wait for asic to come out of reset, timeout = 10s */
6350 	timeout = 10000;
6351 	do {
6352 		usleep_range(10000, 10500);
6353 		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
6354 		timeout -= 10;
6355 	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
6356 		 (status != PCI_VENDOR_ID_AMD));
6357 
6358 	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
6359 		r = -ETIME;
6360 		goto out;
6361 	}
6362 
6363 	amdgpu_device_load_switch_state(adev);
6364 	/* Restore PCI confspace */
6365 	amdgpu_device_load_pci_state(pdev);
6366 
6367 	/* confirm  ASIC came out of reset */
6368 	for (i = 0; i < adev->usec_timeout; i++) {
6369 		memsize = amdgpu_asic_get_config_memsize(adev);
6370 
6371 		if (memsize != 0xffffffff)
6372 			break;
6373 		udelay(1);
6374 	}
6375 	if (memsize == 0xffffffff) {
6376 		r = -ETIME;
6377 		goto out;
6378 	}
6379 
6380 	reset_context.method = AMD_RESET_METHOD_NONE;
6381 	reset_context.reset_req_dev = adev;
6382 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6383 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
6384 
6385 	if (hive) {
6386 		reset_context.hive = hive;
6387 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6388 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
6389 	} else {
6390 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6391 	}
6392 
6393 	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
6394 out:
6395 	if (!r) {
6396 		if (amdgpu_device_cache_pci_state(adev->pdev))
6397 			pci_restore_state(adev->pdev);
6398 		dev_info(adev->dev, "PCIe error recovery succeeded\n");
6399 	} else {
6400 		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6401 		if (hive) {
6402 			list_for_each_entry(tmp_adev, &device_list, reset_list)
6403 				amdgpu_device_unset_mp1_state(tmp_adev);
6404 		}
6405 		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6406 	}
6407 
6408 	if (hive) {
6409 		mutex_unlock(&hive->hive_lock);
6410 		amdgpu_put_xgmi_hive(hive);
6411 	}
6412 
6413 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6414 }
6415 
6416 /**
6417  * amdgpu_pci_resume() - resume normal ops after PCI reset
6418  * @pdev: pointer to PCI device
6419  *
6420  * Called when the error recovery driver tells us that its
6421  * OK to resume normal operation.
6422  */
6423 void amdgpu_pci_resume(struct pci_dev *pdev)
6424 {
6425 	struct drm_device *dev = pci_get_drvdata(pdev);
6426 	struct amdgpu_device *adev = drm_to_adev(dev);
6427 	struct list_head device_list;
6428 	struct amdgpu_hive_info *hive = NULL;
6429 	struct amdgpu_device *tmp_adev = NULL;
6430 
6431 	dev_info(adev->dev, "PCI error: resume callback!!\n");
6432 
6433 	/* Only continue execution for the case of pci_channel_io_frozen */
6434 	if (adev->pci_channel_state != pci_channel_io_frozen)
6435 		return;
6436 
6437 	INIT_LIST_HEAD(&device_list);
6438 
6439 	hive = amdgpu_get_xgmi_hive(adev);
6440 	if (hive) {
6441 		mutex_lock(&hive->hive_lock);
6442 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6443 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
6444 			list_add_tail(&tmp_adev->reset_list, &device_list);
6445 		}
6446 	} else
6447 		list_add_tail(&adev->reset_list, &device_list);
6448 
6449 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
6450 	amdgpu_device_gpu_resume(adev, &device_list, false);
6451 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6452 
6453 	if (hive) {
6454 		mutex_unlock(&hive->hive_lock);
6455 		amdgpu_put_xgmi_hive(hive);
6456 	}
6457 }
6458 
6459 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
6460 {
6461 	struct pci_dev *swus, *swds;
6462 	int r;
6463 
6464 	swds = pci_upstream_bridge(adev->pdev);
6465 	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
6466 	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
6467 		return;
6468 	swus = pci_upstream_bridge(swds);
6469 	if (!swus ||
6470 	    (swus->vendor != PCI_VENDOR_ID_ATI &&
6471 	     swus->vendor != PCI_VENDOR_ID_AMD) ||
6472 	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
6473 		return;
6474 
6475 	/* If already saved, return */
6476 	if (adev->pcie_reset_ctx.swus)
6477 		return;
6478 	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
6479 	r = pci_save_state(swds);
6480 	if (r)
6481 		return;
6482 	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
6483 
6484 	r = pci_save_state(swus);
6485 	if (r)
6486 		return;
6487 	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
6488 
6489 	adev->pcie_reset_ctx.swus = swus;
6490 }
6491 
6492 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
6493 {
6494 	struct pci_dev *pdev;
6495 	int r;
6496 
6497 	if (!adev->pcie_reset_ctx.swds_pcistate ||
6498 	    !adev->pcie_reset_ctx.swus_pcistate)
6499 		return;
6500 
6501 	pdev = adev->pcie_reset_ctx.swus;
6502 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
6503 	if (!r) {
6504 		pci_restore_state(pdev);
6505 	} else {
6506 		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
6507 		return;
6508 	}
6509 
6510 	pdev = pci_upstream_bridge(adev->pdev);
6511 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
6512 	if (!r)
6513 		pci_restore_state(pdev);
6514 	else
6515 		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
6516 }
6517 
6518 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6519 {
6520 	struct drm_device *dev = pci_get_drvdata(pdev);
6521 	struct amdgpu_device *adev = drm_to_adev(dev);
6522 	int r;
6523 
6524 	if (amdgpu_sriov_vf(adev))
6525 		return false;
6526 
6527 	r = pci_save_state(pdev);
6528 	if (!r) {
6529 		kfree(adev->pci_state);
6530 
6531 		adev->pci_state = pci_store_saved_state(pdev);
6532 
6533 		if (!adev->pci_state) {
6534 			dev_err(adev->dev, "Failed to store PCI saved state");
6535 			return false;
6536 		}
6537 	} else {
6538 		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
6539 		return false;
6540 	}
6541 
6542 	amdgpu_device_cache_switch_state(adev);
6543 
6544 	return true;
6545 }
6546 
6547 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6548 {
6549 	struct drm_device *dev = pci_get_drvdata(pdev);
6550 	struct amdgpu_device *adev = drm_to_adev(dev);
6551 	int r;
6552 
6553 	if (!adev->pci_state)
6554 		return false;
6555 
6556 	r = pci_load_saved_state(pdev, adev->pci_state);
6557 
6558 	if (!r) {
6559 		pci_restore_state(pdev);
6560 	} else {
6561 		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
6562 		return false;
6563 	}
6564 
6565 	return true;
6566 }
6567 
6568 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6569 		struct amdgpu_ring *ring)
6570 {
6571 #ifdef CONFIG_X86_64
6572 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6573 		return;
6574 #endif
6575 	if (adev->gmc.xgmi.connected_to_cpu)
6576 		return;
6577 
6578 	if (ring && ring->funcs->emit_hdp_flush) {
6579 		amdgpu_ring_emit_hdp_flush(ring);
6580 		return;
6581 	}
6582 
6583 	if (!ring && amdgpu_sriov_runtime(adev)) {
6584 		if (!amdgpu_kiq_hdp_flush(adev))
6585 			return;
6586 	}
6587 
6588 	amdgpu_hdp_flush(adev, ring);
6589 }
6590 
6591 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6592 		struct amdgpu_ring *ring)
6593 {
6594 #ifdef CONFIG_X86_64
6595 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6596 		return;
6597 #endif
6598 	if (adev->gmc.xgmi.connected_to_cpu)
6599 		return;
6600 
6601 	amdgpu_hdp_invalidate(adev, ring);
6602 }
6603 
6604 int amdgpu_in_reset(struct amdgpu_device *adev)
6605 {
6606 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6607 }
6608 
6609 /**
6610  * amdgpu_device_halt() - bring hardware to some kind of halt state
6611  *
6612  * @adev: amdgpu_device pointer
6613  *
6614  * Bring hardware to some kind of halt state so that no one can touch it
6615  * any more. It will help to maintain error context when error occurred.
6616  * Compare to a simple hang, the system will keep stable at least for SSH
6617  * access. Then it should be trivial to inspect the hardware state and
6618  * see what's going on. Implemented as following:
6619  *
6620  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6621  *    clears all CPU mappings to device, disallows remappings through page faults
6622  * 2. amdgpu_irq_disable_all() disables all interrupts
6623  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6624  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6625  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6626  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6627  *    flush any in flight DMA operations
6628  */
6629 void amdgpu_device_halt(struct amdgpu_device *adev)
6630 {
6631 	struct pci_dev *pdev = adev->pdev;
6632 	struct drm_device *ddev = adev_to_drm(adev);
6633 
6634 	amdgpu_xcp_dev_unplug(adev);
6635 	drm_dev_unplug(ddev);
6636 
6637 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
6638 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
6639 
6640 	amdgpu_irq_disable_all(adev);
6641 
6642 	amdgpu_fence_driver_hw_fini(adev);
6643 
6644 	adev->no_hw_access = true;
6645 
6646 	amdgpu_device_unmap_mmio(adev);
6647 
6648 	pci_disable_device(pdev);
6649 	pci_wait_for_pending_transaction(pdev);
6650 }
6651 
6652 /**
6653  * amdgpu_device_get_gang - return a reference to the current gang
6654  * @adev: amdgpu_device pointer
6655  *
6656  * Returns: A new reference to the current gang leader.
6657  */
6658 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6659 {
6660 	struct dma_fence *fence;
6661 
6662 	rcu_read_lock();
6663 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6664 	rcu_read_unlock();
6665 	return fence;
6666 }
6667 
6668 /**
6669  * amdgpu_device_switch_gang - switch to a new gang
6670  * @adev: amdgpu_device pointer
6671  * @gang: the gang to switch to
6672  *
6673  * Try to switch to a new gang.
6674  * Returns: NULL if we switched to the new gang or a reference to the current
6675  * gang leader.
6676  */
6677 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6678 					    struct dma_fence *gang)
6679 {
6680 	struct dma_fence *old = NULL;
6681 
6682 	dma_fence_get(gang);
6683 	do {
6684 		dma_fence_put(old);
6685 		old = amdgpu_device_get_gang(adev);
6686 		if (old == gang)
6687 			break;
6688 
6689 		if (!dma_fence_is_signaled(old)) {
6690 			dma_fence_put(gang);
6691 			return old;
6692 		}
6693 
6694 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6695 			 old, gang) != old);
6696 
6697 	/*
6698 	 * Drop it once for the exchanged reference in adev and once for the
6699 	 * thread local reference acquired in amdgpu_device_get_gang().
6700 	 */
6701 	dma_fence_put(old);
6702 	dma_fence_put(old);
6703 	return NULL;
6704 }
6705 
6706 /**
6707  * amdgpu_device_enforce_isolation - enforce HW isolation
6708  * @adev: the amdgpu device pointer
6709  * @ring: the HW ring the job is supposed to run on
6710  * @job: the job which is about to be pushed to the HW ring
6711  *
6712  * Makes sure that only one client at a time can use the GFX block.
6713  * Returns: The dependency to wait on before the job can be pushed to the HW.
6714  * The function is called multiple times until NULL is returned.
6715  */
6716 struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
6717 						  struct amdgpu_ring *ring,
6718 						  struct amdgpu_job *job)
6719 {
6720 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
6721 	struct drm_sched_fence *f = job->base.s_fence;
6722 	struct dma_fence *dep;
6723 	void *owner;
6724 	int r;
6725 
6726 	/*
6727 	 * For now enforce isolation only for the GFX block since we only need
6728 	 * the cleaner shader on those rings.
6729 	 */
6730 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
6731 	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6732 		return NULL;
6733 
6734 	/*
6735 	 * All submissions where enforce isolation is false are handled as if
6736 	 * they come from a single client. Use ~0l as the owner to distinct it
6737 	 * from kernel submissions where the owner is NULL.
6738 	 */
6739 	owner = job->enforce_isolation ? f->owner : (void *)~0l;
6740 
6741 	mutex_lock(&adev->enforce_isolation_mutex);
6742 
6743 	/*
6744 	 * The "spearhead" submission is the first one which changes the
6745 	 * ownership to its client. We always need to wait for it to be
6746 	 * pushed to the HW before proceeding with anything.
6747 	 */
6748 	if (&f->scheduled != isolation->spearhead &&
6749 	    !dma_fence_is_signaled(isolation->spearhead)) {
6750 		dep = isolation->spearhead;
6751 		goto out_grab_ref;
6752 	}
6753 
6754 	if (isolation->owner != owner) {
6755 
6756 		/*
6757 		 * Wait for any gang to be assembled before switching to a
6758 		 * different owner or otherwise we could deadlock the
6759 		 * submissions.
6760 		 */
6761 		if (!job->gang_submit) {
6762 			dep = amdgpu_device_get_gang(adev);
6763 			if (!dma_fence_is_signaled(dep))
6764 				goto out_return_dep;
6765 			dma_fence_put(dep);
6766 		}
6767 
6768 		dma_fence_put(isolation->spearhead);
6769 		isolation->spearhead = dma_fence_get(&f->scheduled);
6770 		amdgpu_sync_move(&isolation->active, &isolation->prev);
6771 		trace_amdgpu_isolation(isolation->owner, owner);
6772 		isolation->owner = owner;
6773 	}
6774 
6775 	/*
6776 	 * Specifying the ring here helps to pipeline submissions even when
6777 	 * isolation is enabled. If that is not desired for testing NULL can be
6778 	 * used instead of the ring to enforce a CPU round trip while switching
6779 	 * between clients.
6780 	 */
6781 	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
6782 	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
6783 	if (r)
6784 		dev_warn(adev->dev, "OOM tracking isolation\n");
6785 
6786 out_grab_ref:
6787 	dma_fence_get(dep);
6788 out_return_dep:
6789 	mutex_unlock(&adev->enforce_isolation_mutex);
6790 	return dep;
6791 }
6792 
6793 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6794 {
6795 	switch (adev->asic_type) {
6796 #ifdef CONFIG_DRM_AMDGPU_SI
6797 	case CHIP_HAINAN:
6798 #endif
6799 	case CHIP_TOPAZ:
6800 		/* chips with no display hardware */
6801 		return false;
6802 #ifdef CONFIG_DRM_AMDGPU_SI
6803 	case CHIP_TAHITI:
6804 	case CHIP_PITCAIRN:
6805 	case CHIP_VERDE:
6806 	case CHIP_OLAND:
6807 #endif
6808 #ifdef CONFIG_DRM_AMDGPU_CIK
6809 	case CHIP_BONAIRE:
6810 	case CHIP_HAWAII:
6811 	case CHIP_KAVERI:
6812 	case CHIP_KABINI:
6813 	case CHIP_MULLINS:
6814 #endif
6815 	case CHIP_TONGA:
6816 	case CHIP_FIJI:
6817 	case CHIP_POLARIS10:
6818 	case CHIP_POLARIS11:
6819 	case CHIP_POLARIS12:
6820 	case CHIP_VEGAM:
6821 	case CHIP_CARRIZO:
6822 	case CHIP_STONEY:
6823 		/* chips with display hardware */
6824 		return true;
6825 	default:
6826 		/* IP discovery */
6827 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6828 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6829 			return false;
6830 		return true;
6831 	}
6832 }
6833 
6834 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
6835 {
6836 	ssize_t size = 0;
6837 
6838 	if (!ring || !ring->adev)
6839 		return size;
6840 
6841 	if (amdgpu_device_should_recover_gpu(ring->adev))
6842 		size |= AMDGPU_RESET_TYPE_FULL;
6843 
6844 	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
6845 	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
6846 		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
6847 
6848 	return size;
6849 }
6850 
6851 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
6852 {
6853 	ssize_t size = 0;
6854 
6855 	if (supported_reset == 0) {
6856 		size += sysfs_emit_at(buf, size, "unsupported");
6857 		size += sysfs_emit_at(buf, size, "\n");
6858 		return size;
6859 
6860 	}
6861 
6862 	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
6863 		size += sysfs_emit_at(buf, size, "soft ");
6864 
6865 	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
6866 		size += sysfs_emit_at(buf, size, "queue ");
6867 
6868 	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
6869 		size += sysfs_emit_at(buf, size, "pipe ");
6870 
6871 	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
6872 		size += sysfs_emit_at(buf, size, "full ");
6873 
6874 	size += sysfs_emit_at(buf, size, "\n");
6875 	return size;
6876 }
6877 
6878 void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
6879 			   enum amdgpu_uid_type type, uint8_t inst,
6880 			   uint64_t uid)
6881 {
6882 	if (!uid_info)
6883 		return;
6884 
6885 	if (type >= AMDGPU_UID_TYPE_MAX) {
6886 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6887 			     type);
6888 		return;
6889 	}
6890 
6891 	if (inst >= AMDGPU_UID_INST_MAX) {
6892 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6893 			     inst);
6894 		return;
6895 	}
6896 
6897 	if (uid_info->uid[type][inst] != 0) {
6898 		dev_warn_once(
6899 			uid_info->adev->dev,
6900 			"Overwriting existing UID %llu for type %d instance %d\n",
6901 			uid_info->uid[type][inst], type, inst);
6902 	}
6903 
6904 	uid_info->uid[type][inst] = uid;
6905 }
6906 
6907 u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
6908 			  enum amdgpu_uid_type type, uint8_t inst)
6909 {
6910 	if (!uid_info)
6911 		return 0;
6912 
6913 	if (type >= AMDGPU_UID_TYPE_MAX) {
6914 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6915 			     type);
6916 		return 0;
6917 	}
6918 
6919 	if (inst >= AMDGPU_UID_INST_MAX) {
6920 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6921 			     inst);
6922 		return 0;
6923 	}
6924 
6925 	return uid_info->uid[type][inst];
6926 }
6927