xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision a2b270c0ecf6d95bcd14ef4c20d0301a88143ff5)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/aperture.h>
30 #include <linux/power_supply.h>
31 #include <linux/kthread.h>
32 #include <linux/module.h>
33 #include <linux/console.h>
34 #include <linux/slab.h>
35 #include <linux/iommu.h>
36 #include <linux/pci.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39 #include <linux/nospec.h>
40 
41 #include <drm/drm_atomic_helper.h>
42 #include <drm/drm_client_event.h>
43 #include <drm/drm_crtc_helper.h>
44 #include <drm/drm_probe_helper.h>
45 #include <drm/amdgpu_drm.h>
46 #include <linux/device.h>
47 #include <linux/vgaarb.h>
48 #include <linux/vga_switcheroo.h>
49 #include <linux/efi.h>
50 #include "amdgpu.h"
51 #include "amdgpu_trace.h"
52 #include "amdgpu_i2c.h"
53 #include "atom.h"
54 #include "amdgpu_atombios.h"
55 #include "amdgpu_atomfirmware.h"
56 #include "amd_pcie.h"
57 #ifdef CONFIG_DRM_AMDGPU_SI
58 #include "si.h"
59 #endif
60 #ifdef CONFIG_DRM_AMDGPU_CIK
61 #include "cik.h"
62 #endif
63 #include "vi.h"
64 #include "soc15.h"
65 #include "nv.h"
66 #include "bif/bif_4_1_d.h"
67 #include <linux/firmware.h>
68 #include "amdgpu_vf_error.h"
69 
70 #include "amdgpu_amdkfd.h"
71 #include "amdgpu_pm.h"
72 
73 #include "amdgpu_xgmi.h"
74 #include "amdgpu_ras.h"
75 #include "amdgpu_ras_mgr.h"
76 #include "amdgpu_pmu.h"
77 #include "amdgpu_fru_eeprom.h"
78 #include "amdgpu_reset.h"
79 #include "amdgpu_virt.h"
80 #include "amdgpu_dev_coredump.h"
81 
82 #include <linux/suspend.h>
83 #include <drm/task_barrier.h>
84 #include <linux/pm_runtime.h>
85 
86 #include <drm/drm_drv.h>
87 
88 #if IS_ENABLED(CONFIG_X86)
89 #include <asm/intel-family.h>
90 #include <asm/cpu_device_id.h>
91 #endif
92 
93 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
98 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
99 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
100 MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
101 
102 #define AMDGPU_RESUME_MS		2000
103 #define AMDGPU_MAX_RETRY_LIMIT		2
104 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
105 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
106 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
107 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
108 
109 #define AMDGPU_VBIOS_SKIP (1U << 0)
110 #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
111 
112 static const struct drm_driver amdgpu_kms_driver;
113 
114 const char *amdgpu_asic_name[] = {
115 	"TAHITI",
116 	"PITCAIRN",
117 	"VERDE",
118 	"OLAND",
119 	"HAINAN",
120 	"BONAIRE",
121 	"KAVERI",
122 	"KABINI",
123 	"HAWAII",
124 	"MULLINS",
125 	"TOPAZ",
126 	"TONGA",
127 	"FIJI",
128 	"CARRIZO",
129 	"STONEY",
130 	"POLARIS10",
131 	"POLARIS11",
132 	"POLARIS12",
133 	"VEGAM",
134 	"VEGA10",
135 	"VEGA12",
136 	"VEGA20",
137 	"RAVEN",
138 	"ARCTURUS",
139 	"RENOIR",
140 	"ALDEBARAN",
141 	"NAVI10",
142 	"CYAN_SKILLFISH",
143 	"NAVI14",
144 	"NAVI12",
145 	"SIENNA_CICHLID",
146 	"NAVY_FLOUNDER",
147 	"VANGOGH",
148 	"DIMGREY_CAVEFISH",
149 	"BEIGE_GOBY",
150 	"YELLOW_CARP",
151 	"IP DISCOVERY",
152 	"LAST",
153 };
154 
155 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
156 /*
157  * Default init level where all blocks are expected to be initialized. This is
158  * the level of initialization expected by default and also after a full reset
159  * of the device.
160  */
161 struct amdgpu_init_level amdgpu_init_default = {
162 	.level = AMDGPU_INIT_LEVEL_DEFAULT,
163 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
164 };
165 
166 struct amdgpu_init_level amdgpu_init_recovery = {
167 	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
168 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
169 };
170 
171 /*
172  * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
173  * is used for cases like reset on initialization where the entire hive needs to
174  * be reset before first use.
175  */
176 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
177 	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
178 	.hwini_ip_block_mask =
179 		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
180 		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
181 		BIT(AMD_IP_BLOCK_TYPE_PSP)
182 };
183 
184 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
185 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
186 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
187 
188 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
189 
190 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
191 					     enum amd_ip_block_type block)
192 {
193 	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
194 }
195 
196 void amdgpu_set_init_level(struct amdgpu_device *adev,
197 			   enum amdgpu_init_lvl_id lvl)
198 {
199 	switch (lvl) {
200 	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
201 		adev->init_lvl = &amdgpu_init_minimal_xgmi;
202 		break;
203 	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
204 		adev->init_lvl = &amdgpu_init_recovery;
205 		break;
206 	case AMDGPU_INIT_LEVEL_DEFAULT:
207 		fallthrough;
208 	default:
209 		adev->init_lvl = &amdgpu_init_default;
210 		break;
211 	}
212 }
213 
214 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
215 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
216 				     void *data);
217 
218 /**
219  * DOC: pcie_replay_count
220  *
221  * The amdgpu driver provides a sysfs API for reporting the total number
222  * of PCIe replays (NAKs).
223  * The file pcie_replay_count is used for this and returns the total
224  * number of replays as a sum of the NAKs generated and NAKs received.
225  */
226 
227 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
228 		struct device_attribute *attr, char *buf)
229 {
230 	struct drm_device *ddev = dev_get_drvdata(dev);
231 	struct amdgpu_device *adev = drm_to_adev(ddev);
232 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
233 
234 	return sysfs_emit(buf, "%llu\n", cnt);
235 }
236 
237 static DEVICE_ATTR(pcie_replay_count, 0444,
238 		amdgpu_device_get_pcie_replay_count, NULL);
239 
240 static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
241 {
242 	int ret = 0;
243 
244 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
245 		ret = sysfs_create_file(&adev->dev->kobj,
246 					&dev_attr_pcie_replay_count.attr);
247 
248 	return ret;
249 }
250 
251 static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
252 {
253 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
254 		sysfs_remove_file(&adev->dev->kobj,
255 				  &dev_attr_pcie_replay_count.attr);
256 }
257 
258 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
259 					  const struct bin_attribute *attr, char *buf,
260 					  loff_t ppos, size_t count)
261 {
262 	struct device *dev = kobj_to_dev(kobj);
263 	struct drm_device *ddev = dev_get_drvdata(dev);
264 	struct amdgpu_device *adev = drm_to_adev(ddev);
265 	ssize_t bytes_read;
266 
267 	switch (ppos) {
268 	case AMDGPU_SYS_REG_STATE_XGMI:
269 		bytes_read = amdgpu_asic_get_reg_state(
270 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
271 		break;
272 	case AMDGPU_SYS_REG_STATE_WAFL:
273 		bytes_read = amdgpu_asic_get_reg_state(
274 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
275 		break;
276 	case AMDGPU_SYS_REG_STATE_PCIE:
277 		bytes_read = amdgpu_asic_get_reg_state(
278 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
279 		break;
280 	case AMDGPU_SYS_REG_STATE_USR:
281 		bytes_read = amdgpu_asic_get_reg_state(
282 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
283 		break;
284 	case AMDGPU_SYS_REG_STATE_USR_1:
285 		bytes_read = amdgpu_asic_get_reg_state(
286 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
287 		break;
288 	default:
289 		return -EINVAL;
290 	}
291 
292 	return bytes_read;
293 }
294 
295 static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
296 		      AMDGPU_SYS_REG_STATE_END);
297 
298 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
299 {
300 	int ret;
301 
302 	if (!amdgpu_asic_get_reg_state_supported(adev))
303 		return 0;
304 
305 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
306 
307 	return ret;
308 }
309 
310 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
311 {
312 	if (!amdgpu_asic_get_reg_state_supported(adev))
313 		return;
314 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
315 }
316 
317 /**
318  * DOC: board_info
319  *
320  * The amdgpu driver provides a sysfs API for giving board related information.
321  * It provides the form factor information in the format
322  *
323  *   type : form factor
324  *
325  * Possible form factor values
326  *
327  * - "cem"		- PCIE CEM card
328  * - "oam"		- Open Compute Accelerator Module
329  * - "unknown"	- Not known
330  *
331  */
332 
333 static ssize_t amdgpu_device_get_board_info(struct device *dev,
334 					    struct device_attribute *attr,
335 					    char *buf)
336 {
337 	struct drm_device *ddev = dev_get_drvdata(dev);
338 	struct amdgpu_device *adev = drm_to_adev(ddev);
339 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
340 	const char *pkg;
341 
342 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
343 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
344 
345 	switch (pkg_type) {
346 	case AMDGPU_PKG_TYPE_CEM:
347 		pkg = "cem";
348 		break;
349 	case AMDGPU_PKG_TYPE_OAM:
350 		pkg = "oam";
351 		break;
352 	default:
353 		pkg = "unknown";
354 		break;
355 	}
356 
357 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
358 }
359 
360 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
361 
362 static struct attribute *amdgpu_board_attrs[] = {
363 	&dev_attr_board_info.attr,
364 	NULL,
365 };
366 
367 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
368 					     struct attribute *attr, int n)
369 {
370 	struct device *dev = kobj_to_dev(kobj);
371 	struct drm_device *ddev = dev_get_drvdata(dev);
372 	struct amdgpu_device *adev = drm_to_adev(ddev);
373 
374 	if (adev->flags & AMD_IS_APU)
375 		return 0;
376 
377 	return attr->mode;
378 }
379 
380 static const struct attribute_group amdgpu_board_attrs_group = {
381 	.attrs = amdgpu_board_attrs,
382 	.is_visible = amdgpu_board_attrs_is_visible
383 };
384 
385 /**
386  * DOC: uma/carveout_options
387  *
388  * This is a read-only file that lists all available UMA allocation
389  * options and their corresponding indices. Example output::
390  *
391  *     $ cat uma/carveout_options
392  *     0: Minimum (512 MB)
393  *     1:  (1 GB)
394  *     2:  (2 GB)
395  *     3:  (4 GB)
396  *     4:  (6 GB)
397  *     5:  (8 GB)
398  *     6:  (12 GB)
399  *     7: Medium (16 GB)
400  *     8:  (24 GB)
401  *     9: High (32 GB)
402  */
403 static ssize_t carveout_options_show(struct device *dev,
404 				     struct device_attribute *attr,
405 				     char *buf)
406 {
407 	struct drm_device *ddev = dev_get_drvdata(dev);
408 	struct amdgpu_device *adev = drm_to_adev(ddev);
409 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
410 	uint32_t memory_carved;
411 	ssize_t size = 0;
412 
413 	if (!uma_info || !uma_info->num_entries)
414 		return -ENODEV;
415 
416 	for (int i = 0; i < uma_info->num_entries; i++) {
417 		memory_carved = uma_info->entries[i].memory_carved_mb;
418 		if (memory_carved >= SZ_1G/SZ_1M) {
419 			size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n",
420 					      i,
421 					      uma_info->entries[i].name,
422 					      memory_carved >> 10);
423 		} else {
424 			size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n",
425 					      i,
426 					      uma_info->entries[i].name,
427 					      memory_carved);
428 		}
429 	}
430 
431 	return size;
432 }
433 static DEVICE_ATTR_RO(carveout_options);
434 
435 /**
436  * DOC: uma/carveout
437  *
438  * This file is both readable and writable. When read, it shows the
439  * index of the current setting. Writing a valid index to this file
440  * allows users to change the UMA carveout size to the selected option
441  * on the next boot.
442  *
443  * The available options and their corresponding indices can be read
444  * from the uma/carveout_options file.
445  */
446 static ssize_t carveout_show(struct device *dev,
447 			     struct device_attribute *attr,
448 			     char *buf)
449 {
450 	struct drm_device *ddev = dev_get_drvdata(dev);
451 	struct amdgpu_device *adev = drm_to_adev(ddev);
452 
453 	return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index);
454 }
455 
456 static ssize_t carveout_store(struct device *dev,
457 			      struct device_attribute *attr,
458 			      const char *buf, size_t count)
459 {
460 	struct drm_device *ddev = dev_get_drvdata(dev);
461 	struct amdgpu_device *adev = drm_to_adev(ddev);
462 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
463 	struct amdgpu_uma_carveout_option *opt;
464 	unsigned long val;
465 	uint8_t flags;
466 	int r;
467 
468 	r = kstrtoul(buf, 10, &val);
469 	if (r)
470 		return r;
471 
472 	if (val >= uma_info->num_entries)
473 		return -EINVAL;
474 
475 	val = array_index_nospec(val, uma_info->num_entries);
476 	opt = &uma_info->entries[val];
477 
478 	if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) &&
479 	    !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) {
480 		drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val);
481 		return -EINVAL;
482 	}
483 
484 	flags = opt->flags;
485 	flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1);
486 
487 	guard(mutex)(&uma_info->update_lock);
488 
489 	r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags);
490 	if (r)
491 		return r;
492 
493 	uma_info->uma_option_index = val;
494 
495 	return count;
496 }
497 static DEVICE_ATTR_RW(carveout);
498 
499 static struct attribute *amdgpu_uma_attrs[] = {
500 	&dev_attr_carveout.attr,
501 	&dev_attr_carveout_options.attr,
502 	NULL
503 };
504 
505 const struct attribute_group amdgpu_uma_attr_group = {
506 	.name = "uma",
507 	.attrs = amdgpu_uma_attrs
508 };
509 
510 static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev)
511 {
512 	int rc;
513 
514 	if (!(adev->flags & AMD_IS_APU))
515 		return;
516 
517 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
518 		return;
519 
520 	rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info);
521 	if (rc) {
522 		drm_dbg(adev_to_drm(adev),
523 			"Failed to parse UMA carveout info from VBIOS: %d\n", rc);
524 		goto out_info;
525 	}
526 
527 	mutex_init(&adev->uma_info.update_lock);
528 
529 	rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group);
530 	if (rc) {
531 		drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc);
532 		goto out_attr;
533 	}
534 
535 	return;
536 
537 out_attr:
538 	mutex_destroy(&adev->uma_info.update_lock);
539 out_info:
540 	return;
541 }
542 
543 static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev)
544 {
545 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
546 
547 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
548 		return;
549 
550 	mutex_destroy(&uma_info->update_lock);
551 	uma_info->num_entries = 0;
552 }
553 
554 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
555 
556 /**
557  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
558  *
559  * @adev: amdgpu device pointer
560  *
561  * Returns true if the device is a dGPU with ATPX power control,
562  * otherwise return false.
563  */
564 bool amdgpu_device_supports_px(struct amdgpu_device *adev)
565 {
566 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
567 		return true;
568 	return false;
569 }
570 
571 /**
572  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
573  *
574  * @adev: amdgpu device pointer
575  *
576  * Returns true if the device is a dGPU with ACPI power control,
577  * otherwise return false.
578  */
579 bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
580 {
581 	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
582 		return false;
583 
584 	if (adev->has_pr3 ||
585 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
586 		return true;
587 	return false;
588 }
589 
590 /**
591  * amdgpu_device_supports_baco - Does the device support BACO
592  *
593  * @adev: amdgpu device pointer
594  *
595  * Return:
596  * 1 if the device supports BACO;
597  * 3 if the device supports MACO (only works if BACO is supported)
598  * otherwise return 0.
599  */
600 int amdgpu_device_supports_baco(struct amdgpu_device *adev)
601 {
602 	return amdgpu_asic_supports_baco(adev);
603 }
604 
605 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
606 {
607 	int bamaco_support;
608 
609 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
610 	bamaco_support = amdgpu_device_supports_baco(adev);
611 
612 	switch (amdgpu_runtime_pm) {
613 	case 2:
614 		if (bamaco_support & MACO_SUPPORT) {
615 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
616 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
617 		} else if (bamaco_support == BACO_SUPPORT) {
618 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
619 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
620 		}
621 		break;
622 	case 1:
623 		if (bamaco_support & BACO_SUPPORT) {
624 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
625 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
626 		}
627 		break;
628 	case -1:
629 	case -2:
630 		if (amdgpu_device_supports_px(adev)) {
631 			/* enable PX as runtime mode */
632 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
633 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
634 		} else if (amdgpu_device_supports_boco(adev)) {
635 			/* enable boco as runtime mode */
636 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
637 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
638 		} else {
639 			if (!bamaco_support)
640 				goto no_runtime_pm;
641 
642 			switch (adev->asic_type) {
643 			case CHIP_VEGA20:
644 			case CHIP_ARCTURUS:
645 				/* BACO are not supported on vega20 and arctrus */
646 				break;
647 			case CHIP_VEGA10:
648 				/* enable BACO as runpm mode if noretry=0 */
649 				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
650 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
651 				break;
652 			default:
653 				/* enable BACO as runpm mode on CI+ */
654 				if (!amdgpu_passthrough(adev))
655 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
656 				break;
657 			}
658 
659 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
660 				if (bamaco_support & MACO_SUPPORT) {
661 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
662 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
663 				} else {
664 					dev_info(adev->dev, "Using BACO for runtime pm\n");
665 				}
666 			}
667 		}
668 		break;
669 	case 0:
670 		dev_info(adev->dev, "runtime pm is manually disabled\n");
671 		break;
672 	default:
673 		break;
674 	}
675 
676 no_runtime_pm:
677 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
678 		dev_info(adev->dev, "Runtime PM not available\n");
679 }
680 /**
681  * amdgpu_device_supports_smart_shift - Is the device dGPU with
682  * smart shift support
683  *
684  * @adev: amdgpu device pointer
685  *
686  * Returns true if the device is a dGPU with Smart Shift support,
687  * otherwise returns false.
688  */
689 bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
690 {
691 	return (amdgpu_device_supports_boco(adev) &&
692 		amdgpu_acpi_is_power_shift_control_supported());
693 }
694 
695 /*
696  * VRAM access helper functions
697  */
698 
699 /**
700  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
701  *
702  * @adev: amdgpu_device pointer
703  * @pos: offset of the buffer in vram
704  * @buf: virtual address of the buffer in system memory
705  * @size: read/write size, sizeof(@buf) must > @size
706  * @write: true - write to vram, otherwise - read from vram
707  */
708 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
709 			     void *buf, size_t size, bool write)
710 {
711 	unsigned long flags;
712 	uint32_t hi = ~0, tmp = 0;
713 	uint32_t *data = buf;
714 	uint64_t last;
715 	int idx;
716 
717 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
718 		return;
719 
720 	if (!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)) {
721 		dev_err(adev->dev, "unaligned pos/size (pos=0x%llx, size=0x%zx)\n",
722 			pos, size);
723 		drm_dev_exit(idx);
724 		return;
725 	}
726 
727 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
728 	for (last = pos + size; pos < last; pos += 4) {
729 		tmp = pos >> 31;
730 
731 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
732 		if (tmp != hi) {
733 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
734 			hi = tmp;
735 		}
736 		if (write)
737 			WREG32_NO_KIQ(mmMM_DATA, *data++);
738 		else
739 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
740 	}
741 
742 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
743 	drm_dev_exit(idx);
744 }
745 
746 /**
747  * amdgpu_device_aper_access - access vram by vram aperture
748  *
749  * @adev: amdgpu_device pointer
750  * @pos: offset of the buffer in vram
751  * @buf: virtual address of the buffer in system memory
752  * @size: read/write size, sizeof(@buf) must > @size
753  * @write: true - write to vram, otherwise - read from vram
754  *
755  * The return value means how many bytes have been transferred.
756  */
757 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
758 				 void *buf, size_t size, bool write)
759 {
760 #ifdef CONFIG_64BIT
761 	void __iomem *addr;
762 	size_t count = 0;
763 	uint64_t last;
764 
765 	if (!adev->mman.aper_base_kaddr)
766 		return 0;
767 
768 	last = min(pos + size, adev->gmc.visible_vram_size);
769 	if (last > pos) {
770 		addr = adev->mman.aper_base_kaddr + pos;
771 		count = last - pos;
772 
773 		if (write) {
774 			memcpy_toio(addr, buf, count);
775 			/* Make sure HDP write cache flush happens without any reordering
776 			 * after the system memory contents are sent over PCIe device
777 			 */
778 			mb();
779 			amdgpu_device_flush_hdp(adev, NULL);
780 		} else {
781 			amdgpu_device_invalidate_hdp(adev, NULL);
782 			/* Make sure HDP read cache is invalidated before issuing a read
783 			 * to the PCIe device
784 			 */
785 			mb();
786 			memcpy_fromio(buf, addr, count);
787 		}
788 
789 	}
790 
791 	return count;
792 #else
793 	return 0;
794 #endif
795 }
796 
797 /**
798  * amdgpu_device_vram_access - read/write a buffer in vram
799  *
800  * @adev: amdgpu_device pointer
801  * @pos: offset of the buffer in vram
802  * @buf: virtual address of the buffer in system memory
803  * @size: read/write size, sizeof(@buf) must > @size
804  * @write: true - write to vram, otherwise - read from vram
805  */
806 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
807 			       void *buf, size_t size, bool write)
808 {
809 	size_t count;
810 
811 	/* try to using vram apreature to access vram first */
812 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
813 	size -= count;
814 	if (size) {
815 		/* using MM to access rest vram */
816 		pos += count;
817 		buf += count;
818 		amdgpu_device_mm_access(adev, pos, buf, size, write);
819 	}
820 }
821 
822 /*
823  * register access helper functions.
824  */
825 
826 /* Check if hw access should be skipped because of hotplug or device error */
827 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
828 {
829 	if (adev->no_hw_access)
830 		return true;
831 
832 #ifdef CONFIG_LOCKDEP
833 	/*
834 	 * This is a bit complicated to understand, so worth a comment. What we assert
835 	 * here is that the GPU reset is not running on another thread in parallel.
836 	 *
837 	 * For this we trylock the read side of the reset semaphore, if that succeeds
838 	 * we know that the reset is not running in parallel.
839 	 *
840 	 * If the trylock fails we assert that we are either already holding the read
841 	 * side of the lock or are the reset thread itself and hold the write side of
842 	 * the lock.
843 	 */
844 	if (in_task()) {
845 		if (down_read_trylock(&adev->reset_domain->sem))
846 			up_read(&adev->reset_domain->sem);
847 		else
848 			lockdep_assert_held(&adev->reset_domain->sem);
849 	}
850 #endif
851 	return false;
852 }
853 
854 /**
855  * amdgpu_device_get_rev_id - query device rev_id
856  *
857  * @adev: amdgpu_device pointer
858  *
859  * Return device rev_id
860  */
861 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
862 {
863 	return adev->nbio.funcs->get_rev_id(adev);
864 }
865 
866 static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
867 {
868 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
869 		return AMDGPU_VBIOS_SKIP;
870 
871 	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
872 		return AMDGPU_VBIOS_OPTIONAL;
873 
874 	return 0;
875 }
876 
877 /**
878  * amdgpu_device_asic_init - Wrapper for atom asic_init
879  *
880  * @adev: amdgpu_device pointer
881  *
882  * Does any asic specific work and then calls atom asic init.
883  */
884 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
885 {
886 	uint32_t flags;
887 	bool optional;
888 	int ret;
889 
890 	amdgpu_asic_pre_asic_init(adev);
891 	flags = amdgpu_device_get_vbios_flags(adev);
892 	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
893 
894 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
895 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
896 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
897 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
898 		amdgpu_psp_wait_for_bootloader(adev);
899 		if (optional && !adev->bios)
900 			return 0;
901 
902 		ret = amdgpu_atomfirmware_asic_init(adev, true);
903 		return ret;
904 	} else {
905 		if (optional && !adev->bios)
906 			return 0;
907 
908 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
909 	}
910 
911 	return 0;
912 }
913 
914 /**
915  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
916  *
917  * @adev: amdgpu_device pointer
918  *
919  * Allocates a scratch page of VRAM for use by various things in the
920  * driver.
921  */
922 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
923 {
924 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
925 				       AMDGPU_GEM_DOMAIN_VRAM |
926 				       AMDGPU_GEM_DOMAIN_GTT,
927 				       &adev->mem_scratch.robj,
928 				       &adev->mem_scratch.gpu_addr,
929 				       (void **)&adev->mem_scratch.ptr);
930 }
931 
932 /**
933  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
934  *
935  * @adev: amdgpu_device pointer
936  *
937  * Frees the VRAM scratch page.
938  */
939 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
940 {
941 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
942 }
943 
944 /**
945  * amdgpu_device_program_register_sequence - program an array of registers.
946  *
947  * @adev: amdgpu_device pointer
948  * @registers: pointer to the register array
949  * @array_size: size of the register array
950  *
951  * Programs an array or registers with and or masks.
952  * This is a helper for setting golden registers.
953  */
954 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
955 					     const u32 *registers,
956 					     const u32 array_size)
957 {
958 	u32 tmp, reg, and_mask, or_mask;
959 	int i;
960 
961 	if (array_size % 3)
962 		return;
963 
964 	for (i = 0; i < array_size; i += 3) {
965 		reg = registers[i + 0];
966 		and_mask = registers[i + 1];
967 		or_mask = registers[i + 2];
968 
969 		if (and_mask == 0xffffffff) {
970 			tmp = or_mask;
971 		} else {
972 			tmp = RREG32(reg);
973 			tmp &= ~and_mask;
974 			if (adev->family >= AMDGPU_FAMILY_AI)
975 				tmp |= (or_mask & and_mask);
976 			else
977 				tmp |= or_mask;
978 		}
979 		WREG32(reg, tmp);
980 	}
981 }
982 
983 /**
984  * amdgpu_device_pci_config_reset - reset the GPU
985  *
986  * @adev: amdgpu_device pointer
987  *
988  * Resets the GPU using the pci config reset sequence.
989  * Only applicable to asics prior to vega10.
990  */
991 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
992 {
993 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
994 }
995 
996 /**
997  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
998  *
999  * @adev: amdgpu_device pointer
1000  *
1001  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1002  */
1003 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1004 {
1005 	return pci_reset_function(adev->pdev);
1006 }
1007 
1008 /*
1009  * amdgpu_device_wb_*()
1010  * Writeback is the method by which the GPU updates special pages in memory
1011  * with the status of certain GPU events (fences, ring pointers,etc.).
1012  */
1013 
1014 /**
1015  * amdgpu_device_wb_fini - Disable Writeback and free memory
1016  *
1017  * @adev: amdgpu_device pointer
1018  *
1019  * Disables Writeback and frees the Writeback memory (all asics).
1020  * Used at driver shutdown.
1021  */
1022 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1023 {
1024 	if (adev->wb.wb_obj) {
1025 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1026 				      &adev->wb.gpu_addr,
1027 				      (void **)&adev->wb.wb);
1028 		adev->wb.wb_obj = NULL;
1029 	}
1030 }
1031 
1032 /**
1033  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1034  *
1035  * @adev: amdgpu_device pointer
1036  *
1037  * Initializes writeback and allocates writeback memory (all asics).
1038  * Used at driver startup.
1039  * Returns 0 on success or an -error on failure.
1040  */
1041 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1042 {
1043 	int r;
1044 
1045 	if (adev->wb.wb_obj == NULL) {
1046 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1047 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1048 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1049 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1050 					    (void **)&adev->wb.wb);
1051 		if (r) {
1052 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1053 			return r;
1054 		}
1055 
1056 		adev->wb.num_wb = AMDGPU_MAX_WB;
1057 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1058 
1059 		/* clear wb memory */
1060 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1061 	}
1062 
1063 	return 0;
1064 }
1065 
1066 /**
1067  * amdgpu_device_wb_get - Allocate a wb entry
1068  *
1069  * @adev: amdgpu_device pointer
1070  * @wb: wb index
1071  *
1072  * Allocate a wb slot for use by the driver (all asics).
1073  * Returns 0 on success or -EINVAL on failure.
1074  */
1075 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1076 {
1077 	unsigned long flags, offset;
1078 
1079 	spin_lock_irqsave(&adev->wb.lock, flags);
1080 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1081 	if (offset < adev->wb.num_wb) {
1082 		__set_bit(offset, adev->wb.used);
1083 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1084 		*wb = offset << 3; /* convert to dw offset */
1085 		return 0;
1086 	} else {
1087 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1088 		return -EINVAL;
1089 	}
1090 }
1091 
1092 /**
1093  * amdgpu_device_wb_free - Free a wb entry
1094  *
1095  * @adev: amdgpu_device pointer
1096  * @wb: wb index
1097  *
1098  * Free a wb slot allocated for use by the driver (all asics)
1099  */
1100 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1101 {
1102 	unsigned long flags;
1103 
1104 	wb >>= 3;
1105 	spin_lock_irqsave(&adev->wb.lock, flags);
1106 	if (wb < adev->wb.num_wb)
1107 		__clear_bit(wb, adev->wb.used);
1108 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1109 }
1110 
1111 /**
1112  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1113  *
1114  * @adev: amdgpu_device pointer
1115  *
1116  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1117  * to fail, but if any of the BARs is not accessible after the size we abort
1118  * driver loading by returning -ENODEV.
1119  */
1120 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1121 {
1122 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1123 	struct pci_bus *root;
1124 	struct resource *res;
1125 	int max_size, r;
1126 	unsigned int i;
1127 	u16 cmd;
1128 
1129 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1130 		return 0;
1131 
1132 	/* Bypass for VF */
1133 	if (amdgpu_sriov_vf(adev))
1134 		return 0;
1135 
1136 	if (!amdgpu_rebar)
1137 		return 0;
1138 
1139 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1140 	if ((amdgpu_runtime_pm != 0) &&
1141 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1142 	    adev->pdev->device == 0x731f &&
1143 	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1144 		return 0;
1145 
1146 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1147 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1148 		dev_warn(
1149 			adev->dev,
1150 			"System can't access extended configuration space, please check!!\n");
1151 
1152 	/* skip if the bios has already enabled large BAR */
1153 	if (adev->gmc.real_vram_size &&
1154 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1155 		return 0;
1156 
1157 	/* Check if the root BUS has 64bit memory resources */
1158 	root = adev->pdev->bus;
1159 	while (root->parent)
1160 		root = root->parent;
1161 
1162 	pci_bus_for_each_resource(root, res, i) {
1163 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1164 		    res->start > 0x100000000ull)
1165 			break;
1166 	}
1167 
1168 	/* Trying to resize is pointless without a root hub window above 4GB */
1169 	if (!res)
1170 		return 0;
1171 
1172 	/* Limit the BAR size to what is available */
1173 	max_size = pci_rebar_get_max_size(adev->pdev, 0);
1174 	if (max_size < 0)
1175 		return 0;
1176 	rbar_size = min(max_size, rbar_size);
1177 
1178 	/* Disable memory decoding while we change the BAR addresses and size */
1179 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1180 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1181 			      cmd & ~PCI_COMMAND_MEMORY);
1182 
1183 	/* Tear down doorbell as resizing will release BARs */
1184 	amdgpu_doorbell_fini(adev);
1185 
1186 	r = pci_resize_resource(adev->pdev, 0, rbar_size,
1187 				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
1188 								  : 1 << 2);
1189 	if (r == -ENOSPC)
1190 		dev_info(adev->dev,
1191 			 "Not enough PCI address space for a large BAR.");
1192 	else if (r && r != -ENOTSUPP)
1193 		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1194 
1195 	/* When the doorbell or fb BAR isn't available we have no chance of
1196 	 * using the device.
1197 	 */
1198 	r = amdgpu_doorbell_init(adev);
1199 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1200 		return -ENODEV;
1201 
1202 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1203 
1204 	return 0;
1205 }
1206 
1207 /*
1208  * GPU helpers function.
1209  */
1210 /**
1211  * amdgpu_device_need_post - check if the hw need post or not
1212  *
1213  * @adev: amdgpu_device pointer
1214  *
1215  * Check if the asic has been initialized (all asics) at driver startup
1216  * or post is needed if  hw reset is performed.
1217  * Returns true if need or false if not.
1218  */
1219 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1220 {
1221 	uint32_t reg, flags;
1222 
1223 	if (amdgpu_sriov_vf(adev))
1224 		return false;
1225 
1226 	flags = amdgpu_device_get_vbios_flags(adev);
1227 	if (flags & AMDGPU_VBIOS_SKIP)
1228 		return false;
1229 	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1230 		return false;
1231 
1232 	if (amdgpu_passthrough(adev)) {
1233 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1234 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1235 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1236 		 * vpost executed for smc version below 22.15
1237 		 */
1238 		if (adev->asic_type == CHIP_FIJI) {
1239 			int err;
1240 			uint32_t fw_ver;
1241 
1242 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1243 			/* force vPost if error occurred */
1244 			if (err)
1245 				return true;
1246 
1247 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1248 			release_firmware(adev->pm.fw);
1249 			if (fw_ver < 0x00160e00)
1250 				return true;
1251 		}
1252 	}
1253 
1254 	/* Don't post if we need to reset whole hive on init */
1255 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1256 		return false;
1257 
1258 	if (adev->has_hw_reset) {
1259 		adev->has_hw_reset = false;
1260 		return true;
1261 	}
1262 
1263 	/* bios scratch used on CIK+ */
1264 	if (adev->asic_type >= CHIP_BONAIRE)
1265 		return amdgpu_atombios_scratch_need_asic_init(adev);
1266 
1267 	/* check MEM_SIZE for older asics */
1268 	reg = amdgpu_asic_get_config_memsize(adev);
1269 
1270 	if ((reg != 0) && (reg != 0xffffffff))
1271 		return false;
1272 
1273 	return true;
1274 }
1275 
1276 /*
1277  * Check whether seamless boot is supported.
1278  *
1279  * So far we only support seamless boot on DCE 3.0 or later.
1280  * If users report that it works on older ASICS as well, we may
1281  * loosen this.
1282  */
1283 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1284 {
1285 	switch (amdgpu_seamless) {
1286 	case -1:
1287 		break;
1288 	case 1:
1289 		return true;
1290 	case 0:
1291 		return false;
1292 	default:
1293 		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1294 			amdgpu_seamless);
1295 		return false;
1296 	}
1297 
1298 	if (!(adev->flags & AMD_IS_APU))
1299 		return false;
1300 
1301 	if (adev->mman.keep_stolen_vga_memory)
1302 		return false;
1303 
1304 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1305 }
1306 
1307 /*
1308  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1309  * don't support dynamic speed switching. Until we have confirmation from Intel
1310  * that a specific host supports it, it's safer that we keep it disabled for all.
1311  *
1312  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1313  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1314  */
1315 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1316 {
1317 #if IS_ENABLED(CONFIG_X86)
1318 	struct cpuinfo_x86 *c = &cpu_data(0);
1319 
1320 	/* eGPU change speeds based on USB4 fabric conditions */
1321 	if (dev_is_removable(adev->dev))
1322 		return true;
1323 
1324 	if (c->x86_vendor == X86_VENDOR_INTEL)
1325 		return false;
1326 #endif
1327 	return true;
1328 }
1329 
1330 static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1331 {
1332 	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1333 	 * It's unclear if this is a platform-specific or GPU-specific issue.
1334 	 * Disable ASPM on SI for the time being.
1335 	 */
1336 	if (adev->family == AMDGPU_FAMILY_SI)
1337 		return true;
1338 
1339 #if IS_ENABLED(CONFIG_X86)
1340 	struct cpuinfo_x86 *c = &cpu_data(0);
1341 
1342 	if (c->x86_vendor == X86_VENDOR_INTEL) {
1343 		switch (c->x86_model) {
1344 		case VFM_MODEL(INTEL_ALDERLAKE):
1345 		case VFM_MODEL(INTEL_ALDERLAKE_L):
1346 		case VFM_MODEL(INTEL_RAPTORLAKE):
1347 		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1348 		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1349 		case VFM_MODEL(INTEL_TIGERLAKE):
1350 		case VFM_MODEL(INTEL_TIGERLAKE_L):
1351 			return true;
1352 		default:
1353 			return false;
1354 		}
1355 	} else {
1356 		return false;
1357 	}
1358 #else
1359 	return false;
1360 #endif
1361 }
1362 
1363 /**
1364  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1365  *
1366  * @adev: amdgpu_device pointer
1367  *
1368  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1369  * be set for this device.
1370  *
1371  * Returns true if it should be used or false if not.
1372  */
1373 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1374 {
1375 	switch (amdgpu_aspm) {
1376 	case -1:
1377 		break;
1378 	case 0:
1379 		return false;
1380 	case 1:
1381 		return true;
1382 	default:
1383 		return false;
1384 	}
1385 	if (adev->flags & AMD_IS_APU)
1386 		return false;
1387 	if (amdgpu_device_aspm_support_quirk(adev))
1388 		return false;
1389 	return pcie_aspm_enabled(adev->pdev);
1390 }
1391 
1392 /* if we get transitioned to only one device, take VGA back */
1393 /**
1394  * amdgpu_device_vga_set_decode - enable/disable vga decode
1395  *
1396  * @pdev: PCI device pointer
1397  * @state: enable/disable vga decode
1398  *
1399  * Enable/disable vga decode (all asics).
1400  * Returns VGA resource flags.
1401  */
1402 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1403 		bool state)
1404 {
1405 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1406 
1407 	amdgpu_asic_set_vga_state(adev, state);
1408 	if (state)
1409 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1410 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1411 	else
1412 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1413 }
1414 
1415 /**
1416  * amdgpu_device_check_block_size - validate the vm block size
1417  *
1418  * @adev: amdgpu_device pointer
1419  *
1420  * Validates the vm block size specified via module parameter.
1421  * The vm block size defines number of bits in page table versus page directory,
1422  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1423  * page table and the remaining bits are in the page directory.
1424  */
1425 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1426 {
1427 	/* defines number of bits in page table versus page directory,
1428 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1429 	 * page table and the remaining bits are in the page directory
1430 	 */
1431 	if (amdgpu_vm_block_size == -1)
1432 		return;
1433 
1434 	if (amdgpu_vm_block_size < 9) {
1435 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1436 			 amdgpu_vm_block_size);
1437 		amdgpu_vm_block_size = -1;
1438 	}
1439 }
1440 
1441 /**
1442  * amdgpu_device_check_vm_size - validate the vm size
1443  *
1444  * @adev: amdgpu_device pointer
1445  *
1446  * Validates the vm size in GB specified via module parameter.
1447  * The VM size is the size of the GPU virtual memory space in GB.
1448  */
1449 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1450 {
1451 	/* no need to check the default value */
1452 	if (amdgpu_vm_size == -1)
1453 		return;
1454 
1455 	if (amdgpu_vm_size < 1) {
1456 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1457 			 amdgpu_vm_size);
1458 		amdgpu_vm_size = -1;
1459 	}
1460 }
1461 
1462 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1463 {
1464 	struct sysinfo si;
1465 	bool is_os_64 = (sizeof(void *) == 8);
1466 	uint64_t total_memory;
1467 	uint64_t dram_size_seven_GB = 0x1B8000000;
1468 	uint64_t dram_size_three_GB = 0xB8000000;
1469 
1470 	if (amdgpu_smu_memory_pool_size == 0)
1471 		return;
1472 
1473 	if (!is_os_64) {
1474 		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
1475 		goto def_value;
1476 	}
1477 	si_meminfo(&si);
1478 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1479 
1480 	if ((amdgpu_smu_memory_pool_size == 1) ||
1481 		(amdgpu_smu_memory_pool_size == 2)) {
1482 		if (total_memory < dram_size_three_GB)
1483 			goto def_value1;
1484 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1485 		(amdgpu_smu_memory_pool_size == 8)) {
1486 		if (total_memory < dram_size_seven_GB)
1487 			goto def_value1;
1488 	} else {
1489 		dev_warn(adev->dev, "Smu memory pool size not supported\n");
1490 		goto def_value;
1491 	}
1492 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1493 
1494 	return;
1495 
1496 def_value1:
1497 	dev_warn(adev->dev, "No enough system memory\n");
1498 def_value:
1499 	adev->pm.smu_prv_buffer_size = 0;
1500 }
1501 
1502 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1503 {
1504 	if (!(adev->flags & AMD_IS_APU) ||
1505 	    adev->asic_type < CHIP_RAVEN)
1506 		return 0;
1507 
1508 	switch (adev->asic_type) {
1509 	case CHIP_RAVEN:
1510 		if (adev->pdev->device == 0x15dd)
1511 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1512 		if (adev->pdev->device == 0x15d8)
1513 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1514 		break;
1515 	case CHIP_RENOIR:
1516 		if ((adev->pdev->device == 0x1636) ||
1517 		    (adev->pdev->device == 0x164c))
1518 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1519 		else
1520 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1521 		break;
1522 	case CHIP_VANGOGH:
1523 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1524 		break;
1525 	case CHIP_YELLOW_CARP:
1526 		break;
1527 	case CHIP_CYAN_SKILLFISH:
1528 		if ((adev->pdev->device == 0x13FE) ||
1529 		    (adev->pdev->device == 0x143F))
1530 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1531 		break;
1532 	default:
1533 		break;
1534 	}
1535 
1536 	return 0;
1537 }
1538 
1539 /**
1540  * amdgpu_device_check_arguments - validate module params
1541  *
1542  * @adev: amdgpu_device pointer
1543  *
1544  * Validates certain module parameters and updates
1545  * the associated values used by the driver (all asics).
1546  */
1547 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1548 {
1549 	int i;
1550 
1551 	if (amdgpu_sched_jobs < 4) {
1552 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1553 			 amdgpu_sched_jobs);
1554 		amdgpu_sched_jobs = 4;
1555 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1556 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1557 			 amdgpu_sched_jobs);
1558 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1559 	}
1560 
1561 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1562 		/* gart size must be greater or equal to 32M */
1563 		dev_warn(adev->dev, "gart size (%d) too small\n",
1564 			 amdgpu_gart_size);
1565 		amdgpu_gart_size = -1;
1566 	}
1567 
1568 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1569 		/* gtt size must be greater or equal to 32M */
1570 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1571 				 amdgpu_gtt_size);
1572 		amdgpu_gtt_size = -1;
1573 	}
1574 
1575 	/* valid range is between 4 and 9 inclusive */
1576 	if (amdgpu_vm_fragment_size != -1 &&
1577 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1578 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1579 		amdgpu_vm_fragment_size = -1;
1580 	}
1581 
1582 	if (amdgpu_sched_hw_submission < 2) {
1583 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1584 			 amdgpu_sched_hw_submission);
1585 		amdgpu_sched_hw_submission = 2;
1586 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1587 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1588 			 amdgpu_sched_hw_submission);
1589 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1590 	}
1591 
1592 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1593 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1594 		amdgpu_reset_method = -1;
1595 	}
1596 
1597 	amdgpu_device_check_smu_prv_buffer_size(adev);
1598 
1599 	amdgpu_device_check_vm_size(adev);
1600 
1601 	amdgpu_device_check_block_size(adev);
1602 
1603 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1604 
1605 	for (i = 0; i < MAX_XCP; i++) {
1606 		switch (amdgpu_enforce_isolation) {
1607 		case -1:
1608 		case 0:
1609 		default:
1610 			/* disable */
1611 			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
1612 			break;
1613 		case 1:
1614 			/* enable */
1615 			adev->enforce_isolation[i] =
1616 				AMDGPU_ENFORCE_ISOLATION_ENABLE;
1617 			break;
1618 		case 2:
1619 			/* enable legacy mode */
1620 			adev->enforce_isolation[i] =
1621 				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
1622 			break;
1623 		case 3:
1624 			/* enable only process isolation without submitting cleaner shader */
1625 			adev->enforce_isolation[i] =
1626 				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
1627 			break;
1628 		}
1629 	}
1630 
1631 	return 0;
1632 }
1633 
1634 /**
1635  * amdgpu_switcheroo_set_state - set switcheroo state
1636  *
1637  * @pdev: pci dev pointer
1638  * @state: vga_switcheroo state
1639  *
1640  * Callback for the switcheroo driver.  Suspends or resumes
1641  * the asics before or after it is powered up using ACPI methods.
1642  */
1643 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1644 					enum vga_switcheroo_state state)
1645 {
1646 	struct drm_device *dev = pci_get_drvdata(pdev);
1647 	int r;
1648 
1649 	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
1650 	    state == VGA_SWITCHEROO_OFF)
1651 		return;
1652 
1653 	if (state == VGA_SWITCHEROO_ON) {
1654 		pr_info("switched on\n");
1655 		/* don't suspend or resume card normally */
1656 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1657 
1658 		pci_set_power_state(pdev, PCI_D0);
1659 		amdgpu_device_load_pci_state(pdev);
1660 		r = pci_enable_device(pdev);
1661 		if (r)
1662 			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
1663 				 r);
1664 		amdgpu_device_resume(dev, true);
1665 
1666 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1667 	} else {
1668 		dev_info(&pdev->dev, "switched off\n");
1669 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1670 		amdgpu_device_prepare(dev);
1671 		amdgpu_device_suspend(dev, true);
1672 		amdgpu_device_cache_pci_state(pdev);
1673 		/* Shut down the device */
1674 		pci_disable_device(pdev);
1675 		pci_set_power_state(pdev, PCI_D3cold);
1676 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1677 	}
1678 }
1679 
1680 /**
1681  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1682  *
1683  * @pdev: pci dev pointer
1684  *
1685  * Callback for the switcheroo driver.  Check of the switcheroo
1686  * state can be changed.
1687  * Returns true if the state can be changed, false if not.
1688  */
1689 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1690 {
1691 	struct drm_device *dev = pci_get_drvdata(pdev);
1692 
1693        /*
1694 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1695 	* locking inversion with the driver load path. And the access here is
1696 	* completely racy anyway. So don't bother with locking for now.
1697 	*/
1698 	return atomic_read(&dev->open_count) == 0;
1699 }
1700 
1701 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1702 	.set_gpu_state = amdgpu_switcheroo_set_state,
1703 	.reprobe = NULL,
1704 	.can_switch = amdgpu_switcheroo_can_switch,
1705 };
1706 
1707 /**
1708  * amdgpu_device_enable_virtual_display - enable virtual display feature
1709  *
1710  * @adev: amdgpu_device pointer
1711  *
1712  * Enabled the virtual display feature if the user has enabled it via
1713  * the module parameter virtual_display.  This feature provides a virtual
1714  * display hardware on headless boards or in virtualized environments.
1715  * This function parses and validates the configuration string specified by
1716  * the user and configures the virtual display configuration (number of
1717  * virtual connectors, crtcs, etc.) specified.
1718  */
1719 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1720 {
1721 	adev->enable_virtual_display = false;
1722 
1723 	if (amdgpu_virtual_display) {
1724 		const char *pci_address_name = pci_name(adev->pdev);
1725 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1726 
1727 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1728 		pciaddstr_tmp = pciaddstr;
1729 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1730 			pciaddname = strsep(&pciaddname_tmp, ",");
1731 			if (!strcmp("all", pciaddname)
1732 			    || !strcmp(pci_address_name, pciaddname)) {
1733 				long num_crtc;
1734 				int res = -1;
1735 
1736 				adev->enable_virtual_display = true;
1737 
1738 				if (pciaddname_tmp)
1739 					res = kstrtol(pciaddname_tmp, 10,
1740 						      &num_crtc);
1741 
1742 				if (!res) {
1743 					if (num_crtc < 1)
1744 						num_crtc = 1;
1745 					if (num_crtc > 6)
1746 						num_crtc = 6;
1747 					adev->mode_info.num_crtc = num_crtc;
1748 				} else {
1749 					adev->mode_info.num_crtc = 1;
1750 				}
1751 				break;
1752 			}
1753 		}
1754 
1755 		dev_info(
1756 			adev->dev,
1757 			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1758 			amdgpu_virtual_display, pci_address_name,
1759 			adev->enable_virtual_display, adev->mode_info.num_crtc);
1760 
1761 		kfree(pciaddstr);
1762 	}
1763 }
1764 
1765 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1766 {
1767 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1768 		adev->mode_info.num_crtc = 1;
1769 		adev->enable_virtual_display = true;
1770 		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
1771 			 adev->enable_virtual_display,
1772 			 adev->mode_info.num_crtc);
1773 	}
1774 }
1775 
1776 /**
1777  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1778  *
1779  * @adev: amdgpu_device pointer
1780  *
1781  * Parses the asic configuration parameters specified in the gpu info
1782  * firmware and makes them available to the driver for use in configuring
1783  * the asic.
1784  * Returns 0 on success, -EINVAL on failure.
1785  */
1786 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1787 {
1788 	const char *chip_name;
1789 	int err;
1790 	const struct gpu_info_firmware_header_v1_0 *hdr;
1791 
1792 	adev->firmware.gpu_info_fw = NULL;
1793 
1794 	switch (adev->asic_type) {
1795 	default:
1796 		return 0;
1797 	case CHIP_VEGA10:
1798 		chip_name = "vega10";
1799 		break;
1800 	case CHIP_VEGA12:
1801 		chip_name = "vega12";
1802 		break;
1803 	case CHIP_RAVEN:
1804 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1805 			chip_name = "raven2";
1806 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1807 			chip_name = "picasso";
1808 		else
1809 			chip_name = "raven";
1810 		break;
1811 	case CHIP_ARCTURUS:
1812 		chip_name = "arcturus";
1813 		break;
1814 	case CHIP_NAVI12:
1815 		if (adev->discovery.bin)
1816 			return 0;
1817 		chip_name = "navi12";
1818 		break;
1819 	case CHIP_CYAN_SKILLFISH:
1820 		if (adev->discovery.bin)
1821 			return 0;
1822 		chip_name = "cyan_skillfish";
1823 		break;
1824 	}
1825 
1826 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
1827 				   AMDGPU_UCODE_OPTIONAL,
1828 				   "amdgpu/%s_gpu_info.bin", chip_name);
1829 	if (err) {
1830 		dev_err(adev->dev,
1831 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
1832 			chip_name);
1833 		goto out;
1834 	}
1835 
1836 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1837 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1838 
1839 	switch (hdr->version_major) {
1840 	case 1:
1841 	{
1842 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1843 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1844 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1845 
1846 		/*
1847 		 * Should be dropped when DAL no longer needs it.
1848 		 */
1849 		if (adev->asic_type == CHIP_NAVI12)
1850 			goto parse_soc_bounding_box;
1851 
1852 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1853 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1854 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1855 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1856 		adev->gfx.config.max_texture_channel_caches =
1857 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1858 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1859 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1860 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1861 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1862 		adev->gfx.config.double_offchip_lds_buf =
1863 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1864 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1865 		adev->gfx.cu_info.max_waves_per_simd =
1866 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1867 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1868 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1869 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1870 		if (hdr->version_minor >= 1) {
1871 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1872 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1873 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1874 			adev->gfx.config.num_sc_per_sh =
1875 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1876 			adev->gfx.config.num_packer_per_sc =
1877 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1878 		}
1879 
1880 parse_soc_bounding_box:
1881 		/*
1882 		 * soc bounding box info is not integrated in disocovery table,
1883 		 * we always need to parse it from gpu info firmware if needed.
1884 		 */
1885 		if (hdr->version_minor == 2) {
1886 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1887 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1888 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1889 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1890 		}
1891 		break;
1892 	}
1893 	default:
1894 		dev_err(adev->dev,
1895 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1896 		err = -EINVAL;
1897 		goto out;
1898 	}
1899 out:
1900 	return err;
1901 }
1902 
1903 static void amdgpu_uid_init(struct amdgpu_device *adev)
1904 {
1905 	/* Initialize the UID for the device */
1906 	adev->uid_info = kzalloc_obj(struct amdgpu_uid);
1907 	if (!adev->uid_info) {
1908 		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
1909 		return;
1910 	}
1911 	adev->uid_info->adev = adev;
1912 }
1913 
1914 static void amdgpu_uid_fini(struct amdgpu_device *adev)
1915 {
1916 	/* Free the UID memory */
1917 	kfree(adev->uid_info);
1918 	adev->uid_info = NULL;
1919 }
1920 
1921 static struct pci_dev *amdgpu_device_find_parent(struct amdgpu_device *adev)
1922 {
1923 	struct pci_dev *parent = adev->pdev;
1924 
1925 	/* skip upstream/downstream switches internal to dGPU */
1926 	while ((parent = pci_upstream_bridge(parent))) {
1927 		if (parent->vendor == PCI_VENDOR_ID_ATI)
1928 			continue;
1929 		break;
1930 	}
1931 
1932 	return parent;
1933 }
1934 
1935 /**
1936  * amdgpu_device_ip_early_init - run early init for hardware IPs
1937  *
1938  * @adev: amdgpu_device pointer
1939  *
1940  * Early initialization pass for hardware IPs.  The hardware IPs that make
1941  * up each asic are discovered each IP's early_init callback is run.  This
1942  * is the first stage in initializing the asic.
1943  * Returns 0 on success, negative error code on failure.
1944  */
1945 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1946 {
1947 	struct amdgpu_ip_block *ip_block;
1948 	struct pci_dev *parent;
1949 	bool total, skip_bios;
1950 	uint32_t bios_flags;
1951 	int i, r;
1952 
1953 	amdgpu_device_enable_virtual_display(adev);
1954 
1955 	if (amdgpu_sriov_vf(adev)) {
1956 		r = amdgpu_virt_request_full_gpu(adev, true);
1957 		if (r)
1958 			return r;
1959 
1960 		r = amdgpu_virt_init_critical_region(adev);
1961 		if (r)
1962 			return r;
1963 	}
1964 
1965 	switch (adev->asic_type) {
1966 #ifdef CONFIG_DRM_AMDGPU_SI
1967 	case CHIP_VERDE:
1968 	case CHIP_TAHITI:
1969 	case CHIP_PITCAIRN:
1970 	case CHIP_OLAND:
1971 	case CHIP_HAINAN:
1972 		adev->family = AMDGPU_FAMILY_SI;
1973 		r = si_set_ip_blocks(adev);
1974 		if (r)
1975 			return r;
1976 		break;
1977 #endif
1978 #ifdef CONFIG_DRM_AMDGPU_CIK
1979 	case CHIP_BONAIRE:
1980 	case CHIP_HAWAII:
1981 	case CHIP_KAVERI:
1982 	case CHIP_KABINI:
1983 	case CHIP_MULLINS:
1984 		if (adev->flags & AMD_IS_APU)
1985 			adev->family = AMDGPU_FAMILY_KV;
1986 		else
1987 			adev->family = AMDGPU_FAMILY_CI;
1988 
1989 		r = cik_set_ip_blocks(adev);
1990 		if (r)
1991 			return r;
1992 		break;
1993 #endif
1994 	case CHIP_TOPAZ:
1995 	case CHIP_TONGA:
1996 	case CHIP_FIJI:
1997 	case CHIP_POLARIS10:
1998 	case CHIP_POLARIS11:
1999 	case CHIP_POLARIS12:
2000 	case CHIP_VEGAM:
2001 	case CHIP_CARRIZO:
2002 	case CHIP_STONEY:
2003 		if (adev->flags & AMD_IS_APU)
2004 			adev->family = AMDGPU_FAMILY_CZ;
2005 		else
2006 			adev->family = AMDGPU_FAMILY_VI;
2007 
2008 		r = vi_set_ip_blocks(adev);
2009 		if (r)
2010 			return r;
2011 		break;
2012 	default:
2013 		r = amdgpu_discovery_set_ip_blocks(adev);
2014 		if (r) {
2015 			adev->num_ip_blocks = 0;
2016 			return r;
2017 		}
2018 		break;
2019 	}
2020 
2021 	/* Check for IP version 9.4.3 with A0 hardware */
2022 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2023 	    !amdgpu_device_get_rev_id(adev)) {
2024 		dev_err(adev->dev, "Unsupported A0 hardware\n");
2025 		return -ENODEV;	/* device unsupported - no device error */
2026 	}
2027 
2028 	if (amdgpu_has_atpx() &&
2029 	    (amdgpu_is_atpx_hybrid() ||
2030 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2031 	    ((adev->flags & AMD_IS_APU) == 0) &&
2032 	    !dev_is_removable(&adev->pdev->dev))
2033 		adev->flags |= AMD_IS_PX;
2034 
2035 	if (!(adev->flags & AMD_IS_APU)) {
2036 		parent = amdgpu_device_find_parent(adev);
2037 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2038 	}
2039 
2040 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2041 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2042 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2043 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2044 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2045 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2046 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2047 
2048 	adev->virt.is_xgmi_node_migrate_enabled = false;
2049 	if (amdgpu_sriov_vf(adev)) {
2050 		adev->virt.is_xgmi_node_migrate_enabled =
2051 			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2052 	}
2053 
2054 	total = true;
2055 	for (i = 0; i < adev->num_ip_blocks; i++) {
2056 		ip_block = &adev->ip_blocks[i];
2057 
2058 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2059 			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2060 				 adev->ip_blocks[i].version->funcs->name);
2061 			adev->ip_blocks[i].status.valid = false;
2062 		} else if (ip_block->version->funcs->early_init) {
2063 			r = ip_block->version->funcs->early_init(ip_block);
2064 			if (r == -ENOENT) {
2065 				adev->ip_blocks[i].status.valid = false;
2066 			} else if (r) {
2067 				dev_err(adev->dev,
2068 					"early_init of IP block <%s> failed %d\n",
2069 					adev->ip_blocks[i].version->funcs->name,
2070 					r);
2071 				total = false;
2072 			} else {
2073 				adev->ip_blocks[i].status.valid = true;
2074 			}
2075 		} else {
2076 			adev->ip_blocks[i].status.valid = true;
2077 		}
2078 		/* get the vbios after the asic_funcs are set up */
2079 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2080 			r = amdgpu_device_parse_gpu_info_fw(adev);
2081 			if (r)
2082 				return r;
2083 
2084 			bios_flags = amdgpu_device_get_vbios_flags(adev);
2085 			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2086 			/* Read BIOS */
2087 			if (!skip_bios) {
2088 				bool optional =
2089 					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2090 				if (!amdgpu_get_bios(adev) && !optional)
2091 					return -EINVAL;
2092 
2093 				if (optional && !adev->bios)
2094 					dev_info(
2095 						adev->dev,
2096 						"VBIOS image optional, proceeding without VBIOS image");
2097 
2098 				if (adev->bios) {
2099 					r = amdgpu_atombios_init(adev);
2100 					if (r) {
2101 						dev_err(adev->dev,
2102 							"amdgpu_atombios_init failed\n");
2103 						amdgpu_vf_error_put(
2104 							adev,
2105 							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2106 							0, 0);
2107 						return r;
2108 					}
2109 				}
2110 			}
2111 
2112 			/*get pf2vf msg info at it's earliest time*/
2113 			if (amdgpu_sriov_vf(adev))
2114 				amdgpu_virt_init_data_exchange(adev);
2115 
2116 		}
2117 	}
2118 	if (!total)
2119 		return -ENODEV;
2120 
2121 	if (adev->gmc.xgmi.supported)
2122 		amdgpu_xgmi_early_init(adev);
2123 
2124 	if (amdgpu_is_multi_aid(adev))
2125 		amdgpu_uid_init(adev);
2126 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2127 	if (ip_block->status.valid != false)
2128 		amdgpu_amdkfd_device_probe(adev);
2129 
2130 	adev->cg_flags &= amdgpu_cg_mask;
2131 	adev->pg_flags &= amdgpu_pg_mask;
2132 
2133 	return 0;
2134 }
2135 
2136 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2137 {
2138 	int i, r;
2139 
2140 	for (i = 0; i < adev->num_ip_blocks; i++) {
2141 		if (!adev->ip_blocks[i].status.sw)
2142 			continue;
2143 		if (adev->ip_blocks[i].status.hw)
2144 			continue;
2145 		if (!amdgpu_ip_member_of_hwini(
2146 			    adev, adev->ip_blocks[i].version->type))
2147 			continue;
2148 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2149 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2150 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2151 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2152 			if (r) {
2153 				dev_err(adev->dev,
2154 					"hw_init of IP block <%s> failed %d\n",
2155 					adev->ip_blocks[i].version->funcs->name,
2156 					r);
2157 				return r;
2158 			}
2159 			adev->ip_blocks[i].status.hw = true;
2160 		}
2161 	}
2162 
2163 	return 0;
2164 }
2165 
2166 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2167 {
2168 	int i, r;
2169 
2170 	for (i = 0; i < adev->num_ip_blocks; i++) {
2171 		if (!adev->ip_blocks[i].status.sw)
2172 			continue;
2173 		if (adev->ip_blocks[i].status.hw)
2174 			continue;
2175 		if (!amdgpu_ip_member_of_hwini(
2176 			    adev, adev->ip_blocks[i].version->type))
2177 			continue;
2178 		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2179 		if (r) {
2180 			dev_err(adev->dev,
2181 				"hw_init of IP block <%s> failed %d\n",
2182 				adev->ip_blocks[i].version->funcs->name, r);
2183 			return r;
2184 		}
2185 		adev->ip_blocks[i].status.hw = true;
2186 	}
2187 
2188 	return 0;
2189 }
2190 
2191 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2192 {
2193 	int r = 0;
2194 	int i;
2195 	uint32_t smu_version;
2196 
2197 	if (adev->asic_type >= CHIP_VEGA10) {
2198 		for (i = 0; i < adev->num_ip_blocks; i++) {
2199 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2200 				continue;
2201 
2202 			if (!amdgpu_ip_member_of_hwini(adev,
2203 						       AMD_IP_BLOCK_TYPE_PSP))
2204 				break;
2205 
2206 			if (!adev->ip_blocks[i].status.sw)
2207 				continue;
2208 
2209 			/* no need to do the fw loading again if already done*/
2210 			if (adev->ip_blocks[i].status.hw == true)
2211 				break;
2212 
2213 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2214 				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2215 				if (r)
2216 					return r;
2217 			} else {
2218 				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2219 				if (r) {
2220 					dev_err(adev->dev,
2221 						"hw_init of IP block <%s> failed %d\n",
2222 						adev->ip_blocks[i]
2223 							.version->funcs->name,
2224 						r);
2225 					return r;
2226 				}
2227 				adev->ip_blocks[i].status.hw = true;
2228 			}
2229 			break;
2230 		}
2231 	}
2232 
2233 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2234 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2235 
2236 	return r;
2237 }
2238 
2239 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2240 {
2241 	struct drm_sched_init_args args = {
2242 		.ops = &amdgpu_sched_ops,
2243 		.timeout_wq = adev->reset_domain->wq,
2244 		.dev = adev->dev,
2245 	};
2246 	long timeout;
2247 	int r, i;
2248 
2249 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2250 		struct amdgpu_ring *ring = adev->rings[i];
2251 
2252 		/* No need to setup the GPU scheduler for rings that don't need it */
2253 		if (!ring || ring->no_scheduler)
2254 			continue;
2255 
2256 		switch (ring->funcs->type) {
2257 		case AMDGPU_RING_TYPE_GFX:
2258 			timeout = adev->gfx_timeout;
2259 			break;
2260 		case AMDGPU_RING_TYPE_COMPUTE:
2261 			timeout = adev->compute_timeout;
2262 			break;
2263 		case AMDGPU_RING_TYPE_SDMA:
2264 			timeout = adev->sdma_timeout;
2265 			break;
2266 		default:
2267 			timeout = adev->video_timeout;
2268 			break;
2269 		}
2270 
2271 		args.timeout = timeout;
2272 		args.credit_limit = ring->num_hw_submission;
2273 		args.score = ring->sched_score;
2274 		args.name = ring->name;
2275 
2276 		r = drm_sched_init(&ring->sched, &args);
2277 		if (r) {
2278 			dev_err(adev->dev,
2279 				"Failed to create scheduler on ring %s.\n",
2280 				ring->name);
2281 			return r;
2282 		}
2283 		r = amdgpu_uvd_entity_init(adev, ring);
2284 		if (r) {
2285 			dev_err(adev->dev,
2286 				"Failed to create UVD scheduling entity on ring %s.\n",
2287 				ring->name);
2288 			return r;
2289 		}
2290 		r = amdgpu_vce_entity_init(adev, ring);
2291 		if (r) {
2292 			dev_err(adev->dev,
2293 				"Failed to create VCE scheduling entity on ring %s.\n",
2294 				ring->name);
2295 			return r;
2296 		}
2297 	}
2298 
2299 	if (adev->xcp_mgr)
2300 		amdgpu_xcp_update_partition_sched_list(adev);
2301 
2302 	return 0;
2303 }
2304 
2305 
2306 /**
2307  * amdgpu_device_ip_init - run init for hardware IPs
2308  *
2309  * @adev: amdgpu_device pointer
2310  *
2311  * Main initialization pass for hardware IPs.  The list of all the hardware
2312  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2313  * are run.  sw_init initializes the software state associated with each IP
2314  * and hw_init initializes the hardware associated with each IP.
2315  * Returns 0 on success, negative error code on failure.
2316  */
2317 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2318 {
2319 	bool init_badpage;
2320 	int i, r;
2321 
2322 	r = amdgpu_ras_init(adev);
2323 	if (r)
2324 		return r;
2325 
2326 	for (i = 0; i < adev->num_ip_blocks; i++) {
2327 		if (!adev->ip_blocks[i].status.valid)
2328 			continue;
2329 		if (adev->ip_blocks[i].version->funcs->sw_init) {
2330 			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2331 			if (r) {
2332 				dev_err(adev->dev,
2333 					"sw_init of IP block <%s> failed %d\n",
2334 					adev->ip_blocks[i].version->funcs->name,
2335 					r);
2336 				goto init_failed;
2337 			}
2338 		}
2339 		adev->ip_blocks[i].status.sw = true;
2340 
2341 		if (!amdgpu_ip_member_of_hwini(
2342 			    adev, adev->ip_blocks[i].version->type))
2343 			continue;
2344 
2345 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2346 			/* need to do common hw init early so everything is set up for gmc */
2347 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2348 			if (r) {
2349 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2350 					r);
2351 				goto init_failed;
2352 			}
2353 			adev->ip_blocks[i].status.hw = true;
2354 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2355 			/* need to do gmc hw init early so we can allocate gpu mem */
2356 			/* Try to reserve bad pages early */
2357 			if (amdgpu_sriov_vf(adev))
2358 				amdgpu_virt_exchange_data(adev);
2359 
2360 			r = amdgpu_device_mem_scratch_init(adev);
2361 			if (r) {
2362 				dev_err(adev->dev,
2363 					"amdgpu_mem_scratch_init failed %d\n",
2364 					r);
2365 				goto init_failed;
2366 			}
2367 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2368 			if (r) {
2369 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2370 					r);
2371 				goto init_failed;
2372 			}
2373 			r = amdgpu_device_wb_init(adev);
2374 			if (r) {
2375 				dev_err(adev->dev,
2376 					"amdgpu_device_wb_init failed %d\n", r);
2377 				goto init_failed;
2378 			}
2379 			adev->ip_blocks[i].status.hw = true;
2380 
2381 			/* right after GMC hw init, we create CSA */
2382 			if (adev->gfx.mcbp) {
2383 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2384 							       AMDGPU_GEM_DOMAIN_VRAM |
2385 							       AMDGPU_GEM_DOMAIN_GTT,
2386 							       AMDGPU_CSA_SIZE);
2387 				if (r) {
2388 					dev_err(adev->dev,
2389 						"allocate CSA failed %d\n", r);
2390 					goto init_failed;
2391 				}
2392 			}
2393 
2394 			r = amdgpu_seq64_init(adev);
2395 			if (r) {
2396 				dev_err(adev->dev, "allocate seq64 failed %d\n",
2397 					r);
2398 				goto init_failed;
2399 			}
2400 		}
2401 	}
2402 
2403 	if (amdgpu_sriov_vf(adev))
2404 		amdgpu_virt_init_data_exchange(adev);
2405 
2406 	r = amdgpu_ib_pool_init(adev);
2407 	if (r) {
2408 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2409 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2410 		goto init_failed;
2411 	}
2412 
2413 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2414 	if (r)
2415 		goto init_failed;
2416 
2417 	r = amdgpu_device_ip_hw_init_phase1(adev);
2418 	if (r)
2419 		goto init_failed;
2420 
2421 	r = amdgpu_device_fw_loading(adev);
2422 	if (r)
2423 		goto init_failed;
2424 
2425 	r = amdgpu_device_ip_hw_init_phase2(adev);
2426 	if (r)
2427 		goto init_failed;
2428 
2429 	/*
2430 	 * retired pages will be loaded from eeprom and reserved here,
2431 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2432 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2433 	 * for I2C communication which only true at this point.
2434 	 *
2435 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2436 	 * failure from bad gpu situation and stop amdgpu init process
2437 	 * accordingly. For other failed cases, it will still release all
2438 	 * the resource and print error message, rather than returning one
2439 	 * negative value to upper level.
2440 	 *
2441 	 * Note: theoretically, this should be called before all vram allocations
2442 	 * to protect retired page from abusing
2443 	 */
2444 	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2445 	r = amdgpu_ras_recovery_init(adev, init_badpage);
2446 	if (r)
2447 		goto init_failed;
2448 
2449 	/**
2450 	 * In case of XGMI grab extra reference for reset domain for this device
2451 	 */
2452 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2453 		if (amdgpu_xgmi_add_device(adev) == 0) {
2454 			if (!amdgpu_sriov_vf(adev)) {
2455 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2456 
2457 				if (WARN_ON(!hive)) {
2458 					r = -ENOENT;
2459 					goto init_failed;
2460 				}
2461 
2462 				if (!hive->reset_domain ||
2463 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2464 					r = -ENOENT;
2465 					amdgpu_put_xgmi_hive(hive);
2466 					goto init_failed;
2467 				}
2468 
2469 				/* Drop the early temporary reset domain we created for device */
2470 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2471 				adev->reset_domain = hive->reset_domain;
2472 				amdgpu_put_xgmi_hive(hive);
2473 			}
2474 		}
2475 	}
2476 
2477 	r = amdgpu_device_init_schedulers(adev);
2478 	if (r)
2479 		goto init_failed;
2480 
2481 	amdgpu_ttm_enable_buffer_funcs(adev);
2482 
2483 	/* Don't init kfd if whole hive need to be reset during init */
2484 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
2485 		amdgpu_amdkfd_device_init(adev);
2486 	}
2487 
2488 	amdgpu_fru_get_product_info(adev);
2489 
2490 	r = amdgpu_cper_init(adev);
2491 
2492 init_failed:
2493 
2494 	return r;
2495 }
2496 
2497 /**
2498  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2499  *
2500  * @adev: amdgpu_device pointer
2501  *
2502  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2503  * this function before a GPU reset.  If the value is retained after a
2504  * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
2505  */
2506 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2507 {
2508 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2509 }
2510 
2511 /**
2512  * amdgpu_device_check_vram_lost - check if vram is valid
2513  *
2514  * @adev: amdgpu_device pointer
2515  *
2516  * Checks the reset magic value written to the gart pointer in VRAM.
2517  * The driver calls this after a GPU reset to see if the contents of
2518  * VRAM is lost or now.
2519  * returns true if vram is lost, false if not.
2520  */
2521 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2522 {
2523 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2524 			AMDGPU_RESET_MAGIC_NUM))
2525 		return true;
2526 
2527 	if (!amdgpu_in_reset(adev))
2528 		return false;
2529 
2530 	/*
2531 	 * For all ASICs with baco/mode1 reset, the VRAM is
2532 	 * always assumed to be lost.
2533 	 */
2534 	switch (amdgpu_asic_reset_method(adev)) {
2535 	case AMD_RESET_METHOD_LEGACY:
2536 	case AMD_RESET_METHOD_LINK:
2537 	case AMD_RESET_METHOD_BACO:
2538 	case AMD_RESET_METHOD_MODE1:
2539 		return true;
2540 	default:
2541 		return false;
2542 	}
2543 }
2544 
2545 /**
2546  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2547  *
2548  * @adev: amdgpu_device pointer
2549  * @state: clockgating state (gate or ungate)
2550  *
2551  * The list of all the hardware IPs that make up the asic is walked and the
2552  * set_clockgating_state callbacks are run.
2553  * Late initialization pass enabling clockgating for hardware IPs.
2554  * Fini or suspend, pass disabling clockgating for hardware IPs.
2555  * Returns 0 on success, negative error code on failure.
2556  */
2557 
2558 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2559 			       enum amd_clockgating_state state)
2560 {
2561 	int i, j, r;
2562 
2563 	if (amdgpu_emu_mode == 1)
2564 		return 0;
2565 
2566 	for (j = 0; j < adev->num_ip_blocks; j++) {
2567 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2568 		if (!adev->ip_blocks[i].status.late_initialized)
2569 			continue;
2570 		if (!adev->ip_blocks[i].version)
2571 			continue;
2572 		/* skip CG for GFX, SDMA on S0ix */
2573 		if (adev->in_s0ix &&
2574 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2575 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2576 			continue;
2577 		/* skip CG for VCE/UVD, it's handled specially */
2578 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2579 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2580 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2581 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2582 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2583 			/* enable clockgating to save power */
2584 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
2585 										     state);
2586 			if (r) {
2587 				dev_err(adev->dev,
2588 					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
2589 					adev->ip_blocks[i].version->funcs->name,
2590 					r);
2591 				return r;
2592 			}
2593 		}
2594 	}
2595 
2596 	return 0;
2597 }
2598 
2599 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2600 			       enum amd_powergating_state state)
2601 {
2602 	int i, j, r;
2603 
2604 	if (amdgpu_emu_mode == 1)
2605 		return 0;
2606 
2607 	for (j = 0; j < adev->num_ip_blocks; j++) {
2608 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2609 		if (!adev->ip_blocks[i].status.late_initialized)
2610 			continue;
2611 		if (!adev->ip_blocks[i].version)
2612 			continue;
2613 		/* skip PG for GFX, SDMA on S0ix */
2614 		if (adev->in_s0ix &&
2615 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2616 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2617 			continue;
2618 		/* skip CG for VCE/UVD, it's handled specially */
2619 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2620 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2621 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2622 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2623 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2624 			/* enable powergating to save power */
2625 			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
2626 											state);
2627 			if (r) {
2628 				dev_err(adev->dev,
2629 					"set_powergating_state(gate) of IP block <%s> failed %d\n",
2630 					adev->ip_blocks[i].version->funcs->name,
2631 					r);
2632 				return r;
2633 			}
2634 		}
2635 	}
2636 	return 0;
2637 }
2638 
2639 static int amdgpu_device_enable_mgpu_fan_boost(void)
2640 {
2641 	struct amdgpu_gpu_instance *gpu_ins;
2642 	struct amdgpu_device *adev;
2643 	int i, ret = 0;
2644 
2645 	mutex_lock(&mgpu_info.mutex);
2646 
2647 	/*
2648 	 * MGPU fan boost feature should be enabled
2649 	 * only when there are two or more dGPUs in
2650 	 * the system
2651 	 */
2652 	if (mgpu_info.num_dgpu < 2)
2653 		goto out;
2654 
2655 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2656 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2657 		adev = gpu_ins->adev;
2658 		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
2659 		    !gpu_ins->mgpu_fan_enabled) {
2660 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2661 			if (ret)
2662 				break;
2663 
2664 			gpu_ins->mgpu_fan_enabled = 1;
2665 		}
2666 	}
2667 
2668 out:
2669 	mutex_unlock(&mgpu_info.mutex);
2670 
2671 	return ret;
2672 }
2673 
2674 /**
2675  * amdgpu_device_ip_late_init - run late init for hardware IPs
2676  *
2677  * @adev: amdgpu_device pointer
2678  *
2679  * Late initialization pass for hardware IPs.  The list of all the hardware
2680  * IPs that make up the asic is walked and the late_init callbacks are run.
2681  * late_init covers any special initialization that an IP requires
2682  * after all of the have been initialized or something that needs to happen
2683  * late in the init process.
2684  * Returns 0 on success, negative error code on failure.
2685  */
2686 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2687 {
2688 	struct amdgpu_gpu_instance *gpu_instance;
2689 	int i = 0, r;
2690 
2691 	for (i = 0; i < adev->num_ip_blocks; i++) {
2692 		if (!adev->ip_blocks[i].status.hw)
2693 			continue;
2694 		if (adev->ip_blocks[i].version->funcs->late_init) {
2695 			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
2696 			if (r) {
2697 				dev_err(adev->dev,
2698 					"late_init of IP block <%s> failed %d\n",
2699 					adev->ip_blocks[i].version->funcs->name,
2700 					r);
2701 				return r;
2702 			}
2703 		}
2704 		adev->ip_blocks[i].status.late_initialized = true;
2705 	}
2706 
2707 	r = amdgpu_ras_late_init(adev);
2708 	if (r) {
2709 		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
2710 		return r;
2711 	}
2712 
2713 	if (!amdgpu_reset_in_recovery(adev))
2714 		amdgpu_ras_set_error_query_ready(adev, true);
2715 
2716 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2717 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2718 
2719 	amdgpu_device_fill_reset_magic(adev);
2720 
2721 	r = amdgpu_device_enable_mgpu_fan_boost();
2722 	if (r)
2723 		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
2724 
2725 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2726 	if (amdgpu_passthrough(adev) &&
2727 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2728 	     adev->asic_type == CHIP_ALDEBARAN))
2729 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2730 
2731 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2732 		mutex_lock(&mgpu_info.mutex);
2733 
2734 		/*
2735 		 * Reset device p-state to low as this was booted with high.
2736 		 *
2737 		 * This should be performed only after all devices from the same
2738 		 * hive get initialized.
2739 		 *
2740 		 * However, it's unknown how many device in the hive in advance.
2741 		 * As this is counted one by one during devices initializations.
2742 		 *
2743 		 * So, we wait for all XGMI interlinked devices initialized.
2744 		 * This may bring some delays as those devices may come from
2745 		 * different hives. But that should be OK.
2746 		 */
2747 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2748 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2749 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2750 				if (gpu_instance->adev->flags & AMD_IS_APU)
2751 					continue;
2752 
2753 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2754 						AMDGPU_XGMI_PSTATE_MIN);
2755 				if (r) {
2756 					dev_err(adev->dev,
2757 						"pstate setting failed (%d).\n",
2758 						r);
2759 					break;
2760 				}
2761 			}
2762 		}
2763 
2764 		mutex_unlock(&mgpu_info.mutex);
2765 	}
2766 
2767 	return 0;
2768 }
2769 
2770 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
2771 {
2772 	struct amdgpu_device *adev = ip_block->adev;
2773 	int r;
2774 
2775 	if (!ip_block->version->funcs->hw_fini) {
2776 		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
2777 			ip_block->version->funcs->name);
2778 	} else {
2779 		r = ip_block->version->funcs->hw_fini(ip_block);
2780 		/* XXX handle errors */
2781 		if (r) {
2782 			dev_dbg(adev->dev,
2783 				"hw_fini of IP block <%s> failed %d\n",
2784 				ip_block->version->funcs->name, r);
2785 		}
2786 	}
2787 
2788 	ip_block->status.hw = false;
2789 }
2790 
2791 /**
2792  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2793  *
2794  * @adev: amdgpu_device pointer
2795  *
2796  * For ASICs need to disable SMC first
2797  */
2798 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2799 {
2800 	int i;
2801 
2802 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
2803 		return;
2804 
2805 	for (i = 0; i < adev->num_ip_blocks; i++) {
2806 		if (!adev->ip_blocks[i].status.hw)
2807 			continue;
2808 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2809 			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2810 			break;
2811 		}
2812 	}
2813 }
2814 
2815 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2816 {
2817 	int i, r;
2818 
2819 	for (i = 0; i < adev->num_ip_blocks; i++) {
2820 		if (!adev->ip_blocks[i].version)
2821 			continue;
2822 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2823 			continue;
2824 
2825 		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
2826 		if (r) {
2827 			dev_dbg(adev->dev,
2828 				"early_fini of IP block <%s> failed %d\n",
2829 				adev->ip_blocks[i].version->funcs->name, r);
2830 		}
2831 	}
2832 
2833 	amdgpu_amdkfd_suspend(adev, true);
2834 	amdgpu_amdkfd_teardown_processes(adev);
2835 	amdgpu_userq_suspend(adev);
2836 
2837 	/* Workaround for ASICs need to disable SMC first */
2838 	amdgpu_device_smu_fini_early(adev);
2839 
2840 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2841 		if (!adev->ip_blocks[i].status.hw)
2842 			continue;
2843 
2844 		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2845 	}
2846 
2847 	if (amdgpu_sriov_vf(adev)) {
2848 		if (amdgpu_virt_release_full_gpu(adev, false))
2849 			dev_err(adev->dev,
2850 				"failed to release exclusive mode on fini\n");
2851 	}
2852 
2853 	/*
2854 	 * Driver reload on the APU can fail due to firmware validation because
2855 	 * the PSP is always running, as it is shared across the whole SoC.
2856 	 * This same issue does not occur on dGPU because it has a mechanism
2857 	 * that checks whether the PSP is running. A solution for those issues
2858 	 * in the APU is to trigger a GPU reset, but this should be done during
2859 	 * the unload phase to avoid adding boot latency and screen flicker.
2860 	 * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends
2861 	 * a request to PMFW to unload MP1, PMFW will put GC in reset and power down
2862 	 * the voltage. Hence, skipping reset for APUs with GFX V11 or later.
2863 	 */
2864 	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu &&
2865 		amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) {
2866 		r = amdgpu_asic_reset(adev);
2867 		if (r)
2868 			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
2869 	}
2870 
2871 	return 0;
2872 }
2873 
2874 /**
2875  * amdgpu_device_ip_fini - run fini for hardware IPs
2876  *
2877  * @adev: amdgpu_device pointer
2878  *
2879  * Main teardown pass for hardware IPs.  The list of all the hardware
2880  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2881  * are run.  hw_fini tears down the hardware associated with each IP
2882  * and sw_fini tears down any software state associated with each IP.
2883  * Returns 0 on success, negative error code on failure.
2884  */
2885 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2886 {
2887 	int i, r;
2888 
2889 	amdgpu_cper_fini(adev);
2890 
2891 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2892 		amdgpu_virt_release_ras_err_handler_data(adev);
2893 
2894 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2895 		amdgpu_xgmi_remove_device(adev);
2896 
2897 	amdgpu_amdkfd_device_fini_sw(adev);
2898 
2899 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2900 		if (!adev->ip_blocks[i].status.sw)
2901 			continue;
2902 
2903 		if (!adev->ip_blocks[i].version)
2904 			continue;
2905 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2906 			amdgpu_ucode_free_bo(adev);
2907 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2908 			amdgpu_device_wb_fini(adev);
2909 			amdgpu_device_mem_scratch_fini(adev);
2910 			amdgpu_ib_pool_fini(adev);
2911 			amdgpu_seq64_fini(adev);
2912 			amdgpu_doorbell_fini(adev);
2913 		}
2914 		if (adev->ip_blocks[i].version->funcs->sw_fini) {
2915 			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
2916 			/* XXX handle errors */
2917 			if (r) {
2918 				dev_dbg(adev->dev,
2919 					"sw_fini of IP block <%s> failed %d\n",
2920 					adev->ip_blocks[i].version->funcs->name,
2921 					r);
2922 			}
2923 		}
2924 		adev->ip_blocks[i].status.sw = false;
2925 		adev->ip_blocks[i].status.valid = false;
2926 	}
2927 
2928 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2929 		if (!adev->ip_blocks[i].status.late_initialized)
2930 			continue;
2931 		if (!adev->ip_blocks[i].version)
2932 			continue;
2933 		if (adev->ip_blocks[i].version->funcs->late_fini)
2934 			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
2935 		adev->ip_blocks[i].status.late_initialized = false;
2936 	}
2937 
2938 	amdgpu_ras_fini(adev);
2939 	amdgpu_uid_fini(adev);
2940 
2941 	return 0;
2942 }
2943 
2944 /**
2945  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2946  *
2947  * @work: work_struct.
2948  */
2949 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2950 {
2951 	struct amdgpu_device *adev =
2952 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2953 	int r;
2954 
2955 	r = amdgpu_ib_ring_tests(adev);
2956 	if (r)
2957 		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
2958 }
2959 
2960 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2961 {
2962 	struct amdgpu_device *adev =
2963 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2964 
2965 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2966 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2967 
2968 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
2969 		adev->gfx.gfx_off_state = true;
2970 }
2971 
2972 /**
2973  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2974  *
2975  * @adev: amdgpu_device pointer
2976  *
2977  * Main suspend function for hardware IPs.  The list of all the hardware
2978  * IPs that make up the asic is walked, clockgating is disabled and the
2979  * suspend callbacks are run.  suspend puts the hardware and software state
2980  * in each IP into a state suitable for suspend.
2981  * Returns 0 on success, negative error code on failure.
2982  */
2983 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2984 {
2985 	int i, r, rec;
2986 
2987 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2988 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2989 
2990 	/*
2991 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2992 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2993 	 * scenario. Add the missing df cstate disablement here.
2994 	 */
2995 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2996 		dev_warn(adev->dev, "Failed to disallow df cstate");
2997 
2998 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2999 		if (!adev->ip_blocks[i].status.valid)
3000 			continue;
3001 
3002 		/* displays are handled separately */
3003 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3004 			continue;
3005 
3006 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3007 		if (r)
3008 			goto unwind;
3009 	}
3010 
3011 	return 0;
3012 unwind:
3013 	rec = amdgpu_device_ip_resume_phase3(adev);
3014 	if (rec)
3015 		dev_err(adev->dev,
3016 			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
3017 			rec);
3018 
3019 	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
3020 
3021 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3022 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3023 
3024 	return r;
3025 }
3026 
3027 /**
3028  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3029  *
3030  * @adev: amdgpu_device pointer
3031  *
3032  * Main suspend function for hardware IPs.  The list of all the hardware
3033  * IPs that make up the asic is walked, clockgating is disabled and the
3034  * suspend callbacks are run.  suspend puts the hardware and software state
3035  * in each IP into a state suitable for suspend.
3036  * Returns 0 on success, negative error code on failure.
3037  */
3038 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3039 {
3040 	int i, r, rec;
3041 
3042 	if (adev->in_s0ix)
3043 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3044 
3045 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3046 		if (!adev->ip_blocks[i].status.valid || !adev->ip_blocks[i].status.hw)
3047 			continue;
3048 		/* displays are handled in phase1 */
3049 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3050 			continue;
3051 		/* PSP lost connection when err_event_athub occurs */
3052 		if (amdgpu_ras_intr_triggered() &&
3053 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3054 			adev->ip_blocks[i].status.hw = false;
3055 			continue;
3056 		}
3057 
3058 		/* skip unnecessary suspend if we do not initialize them yet */
3059 		if (!amdgpu_ip_member_of_hwini(
3060 			    adev, adev->ip_blocks[i].version->type))
3061 			continue;
3062 
3063 		/* Since we skip suspend for S0i3, we need to cancel the delayed
3064 		 * idle work here as the suspend callback never gets called.
3065 		 */
3066 		if (adev->in_s0ix &&
3067 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3068 		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3069 			cancel_delayed_work_sync(&adev->gfx.idle_work);
3070 		/* skip suspend of gfx/mes and psp for S0ix
3071 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3072 		 * like at runtime. PSP is also part of the always on hardware
3073 		 * so no need to suspend it.
3074 		 */
3075 		if (adev->in_s0ix &&
3076 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3077 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3078 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3079 			continue;
3080 
3081 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3082 		if (adev->in_s0ix &&
3083 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3084 		     IP_VERSION(5, 0, 0)) &&
3085 		    (adev->ip_blocks[i].version->type ==
3086 		     AMD_IP_BLOCK_TYPE_SDMA))
3087 			continue;
3088 
3089 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3090 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3091 		 * from this location and RLC Autoload automatically also gets loaded
3092 		 * from here based on PMFW -> PSP message during re-init sequence.
3093 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3094 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3095 		 */
3096 		if (amdgpu_in_reset(adev) &&
3097 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3098 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3099 			continue;
3100 
3101 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3102 		if (r)
3103 			goto unwind;
3104 
3105 		/* handle putting the SMC in the appropriate state */
3106 		if (!amdgpu_sriov_vf(adev)) {
3107 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3108 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3109 				if (r) {
3110 					dev_err(adev->dev,
3111 						"SMC failed to set mp1 state %d, %d\n",
3112 						adev->mp1_state, r);
3113 					goto unwind;
3114 				}
3115 			}
3116 		}
3117 	}
3118 
3119 	return 0;
3120 unwind:
3121 	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3122 	rec = amdgpu_device_ip_resume_phase1(adev);
3123 	if (rec) {
3124 		dev_err(adev->dev,
3125 			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3126 			rec);
3127 		return r;
3128 	}
3129 
3130 	rec = amdgpu_device_fw_loading(adev);
3131 	if (rec) {
3132 		dev_err(adev->dev,
3133 			"amdgpu_device_fw_loading failed during unwind: %d\n",
3134 			rec);
3135 		return r;
3136 	}
3137 
3138 	rec = amdgpu_device_ip_resume_phase2(adev);
3139 	if (rec) {
3140 		dev_err(adev->dev,
3141 			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3142 			rec);
3143 		return r;
3144 	}
3145 
3146 	return r;
3147 }
3148 
3149 /**
3150  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3151  *
3152  * @adev: amdgpu_device pointer
3153  *
3154  * Main suspend function for hardware IPs.  The list of all the hardware
3155  * IPs that make up the asic is walked, clockgating is disabled and the
3156  * suspend callbacks are run.  suspend puts the hardware and software state
3157  * in each IP into a state suitable for suspend.
3158  * Returns 0 on success, negative error code on failure.
3159  */
3160 static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3161 {
3162 	int r;
3163 
3164 	if (amdgpu_sriov_vf(adev)) {
3165 		amdgpu_virt_fini_data_exchange(adev);
3166 		amdgpu_virt_request_full_gpu(adev, false);
3167 	}
3168 
3169 	amdgpu_ttm_disable_buffer_funcs(adev);
3170 
3171 	r = amdgpu_device_ip_suspend_phase1(adev);
3172 	if (r)
3173 		return r;
3174 	r = amdgpu_device_ip_suspend_phase2(adev);
3175 
3176 	if (amdgpu_sriov_vf(adev))
3177 		amdgpu_virt_release_full_gpu(adev, false);
3178 
3179 	return r;
3180 }
3181 
3182 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3183 {
3184 	int i, r;
3185 
3186 	static enum amd_ip_block_type ip_order[] = {
3187 		AMD_IP_BLOCK_TYPE_COMMON,
3188 		AMD_IP_BLOCK_TYPE_GMC,
3189 		AMD_IP_BLOCK_TYPE_PSP,
3190 		AMD_IP_BLOCK_TYPE_IH,
3191 	};
3192 
3193 	for (i = 0; i < adev->num_ip_blocks; i++) {
3194 		int j;
3195 		struct amdgpu_ip_block *block;
3196 
3197 		block = &adev->ip_blocks[i];
3198 		block->status.hw = false;
3199 
3200 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3201 
3202 			if (block->version->type != ip_order[j] ||
3203 				!block->status.valid)
3204 				continue;
3205 
3206 			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3207 			if (r) {
3208 				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3209 					 block->version->funcs->name);
3210 				return r;
3211 			}
3212 			block->status.hw = true;
3213 		}
3214 	}
3215 
3216 	return 0;
3217 }
3218 
3219 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3220 {
3221 	struct amdgpu_ip_block *block;
3222 	int i, r = 0;
3223 
3224 	static enum amd_ip_block_type ip_order[] = {
3225 		AMD_IP_BLOCK_TYPE_SMC,
3226 		AMD_IP_BLOCK_TYPE_DCE,
3227 		AMD_IP_BLOCK_TYPE_GFX,
3228 		AMD_IP_BLOCK_TYPE_SDMA,
3229 		AMD_IP_BLOCK_TYPE_MES,
3230 		AMD_IP_BLOCK_TYPE_UVD,
3231 		AMD_IP_BLOCK_TYPE_VCE,
3232 		AMD_IP_BLOCK_TYPE_VCN,
3233 		AMD_IP_BLOCK_TYPE_JPEG
3234 	};
3235 
3236 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3237 		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
3238 
3239 		if (!block)
3240 			continue;
3241 
3242 		if (block->status.valid && !block->status.hw) {
3243 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3244 				r = amdgpu_ip_block_resume(block);
3245 			} else {
3246 				r = block->version->funcs->hw_init(block);
3247 			}
3248 
3249 			if (r) {
3250 				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3251 					 block->version->funcs->name);
3252 				break;
3253 			}
3254 			block->status.hw = true;
3255 		}
3256 	}
3257 
3258 	return r;
3259 }
3260 
3261 /**
3262  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3263  *
3264  * @adev: amdgpu_device pointer
3265  *
3266  * First resume function for hardware IPs.  The list of all the hardware
3267  * IPs that make up the asic is walked and the resume callbacks are run for
3268  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3269  * after a suspend and updates the software state as necessary.  This
3270  * function is also used for restoring the GPU after a GPU reset.
3271  * Returns 0 on success, negative error code on failure.
3272  */
3273 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3274 {
3275 	int i, r;
3276 
3277 	for (i = 0; i < adev->num_ip_blocks; i++) {
3278 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3279 			continue;
3280 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3281 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3282 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3283 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3284 
3285 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3286 			if (r)
3287 				return r;
3288 		}
3289 	}
3290 
3291 	return 0;
3292 }
3293 
3294 /**
3295  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3296  *
3297  * @adev: amdgpu_device pointer
3298  *
3299  * Second resume function for hardware IPs.  The list of all the hardware
3300  * IPs that make up the asic is walked and the resume callbacks are run for
3301  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3302  * functional state after a suspend and updates the software state as
3303  * necessary.  This function is also used for restoring the GPU after a GPU
3304  * reset.
3305  * Returns 0 on success, negative error code on failure.
3306  */
3307 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3308 {
3309 	int i, r;
3310 
3311 	for (i = 0; i < adev->num_ip_blocks; i++) {
3312 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3313 			continue;
3314 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3315 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3316 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3317 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3318 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3319 			continue;
3320 		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3321 		if (r)
3322 			return r;
3323 	}
3324 
3325 	return 0;
3326 }
3327 
3328 /**
3329  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3330  *
3331  * @adev: amdgpu_device pointer
3332  *
3333  * Third resume function for hardware IPs.  The list of all the hardware
3334  * IPs that make up the asic is walked and the resume callbacks are run for
3335  * all DCE.  resume puts the hardware into a functional state after a suspend
3336  * and updates the software state as necessary.  This function is also used
3337  * for restoring the GPU after a GPU reset.
3338  *
3339  * Returns 0 on success, negative error code on failure.
3340  */
3341 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3342 {
3343 	int i, r;
3344 
3345 	for (i = 0; i < adev->num_ip_blocks; i++) {
3346 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3347 			continue;
3348 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3349 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3350 			if (r)
3351 				return r;
3352 		}
3353 	}
3354 
3355 	return 0;
3356 }
3357 
3358 /**
3359  * amdgpu_device_ip_resume - run resume for hardware IPs
3360  *
3361  * @adev: amdgpu_device pointer
3362  *
3363  * Main resume function for hardware IPs.  The hardware IPs
3364  * are split into two resume functions because they are
3365  * also used in recovering from a GPU reset and some additional
3366  * steps need to be take between them.  In this case (S3/S4) they are
3367  * run sequentially.
3368  * Returns 0 on success, negative error code on failure.
3369  */
3370 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3371 {
3372 	int r;
3373 
3374 	r = amdgpu_device_ip_resume_phase1(adev);
3375 	if (r)
3376 		return r;
3377 
3378 	r = amdgpu_device_fw_loading(adev);
3379 	if (r)
3380 		return r;
3381 
3382 	r = amdgpu_device_ip_resume_phase2(adev);
3383 
3384 	amdgpu_ttm_enable_buffer_funcs(adev);
3385 
3386 	if (r)
3387 		return r;
3388 
3389 	amdgpu_fence_driver_hw_init(adev);
3390 
3391 	r = amdgpu_device_ip_resume_phase3(adev);
3392 
3393 	return r;
3394 }
3395 
3396 /**
3397  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3398  *
3399  * @adev: amdgpu_device pointer
3400  *
3401  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3402  */
3403 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3404 {
3405 	if (amdgpu_sriov_vf(adev)) {
3406 		if (adev->is_atom_fw) {
3407 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3408 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3409 		} else {
3410 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3411 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3412 		}
3413 
3414 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3415 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3416 	}
3417 }
3418 
3419 /**
3420  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3421  *
3422  * @pdev : pci device context
3423  * @asic_type: AMD asic type
3424  *
3425  * Check if there is DC (new modesetting infrastructre) support for an asic.
3426  * returns true if DC has support, false if not.
3427  */
3428 bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
3429 				       enum amd_asic_type asic_type)
3430 {
3431 	switch (asic_type) {
3432 #ifdef CONFIG_DRM_AMDGPU_SI
3433 	case CHIP_HAINAN:
3434 #endif
3435 	case CHIP_TOPAZ:
3436 		/* chips with no display hardware */
3437 		return false;
3438 #if defined(CONFIG_DRM_AMD_DC)
3439 	case CHIP_TAHITI:
3440 	case CHIP_PITCAIRN:
3441 	case CHIP_VERDE:
3442 	case CHIP_OLAND:
3443 		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
3444 	default:
3445 		return amdgpu_dc != 0;
3446 #else
3447 	default:
3448 		if (amdgpu_dc > 0)
3449 			dev_info_once(
3450 				&pdev->dev,
3451 				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3452 		return false;
3453 #endif
3454 	}
3455 }
3456 
3457 /**
3458  * amdgpu_device_has_dc_support - check if dc is supported
3459  *
3460  * @adev: amdgpu_device pointer
3461  *
3462  * Returns true for supported, false for not supported
3463  */
3464 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3465 {
3466 	if (adev->enable_virtual_display ||
3467 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3468 		return false;
3469 
3470 	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
3471 }
3472 
3473 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3474 {
3475 	struct amdgpu_device *adev =
3476 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3477 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3478 
3479 	/* It's a bug to not have a hive within this function */
3480 	if (WARN_ON(!hive))
3481 		return;
3482 
3483 	/*
3484 	 * Use task barrier to synchronize all xgmi reset works across the
3485 	 * hive. task_barrier_enter and task_barrier_exit will block
3486 	 * until all the threads running the xgmi reset works reach
3487 	 * those points. task_barrier_full will do both blocks.
3488 	 */
3489 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3490 
3491 		task_barrier_enter(&hive->tb);
3492 		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
3493 
3494 		if (adev->asic_reset_res)
3495 			goto fail;
3496 
3497 		task_barrier_exit(&hive->tb);
3498 		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
3499 
3500 		if (adev->asic_reset_res)
3501 			goto fail;
3502 
3503 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3504 	} else {
3505 
3506 		task_barrier_full(&hive->tb);
3507 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3508 	}
3509 
3510 fail:
3511 	if (adev->asic_reset_res)
3512 		dev_warn(adev->dev,
3513 			 "ASIC reset failed with error, %d for drm dev, %s",
3514 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3515 	amdgpu_put_xgmi_hive(hive);
3516 }
3517 
3518 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3519 {
3520 	char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
3521 	char *input = buf;
3522 	char *timeout_setting = NULL;
3523 	int index = 0;
3524 	long timeout;
3525 	int ret = 0;
3526 
3527 	/* By default timeout for all queues is 2 sec */
3528 	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3529 		adev->video_timeout = msecs_to_jiffies(2000);
3530 
3531 	if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
3532 		return 0;
3533 
3534 	/*
3535 	 * strsep() destructively modifies its input by replacing delimiters
3536 	 * with '\0'. Use a stack copy so the global module parameter buffer
3537 	 * remains intact for multi-GPU systems where this function is called
3538 	 * once per device.
3539 	 */
3540 	strscpy(buf, amdgpu_lockup_timeout, sizeof(buf));
3541 
3542 	while ((timeout_setting = strsep(&input, ",")) &&
3543 	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3544 		ret = kstrtol(timeout_setting, 0, &timeout);
3545 		if (ret)
3546 			return ret;
3547 
3548 		if (timeout == 0) {
3549 			index++;
3550 			continue;
3551 		} else if (timeout < 0) {
3552 			timeout = MAX_SCHEDULE_TIMEOUT;
3553 			dev_warn(adev->dev, "lockup timeout disabled");
3554 			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3555 		} else {
3556 			timeout = msecs_to_jiffies(timeout);
3557 		}
3558 
3559 		switch (index++) {
3560 		case 0:
3561 			adev->gfx_timeout = timeout;
3562 			break;
3563 		case 1:
3564 			adev->compute_timeout = timeout;
3565 			break;
3566 		case 2:
3567 			adev->sdma_timeout = timeout;
3568 			break;
3569 		case 3:
3570 			adev->video_timeout = timeout;
3571 			break;
3572 		default:
3573 			break;
3574 		}
3575 	}
3576 
3577 	/* When only one value specified apply it to all queues. */
3578 	if (index == 1)
3579 		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3580 			adev->video_timeout = timeout;
3581 
3582 	return ret;
3583 }
3584 
3585 /**
3586  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3587  *
3588  * @adev: amdgpu_device pointer
3589  *
3590  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3591  */
3592 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3593 {
3594 	struct iommu_domain *domain;
3595 
3596 	domain = iommu_get_domain_for_dev(adev->dev);
3597 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3598 		adev->ram_is_direct_mapped = true;
3599 }
3600 
3601 #if defined(CONFIG_HSA_AMD_P2P)
3602 /**
3603  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
3604  *
3605  * @adev: amdgpu_device pointer
3606  *
3607  * return if IOMMU remapping bar address
3608  */
3609 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
3610 {
3611 	struct iommu_domain *domain;
3612 
3613 	domain = iommu_get_domain_for_dev(adev->dev);
3614 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
3615 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
3616 		return true;
3617 
3618 	return false;
3619 }
3620 #endif
3621 
3622 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3623 {
3624 	if (amdgpu_mcbp == 1)
3625 		adev->gfx.mcbp = true;
3626 	else if (amdgpu_mcbp == 0)
3627 		adev->gfx.mcbp = false;
3628 
3629 	if (amdgpu_sriov_vf(adev))
3630 		adev->gfx.mcbp = true;
3631 
3632 	if (adev->gfx.mcbp)
3633 		dev_info(adev->dev, "MCBP is enabled\n");
3634 }
3635 
3636 static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
3637 {
3638 	int r;
3639 
3640 	r = amdgpu_atombios_sysfs_init(adev);
3641 	if (r)
3642 		drm_err(&adev->ddev,
3643 			"registering atombios sysfs failed (%d).\n", r);
3644 
3645 	r = amdgpu_pm_sysfs_init(adev);
3646 	if (r)
3647 		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
3648 
3649 	r = amdgpu_ucode_sysfs_init(adev);
3650 	if (r) {
3651 		adev->ucode_sysfs_en = false;
3652 		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
3653 	} else
3654 		adev->ucode_sysfs_en = true;
3655 
3656 	r = amdgpu_device_attr_sysfs_init(adev);
3657 	if (r)
3658 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3659 
3660 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
3661 	if (r)
3662 		dev_err(adev->dev,
3663 			"Could not create amdgpu board attributes\n");
3664 
3665 	amdgpu_fru_sysfs_init(adev);
3666 	amdgpu_reg_state_sysfs_init(adev);
3667 	amdgpu_xcp_sysfs_init(adev);
3668 	amdgpu_uma_sysfs_init(adev);
3669 	amdgpu_ptl_sysfs_init(adev);
3670 
3671 	return r;
3672 }
3673 
3674 static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
3675 {
3676 	if (adev->pm.sysfs_initialized)
3677 		amdgpu_pm_sysfs_fini(adev);
3678 	if (adev->ucode_sysfs_en)
3679 		amdgpu_ucode_sysfs_fini(adev);
3680 	amdgpu_device_attr_sysfs_fini(adev);
3681 	amdgpu_fru_sysfs_fini(adev);
3682 
3683 	amdgpu_reg_state_sysfs_fini(adev);
3684 	amdgpu_xcp_sysfs_fini(adev);
3685 	amdgpu_uma_sysfs_fini(adev);
3686 	amdgpu_ptl_sysfs_fini(adev);
3687 }
3688 
3689 /**
3690  * amdgpu_device_init - initialize the driver
3691  *
3692  * @adev: amdgpu_device pointer
3693  * @flags: driver flags
3694  *
3695  * Initializes the driver info and hw (all asics).
3696  * Returns 0 for success or an error on failure.
3697  * Called at driver startup.
3698  */
3699 int amdgpu_device_init(struct amdgpu_device *adev,
3700 		       uint32_t flags)
3701 {
3702 	struct pci_dev *pdev = adev->pdev;
3703 	int r, i;
3704 	bool px = false;
3705 	u32 max_MBps;
3706 	int tmp;
3707 
3708 	adev->shutdown = false;
3709 	adev->flags = flags;
3710 
3711 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3712 		adev->asic_type = amdgpu_force_asic_type;
3713 	else
3714 		adev->asic_type = flags & AMD_ASIC_MASK;
3715 
3716 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3717 	if (amdgpu_emu_mode == 1)
3718 		adev->usec_timeout *= 10;
3719 	adev->gmc.gart_size = 512 * 1024 * 1024;
3720 	adev->accel_working = false;
3721 	adev->num_rings = 0;
3722 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3723 	adev->mman.buffer_funcs = NULL;
3724 	adev->mman.num_buffer_funcs_scheds = 0;
3725 	adev->vm_manager.vm_pte_funcs = NULL;
3726 	adev->vm_manager.vm_pte_num_scheds = 0;
3727 	adev->gmc.gmc_funcs = NULL;
3728 	adev->harvest_ip_mask = 0x0;
3729 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3730 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3731 
3732 	amdgpu_reg_access_init(adev);
3733 
3734 	dev_info(
3735 		adev->dev,
3736 		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3737 		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3738 		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3739 
3740 	/* mutex initialization are all done here so we
3741 	 * can recall function without having locking issues
3742 	 */
3743 	mutex_init(&adev->firmware.mutex);
3744 	mutex_init(&adev->pm.mutex);
3745 	mutex_init(&adev->gfx.gpu_clock_mutex);
3746 	mutex_init(&adev->srbm_mutex);
3747 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3748 	mutex_init(&adev->gfx.gfx_off_mutex);
3749 	mutex_init(&adev->gfx.partition_mutex);
3750 	mutex_init(&adev->grbm_idx_mutex);
3751 	mutex_init(&adev->mn_lock);
3752 	mutex_init(&adev->virt.vf_errors.lock);
3753 	hash_init(adev->mn_hash);
3754 	mutex_init(&adev->psp.mutex);
3755 	mutex_init(&adev->psp.ptl.mutex);
3756 	mutex_init(&adev->notifier_lock);
3757 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3758 	mutex_init(&adev->benchmark_mutex);
3759 	mutex_init(&adev->gfx.reset_sem_mutex);
3760 
3761 	/* Associate locks with lockdep classes for ordering validation */
3762 	amdgpu_lockdep_set_class(adev);
3763 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
3764 	mutex_init(&adev->enforce_isolation_mutex);
3765 	for (i = 0; i < MAX_XCP; ++i) {
3766 		adev->isolation[i].spearhead = dma_fence_get_stub();
3767 		amdgpu_sync_create(&adev->isolation[i].active);
3768 		amdgpu_sync_create(&adev->isolation[i].prev);
3769 	}
3770 	mutex_init(&adev->gfx.userq_sch_mutex);
3771 	mutex_init(&adev->gfx.workload_profile_mutex);
3772 	mutex_init(&adev->vcn.workload_profile_mutex);
3773 
3774 	spin_lock_init(&adev->irq.lock);
3775 
3776 	amdgpu_device_init_apu_flags(adev);
3777 
3778 	r = amdgpu_device_check_arguments(adev);
3779 	if (r)
3780 		return r;
3781 
3782 	spin_lock_init(&adev->mmio_idx_lock);
3783 	spin_lock_init(&adev->mm_stats.lock);
3784 	spin_lock_init(&adev->virt.rlcg_reg_lock);
3785 	spin_lock_init(&adev->wb.lock);
3786 
3787 	INIT_LIST_HEAD(&adev->reset_list);
3788 
3789 	INIT_LIST_HEAD(&adev->ras_list);
3790 
3791 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3792 
3793 	xa_init_flags(&adev->userq_doorbell_xa, XA_FLAGS_LOCK_IRQ);
3794 
3795 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3796 			  amdgpu_device_delayed_init_work_handler);
3797 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3798 			  amdgpu_device_delay_enable_gfx_off);
3799 	/*
3800 	 * Initialize the enforce_isolation work structures for each XCP
3801 	 * partition.  This work handler is responsible for enforcing shader
3802 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
3803 	 * each GFX and compute ring.  If there are any fences, it schedules
3804 	 * the `enforce_isolation_work` to be run after a delay.  If there are
3805 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
3806 	 * runqueue.
3807 	 */
3808 	for (i = 0; i < MAX_XCP; i++) {
3809 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
3810 				  amdgpu_gfx_enforce_isolation_handler);
3811 		adev->gfx.enforce_isolation[i].adev = adev;
3812 		adev->gfx.enforce_isolation[i].xcp_id = i;
3813 	}
3814 
3815 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3816 
3817 	amdgpu_coredump_init(adev);
3818 
3819 	adev->gfx.gfx_off_req_count = 1;
3820 	adev->gfx.gfx_off_residency = 0;
3821 	adev->gfx.gfx_off_entrycount = 0;
3822 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3823 
3824 	atomic_set(&adev->throttling_logging_enabled, 1);
3825 	/*
3826 	 * If throttling continues, logging will be performed every minute
3827 	 * to avoid log flooding. "-1" is subtracted since the thermal
3828 	 * throttling interrupt comes every second. Thus, the total logging
3829 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3830 	 * for throttling interrupt) = 60 seconds.
3831 	 */
3832 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3833 
3834 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3835 
3836 	/* Registers mapping */
3837 	/* TODO: block userspace mapping of io register */
3838 	if (adev->asic_type >= CHIP_BONAIRE) {
3839 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3840 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3841 	} else {
3842 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3843 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3844 	}
3845 
3846 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3847 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3848 
3849 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3850 	if (!adev->rmmio)
3851 		return -ENOMEM;
3852 
3853 	dev_info(adev->dev, "register mmio base: 0x%08X\n",
3854 		 (uint32_t)adev->rmmio_base);
3855 	dev_info(adev->dev, "register mmio size: %u\n",
3856 		 (unsigned int)adev->rmmio_size);
3857 
3858 	/*
3859 	 * Reset domain needs to be present early, before XGMI hive discovered
3860 	 * (if any) and initialized to use reset sem and in_gpu reset flag
3861 	 * early on during init and before calling to RREG32.
3862 	 */
3863 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3864 	if (!adev->reset_domain)
3865 		return -ENOMEM;
3866 
3867 	/* detect hw virtualization here */
3868 	amdgpu_virt_init(adev);
3869 
3870 	amdgpu_device_get_pcie_info(adev);
3871 
3872 	r = amdgpu_device_get_job_timeout_settings(adev);
3873 	if (r) {
3874 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3875 		return r;
3876 	}
3877 
3878 	amdgpu_device_set_mcbp(adev);
3879 
3880 	/*
3881 	 * By default, use default mode where all blocks are expected to be
3882 	 * initialized. At present a 'swinit' of blocks is required to be
3883 	 * completed before the need for a different level is detected.
3884 	 */
3885 	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
3886 
3887 	amdgpu_device_check_iommu_direct_map(adev);
3888 
3889 	/* early init functions */
3890 	r = amdgpu_device_ip_early_init(adev);
3891 	if (r)
3892 		return r;
3893 
3894 	/*
3895 	 * No need to remove conflicting FBs for non-display class devices.
3896 	 * This prevents the sysfb from being freed accidently.
3897 	 */
3898 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
3899 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
3900 		/* Get rid of things like offb */
3901 		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
3902 		if (r)
3903 			return r;
3904 	}
3905 
3906 	/* Enable TMZ based on IP_VERSION */
3907 	amdgpu_gmc_tmz_set(adev);
3908 
3909 	if (amdgpu_sriov_vf(adev) &&
3910 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
3911 		/* VF MMIO access (except mailbox range) from CPU
3912 		 * will be blocked during sriov runtime
3913 		 */
3914 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
3915 
3916 	amdgpu_gmc_noretry_set(adev);
3917 	/* Need to get xgmi info early to decide the reset behavior*/
3918 	if (adev->gmc.xgmi.supported) {
3919 		if (adev->gfxhub.funcs &&
3920 		    adev->gfxhub.funcs->get_xgmi_info) {
3921 			r = adev->gfxhub.funcs->get_xgmi_info(adev);
3922 			if (r)
3923 				return r;
3924 		}
3925 	}
3926 
3927 	if (adev->gmc.xgmi.connected_to_cpu) {
3928 		if (adev->mmhub.funcs &&
3929 		    adev->mmhub.funcs->get_xgmi_info) {
3930 			r = adev->mmhub.funcs->get_xgmi_info(adev);
3931 			if (r)
3932 				return r;
3933 		}
3934 	}
3935 
3936 	/* enable PCIE atomic ops */
3937 	if (amdgpu_sriov_vf(adev)) {
3938 		if (adev->virt.fw_reserve.p_pf2vf)
3939 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3940 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3941 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3942 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3943 	 * internal path natively support atomics, set have_atomics_support to true.
3944 	 */
3945 	} else if ((adev->flags & AMD_IS_APU &&
3946 		   amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) ||
3947 		   (adev->gmc.xgmi.connected_to_cpu &&
3948 		   amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))) {
3949 		adev->have_atomics_support = true;
3950 	} else {
3951 		adev->have_atomics_support =
3952 			!pci_enable_atomic_ops_to_root(adev->pdev,
3953 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3954 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3955 	}
3956 
3957 	if (!adev->have_atomics_support)
3958 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3959 
3960 	/* doorbell bar mapping and doorbell index init*/
3961 	amdgpu_doorbell_init(adev);
3962 
3963 	if (amdgpu_emu_mode == 1) {
3964 		/* post the asic on emulation mode */
3965 		emu_soc_asic_init(adev);
3966 		goto fence_driver_init;
3967 	}
3968 
3969 	amdgpu_reset_init(adev);
3970 
3971 	/* detect if we are with an SRIOV vbios */
3972 	if (adev->bios)
3973 		amdgpu_device_detect_sriov_bios(adev);
3974 
3975 	/* check if we need to reset the asic
3976 	 *  E.g., driver was not cleanly unloaded previously, etc.
3977 	 */
3978 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3979 		if (adev->gmc.xgmi.num_physical_nodes) {
3980 			dev_info(adev->dev, "Pending hive reset.\n");
3981 			amdgpu_set_init_level(adev,
3982 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3983 		} else {
3984 				tmp = amdgpu_reset_method;
3985 				/* It should do a default reset when loading or reloading the driver,
3986 				 * regardless of the module parameter reset_method.
3987 				 */
3988 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3989 				r = amdgpu_asic_reset(adev);
3990 				amdgpu_reset_method = tmp;
3991 		}
3992 
3993 		if (r) {
3994 		  dev_err(adev->dev, "asic reset on init failed\n");
3995 		  goto failed;
3996 		}
3997 	}
3998 
3999 	/* Post card if necessary */
4000 	if (amdgpu_device_need_post(adev)) {
4001 		if (!adev->bios) {
4002 			dev_err(adev->dev, "no vBIOS found\n");
4003 			r = -EINVAL;
4004 			goto failed;
4005 		}
4006 		dev_info(adev->dev, "GPU posting now...\n");
4007 		r = amdgpu_device_asic_init(adev);
4008 		if (r) {
4009 			dev_err(adev->dev, "gpu post error!\n");
4010 			goto failed;
4011 		}
4012 	}
4013 
4014 	if (adev->bios) {
4015 		if (adev->is_atom_fw) {
4016 			/* Initialize clocks */
4017 			r = amdgpu_atomfirmware_get_clock_info(adev);
4018 			if (r) {
4019 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4020 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4021 				goto failed;
4022 			}
4023 		} else {
4024 			/* Initialize clocks */
4025 			r = amdgpu_atombios_get_clock_info(adev);
4026 			if (r) {
4027 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4028 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4029 				goto failed;
4030 			}
4031 			/* init i2c buses */
4032 			amdgpu_i2c_init(adev);
4033 		}
4034 	}
4035 
4036 fence_driver_init:
4037 	/* Fence driver */
4038 	r = amdgpu_fence_driver_sw_init(adev);
4039 	if (r) {
4040 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4041 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4042 		goto failed;
4043 	}
4044 
4045 	/* init the mode config */
4046 	drm_mode_config_init(adev_to_drm(adev));
4047 
4048 	r = amdgpu_device_ip_init(adev);
4049 	if (r) {
4050 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4051 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4052 		goto release_ras_con;
4053 	}
4054 
4055 	amdgpu_fence_driver_hw_init(adev);
4056 
4057 	dev_info(adev->dev,
4058 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4059 			adev->gfx.config.max_shader_engines,
4060 			adev->gfx.config.max_sh_per_se,
4061 			adev->gfx.config.max_cu_per_sh,
4062 			adev->gfx.cu_info.number);
4063 
4064 	adev->accel_working = true;
4065 
4066 	amdgpu_vm_check_compute_bug(adev);
4067 
4068 	/* Initialize the buffer migration limit. */
4069 	if (amdgpu_moverate >= 0)
4070 		max_MBps = amdgpu_moverate;
4071 	else
4072 		max_MBps = 8; /* Allow 8 MB/s. */
4073 	/* Get a log2 for easy divisions. */
4074 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4075 
4076 	/*
4077 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4078 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4079 	 * gpu instance is counted less.
4080 	 */
4081 	amdgpu_register_gpu_instance(adev);
4082 
4083 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4084 	 * explicit gating rather than handling it automatically.
4085 	 */
4086 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4087 		r = amdgpu_device_ip_late_init(adev);
4088 		if (r) {
4089 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4090 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4091 			goto release_ras_con;
4092 		}
4093 		/* must succeed. */
4094 		amdgpu_ras_resume(adev);
4095 		queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4096 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4097 	}
4098 
4099 	if (amdgpu_sriov_vf(adev)) {
4100 		amdgpu_virt_release_full_gpu(adev, true);
4101 		flush_delayed_work(&adev->delayed_init_work);
4102 	}
4103 
4104 	/* Don't init kfd if whole hive need to be reset during init */
4105 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4106 		kgd2kfd_init_zone_device(adev);
4107 		kfd_update_svm_support_properties(adev);
4108 	}
4109 
4110 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4111 		amdgpu_xgmi_reset_on_init(adev);
4112 
4113 	/*
4114 	 * Place those sysfs registering after `late_init`. As some of those
4115 	 * operations performed in `late_init` might affect the sysfs
4116 	 * interfaces creating.
4117 	 */
4118 	r = amdgpu_device_sys_interface_init(adev);
4119 
4120 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4121 		r = amdgpu_pmu_init(adev);
4122 	if (r)
4123 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4124 
4125 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4126 	if (amdgpu_device_cache_pci_state(adev->pdev))
4127 		pci_restore_state(pdev);
4128 
4129 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4130 	/* this will fail for cards that aren't VGA class devices, just
4131 	 * ignore it
4132 	 */
4133 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4134 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4135 
4136 	px = amdgpu_device_supports_px(adev);
4137 
4138 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4139 				apple_gmux_detect(NULL, NULL)))
4140 		vga_switcheroo_register_client(adev->pdev,
4141 					       &amdgpu_switcheroo_ops, px);
4142 
4143 	if (px)
4144 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4145 
4146 	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4147 	r = register_pm_notifier(&adev->pm_nb);
4148 	if (r)
4149 		goto failed;
4150 
4151 	return 0;
4152 
4153 release_ras_con:
4154 	if (amdgpu_sriov_vf(adev))
4155 		amdgpu_virt_release_full_gpu(adev, true);
4156 
4157 	/* failed in exclusive mode due to timeout */
4158 	if (amdgpu_sriov_vf(adev) &&
4159 		!amdgpu_sriov_runtime(adev) &&
4160 		amdgpu_virt_mmio_blocked(adev) &&
4161 		!amdgpu_virt_wait_reset(adev)) {
4162 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4163 		/* Don't send request since VF is inactive. */
4164 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4165 		adev->virt.ops = NULL;
4166 		r = -EAGAIN;
4167 	}
4168 	amdgpu_release_ras_context(adev);
4169 
4170 failed:
4171 	amdgpu_vf_error_trans_all(adev);
4172 
4173 	return r;
4174 }
4175 
4176 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4177 {
4178 
4179 	/* Clear all CPU mappings pointing to this device */
4180 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4181 
4182 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4183 	amdgpu_doorbell_fini(adev);
4184 
4185 	iounmap(adev->rmmio);
4186 	adev->rmmio = NULL;
4187 	if (adev->mman.aper_base_kaddr)
4188 		iounmap(adev->mman.aper_base_kaddr);
4189 	adev->mman.aper_base_kaddr = NULL;
4190 
4191 	/* Memory manager related */
4192 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4193 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4194 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4195 	}
4196 }
4197 
4198 /**
4199  * amdgpu_device_fini_hw - tear down the driver
4200  *
4201  * @adev: amdgpu_device pointer
4202  *
4203  * Tear down the driver info (all asics).
4204  * Called at driver shutdown.
4205  */
4206 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4207 {
4208 	dev_info(adev->dev, "finishing device.\n");
4209 	flush_delayed_work(&adev->delayed_init_work);
4210 
4211 	if (adev->mman.initialized)
4212 		drain_workqueue(adev->mman.bdev.wq);
4213 	adev->shutdown = true;
4214 
4215 	unregister_pm_notifier(&adev->pm_nb);
4216 
4217 	/* make sure IB test finished before entering exclusive mode
4218 	 * to avoid preemption on IB test
4219 	 */
4220 	if (amdgpu_sriov_vf(adev)) {
4221 		amdgpu_virt_request_full_gpu(adev, false);
4222 		amdgpu_virt_fini_data_exchange(adev);
4223 	}
4224 
4225 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
4226 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
4227 
4228 	/* disable all interrupts */
4229 	amdgpu_irq_disable_all(adev);
4230 	if (adev->mode_info.mode_config_initialized) {
4231 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4232 			drm_helper_force_disable_all(adev_to_drm(adev));
4233 		else
4234 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4235 	}
4236 	amdgpu_fence_driver_hw_fini(adev);
4237 
4238 	amdgpu_device_sys_interface_fini(adev);
4239 
4240 	/* disable ras feature must before hw fini */
4241 	amdgpu_ras_pre_fini(adev);
4242 
4243 	amdgpu_ttm_disable_buffer_funcs(adev);
4244 
4245 	/*
4246 	 * device went through surprise hotplug; we need to destroy topology
4247 	 * before ip_fini_early to prevent kfd locking refcount issues by calling
4248 	 * amdgpu_amdkfd_suspend()
4249 	 */
4250 	if (pci_dev_is_disconnected(adev->pdev))
4251 		amdgpu_amdkfd_device_fini_sw(adev);
4252 
4253 	amdgpu_coredump_fini(adev);
4254 	amdgpu_device_ip_fini_early(adev);
4255 
4256 	amdgpu_irq_fini_hw(adev);
4257 
4258 	if (adev->mman.initialized)
4259 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4260 
4261 	amdgpu_gart_dummy_page_fini(adev);
4262 
4263 	if (pci_dev_is_disconnected(adev->pdev))
4264 		amdgpu_device_unmap_mmio(adev);
4265 
4266 }
4267 
4268 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4269 {
4270 	int i, idx;
4271 	bool px;
4272 
4273 	amdgpu_device_ip_fini(adev);
4274 	amdgpu_fence_driver_sw_fini(adev);
4275 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4276 	adev->accel_working = false;
4277 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4278 	for (i = 0; i < MAX_XCP; ++i) {
4279 		dma_fence_put(adev->isolation[i].spearhead);
4280 		amdgpu_sync_free(&adev->isolation[i].active);
4281 		amdgpu_sync_free(&adev->isolation[i].prev);
4282 	}
4283 
4284 	amdgpu_reset_fini(adev);
4285 
4286 	/* free i2c buses */
4287 	amdgpu_i2c_fini(adev);
4288 
4289 	if (adev->bios) {
4290 		if (amdgpu_emu_mode != 1)
4291 			amdgpu_atombios_fini(adev);
4292 		amdgpu_bios_release(adev);
4293 	}
4294 
4295 	kfree(adev->fru_info);
4296 	adev->fru_info = NULL;
4297 
4298 	kfree(adev->xcp_mgr);
4299 	adev->xcp_mgr = NULL;
4300 
4301 	px = amdgpu_device_supports_px(adev);
4302 
4303 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4304 				apple_gmux_detect(NULL, NULL)))
4305 		vga_switcheroo_unregister_client(adev->pdev);
4306 
4307 	if (px)
4308 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4309 
4310 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4311 		vga_client_unregister(adev->pdev);
4312 
4313 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4314 
4315 		iounmap(adev->rmmio);
4316 		adev->rmmio = NULL;
4317 		drm_dev_exit(idx);
4318 	}
4319 
4320 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4321 		amdgpu_pmu_fini(adev);
4322 	if (adev->discovery.bin)
4323 		amdgpu_discovery_fini(adev);
4324 
4325 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4326 	adev->reset_domain = NULL;
4327 
4328 	kfree(adev->pci_state);
4329 	kfree(adev->pcie_reset_ctx.swds_pcistate);
4330 	kfree(adev->pcie_reset_ctx.swus_pcistate);
4331 }
4332 
4333 /**
4334  * amdgpu_device_evict_resources - evict device resources
4335  * @adev: amdgpu device object
4336  *
4337  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4338  * of the vram memory type. Mainly used for evicting device resources
4339  * at suspend time.
4340  *
4341  */
4342 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4343 {
4344 	int ret;
4345 
4346 	/* No need to evict vram on APUs unless going to S4 */
4347 	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4348 		return 0;
4349 
4350 	/* No need to evict when going to S5 through S4 callbacks */
4351 	if (system_state == SYSTEM_POWER_OFF)
4352 		return 0;
4353 
4354 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4355 	if (ret) {
4356 		dev_warn(adev->dev, "evicting device resources failed\n");
4357 		return ret;
4358 	}
4359 
4360 	if (adev->in_s4) {
4361 		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
4362 		if (ret)
4363 			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
4364 	}
4365 	return ret;
4366 }
4367 
4368 /*
4369  * Suspend & resume.
4370  */
4371 /**
4372  * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4373  * @nb: notifier block
4374  * @mode: suspend mode
4375  * @data: data
4376  *
4377  * This function is called when the system is about to suspend or hibernate.
4378  * It is used to set the appropriate flags so that eviction can be optimized
4379  * in the pm prepare callback.
4380  */
4381 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
4382 				     void *data)
4383 {
4384 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
4385 
4386 	switch (mode) {
4387 	case PM_HIBERNATION_PREPARE:
4388 		adev->in_s4 = true;
4389 		break;
4390 	case PM_POST_HIBERNATION:
4391 		adev->in_s4 = false;
4392 		break;
4393 	}
4394 
4395 	return NOTIFY_DONE;
4396 }
4397 
4398 /**
4399  * amdgpu_device_prepare - prepare for device suspend
4400  *
4401  * @dev: drm dev pointer
4402  *
4403  * Prepare to put the hw in the suspend state (all asics).
4404  * Returns 0 for success or an error on failure.
4405  * Called at driver suspend.
4406  */
4407 int amdgpu_device_prepare(struct drm_device *dev)
4408 {
4409 	struct amdgpu_device *adev = drm_to_adev(dev);
4410 	int i, r;
4411 
4412 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4413 		return 0;
4414 
4415 	/* Evict the majority of BOs before starting suspend sequence */
4416 	r = amdgpu_device_evict_resources(adev);
4417 	if (r)
4418 		return r;
4419 
4420 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4421 
4422 	for (i = 0; i < adev->num_ip_blocks; i++) {
4423 		if (!adev->ip_blocks[i].status.valid)
4424 			continue;
4425 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4426 			continue;
4427 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4428 		if (r)
4429 			return r;
4430 	}
4431 
4432 	return 0;
4433 }
4434 
4435 /**
4436  * amdgpu_device_complete - complete power state transition
4437  *
4438  * @dev: drm dev pointer
4439  *
4440  * Undo the changes from amdgpu_device_prepare. This will be
4441  * called on all resume transitions, including those that failed.
4442  */
4443 void amdgpu_device_complete(struct drm_device *dev)
4444 {
4445 	struct amdgpu_device *adev = drm_to_adev(dev);
4446 	int i;
4447 
4448 	for (i = 0; i < adev->num_ip_blocks; i++) {
4449 		if (!adev->ip_blocks[i].status.valid)
4450 			continue;
4451 		if (!adev->ip_blocks[i].version->funcs->complete)
4452 			continue;
4453 		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
4454 	}
4455 }
4456 
4457 /**
4458  * amdgpu_device_suspend - initiate device suspend
4459  *
4460  * @dev: drm dev pointer
4461  * @notify_clients: notify in-kernel DRM clients
4462  *
4463  * Puts the hw in the suspend state (all asics).
4464  * Returns 0 for success or an error on failure.
4465  * Called at driver suspend.
4466  */
4467 int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4468 {
4469 	struct amdgpu_device *adev = drm_to_adev(dev);
4470 	int r, rec;
4471 
4472 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4473 		return 0;
4474 
4475 	adev->in_suspend = true;
4476 
4477 	if (amdgpu_sriov_vf(adev)) {
4478 		if (!adev->in_runpm)
4479 			amdgpu_amdkfd_suspend_process(adev);
4480 		amdgpu_virt_fini_data_exchange(adev);
4481 		r = amdgpu_virt_request_full_gpu(adev, false);
4482 		if (r)
4483 			return r;
4484 	}
4485 
4486 	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
4487 	if (r)
4488 		goto unwind_sriov;
4489 
4490 	if (notify_clients)
4491 		drm_client_dev_suspend(adev_to_drm(adev));
4492 
4493 	cancel_delayed_work_sync(&adev->delayed_init_work);
4494 
4495 	amdgpu_ras_suspend(adev);
4496 
4497 	r = amdgpu_device_ip_suspend_phase1(adev);
4498 	if (r)
4499 		goto unwind_smartshift;
4500 
4501 	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4502 	r = amdgpu_userq_suspend(adev);
4503 	if (r)
4504 		goto unwind_ip_phase1;
4505 
4506 	r = amdgpu_device_evict_resources(adev);
4507 	if (r)
4508 		goto unwind_userq;
4509 
4510 	amdgpu_ttm_disable_buffer_funcs(adev);
4511 
4512 	amdgpu_fence_driver_hw_fini(adev);
4513 
4514 	r = amdgpu_device_ip_suspend_phase2(adev);
4515 	if (r)
4516 		goto unwind_evict;
4517 
4518 	if (amdgpu_sriov_vf(adev))
4519 		amdgpu_virt_release_full_gpu(adev, false);
4520 
4521 	return 0;
4522 
4523 unwind_evict:
4524 	amdgpu_ttm_enable_buffer_funcs(adev);
4525 	amdgpu_fence_driver_hw_init(adev);
4526 
4527 unwind_userq:
4528 	rec = amdgpu_userq_resume(adev);
4529 	if (rec) {
4530 		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
4531 		return r;
4532 	}
4533 	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4534 	if (rec) {
4535 		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
4536 		return r;
4537 	}
4538 
4539 unwind_ip_phase1:
4540 	/* suspend phase 1 = resume phase 3 */
4541 	rec = amdgpu_device_ip_resume_phase3(adev);
4542 	if (rec) {
4543 		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
4544 		return r;
4545 	}
4546 
4547 unwind_smartshift:
4548 	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
4549 	if (rec) {
4550 		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
4551 		return r;
4552 	}
4553 
4554 	if (notify_clients)
4555 		drm_client_dev_resume(adev_to_drm(adev));
4556 
4557 	amdgpu_ras_resume(adev);
4558 
4559 unwind_sriov:
4560 	if (amdgpu_sriov_vf(adev)) {
4561 		rec = amdgpu_virt_request_full_gpu(adev, true);
4562 		if (rec) {
4563 			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
4564 			return r;
4565 		}
4566 	}
4567 
4568 	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
4569 
4570 	return r;
4571 }
4572 
4573 static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
4574 {
4575 	int r;
4576 	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
4577 
4578 	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
4579 	 * may not work. The access could be blocked by nBIF protection as VF isn't in
4580 	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
4581 	 * so that QEMU reprograms MSIX table.
4582 	 */
4583 	amdgpu_restore_msix(adev);
4584 
4585 	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4586 	if (r)
4587 		return r;
4588 
4589 	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
4590 		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
4591 
4592 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
4593 	adev->vm_manager.vram_base_offset +=
4594 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
4595 
4596 	return 0;
4597 }
4598 
4599 /**
4600  * amdgpu_device_resume - initiate device resume
4601  *
4602  * @dev: drm dev pointer
4603  * @notify_clients: notify in-kernel DRM clients
4604  *
4605  * Bring the hw back to operating state (all asics).
4606  * Returns 0 for success or an error on failure.
4607  * Called at driver resume.
4608  */
4609 int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
4610 {
4611 	struct amdgpu_device *adev = drm_to_adev(dev);
4612 	int r = 0;
4613 
4614 	if (amdgpu_sriov_vf(adev)) {
4615 		r = amdgpu_virt_request_full_gpu(adev, true);
4616 		if (r)
4617 			return r;
4618 	}
4619 
4620 	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
4621 		r = amdgpu_virt_resume(adev);
4622 		if (r)
4623 			goto exit;
4624 	}
4625 
4626 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4627 		return 0;
4628 
4629 	if (adev->in_s0ix)
4630 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4631 
4632 	/* post card */
4633 	if (amdgpu_device_need_post(adev)) {
4634 		r = amdgpu_device_asic_init(adev);
4635 		if (r)
4636 			dev_err(adev->dev, "amdgpu asic init failed\n");
4637 	}
4638 
4639 	r = amdgpu_device_ip_resume(adev);
4640 
4641 	if (r) {
4642 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4643 		goto exit;
4644 	}
4645 
4646 	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4647 	if (r)
4648 		goto exit;
4649 
4650 	r = amdgpu_userq_resume(adev);
4651 	if (r)
4652 		goto exit;
4653 
4654 	r = amdgpu_device_ip_late_init(adev);
4655 	if (r)
4656 		goto exit;
4657 
4658 	queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4659 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4660 exit:
4661 	if (amdgpu_sriov_vf(adev)) {
4662 		amdgpu_virt_init_data_exchange(adev);
4663 		amdgpu_virt_release_full_gpu(adev, true);
4664 
4665 		if (!r && !adev->in_runpm)
4666 			r = amdgpu_amdkfd_resume_process(adev);
4667 	}
4668 
4669 	if (r)
4670 		return r;
4671 
4672 	/* Make sure IB tests flushed */
4673 	flush_delayed_work(&adev->delayed_init_work);
4674 
4675 	if (notify_clients)
4676 		drm_client_dev_resume(adev_to_drm(adev));
4677 
4678 	amdgpu_ras_resume(adev);
4679 
4680 	if (adev->mode_info.num_crtc) {
4681 		/*
4682 		 * Most of the connector probing functions try to acquire runtime pm
4683 		 * refs to ensure that the GPU is powered on when connector polling is
4684 		 * performed. Since we're calling this from a runtime PM callback,
4685 		 * trying to acquire rpm refs will cause us to deadlock.
4686 		 *
4687 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4688 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4689 		 */
4690 #ifdef CONFIG_PM
4691 		dev->dev->power.disable_depth++;
4692 #endif
4693 		if (!adev->dc_enabled)
4694 			drm_helper_hpd_irq_event(dev);
4695 		else
4696 			drm_kms_helper_hotplug_event(dev);
4697 #ifdef CONFIG_PM
4698 		dev->dev->power.disable_depth--;
4699 #endif
4700 	}
4701 
4702 	amdgpu_vram_mgr_clear_reset_blocks(adev);
4703 	adev->in_suspend = false;
4704 
4705 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
4706 		dev_warn(adev->dev, "smart shift update failed\n");
4707 
4708 	return 0;
4709 }
4710 
4711 /**
4712  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4713  *
4714  * @adev: amdgpu_device pointer
4715  *
4716  * The list of all the hardware IPs that make up the asic is walked and
4717  * the check_soft_reset callbacks are run.  check_soft_reset determines
4718  * if the asic is still hung or not.
4719  * Returns true if any of the IPs are still in a hung state, false if not.
4720  */
4721 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4722 {
4723 	int i;
4724 	bool asic_hang = false;
4725 
4726 	if (amdgpu_sriov_vf(adev))
4727 		return true;
4728 
4729 	if (amdgpu_asic_need_full_reset(adev))
4730 		return true;
4731 
4732 	for (i = 0; i < adev->num_ip_blocks; i++) {
4733 		if (!adev->ip_blocks[i].status.valid)
4734 			continue;
4735 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4736 			adev->ip_blocks[i].status.hang =
4737 				adev->ip_blocks[i].version->funcs->check_soft_reset(
4738 					&adev->ip_blocks[i]);
4739 		if (adev->ip_blocks[i].status.hang) {
4740 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4741 			asic_hang = true;
4742 		}
4743 	}
4744 	return asic_hang;
4745 }
4746 
4747 /**
4748  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4749  *
4750  * @adev: amdgpu_device pointer
4751  *
4752  * The list of all the hardware IPs that make up the asic is walked and the
4753  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4754  * handles any IP specific hardware or software state changes that are
4755  * necessary for a soft reset to succeed.
4756  * Returns 0 on success, negative error code on failure.
4757  */
4758 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4759 {
4760 	int i, r = 0;
4761 
4762 	for (i = 0; i < adev->num_ip_blocks; i++) {
4763 		if (!adev->ip_blocks[i].status.valid)
4764 			continue;
4765 		if (adev->ip_blocks[i].status.hang &&
4766 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4767 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
4768 			if (r)
4769 				return r;
4770 		}
4771 	}
4772 
4773 	return 0;
4774 }
4775 
4776 /**
4777  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4778  *
4779  * @adev: amdgpu_device pointer
4780  *
4781  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4782  * reset is necessary to recover.
4783  * Returns true if a full asic reset is required, false if not.
4784  */
4785 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4786 {
4787 	int i;
4788 
4789 	if (amdgpu_asic_need_full_reset(adev))
4790 		return true;
4791 
4792 	for (i = 0; i < adev->num_ip_blocks; i++) {
4793 		if (!adev->ip_blocks[i].status.valid)
4794 			continue;
4795 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4796 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4797 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4798 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4799 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4800 			if (adev->ip_blocks[i].status.hang) {
4801 				dev_info(adev->dev, "Some block need full reset!\n");
4802 				return true;
4803 			}
4804 		}
4805 	}
4806 	return false;
4807 }
4808 
4809 /**
4810  * amdgpu_device_ip_soft_reset - do a soft reset
4811  *
4812  * @adev: amdgpu_device pointer
4813  *
4814  * The list of all the hardware IPs that make up the asic is walked and the
4815  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4816  * IP specific hardware or software state changes that are necessary to soft
4817  * reset the IP.
4818  * Returns 0 on success, negative error code on failure.
4819  */
4820 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4821 {
4822 	int i, r = 0;
4823 
4824 	for (i = 0; i < adev->num_ip_blocks; i++) {
4825 		if (!adev->ip_blocks[i].status.valid)
4826 			continue;
4827 		if (adev->ip_blocks[i].status.hang &&
4828 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4829 			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
4830 			if (r)
4831 				return r;
4832 		}
4833 	}
4834 
4835 	return 0;
4836 }
4837 
4838 /**
4839  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4840  *
4841  * @adev: amdgpu_device pointer
4842  *
4843  * The list of all the hardware IPs that make up the asic is walked and the
4844  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4845  * handles any IP specific hardware or software state changes that are
4846  * necessary after the IP has been soft reset.
4847  * Returns 0 on success, negative error code on failure.
4848  */
4849 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4850 {
4851 	int i, r = 0;
4852 
4853 	for (i = 0; i < adev->num_ip_blocks; i++) {
4854 		if (!adev->ip_blocks[i].status.valid)
4855 			continue;
4856 		if (adev->ip_blocks[i].status.hang &&
4857 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4858 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
4859 		if (r)
4860 			return r;
4861 	}
4862 
4863 	return 0;
4864 }
4865 
4866 /**
4867  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4868  *
4869  * @adev: amdgpu_device pointer
4870  * @reset_context: amdgpu reset context pointer
4871  *
4872  * do VF FLR and reinitialize Asic
4873  * return 0 means succeeded otherwise failed
4874  */
4875 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4876 				     struct amdgpu_reset_context *reset_context)
4877 {
4878 	int r;
4879 	struct amdgpu_hive_info *hive = NULL;
4880 
4881 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
4882 		if (!amdgpu_ras_get_fed_status(adev))
4883 			amdgpu_virt_ready_to_reset(adev);
4884 		amdgpu_virt_wait_reset(adev);
4885 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
4886 		r = amdgpu_virt_request_full_gpu(adev, true);
4887 	} else {
4888 		r = amdgpu_virt_reset_gpu(adev);
4889 	}
4890 	if (r)
4891 		return r;
4892 
4893 	amdgpu_ras_clear_err_state(adev);
4894 	amdgpu_irq_gpu_reset_resume_helper(adev);
4895 
4896 	/* some sw clean up VF needs to do before recover */
4897 	amdgpu_virt_post_reset(adev);
4898 
4899 	/* Resume IP prior to SMC */
4900 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4901 	if (r)
4902 		return r;
4903 
4904 	amdgpu_virt_init_data_exchange(adev);
4905 
4906 	r = amdgpu_device_fw_loading(adev);
4907 	if (r)
4908 		return r;
4909 
4910 	/* now we are okay to resume SMC/CP/SDMA */
4911 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4912 	if (r)
4913 		return r;
4914 
4915 	hive = amdgpu_get_xgmi_hive(adev);
4916 	/* Update PSP FW topology after reset */
4917 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4918 		r = amdgpu_xgmi_update_topology(hive, adev);
4919 	if (hive)
4920 		amdgpu_put_xgmi_hive(hive);
4921 	if (r)
4922 		return r;
4923 
4924 	r = amdgpu_ib_ring_tests(adev);
4925 	if (r)
4926 		return r;
4927 
4928 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
4929 		amdgpu_inc_vram_lost(adev);
4930 
4931 	/* need to be called during full access so we can't do it later like
4932 	 * bare-metal does.
4933 	 */
4934 	amdgpu_amdkfd_post_reset(adev);
4935 	amdgpu_virt_release_full_gpu(adev, true);
4936 
4937 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4938 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
4939 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
4940 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
4941 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4942 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
4943 		amdgpu_ras_resume(adev);
4944 
4945 	amdgpu_virt_ras_telemetry_post_reset(adev);
4946 
4947 	return 0;
4948 }
4949 
4950 /**
4951  * amdgpu_device_has_job_running - check if there is any unfinished job
4952  *
4953  * @adev: amdgpu_device pointer
4954  *
4955  * check if there is any job running on the device when guest driver receives
4956  * FLR notification from host driver. If there are still jobs running, then
4957  * the guest driver will not respond the FLR reset. Instead, let the job hit
4958  * the timeout and guest driver then issue the reset request.
4959  */
4960 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4961 {
4962 	int i;
4963 
4964 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4965 		struct amdgpu_ring *ring = adev->rings[i];
4966 
4967 		if (!amdgpu_ring_sched_ready(ring))
4968 			continue;
4969 
4970 		if (amdgpu_fence_count_emitted(ring))
4971 			return true;
4972 	}
4973 	return false;
4974 }
4975 
4976 /**
4977  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4978  *
4979  * @adev: amdgpu_device pointer
4980  *
4981  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4982  * a hung GPU.
4983  */
4984 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4985 {
4986 
4987 	if (amdgpu_gpu_recovery == 0)
4988 		goto disabled;
4989 
4990 	/* Skip soft reset check in fatal error mode */
4991 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4992 		return true;
4993 
4994 	if (amdgpu_sriov_vf(adev))
4995 		return true;
4996 
4997 	if (amdgpu_gpu_recovery == -1) {
4998 		switch (adev->asic_type) {
4999 #ifdef CONFIG_DRM_AMDGPU_SI
5000 		case CHIP_VERDE:
5001 		case CHIP_TAHITI:
5002 		case CHIP_PITCAIRN:
5003 		case CHIP_OLAND:
5004 		case CHIP_HAINAN:
5005 #endif
5006 #ifdef CONFIG_DRM_AMDGPU_CIK
5007 		case CHIP_KAVERI:
5008 		case CHIP_KABINI:
5009 		case CHIP_MULLINS:
5010 #endif
5011 		case CHIP_CARRIZO:
5012 		case CHIP_STONEY:
5013 		case CHIP_CYAN_SKILLFISH:
5014 			goto disabled;
5015 		default:
5016 			break;
5017 		}
5018 	}
5019 
5020 	return true;
5021 
5022 disabled:
5023 		dev_info(adev->dev, "GPU recovery disabled.\n");
5024 		return false;
5025 }
5026 
5027 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5028 {
5029 	u32 i;
5030 	int ret = 0;
5031 
5032 	if (adev->bios)
5033 		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5034 
5035 	dev_info(adev->dev, "GPU mode1 reset\n");
5036 
5037 	/* Cache the state before bus master disable. The saved config space
5038 	 * values are used in other cases like restore after mode-2 reset.
5039 	 */
5040 	amdgpu_device_cache_pci_state(adev->pdev);
5041 
5042 	/* disable BM */
5043 	pci_clear_master(adev->pdev);
5044 
5045 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5046 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5047 		ret = amdgpu_dpm_mode1_reset(adev);
5048 	} else {
5049 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5050 		ret = psp_gpu_reset(adev);
5051 	}
5052 
5053 	if (ret)
5054 		goto mode1_reset_failed;
5055 
5056 	/* enable mmio access after mode 1 reset completed */
5057 	adev->no_hw_access = false;
5058 
5059 	/* ensure no_hw_access is updated before we access hw */
5060 	smp_mb();
5061 
5062 	amdgpu_device_load_pci_state(adev->pdev);
5063 	ret = amdgpu_psp_wait_for_bootloader(adev);
5064 	if (ret)
5065 		goto mode1_reset_failed;
5066 
5067 	/* wait for asic to come out of reset */
5068 	for (i = 0; i < adev->usec_timeout; i++) {
5069 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5070 
5071 		if (memsize != 0xffffffff)
5072 			break;
5073 		udelay(1);
5074 	}
5075 
5076 	if (i >= adev->usec_timeout) {
5077 		ret = -ETIMEDOUT;
5078 		goto mode1_reset_failed;
5079 	}
5080 
5081 	if (adev->bios)
5082 		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5083 
5084 	return 0;
5085 
5086 mode1_reset_failed:
5087 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5088 	return ret;
5089 }
5090 
5091 int amdgpu_device_link_reset(struct amdgpu_device *adev)
5092 {
5093 	int ret = 0;
5094 
5095 	dev_info(adev->dev, "GPU link reset\n");
5096 
5097 	if (!amdgpu_reset_in_dpc(adev))
5098 		ret = amdgpu_dpm_link_reset(adev);
5099 
5100 	if (ret)
5101 		goto link_reset_failed;
5102 
5103 	ret = amdgpu_psp_wait_for_bootloader(adev);
5104 	if (ret)
5105 		goto link_reset_failed;
5106 
5107 	return 0;
5108 
5109 link_reset_failed:
5110 	dev_err(adev->dev, "GPU link reset failed\n");
5111 	return ret;
5112 }
5113 
5114 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5115 				 struct amdgpu_reset_context *reset_context)
5116 {
5117 	struct amdgpu_job *job = NULL;
5118 	struct dma_fence *fence = NULL;
5119 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5120 	bool need_full_reset =
5121 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5122 	int i, r;
5123 
5124 	if (reset_context->reset_req_dev == adev)
5125 		job = reset_context->job;
5126 
5127 	if (amdgpu_sriov_vf(adev))
5128 		amdgpu_virt_pre_reset(adev);
5129 
5130 	amdgpu_fence_driver_isr_toggle(adev, true);
5131 
5132 	if (job)
5133 		fence = &job->hw_fence->base;
5134 
5135 	/* block all schedulers and reset given job's ring */
5136 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5137 		struct amdgpu_ring *ring = adev->rings[i];
5138 
5139 		if (!amdgpu_ring_sched_ready(ring))
5140 			continue;
5141 
5142 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5143 		amdgpu_fence_driver_force_completion(ring, fence);
5144 	}
5145 
5146 	amdgpu_fence_driver_isr_toggle(adev, false);
5147 
5148 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5149 	/* If reset handler not implemented, continue; otherwise return */
5150 	if (r == -EOPNOTSUPP)
5151 		r = 0;
5152 	else
5153 		return r;
5154 
5155 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5156 	if (!amdgpu_sriov_vf(adev)) {
5157 
5158 		if (!need_full_reset)
5159 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5160 
5161 		if (!need_full_reset && amdgpu_gpu_recovery &&
5162 		    amdgpu_device_ip_check_soft_reset(adev)) {
5163 			amdgpu_device_ip_pre_soft_reset(adev);
5164 			r = amdgpu_device_ip_soft_reset(adev);
5165 			amdgpu_device_ip_post_soft_reset(adev);
5166 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5167 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5168 				need_full_reset = true;
5169 			}
5170 		}
5171 
5172 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5173 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5174 			/* Trigger ip dump before we reset the asic */
5175 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5176 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5177 					tmp_adev->ip_blocks[i].version->funcs
5178 						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5179 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5180 		}
5181 
5182 		if (need_full_reset)
5183 			r = amdgpu_device_ip_suspend(adev);
5184 		if (need_full_reset)
5185 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5186 		else
5187 			clear_bit(AMDGPU_NEED_FULL_RESET,
5188 				  &reset_context->flags);
5189 	}
5190 
5191 	return r;
5192 }
5193 
5194 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5195 {
5196 	struct list_head *device_list_handle;
5197 	bool full_reset, vram_lost = false;
5198 	struct amdgpu_device *tmp_adev;
5199 	int r, init_level;
5200 
5201 	device_list_handle = reset_context->reset_device_list;
5202 
5203 	if (!device_list_handle)
5204 		return -EINVAL;
5205 
5206 	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5207 
5208 	/**
5209 	 * If it's reset on init, it's default init level, otherwise keep level
5210 	 * as recovery level.
5211 	 */
5212 	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5213 			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5214 	else
5215 			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5216 
5217 	r = 0;
5218 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5219 		amdgpu_set_init_level(tmp_adev, init_level);
5220 		if (full_reset) {
5221 			/* post card */
5222 			amdgpu_reset_set_dpc_status(tmp_adev, false);
5223 			amdgpu_ras_clear_err_state(tmp_adev);
5224 			r = amdgpu_device_asic_init(tmp_adev);
5225 			if (r) {
5226 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5227 			} else {
5228 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5229 
5230 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5231 				if (r)
5232 					goto out;
5233 
5234 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5235 
5236 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5237 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5238 
5239 				if (vram_lost) {
5240 					dev_info(
5241 						tmp_adev->dev,
5242 						"VRAM is lost due to GPU reset!\n");
5243 					amdgpu_inc_vram_lost(tmp_adev);
5244 				}
5245 
5246 				r = amdgpu_device_fw_loading(tmp_adev);
5247 				if (r)
5248 					return r;
5249 
5250 				r = amdgpu_xcp_restore_partition_mode(
5251 					tmp_adev->xcp_mgr);
5252 				if (r)
5253 					goto out;
5254 
5255 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5256 				if (r)
5257 					goto out;
5258 
5259 				amdgpu_ttm_enable_buffer_funcs(tmp_adev);
5260 
5261 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5262 				if (r)
5263 					goto out;
5264 
5265 				if (vram_lost)
5266 					amdgpu_device_fill_reset_magic(tmp_adev);
5267 
5268 				/*
5269 				 * Add this ASIC as tracked as reset was already
5270 				 * complete successfully.
5271 				 */
5272 				amdgpu_register_gpu_instance(tmp_adev);
5273 
5274 				if (!reset_context->hive &&
5275 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5276 					amdgpu_xgmi_add_device(tmp_adev);
5277 
5278 				r = amdgpu_device_ip_late_init(tmp_adev);
5279 				if (r)
5280 					goto out;
5281 
5282 				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
5283 				if (r)
5284 					goto out;
5285 
5286 				drm_client_dev_resume(adev_to_drm(tmp_adev));
5287 
5288 				/*
5289 				 * The GPU enters bad state once faulty pages
5290 				 * by ECC has reached the threshold, and ras
5291 				 * recovery is scheduled next. So add one check
5292 				 * here to break recovery if it indeed exceeds
5293 				 * bad page threshold, and remind user to
5294 				 * retire this GPU or setting one bigger
5295 				 * bad_page_threshold value to fix this once
5296 				 * probing driver again.
5297 				 */
5298 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5299 					/* must succeed. */
5300 					amdgpu_ras_resume(tmp_adev);
5301 				} else {
5302 					r = -EINVAL;
5303 					goto out;
5304 				}
5305 
5306 				/* Update PSP FW topology after reset */
5307 				if (reset_context->hive &&
5308 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5309 					r = amdgpu_xgmi_update_topology(
5310 						reset_context->hive, tmp_adev);
5311 			}
5312 		}
5313 
5314 out:
5315 		if (!r) {
5316 			/* IP init is complete now, set level as default */
5317 			amdgpu_set_init_level(tmp_adev,
5318 					      AMDGPU_INIT_LEVEL_DEFAULT);
5319 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5320 			r = amdgpu_ib_ring_tests(tmp_adev);
5321 			if (r) {
5322 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5323 				r = -EAGAIN;
5324 				goto end;
5325 			}
5326 		}
5327 
5328 		if (r)
5329 			tmp_adev->asic_reset_res = r;
5330 	}
5331 
5332 end:
5333 	return r;
5334 }
5335 
5336 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5337 			 struct amdgpu_reset_context *reset_context)
5338 {
5339 	struct amdgpu_device *tmp_adev = NULL;
5340 	bool need_full_reset, skip_hw_reset;
5341 	int r = 0;
5342 
5343 	/* Try reset handler method first */
5344 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5345 				    reset_list);
5346 
5347 	reset_context->reset_device_list = device_list_handle;
5348 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5349 	/* If reset handler not implemented, continue; otherwise return */
5350 	if (r == -EOPNOTSUPP)
5351 		r = 0;
5352 	else
5353 		return r;
5354 
5355 	/* Reset handler not implemented, use the default method */
5356 	need_full_reset =
5357 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5358 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5359 
5360 	/*
5361 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5362 	 * to allow proper links negotiation in FW (within 1 sec)
5363 	 */
5364 	if (!skip_hw_reset && need_full_reset) {
5365 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5366 			/* For XGMI run all resets in parallel to speed up the process */
5367 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5368 				if (!queue_work(system_dfl_wq,
5369 						&tmp_adev->xgmi_reset_work))
5370 					r = -EALREADY;
5371 			} else
5372 				r = amdgpu_asic_reset(tmp_adev);
5373 
5374 			if (r) {
5375 				dev_err(tmp_adev->dev,
5376 					"ASIC reset failed with error, %d for drm dev, %s",
5377 					r, adev_to_drm(tmp_adev)->unique);
5378 				goto out;
5379 			}
5380 		}
5381 
5382 		/* For XGMI wait for all resets to complete before proceed */
5383 		if (!r) {
5384 			list_for_each_entry(tmp_adev, device_list_handle,
5385 					    reset_list) {
5386 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5387 					flush_work(&tmp_adev->xgmi_reset_work);
5388 					r = tmp_adev->asic_reset_res;
5389 					if (r)
5390 						break;
5391 				}
5392 			}
5393 		}
5394 	}
5395 
5396 	if (!r && amdgpu_ras_intr_triggered()) {
5397 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5398 			amdgpu_ras_reset_error_count(tmp_adev,
5399 						     AMDGPU_RAS_BLOCK__MMHUB);
5400 		}
5401 
5402 		amdgpu_ras_intr_cleared();
5403 	}
5404 
5405 	r = amdgpu_device_reinit_after_reset(reset_context);
5406 	if (r == -EAGAIN)
5407 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5408 	else
5409 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5410 
5411 out:
5412 	return r;
5413 }
5414 
5415 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5416 {
5417 
5418 	switch (amdgpu_asic_reset_method(adev)) {
5419 	case AMD_RESET_METHOD_MODE1:
5420 	case AMD_RESET_METHOD_LINK:
5421 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5422 		break;
5423 	case AMD_RESET_METHOD_MODE2:
5424 		adev->mp1_state = PP_MP1_STATE_RESET;
5425 		break;
5426 	default:
5427 		adev->mp1_state = PP_MP1_STATE_NONE;
5428 		break;
5429 	}
5430 }
5431 
5432 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5433 {
5434 	amdgpu_vf_error_trans_all(adev);
5435 	adev->mp1_state = PP_MP1_STATE_NONE;
5436 }
5437 
5438 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5439 {
5440 	struct pci_dev *p = NULL;
5441 
5442 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5443 			adev->pdev->bus->number, 1);
5444 	if (p) {
5445 		pm_runtime_enable(&(p->dev));
5446 		pm_runtime_resume(&(p->dev));
5447 	}
5448 
5449 	pci_dev_put(p);
5450 }
5451 
5452 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5453 {
5454 	enum amd_reset_method reset_method;
5455 	struct pci_dev *p = NULL;
5456 	u64 expires;
5457 
5458 	/*
5459 	 * For now, only BACO and mode1 reset are confirmed
5460 	 * to suffer the audio issue without proper suspended.
5461 	 */
5462 	reset_method = amdgpu_asic_reset_method(adev);
5463 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5464 	     (reset_method != AMD_RESET_METHOD_MODE1))
5465 		return -EINVAL;
5466 
5467 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5468 			adev->pdev->bus->number, 1);
5469 	if (!p)
5470 		return -ENODEV;
5471 
5472 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5473 	if (!expires)
5474 		/*
5475 		 * If we cannot get the audio device autosuspend delay,
5476 		 * a fixed 4S interval will be used. Considering 3S is
5477 		 * the audio controller default autosuspend delay setting.
5478 		 * 4S used here is guaranteed to cover that.
5479 		 */
5480 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5481 
5482 	while (!pm_runtime_status_suspended(&(p->dev))) {
5483 		if (!pm_runtime_suspend(&(p->dev)))
5484 			break;
5485 
5486 		if (expires < ktime_get_mono_fast_ns()) {
5487 			dev_warn(adev->dev, "failed to suspend display audio\n");
5488 			pci_dev_put(p);
5489 			/* TODO: abort the succeeding gpu reset? */
5490 			return -ETIMEDOUT;
5491 		}
5492 	}
5493 
5494 	pm_runtime_disable(&(p->dev));
5495 
5496 	pci_dev_put(p);
5497 	return 0;
5498 }
5499 
5500 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5501 {
5502 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5503 
5504 #if defined(CONFIG_DEBUG_FS)
5505 	if (!amdgpu_sriov_vf(adev))
5506 		cancel_work(&adev->reset_work);
5507 #endif
5508 	amdgpu_userq_mgr_cancel_reset_work(adev);
5509 
5510 	if (adev->kfd.dev)
5511 		cancel_work(&adev->kfd.reset_work);
5512 
5513 	if (amdgpu_sriov_vf(adev))
5514 		cancel_work(&adev->virt.flr_work);
5515 
5516 	if (con && adev->ras_enabled)
5517 		cancel_work(&con->recovery_work);
5518 
5519 }
5520 
5521 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5522 {
5523 	struct amdgpu_device *tmp_adev;
5524 	int ret = 0;
5525 
5526 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5527 		ret |= amdgpu_device_bus_status_check(tmp_adev);
5528 	}
5529 
5530 	return ret;
5531 }
5532 
5533 static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
5534 					  struct list_head *device_list,
5535 					  struct amdgpu_hive_info *hive)
5536 {
5537 	struct amdgpu_device *tmp_adev = NULL;
5538 
5539 	/*
5540 	 * Build list of devices to reset.
5541 	 * In case we are in XGMI hive mode, resort the device list
5542 	 * to put adev in the 1st position.
5543 	 */
5544 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5545 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5546 			list_add_tail(&tmp_adev->reset_list, device_list);
5547 			if (adev->shutdown)
5548 				tmp_adev->shutdown = true;
5549 		}
5550 		if (!list_is_first(&adev->reset_list, device_list))
5551 			list_rotate_to_front(&adev->reset_list, device_list);
5552 	} else {
5553 		list_add_tail(&adev->reset_list, device_list);
5554 	}
5555 }
5556 
5557 static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
5558 						  struct list_head *device_list)
5559 {
5560 	struct amdgpu_device *tmp_adev = NULL;
5561 
5562 	if (list_empty(device_list))
5563 		return;
5564 	tmp_adev =
5565 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5566 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5567 }
5568 
5569 static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
5570 						  struct list_head *device_list)
5571 {
5572 	struct amdgpu_device *tmp_adev = NULL;
5573 
5574 	if (list_empty(device_list))
5575 		return;
5576 	tmp_adev =
5577 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5578 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5579 }
5580 
5581 static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
5582 					  struct amdgpu_job *job,
5583 					  struct amdgpu_reset_context *reset_context,
5584 					  struct list_head *device_list,
5585 					  struct amdgpu_hive_info *hive,
5586 					  bool need_emergency_restart)
5587 {
5588 	struct amdgpu_device *tmp_adev = NULL;
5589 	int i;
5590 
5591 	/* block all schedulers and reset given job's ring */
5592 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5593 		amdgpu_device_set_mp1_state(tmp_adev);
5594 
5595 		/*
5596 		 * Try to put the audio codec into suspend state
5597 		 * before gpu reset started.
5598 		 *
5599 		 * Due to the power domain of the graphics device
5600 		 * is shared with AZ power domain. Without this,
5601 		 * we may change the audio hardware from behind
5602 		 * the audio driver's back. That will trigger
5603 		 * some audio codec errors.
5604 		 */
5605 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5606 			tmp_adev->pcie_reset_ctx.audio_suspended = true;
5607 
5608 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5609 
5610 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5611 
5612 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5613 
5614 		/*
5615 		 * Mark these ASICs to be reset as untracked first
5616 		 * And add them back after reset completed
5617 		 */
5618 		amdgpu_unregister_gpu_instance(tmp_adev);
5619 
5620 		drm_client_dev_suspend(adev_to_drm(tmp_adev));
5621 
5622 		/* disable ras on ALL IPs */
5623 		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
5624 		    amdgpu_device_ip_need_full_reset(tmp_adev))
5625 			amdgpu_ras_suspend(tmp_adev);
5626 
5627 		amdgpu_userq_pre_reset(tmp_adev);
5628 
5629 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5630 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5631 
5632 			if (!amdgpu_ring_sched_ready(ring))
5633 				continue;
5634 
5635 			drm_sched_wqueue_stop(&ring->sched);
5636 
5637 			if (need_emergency_restart)
5638 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5639 		}
5640 		atomic_inc(&tmp_adev->gpu_reset_counter);
5641 	}
5642 }
5643 
5644 static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
5645 			      struct list_head *device_list,
5646 			      struct amdgpu_reset_context *reset_context)
5647 {
5648 	struct amdgpu_device *tmp_adev = NULL;
5649 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5650 	int r = 0;
5651 
5652 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5653 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5654 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5655 		/*TODO Should we stop ?*/
5656 		if (r) {
5657 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5658 				  r, adev_to_drm(tmp_adev)->unique);
5659 			tmp_adev->asic_reset_res = r;
5660 		}
5661 	}
5662 
5663 	/* Actual ASIC resets if needed.*/
5664 	/* Host driver will handle XGMI hive reset for SRIOV */
5665 	if (amdgpu_sriov_vf(adev)) {
5666 
5667 		/* Bail out of reset early */
5668 		if (amdgpu_ras_is_rma(adev))
5669 			return -ENODEV;
5670 
5671 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5672 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5673 			amdgpu_ras_set_fed(adev, true);
5674 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5675 		}
5676 
5677 		r = amdgpu_device_reset_sriov(adev, reset_context);
5678 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5679 			amdgpu_virt_release_full_gpu(adev, true);
5680 			goto retry;
5681 		}
5682 		if (r)
5683 			adev->asic_reset_res = r;
5684 	} else {
5685 		r = amdgpu_do_asic_reset(device_list, reset_context);
5686 		if (r && r == -EAGAIN)
5687 			goto retry;
5688 	}
5689 
5690 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5691 		/*
5692 		 * Drop any pending non scheduler resets queued before reset is done.
5693 		 * Any reset scheduled after this point would be valid. Scheduler resets
5694 		 * were already dropped during drm_sched_stop and no new ones can come
5695 		 * in before drm_sched_start.
5696 		 */
5697 		amdgpu_device_stop_pending_resets(tmp_adev);
5698 	}
5699 
5700 	return r;
5701 }
5702 
5703 static int amdgpu_device_sched_resume(struct list_head *device_list,
5704 			      struct amdgpu_reset_context *reset_context,
5705 			      bool   job_signaled)
5706 {
5707 	struct amdgpu_device *tmp_adev = NULL;
5708 	int i, r = 0;
5709 
5710 	/* Post ASIC reset for all devs .*/
5711 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5712 
5713 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5714 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5715 
5716 			if (!amdgpu_ring_sched_ready(ring))
5717 				continue;
5718 
5719 			drm_sched_wqueue_start(&ring->sched);
5720 		}
5721 
5722 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5723 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5724 
5725 		if (tmp_adev->asic_reset_res) {
5726 			/* bad news, how to tell it to userspace ?
5727 			 * for ras error, we should report GPU bad status instead of
5728 			 * reset failure
5729 			 */
5730 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5731 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5732 				dev_info(
5733 					tmp_adev->dev,
5734 					"GPU reset(%d) failed with error %d\n",
5735 					atomic_read(
5736 						&tmp_adev->gpu_reset_counter),
5737 					tmp_adev->asic_reset_res);
5738 			amdgpu_vf_error_put(tmp_adev,
5739 					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
5740 					    tmp_adev->asic_reset_res);
5741 			if (!r)
5742 				r = tmp_adev->asic_reset_res;
5743 			tmp_adev->asic_reset_res = 0;
5744 		} else {
5745 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
5746 				 atomic_read(&tmp_adev->gpu_reset_counter));
5747 			if (amdgpu_acpi_smart_shift_update(tmp_adev,
5748 							   AMDGPU_SS_DEV_D0))
5749 				dev_warn(tmp_adev->dev,
5750 					 "smart shift update failed\n");
5751 		}
5752 	}
5753 
5754 	return r;
5755 }
5756 
5757 static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
5758 			      struct list_head *device_list,
5759 			      bool   need_emergency_restart)
5760 {
5761 	struct amdgpu_device *tmp_adev = NULL;
5762 
5763 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5764 		/* unlock kfd: SRIOV would do it separately */
5765 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5766 			amdgpu_amdkfd_post_reset(tmp_adev);
5767 
5768 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5769 		 * need to bring up kfd here if it's not be initialized before
5770 		 */
5771 		if (!adev->kfd.init_complete)
5772 			amdgpu_amdkfd_device_init(adev);
5773 
5774 		if (tmp_adev->pcie_reset_ctx.audio_suspended)
5775 			amdgpu_device_resume_display_audio(tmp_adev);
5776 
5777 		amdgpu_device_unset_mp1_state(tmp_adev);
5778 
5779 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5780 
5781 	}
5782 }
5783 
5784 
5785 /**
5786  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5787  *
5788  * @adev: amdgpu_device pointer
5789  * @job: which job trigger hang
5790  * @reset_context: amdgpu reset context pointer
5791  *
5792  * Attempt to reset the GPU if it has hung (all asics).
5793  * Attempt to do soft-reset or full-reset and reinitialize Asic
5794  * Returns 0 for success or an error on failure.
5795  */
5796 
5797 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5798 			      struct amdgpu_job *job,
5799 			      struct amdgpu_reset_context *reset_context)
5800 {
5801 	struct list_head device_list;
5802 	bool job_signaled = false;
5803 	struct amdgpu_hive_info *hive = NULL;
5804 	int r = 0;
5805 	bool need_emergency_restart = false;
5806 	/* save the pasid here as the job may be freed before the end of the reset */
5807 	int pasid = job ? job->pasid : -EINVAL;
5808 
5809 	/*
5810 	 * If it reaches here because of hang/timeout and a RAS error is
5811 	 * detected at the same time, let RAS recovery take care of it.
5812 	 */
5813 	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
5814 	    !amdgpu_sriov_vf(adev) &&
5815 	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
5816 		dev_dbg(adev->dev,
5817 			"Gpu recovery from source: %d yielding to RAS error recovery handling",
5818 			reset_context->src);
5819 		return 0;
5820 	}
5821 
5822 	/*
5823 	 * Special case: RAS triggered and full reset isn't supported
5824 	 */
5825 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5826 
5827 	/*
5828 	 * Flush RAM to disk so that after reboot
5829 	 * the user can read log and see why the system rebooted.
5830 	 */
5831 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5832 		amdgpu_ras_get_context(adev)->reboot) {
5833 		dev_warn(adev->dev, "Emergency reboot.");
5834 
5835 		ksys_sync_helper();
5836 		emergency_restart();
5837 	}
5838 
5839 	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
5840 		 need_emergency_restart ? "jobs stop" : "reset",
5841 		 reset_context->src);
5842 
5843 	if (!amdgpu_sriov_vf(adev))
5844 		hive = amdgpu_get_xgmi_hive(adev);
5845 	if (hive)
5846 		mutex_lock(&hive->hive_lock);
5847 
5848 	reset_context->job = job;
5849 	reset_context->hive = hive;
5850 	INIT_LIST_HEAD(&device_list);
5851 
5852 	amdgpu_device_recovery_prepare(adev, &device_list, hive);
5853 
5854 	if (!amdgpu_sriov_vf(adev)) {
5855 		r = amdgpu_device_health_check(&device_list);
5856 		if (r)
5857 			goto end_reset;
5858 	}
5859 
5860 	/* Cannot be called after locking reset domain */
5861 	amdgpu_ras_pre_reset(adev, &device_list);
5862 
5863 	/* We need to lock reset domain only once both for XGMI and single device */
5864 	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
5865 
5866 	/* unmap all the mappings of doorbell and framebuffer to prevent user space from
5867 	 * accessing them
5868 	 */
5869 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
5870 	amdgpu_amdkfd_clear_kfd_mapping(adev);
5871 
5872 	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
5873 				      hive, need_emergency_restart);
5874 	if (need_emergency_restart)
5875 		goto skip_sched_resume;
5876 	/*
5877 	 * Must check guilty signal here since after this point all old
5878 	 * HW fences are force signaled.
5879 	 *
5880 	 * job->base holds a reference to parent fence
5881 	 */
5882 	if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) {
5883 		job_signaled = true;
5884 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5885 		goto skip_hw_reset;
5886 	}
5887 
5888 	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
5889 	if (r)
5890 		goto reset_unlock;
5891 skip_hw_reset:
5892 	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
5893 	if (r)
5894 		goto reset_unlock;
5895 skip_sched_resume:
5896 	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
5897 reset_unlock:
5898 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
5899 	amdgpu_ras_post_reset(adev, &device_list);
5900 end_reset:
5901 	if (hive) {
5902 		mutex_unlock(&hive->hive_lock);
5903 		amdgpu_put_xgmi_hive(hive);
5904 	}
5905 
5906 	if (r)
5907 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5908 
5909 	atomic_set(&adev->reset_domain->reset_res, r);
5910 
5911 	if (!r) {
5912 		struct amdgpu_task_info *ti = NULL;
5913 
5914 		/*
5915 		 * The job may already be freed at this point via the sched tdr workqueue so
5916 		 * use the cached pasid.
5917 		 */
5918 		if (pasid >= 0)
5919 			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
5920 
5921 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
5922 				     ti ? &ti->task : NULL);
5923 
5924 		amdgpu_vm_put_task_info(ti);
5925 	}
5926 
5927 	return r;
5928 }
5929 
5930 /**
5931  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5932  *
5933  * @adev: amdgpu_device pointer
5934  * @speed: pointer to the speed of the link
5935  * @width: pointer to the width of the link
5936  *
5937  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5938  * first physical partner to an AMD dGPU.
5939  * This will exclude any virtual switches and links.
5940  */
5941 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5942 					    enum pci_bus_speed *speed,
5943 					    enum pcie_link_width *width)
5944 {
5945 	if (!speed || !width)
5946 		return;
5947 
5948 	*speed = PCI_SPEED_UNKNOWN;
5949 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5950 
5951 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5952 		struct pci_dev *parent = amdgpu_device_find_parent(adev);
5953 
5954 		if (parent) {
5955 			*speed = pcie_get_speed_cap(parent);
5956 			*width = pcie_get_width_cap(parent);
5957 		}
5958 	} else {
5959 		/* use the current speeds rather than max if switching is not supported */
5960 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5961 	}
5962 }
5963 
5964 /**
5965  * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
5966  *
5967  * @adev: amdgpu_device pointer
5968  * @speed: pointer to the speed of the link
5969  * @width: pointer to the width of the link
5970  *
5971  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5972  * AMD dGPU which may be a virtual upstream bridge.
5973  */
5974 static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
5975 					enum pci_bus_speed *speed,
5976 					enum pcie_link_width *width)
5977 {
5978 	struct pci_dev *parent = adev->pdev;
5979 
5980 	if (!speed || !width)
5981 		return;
5982 
5983 	/* use the device itself */
5984 	*speed = pcie_get_speed_cap(adev->pdev);
5985 	*width = pcie_get_width_cap(adev->pdev);
5986 
5987 	/* use the link outside the device */
5988 	parent = amdgpu_device_find_parent(adev);
5989 	if (parent) {
5990 		*speed = pcie_get_speed_cap(parent);
5991 		*width = pcie_get_width_cap(parent);
5992 	}
5993 }
5994 
5995 /**
5996  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5997  *
5998  * @adev: amdgpu_device pointer
5999  *
6000  * Fetches and stores in the driver the PCIE capabilities (gen speed
6001  * and lanes) of the slot the device is in. Handles APUs and
6002  * virtualized environments where PCIE config space may not be available.
6003  */
6004 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6005 {
6006 	enum pci_bus_speed speed_cap, platform_speed_cap;
6007 	enum pcie_link_width platform_link_width, link_width;
6008 
6009 	if (amdgpu_pcie_gen_cap)
6010 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6011 
6012 	if (amdgpu_pcie_lane_cap)
6013 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6014 
6015 	/* covers APUs as well */
6016 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6017 		if (adev->pm.pcie_gen_mask == 0)
6018 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6019 		if (adev->pm.pcie_mlw_mask == 0)
6020 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6021 		return;
6022 	}
6023 
6024 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6025 		return;
6026 
6027 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6028 					&platform_link_width);
6029 	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
6030 
6031 	if (adev->pm.pcie_gen_mask == 0) {
6032 		/* asic caps */
6033 		if (speed_cap == PCI_SPEED_UNKNOWN) {
6034 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6035 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6036 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6037 		} else {
6038 			if (speed_cap == PCIE_SPEED_32_0GT)
6039 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6040 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6041 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6042 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6043 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6044 			else if (speed_cap == PCIE_SPEED_16_0GT)
6045 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6046 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6047 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6048 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6049 			else if (speed_cap == PCIE_SPEED_8_0GT)
6050 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6051 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6052 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6053 			else if (speed_cap == PCIE_SPEED_5_0GT)
6054 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6055 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6056 			else
6057 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6058 		}
6059 		/* platform caps */
6060 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6061 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6062 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6063 		} else {
6064 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6065 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6066 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6067 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6068 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6069 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6070 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6071 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6072 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6073 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6074 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6075 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6076 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6077 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6078 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6079 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6080 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6081 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6082 			else
6083 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6084 
6085 		}
6086 	}
6087 	if (adev->pm.pcie_mlw_mask == 0) {
6088 		/* asic caps */
6089 		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6090 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6091 		} else {
6092 			switch (link_width) {
6093 			case PCIE_LNK_X32:
6094 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6095 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6096 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6097 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6098 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6099 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6100 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6101 				break;
6102 			case PCIE_LNK_X16:
6103 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6104 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6105 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6106 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6107 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6108 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6109 				break;
6110 			case PCIE_LNK_X12:
6111 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6112 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6113 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6114 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6115 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6116 				break;
6117 			case PCIE_LNK_X8:
6118 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6119 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6120 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6121 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6122 				break;
6123 			case PCIE_LNK_X4:
6124 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6125 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6126 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6127 				break;
6128 			case PCIE_LNK_X2:
6129 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6130 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6131 				break;
6132 			case PCIE_LNK_X1:
6133 				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6134 				break;
6135 			default:
6136 				break;
6137 			}
6138 		}
6139 		/* platform caps */
6140 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6141 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6142 		} else {
6143 			switch (platform_link_width) {
6144 			case PCIE_LNK_X32:
6145 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6146 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6147 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6148 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6149 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6150 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6151 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6152 				break;
6153 			case PCIE_LNK_X16:
6154 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6155 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6156 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6157 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6158 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6159 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6160 				break;
6161 			case PCIE_LNK_X12:
6162 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6163 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6164 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6165 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6166 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6167 				break;
6168 			case PCIE_LNK_X8:
6169 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6170 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6171 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6172 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6173 				break;
6174 			case PCIE_LNK_X4:
6175 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6176 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6177 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6178 				break;
6179 			case PCIE_LNK_X2:
6180 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6181 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6182 				break;
6183 			case PCIE_LNK_X1:
6184 				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6185 				break;
6186 			default:
6187 				break;
6188 			}
6189 		}
6190 	}
6191 }
6192 
6193 /**
6194  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6195  *
6196  * @adev: amdgpu_device pointer
6197  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6198  *
6199  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6200  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6201  * @peer_adev.
6202  */
6203 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6204 				      struct amdgpu_device *peer_adev)
6205 {
6206 #ifdef CONFIG_HSA_AMD_P2P
6207 	bool p2p_access =
6208 		!adev->gmc.xgmi.connected_to_cpu &&
6209 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6210 	if (!p2p_access)
6211 		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6212 			pci_name(peer_adev->pdev));
6213 
6214 	bool is_large_bar = adev->gmc.visible_vram_size &&
6215 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6216 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6217 
6218 	if (!p2p_addressable) {
6219 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6220 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6221 		resource_size_t aper_limit =
6222 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6223 
6224 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6225 				     aper_limit & address_mask);
6226 	}
6227 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6228 #else
6229 	return false;
6230 #endif
6231 }
6232 
6233 int amdgpu_device_baco_enter(struct amdgpu_device *adev)
6234 {
6235 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6236 
6237 	if (!amdgpu_device_supports_baco(adev))
6238 		return -ENOTSUPP;
6239 
6240 	if (ras && adev->ras_enabled &&
6241 	    adev->nbio.funcs->enable_doorbell_interrupt)
6242 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6243 
6244 	return amdgpu_dpm_baco_enter(adev);
6245 }
6246 
6247 int amdgpu_device_baco_exit(struct amdgpu_device *adev)
6248 {
6249 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6250 	int ret = 0;
6251 
6252 	if (!amdgpu_device_supports_baco(adev))
6253 		return -ENOTSUPP;
6254 
6255 	ret = amdgpu_dpm_baco_exit(adev);
6256 	if (ret)
6257 		return ret;
6258 
6259 	if (ras && adev->ras_enabled &&
6260 	    adev->nbio.funcs->enable_doorbell_interrupt)
6261 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6262 
6263 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6264 	    adev->nbio.funcs->clear_doorbell_interrupt)
6265 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6266 
6267 	return 0;
6268 }
6269 
6270 /**
6271  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6272  * @pdev: PCI device struct
6273  * @state: PCI channel state
6274  *
6275  * Description: Called when a PCI error is detected.
6276  *
6277  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6278  */
6279 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6280 {
6281 	struct drm_device *dev = pci_get_drvdata(pdev);
6282 	struct amdgpu_device *adev = drm_to_adev(dev);
6283 	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
6284 		amdgpu_get_xgmi_hive(adev);
6285 	struct amdgpu_reset_context reset_context;
6286 	struct list_head device_list;
6287 
6288 	dev_info(adev->dev, "PCI error: detected callback!!\n");
6289 
6290 	adev->pci_channel_state = state;
6291 
6292 	switch (state) {
6293 	case pci_channel_io_normal:
6294 		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6295 		return PCI_ERS_RESULT_CAN_RECOVER;
6296 	case pci_channel_io_frozen:
6297 		/* Fatal error, prepare for slot reset */
6298 		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6299 		if (hive) {
6300 			/* Hive devices should be able to support FW based
6301 			 * link reset on other devices, if not return.
6302 			 */
6303 			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6304 				dev_warn(adev->dev,
6305 					 "No support for XGMI hive yet...\n");
6306 				return PCI_ERS_RESULT_DISCONNECT;
6307 			}
6308 			/* Set dpc status only if device is part of hive
6309 			 * Non-hive devices should be able to recover after
6310 			 * link reset.
6311 			 */
6312 			amdgpu_reset_set_dpc_status(adev, true);
6313 
6314 			mutex_lock(&hive->hive_lock);
6315 		} else {
6316 			if (amdgpu_device_bus_status_check(adev))
6317 				amdgpu_reset_set_dpc_status(adev, true);
6318 		}
6319 		memset(&reset_context, 0, sizeof(reset_context));
6320 		INIT_LIST_HEAD(&device_list);
6321 
6322 		amdgpu_device_recovery_prepare(adev, &device_list, hive);
6323 		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6324 		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6325 					      hive, false);
6326 		if (hive)
6327 			mutex_unlock(&hive->hive_lock);
6328 		return PCI_ERS_RESULT_NEED_RESET;
6329 	case pci_channel_io_perm_failure:
6330 		/* Permanent error, prepare for device removal */
6331 		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6332 		return PCI_ERS_RESULT_DISCONNECT;
6333 	}
6334 
6335 	return PCI_ERS_RESULT_NEED_RESET;
6336 }
6337 
6338 /**
6339  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6340  * @pdev: pointer to PCI device
6341  */
6342 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6343 {
6344 	struct drm_device *dev = pci_get_drvdata(pdev);
6345 	struct amdgpu_device *adev = drm_to_adev(dev);
6346 
6347 	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6348 
6349 	/* TODO - dump whatever for debugging purposes */
6350 
6351 	/* This called only if amdgpu_pci_error_detected returns
6352 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6353 	 * works, no need to reset slot.
6354 	 */
6355 
6356 	return PCI_ERS_RESULT_RECOVERED;
6357 }
6358 
6359 /**
6360  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6361  * @pdev: PCI device struct
6362  *
6363  * Description: This routine is called by the pci error recovery
6364  * code after the PCI slot has been reset, just before we
6365  * should resume normal operations.
6366  */
6367 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6368 {
6369 	struct drm_device *dev = pci_get_drvdata(pdev);
6370 	struct amdgpu_device *adev = drm_to_adev(dev);
6371 	struct amdgpu_reset_context reset_context;
6372 	struct amdgpu_device *tmp_adev;
6373 	struct amdgpu_hive_info *hive;
6374 	struct list_head device_list;
6375 	struct pci_dev *link_dev;
6376 	int r = 0, i, timeout;
6377 	u32 memsize;
6378 	u16 status;
6379 
6380 	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6381 
6382 	memset(&reset_context, 0, sizeof(reset_context));
6383 	INIT_LIST_HEAD(&device_list);
6384 	hive = amdgpu_get_xgmi_hive(adev);
6385 	if (hive) {
6386 		mutex_lock(&hive->hive_lock);
6387 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6388 			list_add_tail(&tmp_adev->reset_list, &device_list);
6389 	} else {
6390 		list_add_tail(&adev->reset_list, &device_list);
6391 	}
6392 
6393 	if (adev->pcie_reset_ctx.swus)
6394 		link_dev = adev->pcie_reset_ctx.swus;
6395 	else
6396 		link_dev = adev->pdev;
6397 	/* wait for asic to come out of reset, timeout = 10s */
6398 	timeout = 10000;
6399 	do {
6400 		usleep_range(10000, 10500);
6401 		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
6402 		timeout -= 10;
6403 	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
6404 		 (status != PCI_VENDOR_ID_AMD));
6405 
6406 	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
6407 		r = -ETIME;
6408 		goto out;
6409 	}
6410 
6411 	amdgpu_device_load_switch_state(adev);
6412 	/* Restore PCI confspace */
6413 	amdgpu_device_load_pci_state(pdev);
6414 
6415 	/* confirm  ASIC came out of reset */
6416 	for (i = 0; i < adev->usec_timeout; i++) {
6417 		memsize = amdgpu_asic_get_config_memsize(adev);
6418 
6419 		if (memsize != 0xffffffff)
6420 			break;
6421 		udelay(1);
6422 	}
6423 	if (memsize == 0xffffffff) {
6424 		r = -ETIME;
6425 		goto out;
6426 	}
6427 
6428 	reset_context.method = AMD_RESET_METHOD_NONE;
6429 	reset_context.reset_req_dev = adev;
6430 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6431 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
6432 
6433 	if (hive) {
6434 		reset_context.hive = hive;
6435 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6436 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
6437 	} else {
6438 		adev->pcie_reset_ctx.in_link_reset = true;
6439 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6440 	}
6441 
6442 	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
6443 out:
6444 	if (!r) {
6445 		if (amdgpu_device_cache_pci_state(adev->pdev))
6446 			pci_restore_state(adev->pdev);
6447 		dev_info(adev->dev, "PCIe error recovery succeeded\n");
6448 	} else {
6449 		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6450 		if (hive) {
6451 			list_for_each_entry(tmp_adev, &device_list, reset_list)
6452 				amdgpu_device_unset_mp1_state(tmp_adev);
6453 		}
6454 		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6455 	}
6456 
6457 	if (hive) {
6458 		mutex_unlock(&hive->hive_lock);
6459 		amdgpu_put_xgmi_hive(hive);
6460 	}
6461 
6462 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6463 }
6464 
6465 /**
6466  * amdgpu_pci_resume() - resume normal ops after PCI reset
6467  * @pdev: pointer to PCI device
6468  *
6469  * Called when the error recovery driver tells us that its
6470  * OK to resume normal operation.
6471  */
6472 void amdgpu_pci_resume(struct pci_dev *pdev)
6473 {
6474 	struct drm_device *dev = pci_get_drvdata(pdev);
6475 	struct amdgpu_device *adev = drm_to_adev(dev);
6476 	struct list_head device_list;
6477 	struct amdgpu_hive_info *hive = NULL;
6478 	struct amdgpu_device *tmp_adev = NULL;
6479 
6480 	dev_info(adev->dev, "PCI error: resume callback!!\n");
6481 
6482 	/* Only continue execution for the case of pci_channel_io_frozen */
6483 	if (adev->pci_channel_state != pci_channel_io_frozen)
6484 		return;
6485 
6486 	INIT_LIST_HEAD(&device_list);
6487 
6488 	hive = amdgpu_get_xgmi_hive(adev);
6489 	if (hive) {
6490 		mutex_lock(&hive->hive_lock);
6491 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6492 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
6493 			list_add_tail(&tmp_adev->reset_list, &device_list);
6494 		}
6495 	} else {
6496 		adev->pcie_reset_ctx.in_link_reset = false;
6497 		list_add_tail(&adev->reset_list, &device_list);
6498 	}
6499 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
6500 	amdgpu_device_gpu_resume(adev, &device_list, false);
6501 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6502 
6503 	if (hive) {
6504 		mutex_unlock(&hive->hive_lock);
6505 		amdgpu_put_xgmi_hive(hive);
6506 	}
6507 }
6508 
6509 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
6510 {
6511 	struct pci_dev *swus, *swds;
6512 	int r;
6513 
6514 	swds = pci_upstream_bridge(adev->pdev);
6515 	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
6516 	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
6517 		return;
6518 	swus = pci_upstream_bridge(swds);
6519 	if (!swus ||
6520 	    (swus->vendor != PCI_VENDOR_ID_ATI &&
6521 	     swus->vendor != PCI_VENDOR_ID_AMD) ||
6522 	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
6523 		return;
6524 
6525 	/* If already saved, return */
6526 	if (adev->pcie_reset_ctx.swus)
6527 		return;
6528 	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
6529 	r = pci_save_state(swds);
6530 	if (r)
6531 		return;
6532 	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
6533 
6534 	r = pci_save_state(swus);
6535 	if (r)
6536 		return;
6537 	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
6538 
6539 	adev->pcie_reset_ctx.swus = swus;
6540 }
6541 
6542 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
6543 {
6544 	struct pci_dev *pdev;
6545 	int r;
6546 
6547 	if (!adev->pcie_reset_ctx.swds_pcistate ||
6548 	    !adev->pcie_reset_ctx.swus_pcistate)
6549 		return;
6550 
6551 	pdev = adev->pcie_reset_ctx.swus;
6552 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
6553 	if (!r) {
6554 		pci_restore_state(pdev);
6555 	} else {
6556 		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
6557 		return;
6558 	}
6559 
6560 	pdev = pci_upstream_bridge(adev->pdev);
6561 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
6562 	if (!r)
6563 		pci_restore_state(pdev);
6564 	else
6565 		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
6566 }
6567 
6568 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6569 {
6570 	struct drm_device *dev = pci_get_drvdata(pdev);
6571 	struct amdgpu_device *adev = drm_to_adev(dev);
6572 	int r;
6573 
6574 	if (amdgpu_sriov_vf(adev))
6575 		return false;
6576 
6577 	r = pci_save_state(pdev);
6578 	if (!r) {
6579 		kfree(adev->pci_state);
6580 
6581 		adev->pci_state = pci_store_saved_state(pdev);
6582 
6583 		if (!adev->pci_state) {
6584 			dev_err(adev->dev, "Failed to store PCI saved state");
6585 			return false;
6586 		}
6587 	} else {
6588 		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
6589 		return false;
6590 	}
6591 
6592 	amdgpu_device_cache_switch_state(adev);
6593 
6594 	return true;
6595 }
6596 
6597 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6598 {
6599 	struct drm_device *dev = pci_get_drvdata(pdev);
6600 	struct amdgpu_device *adev = drm_to_adev(dev);
6601 	int r;
6602 
6603 	if (!adev->pci_state)
6604 		return false;
6605 
6606 	r = pci_load_saved_state(pdev, adev->pci_state);
6607 
6608 	if (!r) {
6609 		pci_restore_state(pdev);
6610 	} else {
6611 		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
6612 		return false;
6613 	}
6614 
6615 	return true;
6616 }
6617 
6618 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6619 		struct amdgpu_ring *ring)
6620 {
6621 #ifdef CONFIG_X86_64
6622 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6623 		return;
6624 #endif
6625 	if (adev->gmc.xgmi.connected_to_cpu)
6626 		return;
6627 
6628 	if (ring && ring->funcs->emit_hdp_flush) {
6629 		amdgpu_ring_emit_hdp_flush(ring);
6630 		return;
6631 	}
6632 
6633 	if (!ring && amdgpu_sriov_runtime(adev)) {
6634 		if (!amdgpu_kiq_hdp_flush(adev))
6635 			return;
6636 	}
6637 
6638 	amdgpu_hdp_flush(adev, ring);
6639 }
6640 
6641 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6642 		struct amdgpu_ring *ring)
6643 {
6644 #ifdef CONFIG_X86_64
6645 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6646 		return;
6647 #endif
6648 	if (adev->gmc.xgmi.connected_to_cpu)
6649 		return;
6650 
6651 	amdgpu_hdp_invalidate(adev, ring);
6652 }
6653 
6654 int amdgpu_in_reset(struct amdgpu_device *adev)
6655 {
6656 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6657 }
6658 
6659 /**
6660  * amdgpu_device_halt() - bring hardware to some kind of halt state
6661  *
6662  * @adev: amdgpu_device pointer
6663  *
6664  * Bring hardware to some kind of halt state so that no one can touch it
6665  * any more. It will help to maintain error context when error occurred.
6666  * Compare to a simple hang, the system will keep stable at least for SSH
6667  * access. Then it should be trivial to inspect the hardware state and
6668  * see what's going on. Implemented as following:
6669  *
6670  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6671  *    clears all CPU mappings to device, disallows remappings through page faults
6672  * 2. amdgpu_irq_disable_all() disables all interrupts
6673  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6674  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6675  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6676  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6677  *    flush any in flight DMA operations
6678  */
6679 void amdgpu_device_halt(struct amdgpu_device *adev)
6680 {
6681 	struct pci_dev *pdev = adev->pdev;
6682 	struct drm_device *ddev = adev_to_drm(adev);
6683 
6684 	amdgpu_xcp_dev_unplug(adev);
6685 	drm_dev_unplug(ddev);
6686 
6687 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
6688 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
6689 
6690 	amdgpu_irq_disable_all(adev);
6691 
6692 	amdgpu_fence_driver_hw_fini(adev);
6693 
6694 	adev->no_hw_access = true;
6695 
6696 	amdgpu_device_unmap_mmio(adev);
6697 
6698 	pci_disable_device(pdev);
6699 	pci_wait_for_pending_transaction(pdev);
6700 }
6701 
6702 /**
6703  * amdgpu_device_get_gang - return a reference to the current gang
6704  * @adev: amdgpu_device pointer
6705  *
6706  * Returns: A new reference to the current gang leader.
6707  */
6708 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6709 {
6710 	struct dma_fence *fence;
6711 
6712 	rcu_read_lock();
6713 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6714 	rcu_read_unlock();
6715 	return fence;
6716 }
6717 
6718 /**
6719  * amdgpu_device_switch_gang - switch to a new gang
6720  * @adev: amdgpu_device pointer
6721  * @gang: the gang to switch to
6722  *
6723  * Try to switch to a new gang.
6724  * Returns: NULL if we switched to the new gang or a reference to the current
6725  * gang leader.
6726  */
6727 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6728 					    struct dma_fence *gang)
6729 {
6730 	struct dma_fence *old = NULL;
6731 
6732 	dma_fence_get(gang);
6733 	do {
6734 		dma_fence_put(old);
6735 		old = amdgpu_device_get_gang(adev);
6736 		if (old == gang)
6737 			break;
6738 
6739 		if (!dma_fence_is_signaled(old)) {
6740 			dma_fence_put(gang);
6741 			return old;
6742 		}
6743 
6744 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6745 			 old, gang) != old);
6746 
6747 	/*
6748 	 * Drop it once for the exchanged reference in adev and once for the
6749 	 * thread local reference acquired in amdgpu_device_get_gang().
6750 	 */
6751 	dma_fence_put(old);
6752 	dma_fence_put(old);
6753 	return NULL;
6754 }
6755 
6756 /**
6757  * amdgpu_device_enforce_isolation - enforce HW isolation
6758  * @adev: the amdgpu device pointer
6759  * @ring: the HW ring the job is supposed to run on
6760  * @job: the job which is about to be pushed to the HW ring
6761  *
6762  * Makes sure that only one client at a time can use the GFX block.
6763  * Returns: The dependency to wait on before the job can be pushed to the HW.
6764  * The function is called multiple times until NULL is returned.
6765  */
6766 struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
6767 						  struct amdgpu_ring *ring,
6768 						  struct amdgpu_job *job)
6769 {
6770 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
6771 	struct drm_sched_fence *f = job->base.s_fence;
6772 	struct dma_fence *dep;
6773 	void *owner;
6774 	int r;
6775 
6776 	/*
6777 	 * For now enforce isolation only for the GFX block since we only need
6778 	 * the cleaner shader on those rings.
6779 	 */
6780 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
6781 	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6782 		return NULL;
6783 
6784 	/*
6785 	 * All submissions where enforce isolation is false are handled as if
6786 	 * they come from a single client. Use ~0l as the owner to distinct it
6787 	 * from kernel submissions where the owner is NULL.
6788 	 */
6789 	owner = job->enforce_isolation ? f->owner : (void *)~0l;
6790 
6791 	mutex_lock(&adev->enforce_isolation_mutex);
6792 
6793 	/*
6794 	 * The "spearhead" submission is the first one which changes the
6795 	 * ownership to its client. We always need to wait for it to be
6796 	 * pushed to the HW before proceeding with anything.
6797 	 */
6798 	if (&f->scheduled != isolation->spearhead &&
6799 	    !dma_fence_is_signaled(isolation->spearhead)) {
6800 		dep = isolation->spearhead;
6801 		goto out_grab_ref;
6802 	}
6803 
6804 	if (isolation->owner != owner) {
6805 
6806 		/*
6807 		 * Wait for any gang to be assembled before switching to a
6808 		 * different owner or otherwise we could deadlock the
6809 		 * submissions.
6810 		 */
6811 		if (!job->gang_submit) {
6812 			dep = amdgpu_device_get_gang(adev);
6813 			if (!dma_fence_is_signaled(dep))
6814 				goto out_return_dep;
6815 			dma_fence_put(dep);
6816 		}
6817 
6818 		dma_fence_put(isolation->spearhead);
6819 		isolation->spearhead = dma_fence_get(&f->scheduled);
6820 		amdgpu_sync_move(&isolation->active, &isolation->prev);
6821 		trace_amdgpu_isolation(isolation->owner, owner);
6822 		isolation->owner = owner;
6823 	}
6824 
6825 	/*
6826 	 * Specifying the ring here helps to pipeline submissions even when
6827 	 * isolation is enabled. If that is not desired for testing NULL can be
6828 	 * used instead of the ring to enforce a CPU round trip while switching
6829 	 * between clients.
6830 	 */
6831 	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
6832 	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
6833 	if (r)
6834 		dev_warn(adev->dev, "OOM tracking isolation\n");
6835 
6836 out_grab_ref:
6837 	dma_fence_get(dep);
6838 out_return_dep:
6839 	mutex_unlock(&adev->enforce_isolation_mutex);
6840 	return dep;
6841 }
6842 
6843 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6844 {
6845 	switch (adev->asic_type) {
6846 #ifdef CONFIG_DRM_AMDGPU_SI
6847 	case CHIP_HAINAN:
6848 #endif
6849 	case CHIP_TOPAZ:
6850 		/* chips with no display hardware */
6851 		return false;
6852 #ifdef CONFIG_DRM_AMDGPU_SI
6853 	case CHIP_TAHITI:
6854 	case CHIP_PITCAIRN:
6855 	case CHIP_VERDE:
6856 	case CHIP_OLAND:
6857 #endif
6858 #ifdef CONFIG_DRM_AMDGPU_CIK
6859 	case CHIP_BONAIRE:
6860 	case CHIP_HAWAII:
6861 	case CHIP_KAVERI:
6862 	case CHIP_KABINI:
6863 	case CHIP_MULLINS:
6864 #endif
6865 	case CHIP_TONGA:
6866 	case CHIP_FIJI:
6867 	case CHIP_POLARIS10:
6868 	case CHIP_POLARIS11:
6869 	case CHIP_POLARIS12:
6870 	case CHIP_VEGAM:
6871 	case CHIP_CARRIZO:
6872 	case CHIP_STONEY:
6873 		/* chips with display hardware */
6874 		return true;
6875 	default:
6876 		/* IP discovery */
6877 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6878 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6879 			return false;
6880 		return true;
6881 	}
6882 }
6883 
6884 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
6885 {
6886 	ssize_t size = 0;
6887 
6888 	if (!ring || !ring->adev)
6889 		return size;
6890 
6891 	if (amdgpu_device_should_recover_gpu(ring->adev))
6892 		size |= AMDGPU_RESET_TYPE_FULL;
6893 
6894 	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
6895 	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
6896 		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
6897 
6898 	return size;
6899 }
6900 
6901 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
6902 {
6903 	ssize_t size = 0;
6904 
6905 	if (supported_reset == 0) {
6906 		size += sysfs_emit_at(buf, size, "unsupported");
6907 		size += sysfs_emit_at(buf, size, "\n");
6908 		return size;
6909 
6910 	}
6911 
6912 	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
6913 		size += sysfs_emit_at(buf, size, "soft ");
6914 
6915 	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
6916 		size += sysfs_emit_at(buf, size, "queue ");
6917 
6918 	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
6919 		size += sysfs_emit_at(buf, size, "pipe ");
6920 
6921 	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
6922 		size += sysfs_emit_at(buf, size, "full ");
6923 
6924 	size += sysfs_emit_at(buf, size, "\n");
6925 	return size;
6926 }
6927 
6928 void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
6929 			   enum amdgpu_uid_type type, uint8_t inst,
6930 			   uint64_t uid)
6931 {
6932 	if (!uid_info)
6933 		return;
6934 
6935 	if (type >= AMDGPU_UID_TYPE_MAX) {
6936 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6937 			     type);
6938 		return;
6939 	}
6940 
6941 	if (inst >= AMDGPU_UID_INST_MAX) {
6942 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6943 			     inst);
6944 		return;
6945 	}
6946 
6947 	if (uid_info->uid[type][inst] != 0) {
6948 		dev_warn_once(
6949 			uid_info->adev->dev,
6950 			"Overwriting existing UID %llu for type %d instance %d\n",
6951 			uid_info->uid[type][inst], type, inst);
6952 	}
6953 
6954 	uid_info->uid[type][inst] = uid;
6955 }
6956 
6957 u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
6958 			  enum amdgpu_uid_type type, uint8_t inst)
6959 {
6960 	if (!uid_info)
6961 		return 0;
6962 
6963 	if (type >= AMDGPU_UID_TYPE_MAX) {
6964 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6965 			     type);
6966 		return 0;
6967 	}
6968 
6969 	if (inst >= AMDGPU_UID_INST_MAX) {
6970 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6971 			     inst);
6972 		return 0;
6973 	}
6974 
6975 	return uid_info->uid[type][inst];
6976 }
6977