xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision b5fa84e805a61d3c1a741035ac793674833d3ca0)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/aperture.h>
30 #include <linux/power_supply.h>
31 #include <linux/kthread.h>
32 #include <linux/module.h>
33 #include <linux/console.h>
34 #include <linux/slab.h>
35 #include <linux/iommu.h>
36 #include <linux/pci.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39 #include <linux/nospec.h>
40 
41 #include <drm/drm_atomic_helper.h>
42 #include <drm/drm_client_event.h>
43 #include <drm/drm_crtc_helper.h>
44 #include <drm/drm_probe_helper.h>
45 #include <drm/amdgpu_drm.h>
46 #include <linux/device.h>
47 #include <linux/vgaarb.h>
48 #include <linux/vga_switcheroo.h>
49 #include <linux/efi.h>
50 #include "amdgpu.h"
51 #include "amdgpu_trace.h"
52 #include "amdgpu_i2c.h"
53 #include "atom.h"
54 #include "amdgpu_atombios.h"
55 #include "amdgpu_atomfirmware.h"
56 #include "amd_pcie.h"
57 #ifdef CONFIG_DRM_AMDGPU_SI
58 #include "si.h"
59 #endif
60 #ifdef CONFIG_DRM_AMDGPU_CIK
61 #include "cik.h"
62 #endif
63 #include "vi.h"
64 #include "soc15.h"
65 #include "nv.h"
66 #include "bif/bif_4_1_d.h"
67 #include <linux/firmware.h>
68 #include "amdgpu_vf_error.h"
69 
70 #include "amdgpu_amdkfd.h"
71 #include "amdgpu_pm.h"
72 
73 #include "amdgpu_xgmi.h"
74 #include "amdgpu_ras.h"
75 #include "amdgpu_ras_mgr.h"
76 #include "amdgpu_pmu.h"
77 #include "amdgpu_fru_eeprom.h"
78 #include "amdgpu_reset.h"
79 #include "amdgpu_virt.h"
80 #include "amdgpu_dev_coredump.h"
81 
82 #include <linux/suspend.h>
83 #include <drm/task_barrier.h>
84 #include <linux/pm_runtime.h>
85 
86 #include <drm/drm_drv.h>
87 
88 #if IS_ENABLED(CONFIG_X86)
89 #include <asm/intel-family.h>
90 #include <asm/cpu_device_id.h>
91 #endif
92 
93 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
98 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
99 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
100 MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
101 
102 #define AMDGPU_RESUME_MS		2000
103 #define AMDGPU_MAX_RETRY_LIMIT		2
104 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
105 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
106 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
107 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
108 
109 #define AMDGPU_VBIOS_SKIP (1U << 0)
110 #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
111 
112 static const struct drm_driver amdgpu_kms_driver;
113 
114 const char *amdgpu_asic_name[] = {
115 	"TAHITI",
116 	"PITCAIRN",
117 	"VERDE",
118 	"OLAND",
119 	"HAINAN",
120 	"BONAIRE",
121 	"KAVERI",
122 	"KABINI",
123 	"HAWAII",
124 	"MULLINS",
125 	"TOPAZ",
126 	"TONGA",
127 	"FIJI",
128 	"CARRIZO",
129 	"STONEY",
130 	"POLARIS10",
131 	"POLARIS11",
132 	"POLARIS12",
133 	"VEGAM",
134 	"VEGA10",
135 	"VEGA12",
136 	"VEGA20",
137 	"RAVEN",
138 	"ARCTURUS",
139 	"RENOIR",
140 	"ALDEBARAN",
141 	"NAVI10",
142 	"CYAN_SKILLFISH",
143 	"NAVI14",
144 	"NAVI12",
145 	"SIENNA_CICHLID",
146 	"NAVY_FLOUNDER",
147 	"VANGOGH",
148 	"DIMGREY_CAVEFISH",
149 	"BEIGE_GOBY",
150 	"YELLOW_CARP",
151 	"IP DISCOVERY",
152 	"LAST",
153 };
154 
155 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
156 /*
157  * Default init level where all blocks are expected to be initialized. This is
158  * the level of initialization expected by default and also after a full reset
159  * of the device.
160  */
161 struct amdgpu_init_level amdgpu_init_default = {
162 	.level = AMDGPU_INIT_LEVEL_DEFAULT,
163 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
164 };
165 
166 struct amdgpu_init_level amdgpu_init_recovery = {
167 	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
168 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
169 };
170 
171 /*
172  * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
173  * is used for cases like reset on initialization where the entire hive needs to
174  * be reset before first use.
175  */
176 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
177 	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
178 	.hwini_ip_block_mask =
179 		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
180 		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
181 		BIT(AMD_IP_BLOCK_TYPE_PSP)
182 };
183 
184 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
185 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
186 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
187 
188 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
189 
190 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
191 					     enum amd_ip_block_type block)
192 {
193 	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
194 }
195 
196 void amdgpu_set_init_level(struct amdgpu_device *adev,
197 			   enum amdgpu_init_lvl_id lvl)
198 {
199 	switch (lvl) {
200 	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
201 		adev->init_lvl = &amdgpu_init_minimal_xgmi;
202 		break;
203 	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
204 		adev->init_lvl = &amdgpu_init_recovery;
205 		break;
206 	case AMDGPU_INIT_LEVEL_DEFAULT:
207 		fallthrough;
208 	default:
209 		adev->init_lvl = &amdgpu_init_default;
210 		break;
211 	}
212 }
213 
214 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
215 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
216 				     void *data);
217 
218 /**
219  * DOC: pcie_replay_count
220  *
221  * The amdgpu driver provides a sysfs API for reporting the total number
222  * of PCIe replays (NAKs).
223  * The file pcie_replay_count is used for this and returns the total
224  * number of replays as a sum of the NAKs generated and NAKs received.
225  */
226 
227 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
228 		struct device_attribute *attr, char *buf)
229 {
230 	struct drm_device *ddev = dev_get_drvdata(dev);
231 	struct amdgpu_device *adev = drm_to_adev(ddev);
232 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
233 
234 	return sysfs_emit(buf, "%llu\n", cnt);
235 }
236 
237 static DEVICE_ATTR(pcie_replay_count, 0444,
238 		amdgpu_device_get_pcie_replay_count, NULL);
239 
240 static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
241 {
242 	int ret = 0;
243 
244 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
245 		ret = sysfs_create_file(&adev->dev->kobj,
246 					&dev_attr_pcie_replay_count.attr);
247 
248 	return ret;
249 }
250 
251 static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
252 {
253 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
254 		sysfs_remove_file(&adev->dev->kobj,
255 				  &dev_attr_pcie_replay_count.attr);
256 }
257 
258 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
259 					  const struct bin_attribute *attr, char *buf,
260 					  loff_t ppos, size_t count)
261 {
262 	struct device *dev = kobj_to_dev(kobj);
263 	struct drm_device *ddev = dev_get_drvdata(dev);
264 	struct amdgpu_device *adev = drm_to_adev(ddev);
265 	ssize_t bytes_read;
266 
267 	switch (ppos) {
268 	case AMDGPU_SYS_REG_STATE_XGMI:
269 		bytes_read = amdgpu_asic_get_reg_state(
270 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
271 		break;
272 	case AMDGPU_SYS_REG_STATE_WAFL:
273 		bytes_read = amdgpu_asic_get_reg_state(
274 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
275 		break;
276 	case AMDGPU_SYS_REG_STATE_PCIE:
277 		bytes_read = amdgpu_asic_get_reg_state(
278 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
279 		break;
280 	case AMDGPU_SYS_REG_STATE_USR:
281 		bytes_read = amdgpu_asic_get_reg_state(
282 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
283 		break;
284 	case AMDGPU_SYS_REG_STATE_USR_1:
285 		bytes_read = amdgpu_asic_get_reg_state(
286 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
287 		break;
288 	default:
289 		return -EINVAL;
290 	}
291 
292 	return bytes_read;
293 }
294 
295 static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
296 		      AMDGPU_SYS_REG_STATE_END);
297 
298 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
299 {
300 	int ret;
301 
302 	if (!amdgpu_asic_get_reg_state_supported(adev))
303 		return 0;
304 
305 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
306 
307 	return ret;
308 }
309 
310 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
311 {
312 	if (!amdgpu_asic_get_reg_state_supported(adev))
313 		return;
314 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
315 }
316 
317 /**
318  * DOC: board_info
319  *
320  * The amdgpu driver provides a sysfs API for giving board related information.
321  * It provides the form factor information in the format
322  *
323  *   type : form factor
324  *
325  * Possible form factor values
326  *
327  * - "cem"		- PCIE CEM card
328  * - "oam"		- Open Compute Accelerator Module
329  * - "unknown"	- Not known
330  *
331  */
332 
333 static ssize_t amdgpu_device_get_board_info(struct device *dev,
334 					    struct device_attribute *attr,
335 					    char *buf)
336 {
337 	struct drm_device *ddev = dev_get_drvdata(dev);
338 	struct amdgpu_device *adev = drm_to_adev(ddev);
339 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
340 	const char *pkg;
341 
342 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
343 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
344 
345 	switch (pkg_type) {
346 	case AMDGPU_PKG_TYPE_CEM:
347 		pkg = "cem";
348 		break;
349 	case AMDGPU_PKG_TYPE_OAM:
350 		pkg = "oam";
351 		break;
352 	default:
353 		pkg = "unknown";
354 		break;
355 	}
356 
357 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
358 }
359 
360 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
361 
362 static struct attribute *amdgpu_board_attrs[] = {
363 	&dev_attr_board_info.attr,
364 	NULL,
365 };
366 
367 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
368 					     struct attribute *attr, int n)
369 {
370 	struct device *dev = kobj_to_dev(kobj);
371 	struct drm_device *ddev = dev_get_drvdata(dev);
372 	struct amdgpu_device *adev = drm_to_adev(ddev);
373 
374 	if (adev->flags & AMD_IS_APU)
375 		return 0;
376 
377 	return attr->mode;
378 }
379 
380 static const struct attribute_group amdgpu_board_attrs_group = {
381 	.attrs = amdgpu_board_attrs,
382 	.is_visible = amdgpu_board_attrs_is_visible
383 };
384 
385 /**
386  * DOC: uma/carveout_options
387  *
388  * This is a read-only file that lists all available UMA allocation
389  * options and their corresponding indices. Example output::
390  *
391  *     $ cat uma/carveout_options
392  *     0: Minimum (512 MB)
393  *     1:  (1 GB)
394  *     2:  (2 GB)
395  *     3:  (4 GB)
396  *     4:  (6 GB)
397  *     5:  (8 GB)
398  *     6:  (12 GB)
399  *     7: Medium (16 GB)
400  *     8:  (24 GB)
401  *     9: High (32 GB)
402  */
403 static ssize_t carveout_options_show(struct device *dev,
404 				     struct device_attribute *attr,
405 				     char *buf)
406 {
407 	struct drm_device *ddev = dev_get_drvdata(dev);
408 	struct amdgpu_device *adev = drm_to_adev(ddev);
409 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
410 	uint32_t memory_carved;
411 	ssize_t size = 0;
412 
413 	if (!uma_info || !uma_info->num_entries)
414 		return -ENODEV;
415 
416 	for (int i = 0; i < uma_info->num_entries; i++) {
417 		memory_carved = uma_info->entries[i].memory_carved_mb;
418 		if (memory_carved >= SZ_1G/SZ_1M) {
419 			size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n",
420 					      i,
421 					      uma_info->entries[i].name,
422 					      memory_carved >> 10);
423 		} else {
424 			size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n",
425 					      i,
426 					      uma_info->entries[i].name,
427 					      memory_carved);
428 		}
429 	}
430 
431 	return size;
432 }
433 static DEVICE_ATTR_RO(carveout_options);
434 
435 /**
436  * DOC: uma/carveout
437  *
438  * This file is both readable and writable. When read, it shows the
439  * index of the current setting. Writing a valid index to this file
440  * allows users to change the UMA carveout size to the selected option
441  * on the next boot.
442  *
443  * The available options and their corresponding indices can be read
444  * from the uma/carveout_options file.
445  */
446 static ssize_t carveout_show(struct device *dev,
447 			     struct device_attribute *attr,
448 			     char *buf)
449 {
450 	struct drm_device *ddev = dev_get_drvdata(dev);
451 	struct amdgpu_device *adev = drm_to_adev(ddev);
452 
453 	return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index);
454 }
455 
456 static ssize_t carveout_store(struct device *dev,
457 			      struct device_attribute *attr,
458 			      const char *buf, size_t count)
459 {
460 	struct drm_device *ddev = dev_get_drvdata(dev);
461 	struct amdgpu_device *adev = drm_to_adev(ddev);
462 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
463 	struct amdgpu_uma_carveout_option *opt;
464 	unsigned long val;
465 	uint8_t flags;
466 	int r;
467 
468 	r = kstrtoul(buf, 10, &val);
469 	if (r)
470 		return r;
471 
472 	if (val >= uma_info->num_entries)
473 		return -EINVAL;
474 
475 	val = array_index_nospec(val, uma_info->num_entries);
476 	opt = &uma_info->entries[val];
477 
478 	if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) &&
479 	    !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) {
480 		drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val);
481 		return -EINVAL;
482 	}
483 
484 	flags = opt->flags;
485 	flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1);
486 
487 	guard(mutex)(&uma_info->update_lock);
488 
489 	r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags);
490 	if (r)
491 		return r;
492 
493 	uma_info->uma_option_index = val;
494 
495 	return count;
496 }
497 static DEVICE_ATTR_RW(carveout);
498 
499 static struct attribute *amdgpu_uma_attrs[] = {
500 	&dev_attr_carveout.attr,
501 	&dev_attr_carveout_options.attr,
502 	NULL
503 };
504 
505 const struct attribute_group amdgpu_uma_attr_group = {
506 	.name = "uma",
507 	.attrs = amdgpu_uma_attrs
508 };
509 
510 static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev)
511 {
512 	int rc;
513 
514 	if (!(adev->flags & AMD_IS_APU))
515 		return;
516 
517 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
518 		return;
519 
520 	rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info);
521 	if (rc) {
522 		drm_dbg(adev_to_drm(adev),
523 			"Failed to parse UMA carveout info from VBIOS: %d\n", rc);
524 		goto out_info;
525 	}
526 
527 	mutex_init(&adev->uma_info.update_lock);
528 
529 	rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group);
530 	if (rc) {
531 		drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc);
532 		goto out_attr;
533 	}
534 
535 	return;
536 
537 out_attr:
538 	mutex_destroy(&adev->uma_info.update_lock);
539 out_info:
540 	return;
541 }
542 
543 static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev)
544 {
545 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
546 
547 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
548 		return;
549 
550 	mutex_destroy(&uma_info->update_lock);
551 	uma_info->num_entries = 0;
552 }
553 
554 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
555 
556 /**
557  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
558  *
559  * @adev: amdgpu device pointer
560  *
561  * Returns true if the device is a dGPU with ATPX power control,
562  * otherwise return false.
563  */
564 bool amdgpu_device_supports_px(struct amdgpu_device *adev)
565 {
566 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
567 		return true;
568 	return false;
569 }
570 
571 /**
572  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
573  *
574  * @adev: amdgpu device pointer
575  *
576  * Returns true if the device is a dGPU with ACPI power control,
577  * otherwise return false.
578  */
579 bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
580 {
581 	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
582 		return false;
583 
584 	if (adev->has_pr3 ||
585 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
586 		return true;
587 	return false;
588 }
589 
590 /**
591  * amdgpu_device_supports_baco - Does the device support BACO
592  *
593  * @adev: amdgpu device pointer
594  *
595  * Return:
596  * 1 if the device supports BACO;
597  * 3 if the device supports MACO (only works if BACO is supported)
598  * otherwise return 0.
599  */
600 int amdgpu_device_supports_baco(struct amdgpu_device *adev)
601 {
602 	return amdgpu_asic_supports_baco(adev);
603 }
604 
605 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
606 {
607 	int bamaco_support;
608 
609 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
610 	bamaco_support = amdgpu_device_supports_baco(adev);
611 
612 	switch (amdgpu_runtime_pm) {
613 	case 2:
614 		if (bamaco_support & MACO_SUPPORT) {
615 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
616 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
617 		} else if (bamaco_support == BACO_SUPPORT) {
618 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
619 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
620 		}
621 		break;
622 	case 1:
623 		if (bamaco_support & BACO_SUPPORT) {
624 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
625 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
626 		}
627 		break;
628 	case -1:
629 	case -2:
630 		if (amdgpu_device_supports_px(adev)) {
631 			/* enable PX as runtime mode */
632 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
633 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
634 		} else if (amdgpu_device_supports_boco(adev)) {
635 			/* enable boco as runtime mode */
636 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
637 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
638 		} else {
639 			if (!bamaco_support)
640 				goto no_runtime_pm;
641 
642 			switch (adev->asic_type) {
643 			case CHIP_VEGA20:
644 			case CHIP_ARCTURUS:
645 				/* BACO are not supported on vega20 and arctrus */
646 				break;
647 			case CHIP_VEGA10:
648 				/* enable BACO as runpm mode if noretry=0 */
649 				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
650 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
651 				break;
652 			default:
653 				/* enable BACO as runpm mode on CI+ */
654 				if (!amdgpu_passthrough(adev))
655 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
656 				break;
657 			}
658 
659 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
660 				if (bamaco_support & MACO_SUPPORT) {
661 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
662 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
663 				} else {
664 					dev_info(adev->dev, "Using BACO for runtime pm\n");
665 				}
666 			}
667 		}
668 		break;
669 	case 0:
670 		dev_info(adev->dev, "runtime pm is manually disabled\n");
671 		break;
672 	default:
673 		break;
674 	}
675 
676 no_runtime_pm:
677 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
678 		dev_info(adev->dev, "Runtime PM not available\n");
679 }
680 /**
681  * amdgpu_device_supports_smart_shift - Is the device dGPU with
682  * smart shift support
683  *
684  * @adev: amdgpu device pointer
685  *
686  * Returns true if the device is a dGPU with Smart Shift support,
687  * otherwise returns false.
688  */
689 bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
690 {
691 	return (amdgpu_device_supports_boco(adev) &&
692 		amdgpu_acpi_is_power_shift_control_supported());
693 }
694 
695 /*
696  * VRAM access helper functions
697  */
698 
699 /**
700  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
701  *
702  * @adev: amdgpu_device pointer
703  * @pos: offset of the buffer in vram
704  * @buf: virtual address of the buffer in system memory
705  * @size: read/write size, sizeof(@buf) must > @size
706  * @write: true - write to vram, otherwise - read from vram
707  */
708 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
709 			     void *buf, size_t size, bool write)
710 {
711 	unsigned long flags;
712 	uint32_t hi = ~0, tmp = 0;
713 	uint32_t *data = buf;
714 	uint64_t last;
715 	int idx;
716 
717 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
718 		return;
719 
720 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
721 
722 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
723 	for (last = pos + size; pos < last; pos += 4) {
724 		tmp = pos >> 31;
725 
726 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
727 		if (tmp != hi) {
728 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
729 			hi = tmp;
730 		}
731 		if (write)
732 			WREG32_NO_KIQ(mmMM_DATA, *data++);
733 		else
734 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
735 	}
736 
737 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
738 	drm_dev_exit(idx);
739 }
740 
741 /**
742  * amdgpu_device_aper_access - access vram by vram aperture
743  *
744  * @adev: amdgpu_device pointer
745  * @pos: offset of the buffer in vram
746  * @buf: virtual address of the buffer in system memory
747  * @size: read/write size, sizeof(@buf) must > @size
748  * @write: true - write to vram, otherwise - read from vram
749  *
750  * The return value means how many bytes have been transferred.
751  */
752 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
753 				 void *buf, size_t size, bool write)
754 {
755 #ifdef CONFIG_64BIT
756 	void __iomem *addr;
757 	size_t count = 0;
758 	uint64_t last;
759 
760 	if (!adev->mman.aper_base_kaddr)
761 		return 0;
762 
763 	last = min(pos + size, adev->gmc.visible_vram_size);
764 	if (last > pos) {
765 		addr = adev->mman.aper_base_kaddr + pos;
766 		count = last - pos;
767 
768 		if (write) {
769 			memcpy_toio(addr, buf, count);
770 			/* Make sure HDP write cache flush happens without any reordering
771 			 * after the system memory contents are sent over PCIe device
772 			 */
773 			mb();
774 			amdgpu_device_flush_hdp(adev, NULL);
775 		} else {
776 			amdgpu_device_invalidate_hdp(adev, NULL);
777 			/* Make sure HDP read cache is invalidated before issuing a read
778 			 * to the PCIe device
779 			 */
780 			mb();
781 			memcpy_fromio(buf, addr, count);
782 		}
783 
784 	}
785 
786 	return count;
787 #else
788 	return 0;
789 #endif
790 }
791 
792 /**
793  * amdgpu_device_vram_access - read/write a buffer in vram
794  *
795  * @adev: amdgpu_device pointer
796  * @pos: offset of the buffer in vram
797  * @buf: virtual address of the buffer in system memory
798  * @size: read/write size, sizeof(@buf) must > @size
799  * @write: true - write to vram, otherwise - read from vram
800  */
801 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
802 			       void *buf, size_t size, bool write)
803 {
804 	size_t count;
805 
806 	/* try to using vram apreature to access vram first */
807 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
808 	size -= count;
809 	if (size) {
810 		/* using MM to access rest vram */
811 		pos += count;
812 		buf += count;
813 		amdgpu_device_mm_access(adev, pos, buf, size, write);
814 	}
815 }
816 
817 /*
818  * register access helper functions.
819  */
820 
821 /* Check if hw access should be skipped because of hotplug or device error */
822 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
823 {
824 	if (adev->no_hw_access)
825 		return true;
826 
827 #ifdef CONFIG_LOCKDEP
828 	/*
829 	 * This is a bit complicated to understand, so worth a comment. What we assert
830 	 * here is that the GPU reset is not running on another thread in parallel.
831 	 *
832 	 * For this we trylock the read side of the reset semaphore, if that succeeds
833 	 * we know that the reset is not running in parallel.
834 	 *
835 	 * If the trylock fails we assert that we are either already holding the read
836 	 * side of the lock or are the reset thread itself and hold the write side of
837 	 * the lock.
838 	 */
839 	if (in_task()) {
840 		if (down_read_trylock(&adev->reset_domain->sem))
841 			up_read(&adev->reset_domain->sem);
842 		else
843 			lockdep_assert_held(&adev->reset_domain->sem);
844 	}
845 #endif
846 	return false;
847 }
848 
849 /**
850  * amdgpu_device_get_rev_id - query device rev_id
851  *
852  * @adev: amdgpu_device pointer
853  *
854  * Return device rev_id
855  */
856 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
857 {
858 	return adev->nbio.funcs->get_rev_id(adev);
859 }
860 
861 static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
862 {
863 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
864 		return AMDGPU_VBIOS_SKIP;
865 
866 	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
867 		return AMDGPU_VBIOS_OPTIONAL;
868 
869 	return 0;
870 }
871 
872 /**
873  * amdgpu_device_asic_init - Wrapper for atom asic_init
874  *
875  * @adev: amdgpu_device pointer
876  *
877  * Does any asic specific work and then calls atom asic init.
878  */
879 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
880 {
881 	uint32_t flags;
882 	bool optional;
883 	int ret;
884 
885 	amdgpu_asic_pre_asic_init(adev);
886 	flags = amdgpu_device_get_vbios_flags(adev);
887 	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
888 
889 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
890 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
891 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
892 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
893 		amdgpu_psp_wait_for_bootloader(adev);
894 		if (optional && !adev->bios)
895 			return 0;
896 
897 		ret = amdgpu_atomfirmware_asic_init(adev, true);
898 		return ret;
899 	} else {
900 		if (optional && !adev->bios)
901 			return 0;
902 
903 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
904 	}
905 
906 	return 0;
907 }
908 
909 /**
910  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
911  *
912  * @adev: amdgpu_device pointer
913  *
914  * Allocates a scratch page of VRAM for use by various things in the
915  * driver.
916  */
917 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
918 {
919 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
920 				       AMDGPU_GEM_DOMAIN_VRAM |
921 				       AMDGPU_GEM_DOMAIN_GTT,
922 				       &adev->mem_scratch.robj,
923 				       &adev->mem_scratch.gpu_addr,
924 				       (void **)&adev->mem_scratch.ptr);
925 }
926 
927 /**
928  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
929  *
930  * @adev: amdgpu_device pointer
931  *
932  * Frees the VRAM scratch page.
933  */
934 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
935 {
936 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
937 }
938 
939 /**
940  * amdgpu_device_program_register_sequence - program an array of registers.
941  *
942  * @adev: amdgpu_device pointer
943  * @registers: pointer to the register array
944  * @array_size: size of the register array
945  *
946  * Programs an array or registers with and or masks.
947  * This is a helper for setting golden registers.
948  */
949 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
950 					     const u32 *registers,
951 					     const u32 array_size)
952 {
953 	u32 tmp, reg, and_mask, or_mask;
954 	int i;
955 
956 	if (array_size % 3)
957 		return;
958 
959 	for (i = 0; i < array_size; i += 3) {
960 		reg = registers[i + 0];
961 		and_mask = registers[i + 1];
962 		or_mask = registers[i + 2];
963 
964 		if (and_mask == 0xffffffff) {
965 			tmp = or_mask;
966 		} else {
967 			tmp = RREG32(reg);
968 			tmp &= ~and_mask;
969 			if (adev->family >= AMDGPU_FAMILY_AI)
970 				tmp |= (or_mask & and_mask);
971 			else
972 				tmp |= or_mask;
973 		}
974 		WREG32(reg, tmp);
975 	}
976 }
977 
978 /**
979  * amdgpu_device_pci_config_reset - reset the GPU
980  *
981  * @adev: amdgpu_device pointer
982  *
983  * Resets the GPU using the pci config reset sequence.
984  * Only applicable to asics prior to vega10.
985  */
986 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
987 {
988 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
989 }
990 
991 /**
992  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
993  *
994  * @adev: amdgpu_device pointer
995  *
996  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
997  */
998 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
999 {
1000 	return pci_reset_function(adev->pdev);
1001 }
1002 
1003 /*
1004  * amdgpu_device_wb_*()
1005  * Writeback is the method by which the GPU updates special pages in memory
1006  * with the status of certain GPU events (fences, ring pointers,etc.).
1007  */
1008 
1009 /**
1010  * amdgpu_device_wb_fini - Disable Writeback and free memory
1011  *
1012  * @adev: amdgpu_device pointer
1013  *
1014  * Disables Writeback and frees the Writeback memory (all asics).
1015  * Used at driver shutdown.
1016  */
1017 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1018 {
1019 	if (adev->wb.wb_obj) {
1020 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1021 				      &adev->wb.gpu_addr,
1022 				      (void **)&adev->wb.wb);
1023 		adev->wb.wb_obj = NULL;
1024 	}
1025 }
1026 
1027 /**
1028  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1029  *
1030  * @adev: amdgpu_device pointer
1031  *
1032  * Initializes writeback and allocates writeback memory (all asics).
1033  * Used at driver startup.
1034  * Returns 0 on success or an -error on failure.
1035  */
1036 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1037 {
1038 	int r;
1039 
1040 	if (adev->wb.wb_obj == NULL) {
1041 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1042 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1043 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1044 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1045 					    (void **)&adev->wb.wb);
1046 		if (r) {
1047 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1048 			return r;
1049 		}
1050 
1051 		adev->wb.num_wb = AMDGPU_MAX_WB;
1052 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1053 
1054 		/* clear wb memory */
1055 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 /**
1062  * amdgpu_device_wb_get - Allocate a wb entry
1063  *
1064  * @adev: amdgpu_device pointer
1065  * @wb: wb index
1066  *
1067  * Allocate a wb slot for use by the driver (all asics).
1068  * Returns 0 on success or -EINVAL on failure.
1069  */
1070 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1071 {
1072 	unsigned long flags, offset;
1073 
1074 	spin_lock_irqsave(&adev->wb.lock, flags);
1075 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1076 	if (offset < adev->wb.num_wb) {
1077 		__set_bit(offset, adev->wb.used);
1078 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1079 		*wb = offset << 3; /* convert to dw offset */
1080 		return 0;
1081 	} else {
1082 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1083 		return -EINVAL;
1084 	}
1085 }
1086 
1087 /**
1088  * amdgpu_device_wb_free - Free a wb entry
1089  *
1090  * @adev: amdgpu_device pointer
1091  * @wb: wb index
1092  *
1093  * Free a wb slot allocated for use by the driver (all asics)
1094  */
1095 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1096 {
1097 	unsigned long flags;
1098 
1099 	wb >>= 3;
1100 	spin_lock_irqsave(&adev->wb.lock, flags);
1101 	if (wb < adev->wb.num_wb)
1102 		__clear_bit(wb, adev->wb.used);
1103 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1104 }
1105 
1106 /**
1107  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1108  *
1109  * @adev: amdgpu_device pointer
1110  *
1111  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1112  * to fail, but if any of the BARs is not accessible after the size we abort
1113  * driver loading by returning -ENODEV.
1114  */
1115 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1116 {
1117 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1118 	struct pci_bus *root;
1119 	struct resource *res;
1120 	int max_size, r;
1121 	unsigned int i;
1122 	u16 cmd;
1123 
1124 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1125 		return 0;
1126 
1127 	/* Bypass for VF */
1128 	if (amdgpu_sriov_vf(adev))
1129 		return 0;
1130 
1131 	if (!amdgpu_rebar)
1132 		return 0;
1133 
1134 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1135 	if ((amdgpu_runtime_pm != 0) &&
1136 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1137 	    adev->pdev->device == 0x731f &&
1138 	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1139 		return 0;
1140 
1141 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1142 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1143 		dev_warn(
1144 			adev->dev,
1145 			"System can't access extended configuration space, please check!!\n");
1146 
1147 	/* skip if the bios has already enabled large BAR */
1148 	if (adev->gmc.real_vram_size &&
1149 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1150 		return 0;
1151 
1152 	/* Check if the root BUS has 64bit memory resources */
1153 	root = adev->pdev->bus;
1154 	while (root->parent)
1155 		root = root->parent;
1156 
1157 	pci_bus_for_each_resource(root, res, i) {
1158 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1159 		    res->start > 0x100000000ull)
1160 			break;
1161 	}
1162 
1163 	/* Trying to resize is pointless without a root hub window above 4GB */
1164 	if (!res)
1165 		return 0;
1166 
1167 	/* Limit the BAR size to what is available */
1168 	max_size = pci_rebar_get_max_size(adev->pdev, 0);
1169 	if (max_size < 0)
1170 		return 0;
1171 	rbar_size = min(max_size, rbar_size);
1172 
1173 	/* Disable memory decoding while we change the BAR addresses and size */
1174 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1175 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1176 			      cmd & ~PCI_COMMAND_MEMORY);
1177 
1178 	/* Tear down doorbell as resizing will release BARs */
1179 	amdgpu_doorbell_fini(adev);
1180 
1181 	r = pci_resize_resource(adev->pdev, 0, rbar_size,
1182 				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
1183 								  : 1 << 2);
1184 	if (r == -ENOSPC)
1185 		dev_info(adev->dev,
1186 			 "Not enough PCI address space for a large BAR.");
1187 	else if (r && r != -ENOTSUPP)
1188 		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1189 
1190 	/* When the doorbell or fb BAR isn't available we have no chance of
1191 	 * using the device.
1192 	 */
1193 	r = amdgpu_doorbell_init(adev);
1194 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1195 		return -ENODEV;
1196 
1197 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1198 
1199 	return 0;
1200 }
1201 
1202 /*
1203  * GPU helpers function.
1204  */
1205 /**
1206  * amdgpu_device_need_post - check if the hw need post or not
1207  *
1208  * @adev: amdgpu_device pointer
1209  *
1210  * Check if the asic has been initialized (all asics) at driver startup
1211  * or post is needed if  hw reset is performed.
1212  * Returns true if need or false if not.
1213  */
1214 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1215 {
1216 	uint32_t reg, flags;
1217 
1218 	if (amdgpu_sriov_vf(adev))
1219 		return false;
1220 
1221 	flags = amdgpu_device_get_vbios_flags(adev);
1222 	if (flags & AMDGPU_VBIOS_SKIP)
1223 		return false;
1224 	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1225 		return false;
1226 
1227 	if (amdgpu_passthrough(adev)) {
1228 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1229 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1230 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1231 		 * vpost executed for smc version below 22.15
1232 		 */
1233 		if (adev->asic_type == CHIP_FIJI) {
1234 			int err;
1235 			uint32_t fw_ver;
1236 
1237 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1238 			/* force vPost if error occurred */
1239 			if (err)
1240 				return true;
1241 
1242 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1243 			release_firmware(adev->pm.fw);
1244 			if (fw_ver < 0x00160e00)
1245 				return true;
1246 		}
1247 	}
1248 
1249 	/* Don't post if we need to reset whole hive on init */
1250 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1251 		return false;
1252 
1253 	if (adev->has_hw_reset) {
1254 		adev->has_hw_reset = false;
1255 		return true;
1256 	}
1257 
1258 	/* bios scratch used on CIK+ */
1259 	if (adev->asic_type >= CHIP_BONAIRE)
1260 		return amdgpu_atombios_scratch_need_asic_init(adev);
1261 
1262 	/* check MEM_SIZE for older asics */
1263 	reg = amdgpu_asic_get_config_memsize(adev);
1264 
1265 	if ((reg != 0) && (reg != 0xffffffff))
1266 		return false;
1267 
1268 	return true;
1269 }
1270 
1271 /*
1272  * Check whether seamless boot is supported.
1273  *
1274  * So far we only support seamless boot on DCE 3.0 or later.
1275  * If users report that it works on older ASICS as well, we may
1276  * loosen this.
1277  */
1278 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1279 {
1280 	switch (amdgpu_seamless) {
1281 	case -1:
1282 		break;
1283 	case 1:
1284 		return true;
1285 	case 0:
1286 		return false;
1287 	default:
1288 		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1289 			amdgpu_seamless);
1290 		return false;
1291 	}
1292 
1293 	if (!(adev->flags & AMD_IS_APU))
1294 		return false;
1295 
1296 	if (adev->mman.keep_stolen_vga_memory)
1297 		return false;
1298 
1299 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1300 }
1301 
1302 /*
1303  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1304  * don't support dynamic speed switching. Until we have confirmation from Intel
1305  * that a specific host supports it, it's safer that we keep it disabled for all.
1306  *
1307  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1308  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1309  */
1310 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1311 {
1312 #if IS_ENABLED(CONFIG_X86)
1313 	struct cpuinfo_x86 *c = &cpu_data(0);
1314 
1315 	/* eGPU change speeds based on USB4 fabric conditions */
1316 	if (dev_is_removable(adev->dev))
1317 		return true;
1318 
1319 	if (c->x86_vendor == X86_VENDOR_INTEL)
1320 		return false;
1321 #endif
1322 	return true;
1323 }
1324 
1325 static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1326 {
1327 	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1328 	 * It's unclear if this is a platform-specific or GPU-specific issue.
1329 	 * Disable ASPM on SI for the time being.
1330 	 */
1331 	if (adev->family == AMDGPU_FAMILY_SI)
1332 		return true;
1333 
1334 #if IS_ENABLED(CONFIG_X86)
1335 	struct cpuinfo_x86 *c = &cpu_data(0);
1336 
1337 	if (c->x86_vendor == X86_VENDOR_INTEL) {
1338 		switch (c->x86_model) {
1339 		case VFM_MODEL(INTEL_ALDERLAKE):
1340 		case VFM_MODEL(INTEL_ALDERLAKE_L):
1341 		case VFM_MODEL(INTEL_RAPTORLAKE):
1342 		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1343 		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1344 		case VFM_MODEL(INTEL_TIGERLAKE):
1345 		case VFM_MODEL(INTEL_TIGERLAKE_L):
1346 			return true;
1347 		default:
1348 			return false;
1349 		}
1350 	} else {
1351 		return false;
1352 	}
1353 #else
1354 	return false;
1355 #endif
1356 }
1357 
1358 /**
1359  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1360  *
1361  * @adev: amdgpu_device pointer
1362  *
1363  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1364  * be set for this device.
1365  *
1366  * Returns true if it should be used or false if not.
1367  */
1368 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1369 {
1370 	switch (amdgpu_aspm) {
1371 	case -1:
1372 		break;
1373 	case 0:
1374 		return false;
1375 	case 1:
1376 		return true;
1377 	default:
1378 		return false;
1379 	}
1380 	if (adev->flags & AMD_IS_APU)
1381 		return false;
1382 	if (amdgpu_device_aspm_support_quirk(adev))
1383 		return false;
1384 	return pcie_aspm_enabled(adev->pdev);
1385 }
1386 
1387 /* if we get transitioned to only one device, take VGA back */
1388 /**
1389  * amdgpu_device_vga_set_decode - enable/disable vga decode
1390  *
1391  * @pdev: PCI device pointer
1392  * @state: enable/disable vga decode
1393  *
1394  * Enable/disable vga decode (all asics).
1395  * Returns VGA resource flags.
1396  */
1397 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1398 		bool state)
1399 {
1400 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1401 
1402 	amdgpu_asic_set_vga_state(adev, state);
1403 	if (state)
1404 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1405 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1406 	else
1407 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1408 }
1409 
1410 /**
1411  * amdgpu_device_check_block_size - validate the vm block size
1412  *
1413  * @adev: amdgpu_device pointer
1414  *
1415  * Validates the vm block size specified via module parameter.
1416  * The vm block size defines number of bits in page table versus page directory,
1417  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1418  * page table and the remaining bits are in the page directory.
1419  */
1420 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1421 {
1422 	/* defines number of bits in page table versus page directory,
1423 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1424 	 * page table and the remaining bits are in the page directory
1425 	 */
1426 	if (amdgpu_vm_block_size == -1)
1427 		return;
1428 
1429 	if (amdgpu_vm_block_size < 9) {
1430 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1431 			 amdgpu_vm_block_size);
1432 		amdgpu_vm_block_size = -1;
1433 	}
1434 }
1435 
1436 /**
1437  * amdgpu_device_check_vm_size - validate the vm size
1438  *
1439  * @adev: amdgpu_device pointer
1440  *
1441  * Validates the vm size in GB specified via module parameter.
1442  * The VM size is the size of the GPU virtual memory space in GB.
1443  */
1444 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1445 {
1446 	/* no need to check the default value */
1447 	if (amdgpu_vm_size == -1)
1448 		return;
1449 
1450 	if (amdgpu_vm_size < 1) {
1451 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1452 			 amdgpu_vm_size);
1453 		amdgpu_vm_size = -1;
1454 	}
1455 }
1456 
1457 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1458 {
1459 	struct sysinfo si;
1460 	bool is_os_64 = (sizeof(void *) == 8);
1461 	uint64_t total_memory;
1462 	uint64_t dram_size_seven_GB = 0x1B8000000;
1463 	uint64_t dram_size_three_GB = 0xB8000000;
1464 
1465 	if (amdgpu_smu_memory_pool_size == 0)
1466 		return;
1467 
1468 	if (!is_os_64) {
1469 		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
1470 		goto def_value;
1471 	}
1472 	si_meminfo(&si);
1473 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1474 
1475 	if ((amdgpu_smu_memory_pool_size == 1) ||
1476 		(amdgpu_smu_memory_pool_size == 2)) {
1477 		if (total_memory < dram_size_three_GB)
1478 			goto def_value1;
1479 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1480 		(amdgpu_smu_memory_pool_size == 8)) {
1481 		if (total_memory < dram_size_seven_GB)
1482 			goto def_value1;
1483 	} else {
1484 		dev_warn(adev->dev, "Smu memory pool size not supported\n");
1485 		goto def_value;
1486 	}
1487 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1488 
1489 	return;
1490 
1491 def_value1:
1492 	dev_warn(adev->dev, "No enough system memory\n");
1493 def_value:
1494 	adev->pm.smu_prv_buffer_size = 0;
1495 }
1496 
1497 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1498 {
1499 	if (!(adev->flags & AMD_IS_APU) ||
1500 	    adev->asic_type < CHIP_RAVEN)
1501 		return 0;
1502 
1503 	switch (adev->asic_type) {
1504 	case CHIP_RAVEN:
1505 		if (adev->pdev->device == 0x15dd)
1506 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1507 		if (adev->pdev->device == 0x15d8)
1508 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1509 		break;
1510 	case CHIP_RENOIR:
1511 		if ((adev->pdev->device == 0x1636) ||
1512 		    (adev->pdev->device == 0x164c))
1513 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1514 		else
1515 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1516 		break;
1517 	case CHIP_VANGOGH:
1518 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1519 		break;
1520 	case CHIP_YELLOW_CARP:
1521 		break;
1522 	case CHIP_CYAN_SKILLFISH:
1523 		if ((adev->pdev->device == 0x13FE) ||
1524 		    (adev->pdev->device == 0x143F))
1525 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1526 		break;
1527 	default:
1528 		break;
1529 	}
1530 
1531 	return 0;
1532 }
1533 
1534 /**
1535  * amdgpu_device_check_arguments - validate module params
1536  *
1537  * @adev: amdgpu_device pointer
1538  *
1539  * Validates certain module parameters and updates
1540  * the associated values used by the driver (all asics).
1541  */
1542 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1543 {
1544 	int i;
1545 
1546 	if (amdgpu_sched_jobs < 4) {
1547 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1548 			 amdgpu_sched_jobs);
1549 		amdgpu_sched_jobs = 4;
1550 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1551 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1552 			 amdgpu_sched_jobs);
1553 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1554 	}
1555 
1556 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1557 		/* gart size must be greater or equal to 32M */
1558 		dev_warn(adev->dev, "gart size (%d) too small\n",
1559 			 amdgpu_gart_size);
1560 		amdgpu_gart_size = -1;
1561 	}
1562 
1563 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1564 		/* gtt size must be greater or equal to 32M */
1565 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1566 				 amdgpu_gtt_size);
1567 		amdgpu_gtt_size = -1;
1568 	}
1569 
1570 	/* valid range is between 4 and 9 inclusive */
1571 	if (amdgpu_vm_fragment_size != -1 &&
1572 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1573 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1574 		amdgpu_vm_fragment_size = -1;
1575 	}
1576 
1577 	if (amdgpu_sched_hw_submission < 2) {
1578 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1579 			 amdgpu_sched_hw_submission);
1580 		amdgpu_sched_hw_submission = 2;
1581 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1582 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1583 			 amdgpu_sched_hw_submission);
1584 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1585 	}
1586 
1587 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1588 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1589 		amdgpu_reset_method = -1;
1590 	}
1591 
1592 	amdgpu_device_check_smu_prv_buffer_size(adev);
1593 
1594 	amdgpu_device_check_vm_size(adev);
1595 
1596 	amdgpu_device_check_block_size(adev);
1597 
1598 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1599 
1600 	for (i = 0; i < MAX_XCP; i++) {
1601 		switch (amdgpu_enforce_isolation) {
1602 		case -1:
1603 		case 0:
1604 		default:
1605 			/* disable */
1606 			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
1607 			break;
1608 		case 1:
1609 			/* enable */
1610 			adev->enforce_isolation[i] =
1611 				AMDGPU_ENFORCE_ISOLATION_ENABLE;
1612 			break;
1613 		case 2:
1614 			/* enable legacy mode */
1615 			adev->enforce_isolation[i] =
1616 				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
1617 			break;
1618 		case 3:
1619 			/* enable only process isolation without submitting cleaner shader */
1620 			adev->enforce_isolation[i] =
1621 				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
1622 			break;
1623 		}
1624 	}
1625 
1626 	return 0;
1627 }
1628 
1629 /**
1630  * amdgpu_switcheroo_set_state - set switcheroo state
1631  *
1632  * @pdev: pci dev pointer
1633  * @state: vga_switcheroo state
1634  *
1635  * Callback for the switcheroo driver.  Suspends or resumes
1636  * the asics before or after it is powered up using ACPI methods.
1637  */
1638 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1639 					enum vga_switcheroo_state state)
1640 {
1641 	struct drm_device *dev = pci_get_drvdata(pdev);
1642 	int r;
1643 
1644 	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
1645 	    state == VGA_SWITCHEROO_OFF)
1646 		return;
1647 
1648 	if (state == VGA_SWITCHEROO_ON) {
1649 		pr_info("switched on\n");
1650 		/* don't suspend or resume card normally */
1651 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1652 
1653 		pci_set_power_state(pdev, PCI_D0);
1654 		amdgpu_device_load_pci_state(pdev);
1655 		r = pci_enable_device(pdev);
1656 		if (r)
1657 			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
1658 				 r);
1659 		amdgpu_device_resume(dev, true);
1660 
1661 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1662 	} else {
1663 		dev_info(&pdev->dev, "switched off\n");
1664 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1665 		amdgpu_device_prepare(dev);
1666 		amdgpu_device_suspend(dev, true);
1667 		amdgpu_device_cache_pci_state(pdev);
1668 		/* Shut down the device */
1669 		pci_disable_device(pdev);
1670 		pci_set_power_state(pdev, PCI_D3cold);
1671 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1672 	}
1673 }
1674 
1675 /**
1676  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1677  *
1678  * @pdev: pci dev pointer
1679  *
1680  * Callback for the switcheroo driver.  Check of the switcheroo
1681  * state can be changed.
1682  * Returns true if the state can be changed, false if not.
1683  */
1684 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1685 {
1686 	struct drm_device *dev = pci_get_drvdata(pdev);
1687 
1688        /*
1689 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1690 	* locking inversion with the driver load path. And the access here is
1691 	* completely racy anyway. So don't bother with locking for now.
1692 	*/
1693 	return atomic_read(&dev->open_count) == 0;
1694 }
1695 
1696 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1697 	.set_gpu_state = amdgpu_switcheroo_set_state,
1698 	.reprobe = NULL,
1699 	.can_switch = amdgpu_switcheroo_can_switch,
1700 };
1701 
1702 /**
1703  * amdgpu_device_enable_virtual_display - enable virtual display feature
1704  *
1705  * @adev: amdgpu_device pointer
1706  *
1707  * Enabled the virtual display feature if the user has enabled it via
1708  * the module parameter virtual_display.  This feature provides a virtual
1709  * display hardware on headless boards or in virtualized environments.
1710  * This function parses and validates the configuration string specified by
1711  * the user and configures the virtual display configuration (number of
1712  * virtual connectors, crtcs, etc.) specified.
1713  */
1714 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1715 {
1716 	adev->enable_virtual_display = false;
1717 
1718 	if (amdgpu_virtual_display) {
1719 		const char *pci_address_name = pci_name(adev->pdev);
1720 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1721 
1722 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1723 		pciaddstr_tmp = pciaddstr;
1724 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1725 			pciaddname = strsep(&pciaddname_tmp, ",");
1726 			if (!strcmp("all", pciaddname)
1727 			    || !strcmp(pci_address_name, pciaddname)) {
1728 				long num_crtc;
1729 				int res = -1;
1730 
1731 				adev->enable_virtual_display = true;
1732 
1733 				if (pciaddname_tmp)
1734 					res = kstrtol(pciaddname_tmp, 10,
1735 						      &num_crtc);
1736 
1737 				if (!res) {
1738 					if (num_crtc < 1)
1739 						num_crtc = 1;
1740 					if (num_crtc > 6)
1741 						num_crtc = 6;
1742 					adev->mode_info.num_crtc = num_crtc;
1743 				} else {
1744 					adev->mode_info.num_crtc = 1;
1745 				}
1746 				break;
1747 			}
1748 		}
1749 
1750 		dev_info(
1751 			adev->dev,
1752 			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1753 			amdgpu_virtual_display, pci_address_name,
1754 			adev->enable_virtual_display, adev->mode_info.num_crtc);
1755 
1756 		kfree(pciaddstr);
1757 	}
1758 }
1759 
1760 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1761 {
1762 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1763 		adev->mode_info.num_crtc = 1;
1764 		adev->enable_virtual_display = true;
1765 		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
1766 			 adev->enable_virtual_display,
1767 			 adev->mode_info.num_crtc);
1768 	}
1769 }
1770 
1771 /**
1772  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1773  *
1774  * @adev: amdgpu_device pointer
1775  *
1776  * Parses the asic configuration parameters specified in the gpu info
1777  * firmware and makes them available to the driver for use in configuring
1778  * the asic.
1779  * Returns 0 on success, -EINVAL on failure.
1780  */
1781 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1782 {
1783 	const char *chip_name;
1784 	int err;
1785 	const struct gpu_info_firmware_header_v1_0 *hdr;
1786 
1787 	adev->firmware.gpu_info_fw = NULL;
1788 
1789 	switch (adev->asic_type) {
1790 	default:
1791 		return 0;
1792 	case CHIP_VEGA10:
1793 		chip_name = "vega10";
1794 		break;
1795 	case CHIP_VEGA12:
1796 		chip_name = "vega12";
1797 		break;
1798 	case CHIP_RAVEN:
1799 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1800 			chip_name = "raven2";
1801 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1802 			chip_name = "picasso";
1803 		else
1804 			chip_name = "raven";
1805 		break;
1806 	case CHIP_ARCTURUS:
1807 		chip_name = "arcturus";
1808 		break;
1809 	case CHIP_NAVI12:
1810 		if (adev->discovery.bin)
1811 			return 0;
1812 		chip_name = "navi12";
1813 		break;
1814 	case CHIP_CYAN_SKILLFISH:
1815 		if (adev->discovery.bin)
1816 			return 0;
1817 		chip_name = "cyan_skillfish";
1818 		break;
1819 	}
1820 
1821 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
1822 				   AMDGPU_UCODE_OPTIONAL,
1823 				   "amdgpu/%s_gpu_info.bin", chip_name);
1824 	if (err) {
1825 		dev_err(adev->dev,
1826 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
1827 			chip_name);
1828 		goto out;
1829 	}
1830 
1831 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1832 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1833 
1834 	switch (hdr->version_major) {
1835 	case 1:
1836 	{
1837 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1838 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1839 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1840 
1841 		/*
1842 		 * Should be dropped when DAL no longer needs it.
1843 		 */
1844 		if (adev->asic_type == CHIP_NAVI12)
1845 			goto parse_soc_bounding_box;
1846 
1847 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1848 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1849 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1850 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1851 		adev->gfx.config.max_texture_channel_caches =
1852 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1853 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1854 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1855 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1856 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1857 		adev->gfx.config.double_offchip_lds_buf =
1858 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1859 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1860 		adev->gfx.cu_info.max_waves_per_simd =
1861 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1862 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1863 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1864 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1865 		if (hdr->version_minor >= 1) {
1866 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1867 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1868 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1869 			adev->gfx.config.num_sc_per_sh =
1870 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1871 			adev->gfx.config.num_packer_per_sc =
1872 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1873 		}
1874 
1875 parse_soc_bounding_box:
1876 		/*
1877 		 * soc bounding box info is not integrated in disocovery table,
1878 		 * we always need to parse it from gpu info firmware if needed.
1879 		 */
1880 		if (hdr->version_minor == 2) {
1881 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1882 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1883 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1885 		}
1886 		break;
1887 	}
1888 	default:
1889 		dev_err(adev->dev,
1890 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1891 		err = -EINVAL;
1892 		goto out;
1893 	}
1894 out:
1895 	return err;
1896 }
1897 
1898 static void amdgpu_uid_init(struct amdgpu_device *adev)
1899 {
1900 	/* Initialize the UID for the device */
1901 	adev->uid_info = kzalloc_obj(struct amdgpu_uid);
1902 	if (!adev->uid_info) {
1903 		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
1904 		return;
1905 	}
1906 	adev->uid_info->adev = adev;
1907 }
1908 
1909 static void amdgpu_uid_fini(struct amdgpu_device *adev)
1910 {
1911 	/* Free the UID memory */
1912 	kfree(adev->uid_info);
1913 	adev->uid_info = NULL;
1914 }
1915 
1916 /**
1917  * amdgpu_device_ip_early_init - run early init for hardware IPs
1918  *
1919  * @adev: amdgpu_device pointer
1920  *
1921  * Early initialization pass for hardware IPs.  The hardware IPs that make
1922  * up each asic are discovered each IP's early_init callback is run.  This
1923  * is the first stage in initializing the asic.
1924  * Returns 0 on success, negative error code on failure.
1925  */
1926 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1927 {
1928 	struct amdgpu_ip_block *ip_block;
1929 	struct pci_dev *parent;
1930 	bool total, skip_bios;
1931 	uint32_t bios_flags;
1932 	int i, r;
1933 
1934 	amdgpu_device_enable_virtual_display(adev);
1935 
1936 	if (amdgpu_sriov_vf(adev)) {
1937 		r = amdgpu_virt_request_full_gpu(adev, true);
1938 		if (r)
1939 			return r;
1940 
1941 		r = amdgpu_virt_init_critical_region(adev);
1942 		if (r)
1943 			return r;
1944 	}
1945 
1946 	switch (adev->asic_type) {
1947 #ifdef CONFIG_DRM_AMDGPU_SI
1948 	case CHIP_VERDE:
1949 	case CHIP_TAHITI:
1950 	case CHIP_PITCAIRN:
1951 	case CHIP_OLAND:
1952 	case CHIP_HAINAN:
1953 		adev->family = AMDGPU_FAMILY_SI;
1954 		r = si_set_ip_blocks(adev);
1955 		if (r)
1956 			return r;
1957 		break;
1958 #endif
1959 #ifdef CONFIG_DRM_AMDGPU_CIK
1960 	case CHIP_BONAIRE:
1961 	case CHIP_HAWAII:
1962 	case CHIP_KAVERI:
1963 	case CHIP_KABINI:
1964 	case CHIP_MULLINS:
1965 		if (adev->flags & AMD_IS_APU)
1966 			adev->family = AMDGPU_FAMILY_KV;
1967 		else
1968 			adev->family = AMDGPU_FAMILY_CI;
1969 
1970 		r = cik_set_ip_blocks(adev);
1971 		if (r)
1972 			return r;
1973 		break;
1974 #endif
1975 	case CHIP_TOPAZ:
1976 	case CHIP_TONGA:
1977 	case CHIP_FIJI:
1978 	case CHIP_POLARIS10:
1979 	case CHIP_POLARIS11:
1980 	case CHIP_POLARIS12:
1981 	case CHIP_VEGAM:
1982 	case CHIP_CARRIZO:
1983 	case CHIP_STONEY:
1984 		if (adev->flags & AMD_IS_APU)
1985 			adev->family = AMDGPU_FAMILY_CZ;
1986 		else
1987 			adev->family = AMDGPU_FAMILY_VI;
1988 
1989 		r = vi_set_ip_blocks(adev);
1990 		if (r)
1991 			return r;
1992 		break;
1993 	default:
1994 		r = amdgpu_discovery_set_ip_blocks(adev);
1995 		if (r) {
1996 			adev->num_ip_blocks = 0;
1997 			return r;
1998 		}
1999 		break;
2000 	}
2001 
2002 	/* Check for IP version 9.4.3 with A0 hardware */
2003 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2004 	    !amdgpu_device_get_rev_id(adev)) {
2005 		dev_err(adev->dev, "Unsupported A0 hardware\n");
2006 		return -ENODEV;	/* device unsupported - no device error */
2007 	}
2008 
2009 	if (amdgpu_has_atpx() &&
2010 	    (amdgpu_is_atpx_hybrid() ||
2011 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2012 	    ((adev->flags & AMD_IS_APU) == 0) &&
2013 	    !dev_is_removable(&adev->pdev->dev))
2014 		adev->flags |= AMD_IS_PX;
2015 
2016 	if (!(adev->flags & AMD_IS_APU)) {
2017 		parent = pcie_find_root_port(adev->pdev);
2018 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2019 	}
2020 
2021 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2022 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2023 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2024 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2025 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2026 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2027 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2028 
2029 	adev->virt.is_xgmi_node_migrate_enabled = false;
2030 	if (amdgpu_sriov_vf(adev)) {
2031 		adev->virt.is_xgmi_node_migrate_enabled =
2032 			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2033 	}
2034 
2035 	total = true;
2036 	for (i = 0; i < adev->num_ip_blocks; i++) {
2037 		ip_block = &adev->ip_blocks[i];
2038 
2039 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2040 			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2041 				 adev->ip_blocks[i].version->funcs->name);
2042 			adev->ip_blocks[i].status.valid = false;
2043 		} else if (ip_block->version->funcs->early_init) {
2044 			r = ip_block->version->funcs->early_init(ip_block);
2045 			if (r == -ENOENT) {
2046 				adev->ip_blocks[i].status.valid = false;
2047 			} else if (r) {
2048 				dev_err(adev->dev,
2049 					"early_init of IP block <%s> failed %d\n",
2050 					adev->ip_blocks[i].version->funcs->name,
2051 					r);
2052 				total = false;
2053 			} else {
2054 				adev->ip_blocks[i].status.valid = true;
2055 			}
2056 		} else {
2057 			adev->ip_blocks[i].status.valid = true;
2058 		}
2059 		/* get the vbios after the asic_funcs are set up */
2060 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2061 			r = amdgpu_device_parse_gpu_info_fw(adev);
2062 			if (r)
2063 				return r;
2064 
2065 			bios_flags = amdgpu_device_get_vbios_flags(adev);
2066 			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2067 			/* Read BIOS */
2068 			if (!skip_bios) {
2069 				bool optional =
2070 					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2071 				if (!amdgpu_get_bios(adev) && !optional)
2072 					return -EINVAL;
2073 
2074 				if (optional && !adev->bios)
2075 					dev_info(
2076 						adev->dev,
2077 						"VBIOS image optional, proceeding without VBIOS image");
2078 
2079 				if (adev->bios) {
2080 					r = amdgpu_atombios_init(adev);
2081 					if (r) {
2082 						dev_err(adev->dev,
2083 							"amdgpu_atombios_init failed\n");
2084 						amdgpu_vf_error_put(
2085 							adev,
2086 							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2087 							0, 0);
2088 						return r;
2089 					}
2090 				}
2091 			}
2092 
2093 			/*get pf2vf msg info at it's earliest time*/
2094 			if (amdgpu_sriov_vf(adev))
2095 				amdgpu_virt_init_data_exchange(adev);
2096 
2097 		}
2098 	}
2099 	if (!total)
2100 		return -ENODEV;
2101 
2102 	if (adev->gmc.xgmi.supported)
2103 		amdgpu_xgmi_early_init(adev);
2104 
2105 	if (amdgpu_is_multi_aid(adev))
2106 		amdgpu_uid_init(adev);
2107 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2108 	if (ip_block->status.valid != false)
2109 		amdgpu_amdkfd_device_probe(adev);
2110 
2111 	adev->cg_flags &= amdgpu_cg_mask;
2112 	adev->pg_flags &= amdgpu_pg_mask;
2113 
2114 	return 0;
2115 }
2116 
2117 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2118 {
2119 	int i, r;
2120 
2121 	for (i = 0; i < adev->num_ip_blocks; i++) {
2122 		if (!adev->ip_blocks[i].status.sw)
2123 			continue;
2124 		if (adev->ip_blocks[i].status.hw)
2125 			continue;
2126 		if (!amdgpu_ip_member_of_hwini(
2127 			    adev, adev->ip_blocks[i].version->type))
2128 			continue;
2129 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2130 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2131 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2132 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2133 			if (r) {
2134 				dev_err(adev->dev,
2135 					"hw_init of IP block <%s> failed %d\n",
2136 					adev->ip_blocks[i].version->funcs->name,
2137 					r);
2138 				return r;
2139 			}
2140 			adev->ip_blocks[i].status.hw = true;
2141 		}
2142 	}
2143 
2144 	return 0;
2145 }
2146 
2147 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2148 {
2149 	int i, r;
2150 
2151 	for (i = 0; i < adev->num_ip_blocks; i++) {
2152 		if (!adev->ip_blocks[i].status.sw)
2153 			continue;
2154 		if (adev->ip_blocks[i].status.hw)
2155 			continue;
2156 		if (!amdgpu_ip_member_of_hwini(
2157 			    adev, adev->ip_blocks[i].version->type))
2158 			continue;
2159 		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2160 		if (r) {
2161 			dev_err(adev->dev,
2162 				"hw_init of IP block <%s> failed %d\n",
2163 				adev->ip_blocks[i].version->funcs->name, r);
2164 			return r;
2165 		}
2166 		adev->ip_blocks[i].status.hw = true;
2167 	}
2168 
2169 	return 0;
2170 }
2171 
2172 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2173 {
2174 	int r = 0;
2175 	int i;
2176 	uint32_t smu_version;
2177 
2178 	if (adev->asic_type >= CHIP_VEGA10) {
2179 		for (i = 0; i < adev->num_ip_blocks; i++) {
2180 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2181 				continue;
2182 
2183 			if (!amdgpu_ip_member_of_hwini(adev,
2184 						       AMD_IP_BLOCK_TYPE_PSP))
2185 				break;
2186 
2187 			if (!adev->ip_blocks[i].status.sw)
2188 				continue;
2189 
2190 			/* no need to do the fw loading again if already done*/
2191 			if (adev->ip_blocks[i].status.hw == true)
2192 				break;
2193 
2194 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2195 				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2196 				if (r)
2197 					return r;
2198 			} else {
2199 				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2200 				if (r) {
2201 					dev_err(adev->dev,
2202 						"hw_init of IP block <%s> failed %d\n",
2203 						adev->ip_blocks[i]
2204 							.version->funcs->name,
2205 						r);
2206 					return r;
2207 				}
2208 				adev->ip_blocks[i].status.hw = true;
2209 			}
2210 			break;
2211 		}
2212 	}
2213 
2214 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2215 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2216 
2217 	return r;
2218 }
2219 
2220 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2221 {
2222 	struct drm_sched_init_args args = {
2223 		.ops = &amdgpu_sched_ops,
2224 		.timeout_wq = adev->reset_domain->wq,
2225 		.dev = adev->dev,
2226 	};
2227 	long timeout;
2228 	int r, i;
2229 
2230 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2231 		struct amdgpu_ring *ring = adev->rings[i];
2232 
2233 		/* No need to setup the GPU scheduler for rings that don't need it */
2234 		if (!ring || ring->no_scheduler)
2235 			continue;
2236 
2237 		switch (ring->funcs->type) {
2238 		case AMDGPU_RING_TYPE_GFX:
2239 			timeout = adev->gfx_timeout;
2240 			break;
2241 		case AMDGPU_RING_TYPE_COMPUTE:
2242 			timeout = adev->compute_timeout;
2243 			break;
2244 		case AMDGPU_RING_TYPE_SDMA:
2245 			timeout = adev->sdma_timeout;
2246 			break;
2247 		default:
2248 			timeout = adev->video_timeout;
2249 			break;
2250 		}
2251 
2252 		args.timeout = timeout;
2253 		args.credit_limit = ring->num_hw_submission;
2254 		args.score = ring->sched_score;
2255 		args.name = ring->name;
2256 
2257 		r = drm_sched_init(&ring->sched, &args);
2258 		if (r) {
2259 			dev_err(adev->dev,
2260 				"Failed to create scheduler on ring %s.\n",
2261 				ring->name);
2262 			return r;
2263 		}
2264 		r = amdgpu_uvd_entity_init(adev, ring);
2265 		if (r) {
2266 			dev_err(adev->dev,
2267 				"Failed to create UVD scheduling entity on ring %s.\n",
2268 				ring->name);
2269 			return r;
2270 		}
2271 		r = amdgpu_vce_entity_init(adev, ring);
2272 		if (r) {
2273 			dev_err(adev->dev,
2274 				"Failed to create VCE scheduling entity on ring %s.\n",
2275 				ring->name);
2276 			return r;
2277 		}
2278 	}
2279 
2280 	if (adev->xcp_mgr)
2281 		amdgpu_xcp_update_partition_sched_list(adev);
2282 
2283 	return 0;
2284 }
2285 
2286 
2287 /**
2288  * amdgpu_device_ip_init - run init for hardware IPs
2289  *
2290  * @adev: amdgpu_device pointer
2291  *
2292  * Main initialization pass for hardware IPs.  The list of all the hardware
2293  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2294  * are run.  sw_init initializes the software state associated with each IP
2295  * and hw_init initializes the hardware associated with each IP.
2296  * Returns 0 on success, negative error code on failure.
2297  */
2298 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2299 {
2300 	bool init_badpage;
2301 	int i, r;
2302 
2303 	r = amdgpu_ras_init(adev);
2304 	if (r)
2305 		return r;
2306 
2307 	for (i = 0; i < adev->num_ip_blocks; i++) {
2308 		if (!adev->ip_blocks[i].status.valid)
2309 			continue;
2310 		if (adev->ip_blocks[i].version->funcs->sw_init) {
2311 			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2312 			if (r) {
2313 				dev_err(adev->dev,
2314 					"sw_init of IP block <%s> failed %d\n",
2315 					adev->ip_blocks[i].version->funcs->name,
2316 					r);
2317 				goto init_failed;
2318 			}
2319 		}
2320 		adev->ip_blocks[i].status.sw = true;
2321 
2322 		if (!amdgpu_ip_member_of_hwini(
2323 			    adev, adev->ip_blocks[i].version->type))
2324 			continue;
2325 
2326 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2327 			/* need to do common hw init early so everything is set up for gmc */
2328 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2329 			if (r) {
2330 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2331 					r);
2332 				goto init_failed;
2333 			}
2334 			adev->ip_blocks[i].status.hw = true;
2335 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2336 			/* need to do gmc hw init early so we can allocate gpu mem */
2337 			/* Try to reserve bad pages early */
2338 			if (amdgpu_sriov_vf(adev))
2339 				amdgpu_virt_exchange_data(adev);
2340 
2341 			r = amdgpu_device_mem_scratch_init(adev);
2342 			if (r) {
2343 				dev_err(adev->dev,
2344 					"amdgpu_mem_scratch_init failed %d\n",
2345 					r);
2346 				goto init_failed;
2347 			}
2348 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2349 			if (r) {
2350 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2351 					r);
2352 				goto init_failed;
2353 			}
2354 			r = amdgpu_device_wb_init(adev);
2355 			if (r) {
2356 				dev_err(adev->dev,
2357 					"amdgpu_device_wb_init failed %d\n", r);
2358 				goto init_failed;
2359 			}
2360 			adev->ip_blocks[i].status.hw = true;
2361 
2362 			/* right after GMC hw init, we create CSA */
2363 			if (adev->gfx.mcbp) {
2364 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2365 							       AMDGPU_GEM_DOMAIN_VRAM |
2366 							       AMDGPU_GEM_DOMAIN_GTT,
2367 							       AMDGPU_CSA_SIZE);
2368 				if (r) {
2369 					dev_err(adev->dev,
2370 						"allocate CSA failed %d\n", r);
2371 					goto init_failed;
2372 				}
2373 			}
2374 
2375 			r = amdgpu_seq64_init(adev);
2376 			if (r) {
2377 				dev_err(adev->dev, "allocate seq64 failed %d\n",
2378 					r);
2379 				goto init_failed;
2380 			}
2381 		}
2382 	}
2383 
2384 	if (amdgpu_sriov_vf(adev))
2385 		amdgpu_virt_init_data_exchange(adev);
2386 
2387 	r = amdgpu_ib_pool_init(adev);
2388 	if (r) {
2389 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2390 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2391 		goto init_failed;
2392 	}
2393 
2394 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2395 	if (r)
2396 		goto init_failed;
2397 
2398 	r = amdgpu_device_ip_hw_init_phase1(adev);
2399 	if (r)
2400 		goto init_failed;
2401 
2402 	r = amdgpu_device_fw_loading(adev);
2403 	if (r)
2404 		goto init_failed;
2405 
2406 	r = amdgpu_device_ip_hw_init_phase2(adev);
2407 	if (r)
2408 		goto init_failed;
2409 
2410 	/*
2411 	 * retired pages will be loaded from eeprom and reserved here,
2412 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2413 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2414 	 * for I2C communication which only true at this point.
2415 	 *
2416 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2417 	 * failure from bad gpu situation and stop amdgpu init process
2418 	 * accordingly. For other failed cases, it will still release all
2419 	 * the resource and print error message, rather than returning one
2420 	 * negative value to upper level.
2421 	 *
2422 	 * Note: theoretically, this should be called before all vram allocations
2423 	 * to protect retired page from abusing
2424 	 */
2425 	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2426 	r = amdgpu_ras_recovery_init(adev, init_badpage);
2427 	if (r)
2428 		goto init_failed;
2429 
2430 	/**
2431 	 * In case of XGMI grab extra reference for reset domain for this device
2432 	 */
2433 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2434 		if (amdgpu_xgmi_add_device(adev) == 0) {
2435 			if (!amdgpu_sriov_vf(adev)) {
2436 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2437 
2438 				if (WARN_ON(!hive)) {
2439 					r = -ENOENT;
2440 					goto init_failed;
2441 				}
2442 
2443 				if (!hive->reset_domain ||
2444 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2445 					r = -ENOENT;
2446 					amdgpu_put_xgmi_hive(hive);
2447 					goto init_failed;
2448 				}
2449 
2450 				/* Drop the early temporary reset domain we created for device */
2451 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2452 				adev->reset_domain = hive->reset_domain;
2453 				amdgpu_put_xgmi_hive(hive);
2454 			}
2455 		}
2456 	}
2457 
2458 	r = amdgpu_device_init_schedulers(adev);
2459 	if (r)
2460 		goto init_failed;
2461 
2462 	amdgpu_ttm_enable_buffer_funcs(adev);
2463 
2464 	/* Don't init kfd if whole hive need to be reset during init */
2465 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
2466 		amdgpu_amdkfd_device_init(adev);
2467 	}
2468 
2469 	amdgpu_fru_get_product_info(adev);
2470 
2471 	r = amdgpu_cper_init(adev);
2472 
2473 init_failed:
2474 
2475 	return r;
2476 }
2477 
2478 /**
2479  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2480  *
2481  * @adev: amdgpu_device pointer
2482  *
2483  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2484  * this function before a GPU reset.  If the value is retained after a
2485  * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
2486  */
2487 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2488 {
2489 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2490 }
2491 
2492 /**
2493  * amdgpu_device_check_vram_lost - check if vram is valid
2494  *
2495  * @adev: amdgpu_device pointer
2496  *
2497  * Checks the reset magic value written to the gart pointer in VRAM.
2498  * The driver calls this after a GPU reset to see if the contents of
2499  * VRAM is lost or now.
2500  * returns true if vram is lost, false if not.
2501  */
2502 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2503 {
2504 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2505 			AMDGPU_RESET_MAGIC_NUM))
2506 		return true;
2507 
2508 	if (!amdgpu_in_reset(adev))
2509 		return false;
2510 
2511 	/*
2512 	 * For all ASICs with baco/mode1 reset, the VRAM is
2513 	 * always assumed to be lost.
2514 	 */
2515 	switch (amdgpu_asic_reset_method(adev)) {
2516 	case AMD_RESET_METHOD_LEGACY:
2517 	case AMD_RESET_METHOD_LINK:
2518 	case AMD_RESET_METHOD_BACO:
2519 	case AMD_RESET_METHOD_MODE1:
2520 		return true;
2521 	default:
2522 		return false;
2523 	}
2524 }
2525 
2526 /**
2527  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2528  *
2529  * @adev: amdgpu_device pointer
2530  * @state: clockgating state (gate or ungate)
2531  *
2532  * The list of all the hardware IPs that make up the asic is walked and the
2533  * set_clockgating_state callbacks are run.
2534  * Late initialization pass enabling clockgating for hardware IPs.
2535  * Fini or suspend, pass disabling clockgating for hardware IPs.
2536  * Returns 0 on success, negative error code on failure.
2537  */
2538 
2539 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2540 			       enum amd_clockgating_state state)
2541 {
2542 	int i, j, r;
2543 
2544 	if (amdgpu_emu_mode == 1)
2545 		return 0;
2546 
2547 	for (j = 0; j < adev->num_ip_blocks; j++) {
2548 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2549 		if (!adev->ip_blocks[i].status.late_initialized)
2550 			continue;
2551 		if (!adev->ip_blocks[i].version)
2552 			continue;
2553 		/* skip CG for GFX, SDMA on S0ix */
2554 		if (adev->in_s0ix &&
2555 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2556 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2557 			continue;
2558 		/* skip CG for VCE/UVD, it's handled specially */
2559 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2560 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2561 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2562 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2563 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2564 			/* enable clockgating to save power */
2565 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
2566 										     state);
2567 			if (r) {
2568 				dev_err(adev->dev,
2569 					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
2570 					adev->ip_blocks[i].version->funcs->name,
2571 					r);
2572 				return r;
2573 			}
2574 		}
2575 	}
2576 
2577 	return 0;
2578 }
2579 
2580 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2581 			       enum amd_powergating_state state)
2582 {
2583 	int i, j, r;
2584 
2585 	if (amdgpu_emu_mode == 1)
2586 		return 0;
2587 
2588 	for (j = 0; j < adev->num_ip_blocks; j++) {
2589 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2590 		if (!adev->ip_blocks[i].status.late_initialized)
2591 			continue;
2592 		if (!adev->ip_blocks[i].version)
2593 			continue;
2594 		/* skip PG for GFX, SDMA on S0ix */
2595 		if (adev->in_s0ix &&
2596 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2597 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2598 			continue;
2599 		/* skip CG for VCE/UVD, it's handled specially */
2600 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2601 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2602 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2603 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2604 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2605 			/* enable powergating to save power */
2606 			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
2607 											state);
2608 			if (r) {
2609 				dev_err(adev->dev,
2610 					"set_powergating_state(gate) of IP block <%s> failed %d\n",
2611 					adev->ip_blocks[i].version->funcs->name,
2612 					r);
2613 				return r;
2614 			}
2615 		}
2616 	}
2617 	return 0;
2618 }
2619 
2620 static int amdgpu_device_enable_mgpu_fan_boost(void)
2621 {
2622 	struct amdgpu_gpu_instance *gpu_ins;
2623 	struct amdgpu_device *adev;
2624 	int i, ret = 0;
2625 
2626 	mutex_lock(&mgpu_info.mutex);
2627 
2628 	/*
2629 	 * MGPU fan boost feature should be enabled
2630 	 * only when there are two or more dGPUs in
2631 	 * the system
2632 	 */
2633 	if (mgpu_info.num_dgpu < 2)
2634 		goto out;
2635 
2636 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2637 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2638 		adev = gpu_ins->adev;
2639 		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
2640 		    !gpu_ins->mgpu_fan_enabled) {
2641 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2642 			if (ret)
2643 				break;
2644 
2645 			gpu_ins->mgpu_fan_enabled = 1;
2646 		}
2647 	}
2648 
2649 out:
2650 	mutex_unlock(&mgpu_info.mutex);
2651 
2652 	return ret;
2653 }
2654 
2655 /**
2656  * amdgpu_device_ip_late_init - run late init for hardware IPs
2657  *
2658  * @adev: amdgpu_device pointer
2659  *
2660  * Late initialization pass for hardware IPs.  The list of all the hardware
2661  * IPs that make up the asic is walked and the late_init callbacks are run.
2662  * late_init covers any special initialization that an IP requires
2663  * after all of the have been initialized or something that needs to happen
2664  * late in the init process.
2665  * Returns 0 on success, negative error code on failure.
2666  */
2667 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2668 {
2669 	struct amdgpu_gpu_instance *gpu_instance;
2670 	int i = 0, r;
2671 
2672 	for (i = 0; i < adev->num_ip_blocks; i++) {
2673 		if (!adev->ip_blocks[i].status.hw)
2674 			continue;
2675 		if (adev->ip_blocks[i].version->funcs->late_init) {
2676 			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
2677 			if (r) {
2678 				dev_err(adev->dev,
2679 					"late_init of IP block <%s> failed %d\n",
2680 					adev->ip_blocks[i].version->funcs->name,
2681 					r);
2682 				return r;
2683 			}
2684 		}
2685 		adev->ip_blocks[i].status.late_initialized = true;
2686 	}
2687 
2688 	r = amdgpu_ras_late_init(adev);
2689 	if (r) {
2690 		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
2691 		return r;
2692 	}
2693 
2694 	if (!amdgpu_reset_in_recovery(adev))
2695 		amdgpu_ras_set_error_query_ready(adev, true);
2696 
2697 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2698 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2699 
2700 	amdgpu_device_fill_reset_magic(adev);
2701 
2702 	r = amdgpu_device_enable_mgpu_fan_boost();
2703 	if (r)
2704 		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
2705 
2706 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2707 	if (amdgpu_passthrough(adev) &&
2708 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2709 	     adev->asic_type == CHIP_ALDEBARAN))
2710 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2711 
2712 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2713 		mutex_lock(&mgpu_info.mutex);
2714 
2715 		/*
2716 		 * Reset device p-state to low as this was booted with high.
2717 		 *
2718 		 * This should be performed only after all devices from the same
2719 		 * hive get initialized.
2720 		 *
2721 		 * However, it's unknown how many device in the hive in advance.
2722 		 * As this is counted one by one during devices initializations.
2723 		 *
2724 		 * So, we wait for all XGMI interlinked devices initialized.
2725 		 * This may bring some delays as those devices may come from
2726 		 * different hives. But that should be OK.
2727 		 */
2728 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2729 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2730 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2731 				if (gpu_instance->adev->flags & AMD_IS_APU)
2732 					continue;
2733 
2734 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2735 						AMDGPU_XGMI_PSTATE_MIN);
2736 				if (r) {
2737 					dev_err(adev->dev,
2738 						"pstate setting failed (%d).\n",
2739 						r);
2740 					break;
2741 				}
2742 			}
2743 		}
2744 
2745 		mutex_unlock(&mgpu_info.mutex);
2746 	}
2747 
2748 	return 0;
2749 }
2750 
2751 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
2752 {
2753 	struct amdgpu_device *adev = ip_block->adev;
2754 	int r;
2755 
2756 	if (!ip_block->version->funcs->hw_fini) {
2757 		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
2758 			ip_block->version->funcs->name);
2759 	} else {
2760 		r = ip_block->version->funcs->hw_fini(ip_block);
2761 		/* XXX handle errors */
2762 		if (r) {
2763 			dev_dbg(adev->dev,
2764 				"hw_fini of IP block <%s> failed %d\n",
2765 				ip_block->version->funcs->name, r);
2766 		}
2767 	}
2768 
2769 	ip_block->status.hw = false;
2770 }
2771 
2772 /**
2773  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2774  *
2775  * @adev: amdgpu_device pointer
2776  *
2777  * For ASICs need to disable SMC first
2778  */
2779 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2780 {
2781 	int i;
2782 
2783 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
2784 		return;
2785 
2786 	for (i = 0; i < adev->num_ip_blocks; i++) {
2787 		if (!adev->ip_blocks[i].status.hw)
2788 			continue;
2789 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2790 			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2791 			break;
2792 		}
2793 	}
2794 }
2795 
2796 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2797 {
2798 	int i, r;
2799 
2800 	for (i = 0; i < adev->num_ip_blocks; i++) {
2801 		if (!adev->ip_blocks[i].version)
2802 			continue;
2803 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2804 			continue;
2805 
2806 		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
2807 		if (r) {
2808 			dev_dbg(adev->dev,
2809 				"early_fini of IP block <%s> failed %d\n",
2810 				adev->ip_blocks[i].version->funcs->name, r);
2811 		}
2812 	}
2813 
2814 	amdgpu_amdkfd_suspend(adev, true);
2815 	amdgpu_amdkfd_teardown_processes(adev);
2816 	amdgpu_userq_suspend(adev);
2817 
2818 	/* Workaround for ASICs need to disable SMC first */
2819 	amdgpu_device_smu_fini_early(adev);
2820 
2821 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2822 		if (!adev->ip_blocks[i].status.hw)
2823 			continue;
2824 
2825 		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2826 	}
2827 
2828 	if (amdgpu_sriov_vf(adev)) {
2829 		if (amdgpu_virt_release_full_gpu(adev, false))
2830 			dev_err(adev->dev,
2831 				"failed to release exclusive mode on fini\n");
2832 	}
2833 
2834 	/*
2835 	 * Driver reload on the APU can fail due to firmware validation because
2836 	 * the PSP is always running, as it is shared across the whole SoC.
2837 	 * This same issue does not occur on dGPU because it has a mechanism
2838 	 * that checks whether the PSP is running. A solution for those issues
2839 	 * in the APU is to trigger a GPU reset, but this should be done during
2840 	 * the unload phase to avoid adding boot latency and screen flicker.
2841 	 * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends
2842 	 * a request to PMFW to unload MP1, PMFW will put GC in reset and power down
2843 	 * the voltage. Hence, skipping reset for APUs with GFX V11 or later.
2844 	 */
2845 	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu &&
2846 		amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) {
2847 		r = amdgpu_asic_reset(adev);
2848 		if (r)
2849 			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
2850 	}
2851 
2852 	return 0;
2853 }
2854 
2855 /**
2856  * amdgpu_device_ip_fini - run fini for hardware IPs
2857  *
2858  * @adev: amdgpu_device pointer
2859  *
2860  * Main teardown pass for hardware IPs.  The list of all the hardware
2861  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2862  * are run.  hw_fini tears down the hardware associated with each IP
2863  * and sw_fini tears down any software state associated with each IP.
2864  * Returns 0 on success, negative error code on failure.
2865  */
2866 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2867 {
2868 	int i, r;
2869 
2870 	amdgpu_cper_fini(adev);
2871 
2872 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2873 		amdgpu_virt_release_ras_err_handler_data(adev);
2874 
2875 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2876 		amdgpu_xgmi_remove_device(adev);
2877 
2878 	amdgpu_amdkfd_device_fini_sw(adev);
2879 
2880 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2881 		if (!adev->ip_blocks[i].status.sw)
2882 			continue;
2883 
2884 		if (!adev->ip_blocks[i].version)
2885 			continue;
2886 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2887 			amdgpu_ucode_free_bo(adev);
2888 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2889 			amdgpu_device_wb_fini(adev);
2890 			amdgpu_device_mem_scratch_fini(adev);
2891 			amdgpu_ib_pool_fini(adev);
2892 			amdgpu_seq64_fini(adev);
2893 			amdgpu_doorbell_fini(adev);
2894 		}
2895 		if (adev->ip_blocks[i].version->funcs->sw_fini) {
2896 			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
2897 			/* XXX handle errors */
2898 			if (r) {
2899 				dev_dbg(adev->dev,
2900 					"sw_fini of IP block <%s> failed %d\n",
2901 					adev->ip_blocks[i].version->funcs->name,
2902 					r);
2903 			}
2904 		}
2905 		adev->ip_blocks[i].status.sw = false;
2906 		adev->ip_blocks[i].status.valid = false;
2907 	}
2908 
2909 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2910 		if (!adev->ip_blocks[i].status.late_initialized)
2911 			continue;
2912 		if (!adev->ip_blocks[i].version)
2913 			continue;
2914 		if (adev->ip_blocks[i].version->funcs->late_fini)
2915 			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
2916 		adev->ip_blocks[i].status.late_initialized = false;
2917 	}
2918 
2919 	amdgpu_ras_fini(adev);
2920 	amdgpu_uid_fini(adev);
2921 
2922 	return 0;
2923 }
2924 
2925 /**
2926  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2927  *
2928  * @work: work_struct.
2929  */
2930 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2931 {
2932 	struct amdgpu_device *adev =
2933 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2934 	int r;
2935 
2936 	r = amdgpu_ib_ring_tests(adev);
2937 	if (r)
2938 		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
2939 }
2940 
2941 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2942 {
2943 	struct amdgpu_device *adev =
2944 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2945 
2946 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2947 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2948 
2949 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
2950 		adev->gfx.gfx_off_state = true;
2951 }
2952 
2953 /**
2954  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2955  *
2956  * @adev: amdgpu_device pointer
2957  *
2958  * Main suspend function for hardware IPs.  The list of all the hardware
2959  * IPs that make up the asic is walked, clockgating is disabled and the
2960  * suspend callbacks are run.  suspend puts the hardware and software state
2961  * in each IP into a state suitable for suspend.
2962  * Returns 0 on success, negative error code on failure.
2963  */
2964 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2965 {
2966 	int i, r, rec;
2967 
2968 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2969 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2970 
2971 	/*
2972 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2973 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2974 	 * scenario. Add the missing df cstate disablement here.
2975 	 */
2976 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2977 		dev_warn(adev->dev, "Failed to disallow df cstate");
2978 
2979 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2980 		if (!adev->ip_blocks[i].status.valid)
2981 			continue;
2982 
2983 		/* displays are handled separately */
2984 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2985 			continue;
2986 
2987 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
2988 		if (r)
2989 			goto unwind;
2990 	}
2991 
2992 	return 0;
2993 unwind:
2994 	rec = amdgpu_device_ip_resume_phase3(adev);
2995 	if (rec)
2996 		dev_err(adev->dev,
2997 			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
2998 			rec);
2999 
3000 	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
3001 
3002 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3003 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3004 
3005 	return r;
3006 }
3007 
3008 /**
3009  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3010  *
3011  * @adev: amdgpu_device pointer
3012  *
3013  * Main suspend function for hardware IPs.  The list of all the hardware
3014  * IPs that make up the asic is walked, clockgating is disabled and the
3015  * suspend callbacks are run.  suspend puts the hardware and software state
3016  * in each IP into a state suitable for suspend.
3017  * Returns 0 on success, negative error code on failure.
3018  */
3019 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3020 {
3021 	int i, r, rec;
3022 
3023 	if (adev->in_s0ix)
3024 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3025 
3026 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3027 		if (!adev->ip_blocks[i].status.valid)
3028 			continue;
3029 		/* displays are handled in phase1 */
3030 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3031 			continue;
3032 		/* PSP lost connection when err_event_athub occurs */
3033 		if (amdgpu_ras_intr_triggered() &&
3034 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3035 			adev->ip_blocks[i].status.hw = false;
3036 			continue;
3037 		}
3038 
3039 		/* skip unnecessary suspend if we do not initialize them yet */
3040 		if (!amdgpu_ip_member_of_hwini(
3041 			    adev, adev->ip_blocks[i].version->type))
3042 			continue;
3043 
3044 		/* Since we skip suspend for S0i3, we need to cancel the delayed
3045 		 * idle work here as the suspend callback never gets called.
3046 		 */
3047 		if (adev->in_s0ix &&
3048 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3049 		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3050 			cancel_delayed_work_sync(&adev->gfx.idle_work);
3051 		/* skip suspend of gfx/mes and psp for S0ix
3052 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3053 		 * like at runtime. PSP is also part of the always on hardware
3054 		 * so no need to suspend it.
3055 		 */
3056 		if (adev->in_s0ix &&
3057 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3058 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3059 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3060 			continue;
3061 
3062 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3063 		if (adev->in_s0ix &&
3064 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3065 		     IP_VERSION(5, 0, 0)) &&
3066 		    (adev->ip_blocks[i].version->type ==
3067 		     AMD_IP_BLOCK_TYPE_SDMA))
3068 			continue;
3069 
3070 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3071 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3072 		 * from this location and RLC Autoload automatically also gets loaded
3073 		 * from here based on PMFW -> PSP message during re-init sequence.
3074 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3075 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3076 		 */
3077 		if (amdgpu_in_reset(adev) &&
3078 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3079 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3080 			continue;
3081 
3082 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3083 		if (r)
3084 			goto unwind;
3085 
3086 		/* handle putting the SMC in the appropriate state */
3087 		if (!amdgpu_sriov_vf(adev)) {
3088 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3089 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3090 				if (r) {
3091 					dev_err(adev->dev,
3092 						"SMC failed to set mp1 state %d, %d\n",
3093 						adev->mp1_state, r);
3094 					goto unwind;
3095 				}
3096 			}
3097 		}
3098 	}
3099 
3100 	return 0;
3101 unwind:
3102 	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3103 	rec = amdgpu_device_ip_resume_phase1(adev);
3104 	if (rec) {
3105 		dev_err(adev->dev,
3106 			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3107 			rec);
3108 		return r;
3109 	}
3110 
3111 	rec = amdgpu_device_fw_loading(adev);
3112 	if (rec) {
3113 		dev_err(adev->dev,
3114 			"amdgpu_device_fw_loading failed during unwind: %d\n",
3115 			rec);
3116 		return r;
3117 	}
3118 
3119 	rec = amdgpu_device_ip_resume_phase2(adev);
3120 	if (rec) {
3121 		dev_err(adev->dev,
3122 			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3123 			rec);
3124 		return r;
3125 	}
3126 
3127 	return r;
3128 }
3129 
3130 /**
3131  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3132  *
3133  * @adev: amdgpu_device pointer
3134  *
3135  * Main suspend function for hardware IPs.  The list of all the hardware
3136  * IPs that make up the asic is walked, clockgating is disabled and the
3137  * suspend callbacks are run.  suspend puts the hardware and software state
3138  * in each IP into a state suitable for suspend.
3139  * Returns 0 on success, negative error code on failure.
3140  */
3141 static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3142 {
3143 	int r;
3144 
3145 	if (amdgpu_sriov_vf(adev)) {
3146 		amdgpu_virt_fini_data_exchange(adev);
3147 		amdgpu_virt_request_full_gpu(adev, false);
3148 	}
3149 
3150 	amdgpu_ttm_disable_buffer_funcs(adev);
3151 
3152 	r = amdgpu_device_ip_suspend_phase1(adev);
3153 	if (r)
3154 		return r;
3155 	r = amdgpu_device_ip_suspend_phase2(adev);
3156 
3157 	if (amdgpu_sriov_vf(adev))
3158 		amdgpu_virt_release_full_gpu(adev, false);
3159 
3160 	return r;
3161 }
3162 
3163 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3164 {
3165 	int i, r;
3166 
3167 	static enum amd_ip_block_type ip_order[] = {
3168 		AMD_IP_BLOCK_TYPE_COMMON,
3169 		AMD_IP_BLOCK_TYPE_GMC,
3170 		AMD_IP_BLOCK_TYPE_PSP,
3171 		AMD_IP_BLOCK_TYPE_IH,
3172 	};
3173 
3174 	for (i = 0; i < adev->num_ip_blocks; i++) {
3175 		int j;
3176 		struct amdgpu_ip_block *block;
3177 
3178 		block = &adev->ip_blocks[i];
3179 		block->status.hw = false;
3180 
3181 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3182 
3183 			if (block->version->type != ip_order[j] ||
3184 				!block->status.valid)
3185 				continue;
3186 
3187 			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3188 			if (r) {
3189 				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3190 					 block->version->funcs->name);
3191 				return r;
3192 			}
3193 			block->status.hw = true;
3194 		}
3195 	}
3196 
3197 	return 0;
3198 }
3199 
3200 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3201 {
3202 	struct amdgpu_ip_block *block;
3203 	int i, r = 0;
3204 
3205 	static enum amd_ip_block_type ip_order[] = {
3206 		AMD_IP_BLOCK_TYPE_SMC,
3207 		AMD_IP_BLOCK_TYPE_DCE,
3208 		AMD_IP_BLOCK_TYPE_GFX,
3209 		AMD_IP_BLOCK_TYPE_SDMA,
3210 		AMD_IP_BLOCK_TYPE_MES,
3211 		AMD_IP_BLOCK_TYPE_UVD,
3212 		AMD_IP_BLOCK_TYPE_VCE,
3213 		AMD_IP_BLOCK_TYPE_VCN,
3214 		AMD_IP_BLOCK_TYPE_JPEG
3215 	};
3216 
3217 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3218 		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
3219 
3220 		if (!block)
3221 			continue;
3222 
3223 		if (block->status.valid && !block->status.hw) {
3224 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3225 				r = amdgpu_ip_block_resume(block);
3226 			} else {
3227 				r = block->version->funcs->hw_init(block);
3228 			}
3229 
3230 			if (r) {
3231 				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3232 					 block->version->funcs->name);
3233 				break;
3234 			}
3235 			block->status.hw = true;
3236 		}
3237 	}
3238 
3239 	return r;
3240 }
3241 
3242 /**
3243  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3244  *
3245  * @adev: amdgpu_device pointer
3246  *
3247  * First resume function for hardware IPs.  The list of all the hardware
3248  * IPs that make up the asic is walked and the resume callbacks are run for
3249  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3250  * after a suspend and updates the software state as necessary.  This
3251  * function is also used for restoring the GPU after a GPU reset.
3252  * Returns 0 on success, negative error code on failure.
3253  */
3254 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3255 {
3256 	int i, r;
3257 
3258 	for (i = 0; i < adev->num_ip_blocks; i++) {
3259 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3260 			continue;
3261 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3262 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3263 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3264 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3265 
3266 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3267 			if (r)
3268 				return r;
3269 		}
3270 	}
3271 
3272 	return 0;
3273 }
3274 
3275 /**
3276  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3277  *
3278  * @adev: amdgpu_device pointer
3279  *
3280  * Second resume function for hardware IPs.  The list of all the hardware
3281  * IPs that make up the asic is walked and the resume callbacks are run for
3282  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3283  * functional state after a suspend and updates the software state as
3284  * necessary.  This function is also used for restoring the GPU after a GPU
3285  * reset.
3286  * Returns 0 on success, negative error code on failure.
3287  */
3288 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3289 {
3290 	int i, r;
3291 
3292 	for (i = 0; i < adev->num_ip_blocks; i++) {
3293 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3294 			continue;
3295 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3296 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3297 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3298 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3299 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3300 			continue;
3301 		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3302 		if (r)
3303 			return r;
3304 	}
3305 
3306 	return 0;
3307 }
3308 
3309 /**
3310  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3311  *
3312  * @adev: amdgpu_device pointer
3313  *
3314  * Third resume function for hardware IPs.  The list of all the hardware
3315  * IPs that make up the asic is walked and the resume callbacks are run for
3316  * all DCE.  resume puts the hardware into a functional state after a suspend
3317  * and updates the software state as necessary.  This function is also used
3318  * for restoring the GPU after a GPU reset.
3319  *
3320  * Returns 0 on success, negative error code on failure.
3321  */
3322 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3323 {
3324 	int i, r;
3325 
3326 	for (i = 0; i < adev->num_ip_blocks; i++) {
3327 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3328 			continue;
3329 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3330 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3331 			if (r)
3332 				return r;
3333 		}
3334 	}
3335 
3336 	return 0;
3337 }
3338 
3339 /**
3340  * amdgpu_device_ip_resume - run resume for hardware IPs
3341  *
3342  * @adev: amdgpu_device pointer
3343  *
3344  * Main resume function for hardware IPs.  The hardware IPs
3345  * are split into two resume functions because they are
3346  * also used in recovering from a GPU reset and some additional
3347  * steps need to be take between them.  In this case (S3/S4) they are
3348  * run sequentially.
3349  * Returns 0 on success, negative error code on failure.
3350  */
3351 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3352 {
3353 	int r;
3354 
3355 	r = amdgpu_device_ip_resume_phase1(adev);
3356 	if (r)
3357 		return r;
3358 
3359 	r = amdgpu_device_fw_loading(adev);
3360 	if (r)
3361 		return r;
3362 
3363 	r = amdgpu_device_ip_resume_phase2(adev);
3364 
3365 	amdgpu_ttm_enable_buffer_funcs(adev);
3366 
3367 	if (r)
3368 		return r;
3369 
3370 	amdgpu_fence_driver_hw_init(adev);
3371 
3372 	r = amdgpu_device_ip_resume_phase3(adev);
3373 
3374 	return r;
3375 }
3376 
3377 /**
3378  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3379  *
3380  * @adev: amdgpu_device pointer
3381  *
3382  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3383  */
3384 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3385 {
3386 	if (amdgpu_sriov_vf(adev)) {
3387 		if (adev->is_atom_fw) {
3388 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3389 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3390 		} else {
3391 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3392 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3393 		}
3394 
3395 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3396 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3397 	}
3398 }
3399 
3400 /**
3401  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3402  *
3403  * @pdev : pci device context
3404  * @asic_type: AMD asic type
3405  *
3406  * Check if there is DC (new modesetting infrastructre) support for an asic.
3407  * returns true if DC has support, false if not.
3408  */
3409 bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
3410 				       enum amd_asic_type asic_type)
3411 {
3412 	switch (asic_type) {
3413 #ifdef CONFIG_DRM_AMDGPU_SI
3414 	case CHIP_HAINAN:
3415 #endif
3416 	case CHIP_TOPAZ:
3417 		/* chips with no display hardware */
3418 		return false;
3419 #if defined(CONFIG_DRM_AMD_DC)
3420 	case CHIP_TAHITI:
3421 	case CHIP_PITCAIRN:
3422 	case CHIP_VERDE:
3423 	case CHIP_OLAND:
3424 		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
3425 	default:
3426 		return amdgpu_dc != 0;
3427 #else
3428 	default:
3429 		if (amdgpu_dc > 0)
3430 			dev_info_once(
3431 				&pdev->dev,
3432 				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3433 		return false;
3434 #endif
3435 	}
3436 }
3437 
3438 /**
3439  * amdgpu_device_has_dc_support - check if dc is supported
3440  *
3441  * @adev: amdgpu_device pointer
3442  *
3443  * Returns true for supported, false for not supported
3444  */
3445 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3446 {
3447 	if (adev->enable_virtual_display ||
3448 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3449 		return false;
3450 
3451 	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
3452 }
3453 
3454 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3455 {
3456 	struct amdgpu_device *adev =
3457 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3458 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3459 
3460 	/* It's a bug to not have a hive within this function */
3461 	if (WARN_ON(!hive))
3462 		return;
3463 
3464 	/*
3465 	 * Use task barrier to synchronize all xgmi reset works across the
3466 	 * hive. task_barrier_enter and task_barrier_exit will block
3467 	 * until all the threads running the xgmi reset works reach
3468 	 * those points. task_barrier_full will do both blocks.
3469 	 */
3470 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3471 
3472 		task_barrier_enter(&hive->tb);
3473 		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
3474 
3475 		if (adev->asic_reset_res)
3476 			goto fail;
3477 
3478 		task_barrier_exit(&hive->tb);
3479 		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
3480 
3481 		if (adev->asic_reset_res)
3482 			goto fail;
3483 
3484 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3485 	} else {
3486 
3487 		task_barrier_full(&hive->tb);
3488 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3489 	}
3490 
3491 fail:
3492 	if (adev->asic_reset_res)
3493 		dev_warn(adev->dev,
3494 			 "ASIC reset failed with error, %d for drm dev, %s",
3495 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3496 	amdgpu_put_xgmi_hive(hive);
3497 }
3498 
3499 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3500 {
3501 	char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
3502 	char *input = buf;
3503 	char *timeout_setting = NULL;
3504 	int index = 0;
3505 	long timeout;
3506 	int ret = 0;
3507 
3508 	/* By default timeout for all queues is 2 sec */
3509 	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3510 		adev->video_timeout = msecs_to_jiffies(2000);
3511 
3512 	if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
3513 		return 0;
3514 
3515 	/*
3516 	 * strsep() destructively modifies its input by replacing delimiters
3517 	 * with '\0'. Use a stack copy so the global module parameter buffer
3518 	 * remains intact for multi-GPU systems where this function is called
3519 	 * once per device.
3520 	 */
3521 	strscpy(buf, amdgpu_lockup_timeout, sizeof(buf));
3522 
3523 	while ((timeout_setting = strsep(&input, ",")) &&
3524 	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3525 		ret = kstrtol(timeout_setting, 0, &timeout);
3526 		if (ret)
3527 			return ret;
3528 
3529 		if (timeout == 0) {
3530 			index++;
3531 			continue;
3532 		} else if (timeout < 0) {
3533 			timeout = MAX_SCHEDULE_TIMEOUT;
3534 			dev_warn(adev->dev, "lockup timeout disabled");
3535 			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3536 		} else {
3537 			timeout = msecs_to_jiffies(timeout);
3538 		}
3539 
3540 		switch (index++) {
3541 		case 0:
3542 			adev->gfx_timeout = timeout;
3543 			break;
3544 		case 1:
3545 			adev->compute_timeout = timeout;
3546 			break;
3547 		case 2:
3548 			adev->sdma_timeout = timeout;
3549 			break;
3550 		case 3:
3551 			adev->video_timeout = timeout;
3552 			break;
3553 		default:
3554 			break;
3555 		}
3556 	}
3557 
3558 	/* When only one value specified apply it to all queues. */
3559 	if (index == 1)
3560 		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3561 			adev->video_timeout = timeout;
3562 
3563 	return ret;
3564 }
3565 
3566 /**
3567  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3568  *
3569  * @adev: amdgpu_device pointer
3570  *
3571  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3572  */
3573 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3574 {
3575 	struct iommu_domain *domain;
3576 
3577 	domain = iommu_get_domain_for_dev(adev->dev);
3578 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3579 		adev->ram_is_direct_mapped = true;
3580 }
3581 
3582 #if defined(CONFIG_HSA_AMD_P2P)
3583 /**
3584  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
3585  *
3586  * @adev: amdgpu_device pointer
3587  *
3588  * return if IOMMU remapping bar address
3589  */
3590 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
3591 {
3592 	struct iommu_domain *domain;
3593 
3594 	domain = iommu_get_domain_for_dev(adev->dev);
3595 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
3596 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
3597 		return true;
3598 
3599 	return false;
3600 }
3601 #endif
3602 
3603 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3604 {
3605 	if (amdgpu_mcbp == 1)
3606 		adev->gfx.mcbp = true;
3607 	else if (amdgpu_mcbp == 0)
3608 		adev->gfx.mcbp = false;
3609 
3610 	if (amdgpu_sriov_vf(adev))
3611 		adev->gfx.mcbp = true;
3612 
3613 	if (adev->gfx.mcbp)
3614 		dev_info(adev->dev, "MCBP is enabled\n");
3615 }
3616 
3617 static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
3618 {
3619 	int r;
3620 
3621 	r = amdgpu_atombios_sysfs_init(adev);
3622 	if (r)
3623 		drm_err(&adev->ddev,
3624 			"registering atombios sysfs failed (%d).\n", r);
3625 
3626 	r = amdgpu_pm_sysfs_init(adev);
3627 	if (r)
3628 		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
3629 
3630 	r = amdgpu_ucode_sysfs_init(adev);
3631 	if (r) {
3632 		adev->ucode_sysfs_en = false;
3633 		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
3634 	} else
3635 		adev->ucode_sysfs_en = true;
3636 
3637 	r = amdgpu_device_attr_sysfs_init(adev);
3638 	if (r)
3639 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3640 
3641 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
3642 	if (r)
3643 		dev_err(adev->dev,
3644 			"Could not create amdgpu board attributes\n");
3645 
3646 	amdgpu_fru_sysfs_init(adev);
3647 	amdgpu_reg_state_sysfs_init(adev);
3648 	amdgpu_xcp_sysfs_init(adev);
3649 	amdgpu_uma_sysfs_init(adev);
3650 	amdgpu_ptl_sysfs_init(adev);
3651 
3652 	return r;
3653 }
3654 
3655 static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
3656 {
3657 	if (adev->pm.sysfs_initialized)
3658 		amdgpu_pm_sysfs_fini(adev);
3659 	if (adev->ucode_sysfs_en)
3660 		amdgpu_ucode_sysfs_fini(adev);
3661 	amdgpu_device_attr_sysfs_fini(adev);
3662 	amdgpu_fru_sysfs_fini(adev);
3663 
3664 	amdgpu_reg_state_sysfs_fini(adev);
3665 	amdgpu_xcp_sysfs_fini(adev);
3666 	amdgpu_uma_sysfs_fini(adev);
3667 	amdgpu_ptl_sysfs_fini(adev);
3668 }
3669 
3670 /**
3671  * amdgpu_device_init - initialize the driver
3672  *
3673  * @adev: amdgpu_device pointer
3674  * @flags: driver flags
3675  *
3676  * Initializes the driver info and hw (all asics).
3677  * Returns 0 for success or an error on failure.
3678  * Called at driver startup.
3679  */
3680 int amdgpu_device_init(struct amdgpu_device *adev,
3681 		       uint32_t flags)
3682 {
3683 	struct pci_dev *pdev = adev->pdev;
3684 	int r, i;
3685 	bool px = false;
3686 	u32 max_MBps;
3687 	int tmp;
3688 
3689 	adev->shutdown = false;
3690 	adev->flags = flags;
3691 
3692 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3693 		adev->asic_type = amdgpu_force_asic_type;
3694 	else
3695 		adev->asic_type = flags & AMD_ASIC_MASK;
3696 
3697 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3698 	if (amdgpu_emu_mode == 1)
3699 		adev->usec_timeout *= 10;
3700 	adev->gmc.gart_size = 512 * 1024 * 1024;
3701 	adev->accel_working = false;
3702 	adev->num_rings = 0;
3703 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3704 	adev->mman.buffer_funcs = NULL;
3705 	adev->mman.num_buffer_funcs_scheds = 0;
3706 	adev->vm_manager.vm_pte_funcs = NULL;
3707 	adev->vm_manager.vm_pte_num_scheds = 0;
3708 	adev->gmc.gmc_funcs = NULL;
3709 	adev->harvest_ip_mask = 0x0;
3710 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3711 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3712 
3713 	amdgpu_reg_access_init(adev);
3714 
3715 	dev_info(
3716 		adev->dev,
3717 		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3718 		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3719 		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3720 
3721 	/* mutex initialization are all done here so we
3722 	 * can recall function without having locking issues
3723 	 */
3724 	mutex_init(&adev->firmware.mutex);
3725 	mutex_init(&adev->pm.mutex);
3726 	mutex_init(&adev->gfx.gpu_clock_mutex);
3727 	mutex_init(&adev->srbm_mutex);
3728 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3729 	mutex_init(&adev->gfx.gfx_off_mutex);
3730 	mutex_init(&adev->gfx.partition_mutex);
3731 	mutex_init(&adev->grbm_idx_mutex);
3732 	mutex_init(&adev->mn_lock);
3733 	mutex_init(&adev->virt.vf_errors.lock);
3734 	hash_init(adev->mn_hash);
3735 	mutex_init(&adev->psp.mutex);
3736 	mutex_init(&adev->psp.ptl.mutex);
3737 	mutex_init(&adev->notifier_lock);
3738 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3739 	mutex_init(&adev->benchmark_mutex);
3740 	mutex_init(&adev->gfx.reset_sem_mutex);
3741 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
3742 	mutex_init(&adev->enforce_isolation_mutex);
3743 	for (i = 0; i < MAX_XCP; ++i) {
3744 		adev->isolation[i].spearhead = dma_fence_get_stub();
3745 		amdgpu_sync_create(&adev->isolation[i].active);
3746 		amdgpu_sync_create(&adev->isolation[i].prev);
3747 	}
3748 	mutex_init(&adev->gfx.userq_sch_mutex);
3749 	mutex_init(&adev->gfx.workload_profile_mutex);
3750 	mutex_init(&adev->vcn.workload_profile_mutex);
3751 
3752 	amdgpu_device_init_apu_flags(adev);
3753 
3754 	r = amdgpu_device_check_arguments(adev);
3755 	if (r)
3756 		return r;
3757 
3758 	spin_lock_init(&adev->mmio_idx_lock);
3759 	spin_lock_init(&adev->mm_stats.lock);
3760 	spin_lock_init(&adev->virt.rlcg_reg_lock);
3761 	spin_lock_init(&adev->wb.lock);
3762 
3763 	INIT_LIST_HEAD(&adev->reset_list);
3764 
3765 	INIT_LIST_HEAD(&adev->ras_list);
3766 
3767 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3768 
3769 	xa_init_flags(&adev->userq_doorbell_xa, XA_FLAGS_LOCK_IRQ);
3770 
3771 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3772 			  amdgpu_device_delayed_init_work_handler);
3773 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3774 			  amdgpu_device_delay_enable_gfx_off);
3775 	/*
3776 	 * Initialize the enforce_isolation work structures for each XCP
3777 	 * partition.  This work handler is responsible for enforcing shader
3778 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
3779 	 * each GFX and compute ring.  If there are any fences, it schedules
3780 	 * the `enforce_isolation_work` to be run after a delay.  If there are
3781 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
3782 	 * runqueue.
3783 	 */
3784 	for (i = 0; i < MAX_XCP; i++) {
3785 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
3786 				  amdgpu_gfx_enforce_isolation_handler);
3787 		adev->gfx.enforce_isolation[i].adev = adev;
3788 		adev->gfx.enforce_isolation[i].xcp_id = i;
3789 	}
3790 
3791 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3792 
3793 	amdgpu_coredump_init(adev);
3794 
3795 	adev->gfx.gfx_off_req_count = 1;
3796 	adev->gfx.gfx_off_residency = 0;
3797 	adev->gfx.gfx_off_entrycount = 0;
3798 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3799 
3800 	atomic_set(&adev->throttling_logging_enabled, 1);
3801 	/*
3802 	 * If throttling continues, logging will be performed every minute
3803 	 * to avoid log flooding. "-1" is subtracted since the thermal
3804 	 * throttling interrupt comes every second. Thus, the total logging
3805 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3806 	 * for throttling interrupt) = 60 seconds.
3807 	 */
3808 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3809 
3810 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3811 
3812 	/* Registers mapping */
3813 	/* TODO: block userspace mapping of io register */
3814 	if (adev->asic_type >= CHIP_BONAIRE) {
3815 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3816 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3817 	} else {
3818 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3819 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3820 	}
3821 
3822 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3823 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3824 
3825 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3826 	if (!adev->rmmio)
3827 		return -ENOMEM;
3828 
3829 	dev_info(adev->dev, "register mmio base: 0x%08X\n",
3830 		 (uint32_t)adev->rmmio_base);
3831 	dev_info(adev->dev, "register mmio size: %u\n",
3832 		 (unsigned int)adev->rmmio_size);
3833 
3834 	/*
3835 	 * Reset domain needs to be present early, before XGMI hive discovered
3836 	 * (if any) and initialized to use reset sem and in_gpu reset flag
3837 	 * early on during init and before calling to RREG32.
3838 	 */
3839 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3840 	if (!adev->reset_domain)
3841 		return -ENOMEM;
3842 
3843 	/* detect hw virtualization here */
3844 	amdgpu_virt_init(adev);
3845 
3846 	amdgpu_device_get_pcie_info(adev);
3847 
3848 	r = amdgpu_device_get_job_timeout_settings(adev);
3849 	if (r) {
3850 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3851 		return r;
3852 	}
3853 
3854 	amdgpu_device_set_mcbp(adev);
3855 
3856 	/*
3857 	 * By default, use default mode where all blocks are expected to be
3858 	 * initialized. At present a 'swinit' of blocks is required to be
3859 	 * completed before the need for a different level is detected.
3860 	 */
3861 	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
3862 
3863 	amdgpu_device_check_iommu_direct_map(adev);
3864 
3865 	/* early init functions */
3866 	r = amdgpu_device_ip_early_init(adev);
3867 	if (r)
3868 		return r;
3869 
3870 	/*
3871 	 * No need to remove conflicting FBs for non-display class devices.
3872 	 * This prevents the sysfb from being freed accidently.
3873 	 */
3874 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
3875 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
3876 		/* Get rid of things like offb */
3877 		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
3878 		if (r)
3879 			return r;
3880 	}
3881 
3882 	/* Enable TMZ based on IP_VERSION */
3883 	amdgpu_gmc_tmz_set(adev);
3884 
3885 	if (amdgpu_sriov_vf(adev) &&
3886 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
3887 		/* VF MMIO access (except mailbox range) from CPU
3888 		 * will be blocked during sriov runtime
3889 		 */
3890 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
3891 
3892 	amdgpu_gmc_noretry_set(adev);
3893 	/* Need to get xgmi info early to decide the reset behavior*/
3894 	if (adev->gmc.xgmi.supported) {
3895 		if (adev->gfxhub.funcs &&
3896 		    adev->gfxhub.funcs->get_xgmi_info) {
3897 			r = adev->gfxhub.funcs->get_xgmi_info(adev);
3898 			if (r)
3899 				return r;
3900 		}
3901 	}
3902 
3903 	if (adev->gmc.xgmi.connected_to_cpu) {
3904 		if (adev->mmhub.funcs &&
3905 		    adev->mmhub.funcs->get_xgmi_info) {
3906 			r = adev->mmhub.funcs->get_xgmi_info(adev);
3907 			if (r)
3908 				return r;
3909 		}
3910 	}
3911 
3912 	/* enable PCIE atomic ops */
3913 	if (amdgpu_sriov_vf(adev)) {
3914 		if (adev->virt.fw_reserve.p_pf2vf)
3915 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3916 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3917 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3918 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3919 	 * internal path natively support atomics, set have_atomics_support to true.
3920 	 */
3921 	} else if ((adev->flags & AMD_IS_APU &&
3922 		   amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) ||
3923 		   (adev->gmc.xgmi.connected_to_cpu &&
3924 		   amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))) {
3925 		adev->have_atomics_support = true;
3926 	} else {
3927 		adev->have_atomics_support =
3928 			!pci_enable_atomic_ops_to_root(adev->pdev,
3929 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3930 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3931 	}
3932 
3933 	if (!adev->have_atomics_support)
3934 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3935 
3936 	/* doorbell bar mapping and doorbell index init*/
3937 	amdgpu_doorbell_init(adev);
3938 
3939 	if (amdgpu_emu_mode == 1) {
3940 		/* post the asic on emulation mode */
3941 		emu_soc_asic_init(adev);
3942 		goto fence_driver_init;
3943 	}
3944 
3945 	amdgpu_reset_init(adev);
3946 
3947 	/* detect if we are with an SRIOV vbios */
3948 	if (adev->bios)
3949 		amdgpu_device_detect_sriov_bios(adev);
3950 
3951 	/* check if we need to reset the asic
3952 	 *  E.g., driver was not cleanly unloaded previously, etc.
3953 	 */
3954 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3955 		if (adev->gmc.xgmi.num_physical_nodes) {
3956 			dev_info(adev->dev, "Pending hive reset.\n");
3957 			amdgpu_set_init_level(adev,
3958 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3959 		} else {
3960 				tmp = amdgpu_reset_method;
3961 				/* It should do a default reset when loading or reloading the driver,
3962 				 * regardless of the module parameter reset_method.
3963 				 */
3964 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3965 				r = amdgpu_asic_reset(adev);
3966 				amdgpu_reset_method = tmp;
3967 		}
3968 
3969 		if (r) {
3970 		  dev_err(adev->dev, "asic reset on init failed\n");
3971 		  goto failed;
3972 		}
3973 	}
3974 
3975 	/* Post card if necessary */
3976 	if (amdgpu_device_need_post(adev)) {
3977 		if (!adev->bios) {
3978 			dev_err(adev->dev, "no vBIOS found\n");
3979 			r = -EINVAL;
3980 			goto failed;
3981 		}
3982 		dev_info(adev->dev, "GPU posting now...\n");
3983 		r = amdgpu_device_asic_init(adev);
3984 		if (r) {
3985 			dev_err(adev->dev, "gpu post error!\n");
3986 			goto failed;
3987 		}
3988 	}
3989 
3990 	if (adev->bios) {
3991 		if (adev->is_atom_fw) {
3992 			/* Initialize clocks */
3993 			r = amdgpu_atomfirmware_get_clock_info(adev);
3994 			if (r) {
3995 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3996 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3997 				goto failed;
3998 			}
3999 		} else {
4000 			/* Initialize clocks */
4001 			r = amdgpu_atombios_get_clock_info(adev);
4002 			if (r) {
4003 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4004 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4005 				goto failed;
4006 			}
4007 			/* init i2c buses */
4008 			amdgpu_i2c_init(adev);
4009 		}
4010 	}
4011 
4012 fence_driver_init:
4013 	/* Fence driver */
4014 	r = amdgpu_fence_driver_sw_init(adev);
4015 	if (r) {
4016 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4017 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4018 		goto failed;
4019 	}
4020 
4021 	/* init the mode config */
4022 	drm_mode_config_init(adev_to_drm(adev));
4023 
4024 	r = amdgpu_device_ip_init(adev);
4025 	if (r) {
4026 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4027 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4028 		goto release_ras_con;
4029 	}
4030 
4031 	amdgpu_fence_driver_hw_init(adev);
4032 
4033 	dev_info(adev->dev,
4034 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4035 			adev->gfx.config.max_shader_engines,
4036 			adev->gfx.config.max_sh_per_se,
4037 			adev->gfx.config.max_cu_per_sh,
4038 			adev->gfx.cu_info.number);
4039 
4040 	adev->accel_working = true;
4041 
4042 	amdgpu_vm_check_compute_bug(adev);
4043 
4044 	/* Initialize the buffer migration limit. */
4045 	if (amdgpu_moverate >= 0)
4046 		max_MBps = amdgpu_moverate;
4047 	else
4048 		max_MBps = 8; /* Allow 8 MB/s. */
4049 	/* Get a log2 for easy divisions. */
4050 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4051 
4052 	/*
4053 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4054 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4055 	 * gpu instance is counted less.
4056 	 */
4057 	amdgpu_register_gpu_instance(adev);
4058 
4059 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4060 	 * explicit gating rather than handling it automatically.
4061 	 */
4062 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4063 		r = amdgpu_device_ip_late_init(adev);
4064 		if (r) {
4065 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4066 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4067 			goto release_ras_con;
4068 		}
4069 		/* must succeed. */
4070 		amdgpu_ras_resume(adev);
4071 		queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4072 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4073 	}
4074 
4075 	if (amdgpu_sriov_vf(adev)) {
4076 		amdgpu_virt_release_full_gpu(adev, true);
4077 		flush_delayed_work(&adev->delayed_init_work);
4078 	}
4079 
4080 	/* Don't init kfd if whole hive need to be reset during init */
4081 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4082 		kgd2kfd_init_zone_device(adev);
4083 		kfd_update_svm_support_properties(adev);
4084 	}
4085 
4086 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4087 		amdgpu_xgmi_reset_on_init(adev);
4088 
4089 	/*
4090 	 * Place those sysfs registering after `late_init`. As some of those
4091 	 * operations performed in `late_init` might affect the sysfs
4092 	 * interfaces creating.
4093 	 */
4094 	r = amdgpu_device_sys_interface_init(adev);
4095 
4096 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4097 		r = amdgpu_pmu_init(adev);
4098 	if (r)
4099 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4100 
4101 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4102 	if (amdgpu_device_cache_pci_state(adev->pdev))
4103 		pci_restore_state(pdev);
4104 
4105 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4106 	/* this will fail for cards that aren't VGA class devices, just
4107 	 * ignore it
4108 	 */
4109 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4110 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4111 
4112 	px = amdgpu_device_supports_px(adev);
4113 
4114 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4115 				apple_gmux_detect(NULL, NULL)))
4116 		vga_switcheroo_register_client(adev->pdev,
4117 					       &amdgpu_switcheroo_ops, px);
4118 
4119 	if (px)
4120 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4121 
4122 	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4123 	r = register_pm_notifier(&adev->pm_nb);
4124 	if (r)
4125 		goto failed;
4126 
4127 	return 0;
4128 
4129 release_ras_con:
4130 	if (amdgpu_sriov_vf(adev))
4131 		amdgpu_virt_release_full_gpu(adev, true);
4132 
4133 	/* failed in exclusive mode due to timeout */
4134 	if (amdgpu_sriov_vf(adev) &&
4135 		!amdgpu_sriov_runtime(adev) &&
4136 		amdgpu_virt_mmio_blocked(adev) &&
4137 		!amdgpu_virt_wait_reset(adev)) {
4138 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4139 		/* Don't send request since VF is inactive. */
4140 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4141 		adev->virt.ops = NULL;
4142 		r = -EAGAIN;
4143 	}
4144 	amdgpu_release_ras_context(adev);
4145 
4146 failed:
4147 	amdgpu_vf_error_trans_all(adev);
4148 
4149 	return r;
4150 }
4151 
4152 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4153 {
4154 
4155 	/* Clear all CPU mappings pointing to this device */
4156 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4157 
4158 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4159 	amdgpu_doorbell_fini(adev);
4160 
4161 	iounmap(adev->rmmio);
4162 	adev->rmmio = NULL;
4163 	if (adev->mman.aper_base_kaddr)
4164 		iounmap(adev->mman.aper_base_kaddr);
4165 	adev->mman.aper_base_kaddr = NULL;
4166 
4167 	/* Memory manager related */
4168 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4169 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4170 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4171 	}
4172 }
4173 
4174 /**
4175  * amdgpu_device_fini_hw - tear down the driver
4176  *
4177  * @adev: amdgpu_device pointer
4178  *
4179  * Tear down the driver info (all asics).
4180  * Called at driver shutdown.
4181  */
4182 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4183 {
4184 	dev_info(adev->dev, "finishing device.\n");
4185 	flush_delayed_work(&adev->delayed_init_work);
4186 
4187 	if (adev->mman.initialized)
4188 		drain_workqueue(adev->mman.bdev.wq);
4189 	adev->shutdown = true;
4190 
4191 	unregister_pm_notifier(&adev->pm_nb);
4192 
4193 	/* make sure IB test finished before entering exclusive mode
4194 	 * to avoid preemption on IB test
4195 	 */
4196 	if (amdgpu_sriov_vf(adev)) {
4197 		amdgpu_virt_request_full_gpu(adev, false);
4198 		amdgpu_virt_fini_data_exchange(adev);
4199 	}
4200 
4201 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
4202 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
4203 
4204 	/* disable all interrupts */
4205 	amdgpu_irq_disable_all(adev);
4206 	if (adev->mode_info.mode_config_initialized) {
4207 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4208 			drm_helper_force_disable_all(adev_to_drm(adev));
4209 		else
4210 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4211 	}
4212 	amdgpu_fence_driver_hw_fini(adev);
4213 
4214 	amdgpu_device_sys_interface_fini(adev);
4215 
4216 	/* disable ras feature must before hw fini */
4217 	amdgpu_ras_pre_fini(adev);
4218 
4219 	amdgpu_ttm_disable_buffer_funcs(adev);
4220 
4221 	/*
4222 	 * device went through surprise hotplug; we need to destroy topology
4223 	 * before ip_fini_early to prevent kfd locking refcount issues by calling
4224 	 * amdgpu_amdkfd_suspend()
4225 	 */
4226 	if (pci_dev_is_disconnected(adev->pdev))
4227 		amdgpu_amdkfd_device_fini_sw(adev);
4228 
4229 	amdgpu_coredump_fini(adev);
4230 	amdgpu_device_ip_fini_early(adev);
4231 
4232 	amdgpu_irq_fini_hw(adev);
4233 
4234 	if (adev->mman.initialized)
4235 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4236 
4237 	amdgpu_gart_dummy_page_fini(adev);
4238 
4239 	if (pci_dev_is_disconnected(adev->pdev))
4240 		amdgpu_device_unmap_mmio(adev);
4241 
4242 }
4243 
4244 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4245 {
4246 	int i, idx;
4247 	bool px;
4248 
4249 	amdgpu_device_ip_fini(adev);
4250 	amdgpu_fence_driver_sw_fini(adev);
4251 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4252 	adev->accel_working = false;
4253 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4254 	for (i = 0; i < MAX_XCP; ++i) {
4255 		dma_fence_put(adev->isolation[i].spearhead);
4256 		amdgpu_sync_free(&adev->isolation[i].active);
4257 		amdgpu_sync_free(&adev->isolation[i].prev);
4258 	}
4259 
4260 	amdgpu_reset_fini(adev);
4261 
4262 	/* free i2c buses */
4263 	amdgpu_i2c_fini(adev);
4264 
4265 	if (adev->bios) {
4266 		if (amdgpu_emu_mode != 1)
4267 			amdgpu_atombios_fini(adev);
4268 		amdgpu_bios_release(adev);
4269 	}
4270 
4271 	kfree(adev->fru_info);
4272 	adev->fru_info = NULL;
4273 
4274 	kfree(adev->xcp_mgr);
4275 	adev->xcp_mgr = NULL;
4276 
4277 	px = amdgpu_device_supports_px(adev);
4278 
4279 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4280 				apple_gmux_detect(NULL, NULL)))
4281 		vga_switcheroo_unregister_client(adev->pdev);
4282 
4283 	if (px)
4284 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4285 
4286 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4287 		vga_client_unregister(adev->pdev);
4288 
4289 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4290 
4291 		iounmap(adev->rmmio);
4292 		adev->rmmio = NULL;
4293 		drm_dev_exit(idx);
4294 	}
4295 
4296 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4297 		amdgpu_pmu_fini(adev);
4298 	if (adev->discovery.bin)
4299 		amdgpu_discovery_fini(adev);
4300 
4301 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4302 	adev->reset_domain = NULL;
4303 
4304 	kfree(adev->pci_state);
4305 	kfree(adev->pcie_reset_ctx.swds_pcistate);
4306 	kfree(adev->pcie_reset_ctx.swus_pcistate);
4307 }
4308 
4309 /**
4310  * amdgpu_device_evict_resources - evict device resources
4311  * @adev: amdgpu device object
4312  *
4313  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4314  * of the vram memory type. Mainly used for evicting device resources
4315  * at suspend time.
4316  *
4317  */
4318 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4319 {
4320 	int ret;
4321 
4322 	/* No need to evict vram on APUs unless going to S4 */
4323 	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4324 		return 0;
4325 
4326 	/* No need to evict when going to S5 through S4 callbacks */
4327 	if (system_state == SYSTEM_POWER_OFF)
4328 		return 0;
4329 
4330 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4331 	if (ret) {
4332 		dev_warn(adev->dev, "evicting device resources failed\n");
4333 		return ret;
4334 	}
4335 
4336 	if (adev->in_s4) {
4337 		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
4338 		if (ret)
4339 			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
4340 	}
4341 	return ret;
4342 }
4343 
4344 /*
4345  * Suspend & resume.
4346  */
4347 /**
4348  * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4349  * @nb: notifier block
4350  * @mode: suspend mode
4351  * @data: data
4352  *
4353  * This function is called when the system is about to suspend or hibernate.
4354  * It is used to set the appropriate flags so that eviction can be optimized
4355  * in the pm prepare callback.
4356  */
4357 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
4358 				     void *data)
4359 {
4360 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
4361 
4362 	switch (mode) {
4363 	case PM_HIBERNATION_PREPARE:
4364 		adev->in_s4 = true;
4365 		break;
4366 	case PM_POST_HIBERNATION:
4367 		adev->in_s4 = false;
4368 		break;
4369 	}
4370 
4371 	return NOTIFY_DONE;
4372 }
4373 
4374 /**
4375  * amdgpu_device_prepare - prepare for device suspend
4376  *
4377  * @dev: drm dev pointer
4378  *
4379  * Prepare to put the hw in the suspend state (all asics).
4380  * Returns 0 for success or an error on failure.
4381  * Called at driver suspend.
4382  */
4383 int amdgpu_device_prepare(struct drm_device *dev)
4384 {
4385 	struct amdgpu_device *adev = drm_to_adev(dev);
4386 	int i, r;
4387 
4388 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4389 		return 0;
4390 
4391 	/* Evict the majority of BOs before starting suspend sequence */
4392 	r = amdgpu_device_evict_resources(adev);
4393 	if (r)
4394 		return r;
4395 
4396 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4397 
4398 	for (i = 0; i < adev->num_ip_blocks; i++) {
4399 		if (!adev->ip_blocks[i].status.valid)
4400 			continue;
4401 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4402 			continue;
4403 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4404 		if (r)
4405 			return r;
4406 	}
4407 
4408 	return 0;
4409 }
4410 
4411 /**
4412  * amdgpu_device_complete - complete power state transition
4413  *
4414  * @dev: drm dev pointer
4415  *
4416  * Undo the changes from amdgpu_device_prepare. This will be
4417  * called on all resume transitions, including those that failed.
4418  */
4419 void amdgpu_device_complete(struct drm_device *dev)
4420 {
4421 	struct amdgpu_device *adev = drm_to_adev(dev);
4422 	int i;
4423 
4424 	for (i = 0; i < adev->num_ip_blocks; i++) {
4425 		if (!adev->ip_blocks[i].status.valid)
4426 			continue;
4427 		if (!adev->ip_blocks[i].version->funcs->complete)
4428 			continue;
4429 		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
4430 	}
4431 }
4432 
4433 /**
4434  * amdgpu_device_suspend - initiate device suspend
4435  *
4436  * @dev: drm dev pointer
4437  * @notify_clients: notify in-kernel DRM clients
4438  *
4439  * Puts the hw in the suspend state (all asics).
4440  * Returns 0 for success or an error on failure.
4441  * Called at driver suspend.
4442  */
4443 int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4444 {
4445 	struct amdgpu_device *adev = drm_to_adev(dev);
4446 	int r, rec;
4447 
4448 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4449 		return 0;
4450 
4451 	adev->in_suspend = true;
4452 
4453 	if (amdgpu_sriov_vf(adev)) {
4454 		if (!adev->in_runpm)
4455 			amdgpu_amdkfd_suspend_process(adev);
4456 		amdgpu_virt_fini_data_exchange(adev);
4457 		r = amdgpu_virt_request_full_gpu(adev, false);
4458 		if (r)
4459 			return r;
4460 	}
4461 
4462 	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
4463 	if (r)
4464 		goto unwind_sriov;
4465 
4466 	if (notify_clients)
4467 		drm_client_dev_suspend(adev_to_drm(adev));
4468 
4469 	cancel_delayed_work_sync(&adev->delayed_init_work);
4470 
4471 	amdgpu_ras_suspend(adev);
4472 
4473 	r = amdgpu_device_ip_suspend_phase1(adev);
4474 	if (r)
4475 		goto unwind_smartshift;
4476 
4477 	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4478 	r = amdgpu_userq_suspend(adev);
4479 	if (r)
4480 		goto unwind_ip_phase1;
4481 
4482 	r = amdgpu_device_evict_resources(adev);
4483 	if (r)
4484 		goto unwind_userq;
4485 
4486 	amdgpu_ttm_disable_buffer_funcs(adev);
4487 
4488 	amdgpu_fence_driver_hw_fini(adev);
4489 
4490 	r = amdgpu_device_ip_suspend_phase2(adev);
4491 	if (r)
4492 		goto unwind_evict;
4493 
4494 	if (amdgpu_sriov_vf(adev))
4495 		amdgpu_virt_release_full_gpu(adev, false);
4496 
4497 	return 0;
4498 
4499 unwind_evict:
4500 	amdgpu_ttm_enable_buffer_funcs(adev);
4501 	amdgpu_fence_driver_hw_init(adev);
4502 
4503 unwind_userq:
4504 	rec = amdgpu_userq_resume(adev);
4505 	if (rec) {
4506 		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
4507 		return r;
4508 	}
4509 	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4510 	if (rec) {
4511 		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
4512 		return r;
4513 	}
4514 
4515 unwind_ip_phase1:
4516 	/* suspend phase 1 = resume phase 3 */
4517 	rec = amdgpu_device_ip_resume_phase3(adev);
4518 	if (rec) {
4519 		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
4520 		return r;
4521 	}
4522 
4523 unwind_smartshift:
4524 	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
4525 	if (rec) {
4526 		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
4527 		return r;
4528 	}
4529 
4530 	if (notify_clients)
4531 		drm_client_dev_resume(adev_to_drm(adev));
4532 
4533 	amdgpu_ras_resume(adev);
4534 
4535 unwind_sriov:
4536 	if (amdgpu_sriov_vf(adev)) {
4537 		rec = amdgpu_virt_request_full_gpu(adev, true);
4538 		if (rec) {
4539 			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
4540 			return r;
4541 		}
4542 	}
4543 
4544 	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
4545 
4546 	return r;
4547 }
4548 
4549 static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
4550 {
4551 	int r;
4552 	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
4553 
4554 	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
4555 	 * may not work. The access could be blocked by nBIF protection as VF isn't in
4556 	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
4557 	 * so that QEMU reprograms MSIX table.
4558 	 */
4559 	amdgpu_restore_msix(adev);
4560 
4561 	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4562 	if (r)
4563 		return r;
4564 
4565 	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
4566 		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
4567 
4568 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
4569 	adev->vm_manager.vram_base_offset +=
4570 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
4571 
4572 	return 0;
4573 }
4574 
4575 /**
4576  * amdgpu_device_resume - initiate device resume
4577  *
4578  * @dev: drm dev pointer
4579  * @notify_clients: notify in-kernel DRM clients
4580  *
4581  * Bring the hw back to operating state (all asics).
4582  * Returns 0 for success or an error on failure.
4583  * Called at driver resume.
4584  */
4585 int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
4586 {
4587 	struct amdgpu_device *adev = drm_to_adev(dev);
4588 	int r = 0;
4589 
4590 	if (amdgpu_sriov_vf(adev)) {
4591 		r = amdgpu_virt_request_full_gpu(adev, true);
4592 		if (r)
4593 			return r;
4594 	}
4595 
4596 	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
4597 		r = amdgpu_virt_resume(adev);
4598 		if (r)
4599 			goto exit;
4600 	}
4601 
4602 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4603 		return 0;
4604 
4605 	if (adev->in_s0ix)
4606 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4607 
4608 	/* post card */
4609 	if (amdgpu_device_need_post(adev)) {
4610 		r = amdgpu_device_asic_init(adev);
4611 		if (r)
4612 			dev_err(adev->dev, "amdgpu asic init failed\n");
4613 	}
4614 
4615 	r = amdgpu_device_ip_resume(adev);
4616 
4617 	if (r) {
4618 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4619 		goto exit;
4620 	}
4621 
4622 	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4623 	if (r)
4624 		goto exit;
4625 
4626 	r = amdgpu_userq_resume(adev);
4627 	if (r)
4628 		goto exit;
4629 
4630 	r = amdgpu_device_ip_late_init(adev);
4631 	if (r)
4632 		goto exit;
4633 
4634 	queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4635 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4636 exit:
4637 	if (amdgpu_sriov_vf(adev)) {
4638 		amdgpu_virt_init_data_exchange(adev);
4639 		amdgpu_virt_release_full_gpu(adev, true);
4640 
4641 		if (!r && !adev->in_runpm)
4642 			r = amdgpu_amdkfd_resume_process(adev);
4643 	}
4644 
4645 	if (r)
4646 		return r;
4647 
4648 	/* Make sure IB tests flushed */
4649 	flush_delayed_work(&adev->delayed_init_work);
4650 
4651 	if (notify_clients)
4652 		drm_client_dev_resume(adev_to_drm(adev));
4653 
4654 	amdgpu_ras_resume(adev);
4655 
4656 	if (adev->mode_info.num_crtc) {
4657 		/*
4658 		 * Most of the connector probing functions try to acquire runtime pm
4659 		 * refs to ensure that the GPU is powered on when connector polling is
4660 		 * performed. Since we're calling this from a runtime PM callback,
4661 		 * trying to acquire rpm refs will cause us to deadlock.
4662 		 *
4663 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4664 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4665 		 */
4666 #ifdef CONFIG_PM
4667 		dev->dev->power.disable_depth++;
4668 #endif
4669 		if (!adev->dc_enabled)
4670 			drm_helper_hpd_irq_event(dev);
4671 		else
4672 			drm_kms_helper_hotplug_event(dev);
4673 #ifdef CONFIG_PM
4674 		dev->dev->power.disable_depth--;
4675 #endif
4676 	}
4677 
4678 	amdgpu_vram_mgr_clear_reset_blocks(adev);
4679 	adev->in_suspend = false;
4680 
4681 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
4682 		dev_warn(adev->dev, "smart shift update failed\n");
4683 
4684 	return 0;
4685 }
4686 
4687 /**
4688  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4689  *
4690  * @adev: amdgpu_device pointer
4691  *
4692  * The list of all the hardware IPs that make up the asic is walked and
4693  * the check_soft_reset callbacks are run.  check_soft_reset determines
4694  * if the asic is still hung or not.
4695  * Returns true if any of the IPs are still in a hung state, false if not.
4696  */
4697 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4698 {
4699 	int i;
4700 	bool asic_hang = false;
4701 
4702 	if (amdgpu_sriov_vf(adev))
4703 		return true;
4704 
4705 	if (amdgpu_asic_need_full_reset(adev))
4706 		return true;
4707 
4708 	for (i = 0; i < adev->num_ip_blocks; i++) {
4709 		if (!adev->ip_blocks[i].status.valid)
4710 			continue;
4711 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4712 			adev->ip_blocks[i].status.hang =
4713 				adev->ip_blocks[i].version->funcs->check_soft_reset(
4714 					&adev->ip_blocks[i]);
4715 		if (adev->ip_blocks[i].status.hang) {
4716 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4717 			asic_hang = true;
4718 		}
4719 	}
4720 	return asic_hang;
4721 }
4722 
4723 /**
4724  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4725  *
4726  * @adev: amdgpu_device pointer
4727  *
4728  * The list of all the hardware IPs that make up the asic is walked and the
4729  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4730  * handles any IP specific hardware or software state changes that are
4731  * necessary for a soft reset to succeed.
4732  * Returns 0 on success, negative error code on failure.
4733  */
4734 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4735 {
4736 	int i, r = 0;
4737 
4738 	for (i = 0; i < adev->num_ip_blocks; i++) {
4739 		if (!adev->ip_blocks[i].status.valid)
4740 			continue;
4741 		if (adev->ip_blocks[i].status.hang &&
4742 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4743 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
4744 			if (r)
4745 				return r;
4746 		}
4747 	}
4748 
4749 	return 0;
4750 }
4751 
4752 /**
4753  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4754  *
4755  * @adev: amdgpu_device pointer
4756  *
4757  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4758  * reset is necessary to recover.
4759  * Returns true if a full asic reset is required, false if not.
4760  */
4761 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4762 {
4763 	int i;
4764 
4765 	if (amdgpu_asic_need_full_reset(adev))
4766 		return true;
4767 
4768 	for (i = 0; i < adev->num_ip_blocks; i++) {
4769 		if (!adev->ip_blocks[i].status.valid)
4770 			continue;
4771 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4772 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4773 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4774 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4775 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4776 			if (adev->ip_blocks[i].status.hang) {
4777 				dev_info(adev->dev, "Some block need full reset!\n");
4778 				return true;
4779 			}
4780 		}
4781 	}
4782 	return false;
4783 }
4784 
4785 /**
4786  * amdgpu_device_ip_soft_reset - do a soft reset
4787  *
4788  * @adev: amdgpu_device pointer
4789  *
4790  * The list of all the hardware IPs that make up the asic is walked and the
4791  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4792  * IP specific hardware or software state changes that are necessary to soft
4793  * reset the IP.
4794  * Returns 0 on success, negative error code on failure.
4795  */
4796 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4797 {
4798 	int i, r = 0;
4799 
4800 	for (i = 0; i < adev->num_ip_blocks; i++) {
4801 		if (!adev->ip_blocks[i].status.valid)
4802 			continue;
4803 		if (adev->ip_blocks[i].status.hang &&
4804 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4805 			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
4806 			if (r)
4807 				return r;
4808 		}
4809 	}
4810 
4811 	return 0;
4812 }
4813 
4814 /**
4815  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4816  *
4817  * @adev: amdgpu_device pointer
4818  *
4819  * The list of all the hardware IPs that make up the asic is walked and the
4820  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4821  * handles any IP specific hardware or software state changes that are
4822  * necessary after the IP has been soft reset.
4823  * Returns 0 on success, negative error code on failure.
4824  */
4825 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4826 {
4827 	int i, r = 0;
4828 
4829 	for (i = 0; i < adev->num_ip_blocks; i++) {
4830 		if (!adev->ip_blocks[i].status.valid)
4831 			continue;
4832 		if (adev->ip_blocks[i].status.hang &&
4833 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4834 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
4835 		if (r)
4836 			return r;
4837 	}
4838 
4839 	return 0;
4840 }
4841 
4842 /**
4843  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4844  *
4845  * @adev: amdgpu_device pointer
4846  * @reset_context: amdgpu reset context pointer
4847  *
4848  * do VF FLR and reinitialize Asic
4849  * return 0 means succeeded otherwise failed
4850  */
4851 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4852 				     struct amdgpu_reset_context *reset_context)
4853 {
4854 	int r;
4855 	struct amdgpu_hive_info *hive = NULL;
4856 
4857 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
4858 		if (!amdgpu_ras_get_fed_status(adev))
4859 			amdgpu_virt_ready_to_reset(adev);
4860 		amdgpu_virt_wait_reset(adev);
4861 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
4862 		r = amdgpu_virt_request_full_gpu(adev, true);
4863 	} else {
4864 		r = amdgpu_virt_reset_gpu(adev);
4865 	}
4866 	if (r)
4867 		return r;
4868 
4869 	amdgpu_ras_clear_err_state(adev);
4870 	amdgpu_irq_gpu_reset_resume_helper(adev);
4871 
4872 	/* some sw clean up VF needs to do before recover */
4873 	amdgpu_virt_post_reset(adev);
4874 
4875 	/* Resume IP prior to SMC */
4876 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4877 	if (r)
4878 		return r;
4879 
4880 	amdgpu_virt_init_data_exchange(adev);
4881 
4882 	r = amdgpu_device_fw_loading(adev);
4883 	if (r)
4884 		return r;
4885 
4886 	/* now we are okay to resume SMC/CP/SDMA */
4887 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4888 	if (r)
4889 		return r;
4890 
4891 	hive = amdgpu_get_xgmi_hive(adev);
4892 	/* Update PSP FW topology after reset */
4893 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4894 		r = amdgpu_xgmi_update_topology(hive, adev);
4895 	if (hive)
4896 		amdgpu_put_xgmi_hive(hive);
4897 	if (r)
4898 		return r;
4899 
4900 	r = amdgpu_ib_ring_tests(adev);
4901 	if (r)
4902 		return r;
4903 
4904 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
4905 		amdgpu_inc_vram_lost(adev);
4906 
4907 	/* need to be called during full access so we can't do it later like
4908 	 * bare-metal does.
4909 	 */
4910 	amdgpu_amdkfd_post_reset(adev);
4911 	amdgpu_virt_release_full_gpu(adev, true);
4912 
4913 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4914 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
4915 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
4916 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
4917 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4918 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
4919 		amdgpu_ras_resume(adev);
4920 
4921 	amdgpu_virt_ras_telemetry_post_reset(adev);
4922 
4923 	return 0;
4924 }
4925 
4926 /**
4927  * amdgpu_device_has_job_running - check if there is any unfinished job
4928  *
4929  * @adev: amdgpu_device pointer
4930  *
4931  * check if there is any job running on the device when guest driver receives
4932  * FLR notification from host driver. If there are still jobs running, then
4933  * the guest driver will not respond the FLR reset. Instead, let the job hit
4934  * the timeout and guest driver then issue the reset request.
4935  */
4936 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4937 {
4938 	int i;
4939 
4940 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4941 		struct amdgpu_ring *ring = adev->rings[i];
4942 
4943 		if (!amdgpu_ring_sched_ready(ring))
4944 			continue;
4945 
4946 		if (amdgpu_fence_count_emitted(ring))
4947 			return true;
4948 	}
4949 	return false;
4950 }
4951 
4952 /**
4953  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4954  *
4955  * @adev: amdgpu_device pointer
4956  *
4957  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4958  * a hung GPU.
4959  */
4960 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4961 {
4962 
4963 	if (amdgpu_gpu_recovery == 0)
4964 		goto disabled;
4965 
4966 	/* Skip soft reset check in fatal error mode */
4967 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4968 		return true;
4969 
4970 	if (amdgpu_sriov_vf(adev))
4971 		return true;
4972 
4973 	if (amdgpu_gpu_recovery == -1) {
4974 		switch (adev->asic_type) {
4975 #ifdef CONFIG_DRM_AMDGPU_SI
4976 		case CHIP_VERDE:
4977 		case CHIP_TAHITI:
4978 		case CHIP_PITCAIRN:
4979 		case CHIP_OLAND:
4980 		case CHIP_HAINAN:
4981 #endif
4982 #ifdef CONFIG_DRM_AMDGPU_CIK
4983 		case CHIP_KAVERI:
4984 		case CHIP_KABINI:
4985 		case CHIP_MULLINS:
4986 #endif
4987 		case CHIP_CARRIZO:
4988 		case CHIP_STONEY:
4989 		case CHIP_CYAN_SKILLFISH:
4990 			goto disabled;
4991 		default:
4992 			break;
4993 		}
4994 	}
4995 
4996 	return true;
4997 
4998 disabled:
4999 		dev_info(adev->dev, "GPU recovery disabled.\n");
5000 		return false;
5001 }
5002 
5003 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5004 {
5005 	u32 i;
5006 	int ret = 0;
5007 
5008 	if (adev->bios)
5009 		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5010 
5011 	dev_info(adev->dev, "GPU mode1 reset\n");
5012 
5013 	/* Cache the state before bus master disable. The saved config space
5014 	 * values are used in other cases like restore after mode-2 reset.
5015 	 */
5016 	amdgpu_device_cache_pci_state(adev->pdev);
5017 
5018 	/* disable BM */
5019 	pci_clear_master(adev->pdev);
5020 
5021 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5022 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5023 		ret = amdgpu_dpm_mode1_reset(adev);
5024 	} else {
5025 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5026 		ret = psp_gpu_reset(adev);
5027 	}
5028 
5029 	if (ret)
5030 		goto mode1_reset_failed;
5031 
5032 	/* enable mmio access after mode 1 reset completed */
5033 	adev->no_hw_access = false;
5034 
5035 	/* ensure no_hw_access is updated before we access hw */
5036 	smp_mb();
5037 
5038 	amdgpu_device_load_pci_state(adev->pdev);
5039 	ret = amdgpu_psp_wait_for_bootloader(adev);
5040 	if (ret)
5041 		goto mode1_reset_failed;
5042 
5043 	/* wait for asic to come out of reset */
5044 	for (i = 0; i < adev->usec_timeout; i++) {
5045 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5046 
5047 		if (memsize != 0xffffffff)
5048 			break;
5049 		udelay(1);
5050 	}
5051 
5052 	if (i >= adev->usec_timeout) {
5053 		ret = -ETIMEDOUT;
5054 		goto mode1_reset_failed;
5055 	}
5056 
5057 	if (adev->bios)
5058 		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5059 
5060 	return 0;
5061 
5062 mode1_reset_failed:
5063 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5064 	return ret;
5065 }
5066 
5067 int amdgpu_device_link_reset(struct amdgpu_device *adev)
5068 {
5069 	int ret = 0;
5070 
5071 	dev_info(adev->dev, "GPU link reset\n");
5072 
5073 	if (!amdgpu_reset_in_dpc(adev))
5074 		ret = amdgpu_dpm_link_reset(adev);
5075 
5076 	if (ret)
5077 		goto link_reset_failed;
5078 
5079 	ret = amdgpu_psp_wait_for_bootloader(adev);
5080 	if (ret)
5081 		goto link_reset_failed;
5082 
5083 	return 0;
5084 
5085 link_reset_failed:
5086 	dev_err(adev->dev, "GPU link reset failed\n");
5087 	return ret;
5088 }
5089 
5090 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5091 				 struct amdgpu_reset_context *reset_context)
5092 {
5093 	int i, r = 0;
5094 	struct amdgpu_job *job = NULL;
5095 	struct dma_fence *fence = NULL;
5096 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5097 	bool need_full_reset =
5098 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5099 
5100 	if (reset_context->reset_req_dev == adev)
5101 		job = reset_context->job;
5102 
5103 	if (amdgpu_sriov_vf(adev))
5104 		amdgpu_virt_pre_reset(adev);
5105 
5106 	amdgpu_fence_driver_isr_toggle(adev, true);
5107 
5108 	if (job)
5109 		fence = &job->hw_fence->base;
5110 
5111 	/* block all schedulers and reset given job's ring */
5112 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5113 		struct amdgpu_ring *ring = adev->rings[i];
5114 
5115 		if (!amdgpu_ring_sched_ready(ring))
5116 			continue;
5117 
5118 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5119 		amdgpu_fence_driver_force_completion(ring, fence);
5120 	}
5121 
5122 	amdgpu_fence_driver_isr_toggle(adev, false);
5123 
5124 	if (job && job->vm)
5125 		drm_sched_increase_karma(&job->base);
5126 
5127 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5128 	/* If reset handler not implemented, continue; otherwise return */
5129 	if (r == -EOPNOTSUPP)
5130 		r = 0;
5131 	else
5132 		return r;
5133 
5134 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5135 	if (!amdgpu_sriov_vf(adev)) {
5136 
5137 		if (!need_full_reset)
5138 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5139 
5140 		if (!need_full_reset && amdgpu_gpu_recovery &&
5141 		    amdgpu_device_ip_check_soft_reset(adev)) {
5142 			amdgpu_device_ip_pre_soft_reset(adev);
5143 			r = amdgpu_device_ip_soft_reset(adev);
5144 			amdgpu_device_ip_post_soft_reset(adev);
5145 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5146 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5147 				need_full_reset = true;
5148 			}
5149 		}
5150 
5151 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5152 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5153 			/* Trigger ip dump before we reset the asic */
5154 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5155 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5156 					tmp_adev->ip_blocks[i].version->funcs
5157 						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5158 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5159 		}
5160 
5161 		if (need_full_reset)
5162 			r = amdgpu_device_ip_suspend(adev);
5163 		if (need_full_reset)
5164 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5165 		else
5166 			clear_bit(AMDGPU_NEED_FULL_RESET,
5167 				  &reset_context->flags);
5168 	}
5169 
5170 	return r;
5171 }
5172 
5173 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5174 {
5175 	struct list_head *device_list_handle;
5176 	bool full_reset, vram_lost = false;
5177 	struct amdgpu_device *tmp_adev;
5178 	int r, init_level;
5179 
5180 	device_list_handle = reset_context->reset_device_list;
5181 
5182 	if (!device_list_handle)
5183 		return -EINVAL;
5184 
5185 	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5186 
5187 	/**
5188 	 * If it's reset on init, it's default init level, otherwise keep level
5189 	 * as recovery level.
5190 	 */
5191 	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5192 			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5193 	else
5194 			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5195 
5196 	r = 0;
5197 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5198 		amdgpu_set_init_level(tmp_adev, init_level);
5199 		if (full_reset) {
5200 			/* post card */
5201 			amdgpu_reset_set_dpc_status(tmp_adev, false);
5202 			amdgpu_ras_clear_err_state(tmp_adev);
5203 			r = amdgpu_device_asic_init(tmp_adev);
5204 			if (r) {
5205 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5206 			} else {
5207 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5208 
5209 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5210 				if (r)
5211 					goto out;
5212 
5213 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5214 
5215 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5216 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5217 
5218 				if (vram_lost) {
5219 					dev_info(
5220 						tmp_adev->dev,
5221 						"VRAM is lost due to GPU reset!\n");
5222 					amdgpu_inc_vram_lost(tmp_adev);
5223 				}
5224 
5225 				r = amdgpu_device_fw_loading(tmp_adev);
5226 				if (r)
5227 					return r;
5228 
5229 				r = amdgpu_xcp_restore_partition_mode(
5230 					tmp_adev->xcp_mgr);
5231 				if (r)
5232 					goto out;
5233 
5234 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5235 				if (r)
5236 					goto out;
5237 
5238 				amdgpu_ttm_enable_buffer_funcs(tmp_adev);
5239 
5240 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5241 				if (r)
5242 					goto out;
5243 
5244 				if (vram_lost)
5245 					amdgpu_device_fill_reset_magic(tmp_adev);
5246 
5247 				/*
5248 				 * Add this ASIC as tracked as reset was already
5249 				 * complete successfully.
5250 				 */
5251 				amdgpu_register_gpu_instance(tmp_adev);
5252 
5253 				if (!reset_context->hive &&
5254 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5255 					amdgpu_xgmi_add_device(tmp_adev);
5256 
5257 				r = amdgpu_device_ip_late_init(tmp_adev);
5258 				if (r)
5259 					goto out;
5260 
5261 				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
5262 				if (r)
5263 					goto out;
5264 
5265 				drm_client_dev_resume(adev_to_drm(tmp_adev));
5266 
5267 				/*
5268 				 * The GPU enters bad state once faulty pages
5269 				 * by ECC has reached the threshold, and ras
5270 				 * recovery is scheduled next. So add one check
5271 				 * here to break recovery if it indeed exceeds
5272 				 * bad page threshold, and remind user to
5273 				 * retire this GPU or setting one bigger
5274 				 * bad_page_threshold value to fix this once
5275 				 * probing driver again.
5276 				 */
5277 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5278 					/* must succeed. */
5279 					amdgpu_ras_resume(tmp_adev);
5280 				} else {
5281 					r = -EINVAL;
5282 					goto out;
5283 				}
5284 
5285 				/* Update PSP FW topology after reset */
5286 				if (reset_context->hive &&
5287 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5288 					r = amdgpu_xgmi_update_topology(
5289 						reset_context->hive, tmp_adev);
5290 			}
5291 		}
5292 
5293 out:
5294 		if (!r) {
5295 			/* IP init is complete now, set level as default */
5296 			amdgpu_set_init_level(tmp_adev,
5297 					      AMDGPU_INIT_LEVEL_DEFAULT);
5298 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5299 			r = amdgpu_ib_ring_tests(tmp_adev);
5300 			if (r) {
5301 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5302 				r = -EAGAIN;
5303 				goto end;
5304 			}
5305 		}
5306 
5307 		if (r)
5308 			tmp_adev->asic_reset_res = r;
5309 	}
5310 
5311 end:
5312 	return r;
5313 }
5314 
5315 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5316 			 struct amdgpu_reset_context *reset_context)
5317 {
5318 	struct amdgpu_device *tmp_adev = NULL;
5319 	bool need_full_reset, skip_hw_reset;
5320 	int r = 0;
5321 
5322 	/* Try reset handler method first */
5323 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5324 				    reset_list);
5325 
5326 	reset_context->reset_device_list = device_list_handle;
5327 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5328 	/* If reset handler not implemented, continue; otherwise return */
5329 	if (r == -EOPNOTSUPP)
5330 		r = 0;
5331 	else
5332 		return r;
5333 
5334 	/* Reset handler not implemented, use the default method */
5335 	need_full_reset =
5336 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5337 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5338 
5339 	/*
5340 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5341 	 * to allow proper links negotiation in FW (within 1 sec)
5342 	 */
5343 	if (!skip_hw_reset && need_full_reset) {
5344 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5345 			/* For XGMI run all resets in parallel to speed up the process */
5346 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5347 				if (!queue_work(system_dfl_wq,
5348 						&tmp_adev->xgmi_reset_work))
5349 					r = -EALREADY;
5350 			} else
5351 				r = amdgpu_asic_reset(tmp_adev);
5352 
5353 			if (r) {
5354 				dev_err(tmp_adev->dev,
5355 					"ASIC reset failed with error, %d for drm dev, %s",
5356 					r, adev_to_drm(tmp_adev)->unique);
5357 				goto out;
5358 			}
5359 		}
5360 
5361 		/* For XGMI wait for all resets to complete before proceed */
5362 		if (!r) {
5363 			list_for_each_entry(tmp_adev, device_list_handle,
5364 					    reset_list) {
5365 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5366 					flush_work(&tmp_adev->xgmi_reset_work);
5367 					r = tmp_adev->asic_reset_res;
5368 					if (r)
5369 						break;
5370 				}
5371 			}
5372 		}
5373 	}
5374 
5375 	if (!r && amdgpu_ras_intr_triggered()) {
5376 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5377 			amdgpu_ras_reset_error_count(tmp_adev,
5378 						     AMDGPU_RAS_BLOCK__MMHUB);
5379 		}
5380 
5381 		amdgpu_ras_intr_cleared();
5382 	}
5383 
5384 	r = amdgpu_device_reinit_after_reset(reset_context);
5385 	if (r == -EAGAIN)
5386 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5387 	else
5388 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5389 
5390 out:
5391 	return r;
5392 }
5393 
5394 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5395 {
5396 
5397 	switch (amdgpu_asic_reset_method(adev)) {
5398 	case AMD_RESET_METHOD_MODE1:
5399 	case AMD_RESET_METHOD_LINK:
5400 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5401 		break;
5402 	case AMD_RESET_METHOD_MODE2:
5403 		adev->mp1_state = PP_MP1_STATE_RESET;
5404 		break;
5405 	default:
5406 		adev->mp1_state = PP_MP1_STATE_NONE;
5407 		break;
5408 	}
5409 }
5410 
5411 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5412 {
5413 	amdgpu_vf_error_trans_all(adev);
5414 	adev->mp1_state = PP_MP1_STATE_NONE;
5415 }
5416 
5417 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5418 {
5419 	struct pci_dev *p = NULL;
5420 
5421 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5422 			adev->pdev->bus->number, 1);
5423 	if (p) {
5424 		pm_runtime_enable(&(p->dev));
5425 		pm_runtime_resume(&(p->dev));
5426 	}
5427 
5428 	pci_dev_put(p);
5429 }
5430 
5431 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5432 {
5433 	enum amd_reset_method reset_method;
5434 	struct pci_dev *p = NULL;
5435 	u64 expires;
5436 
5437 	/*
5438 	 * For now, only BACO and mode1 reset are confirmed
5439 	 * to suffer the audio issue without proper suspended.
5440 	 */
5441 	reset_method = amdgpu_asic_reset_method(adev);
5442 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5443 	     (reset_method != AMD_RESET_METHOD_MODE1))
5444 		return -EINVAL;
5445 
5446 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5447 			adev->pdev->bus->number, 1);
5448 	if (!p)
5449 		return -ENODEV;
5450 
5451 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5452 	if (!expires)
5453 		/*
5454 		 * If we cannot get the audio device autosuspend delay,
5455 		 * a fixed 4S interval will be used. Considering 3S is
5456 		 * the audio controller default autosuspend delay setting.
5457 		 * 4S used here is guaranteed to cover that.
5458 		 */
5459 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5460 
5461 	while (!pm_runtime_status_suspended(&(p->dev))) {
5462 		if (!pm_runtime_suspend(&(p->dev)))
5463 			break;
5464 
5465 		if (expires < ktime_get_mono_fast_ns()) {
5466 			dev_warn(adev->dev, "failed to suspend display audio\n");
5467 			pci_dev_put(p);
5468 			/* TODO: abort the succeeding gpu reset? */
5469 			return -ETIMEDOUT;
5470 		}
5471 	}
5472 
5473 	pm_runtime_disable(&(p->dev));
5474 
5475 	pci_dev_put(p);
5476 	return 0;
5477 }
5478 
5479 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5480 {
5481 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5482 
5483 #if defined(CONFIG_DEBUG_FS)
5484 	if (!amdgpu_sriov_vf(adev))
5485 		cancel_work(&adev->reset_work);
5486 #endif
5487 	amdgpu_userq_mgr_cancel_reset_work(adev);
5488 
5489 	if (adev->kfd.dev)
5490 		cancel_work(&adev->kfd.reset_work);
5491 
5492 	if (amdgpu_sriov_vf(adev))
5493 		cancel_work(&adev->virt.flr_work);
5494 
5495 	if (con && adev->ras_enabled)
5496 		cancel_work(&con->recovery_work);
5497 
5498 }
5499 
5500 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5501 {
5502 	struct amdgpu_device *tmp_adev;
5503 	int ret = 0;
5504 
5505 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5506 		ret |= amdgpu_device_bus_status_check(tmp_adev);
5507 	}
5508 
5509 	return ret;
5510 }
5511 
5512 static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
5513 					  struct list_head *device_list,
5514 					  struct amdgpu_hive_info *hive)
5515 {
5516 	struct amdgpu_device *tmp_adev = NULL;
5517 
5518 	/*
5519 	 * Build list of devices to reset.
5520 	 * In case we are in XGMI hive mode, resort the device list
5521 	 * to put adev in the 1st position.
5522 	 */
5523 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5524 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5525 			list_add_tail(&tmp_adev->reset_list, device_list);
5526 			if (adev->shutdown)
5527 				tmp_adev->shutdown = true;
5528 		}
5529 		if (!list_is_first(&adev->reset_list, device_list))
5530 			list_rotate_to_front(&adev->reset_list, device_list);
5531 	} else {
5532 		list_add_tail(&adev->reset_list, device_list);
5533 	}
5534 }
5535 
5536 static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
5537 						  struct list_head *device_list)
5538 {
5539 	struct amdgpu_device *tmp_adev = NULL;
5540 
5541 	if (list_empty(device_list))
5542 		return;
5543 	tmp_adev =
5544 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5545 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5546 }
5547 
5548 static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
5549 						  struct list_head *device_list)
5550 {
5551 	struct amdgpu_device *tmp_adev = NULL;
5552 
5553 	if (list_empty(device_list))
5554 		return;
5555 	tmp_adev =
5556 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5557 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5558 }
5559 
5560 static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
5561 					  struct amdgpu_job *job,
5562 					  struct amdgpu_reset_context *reset_context,
5563 					  struct list_head *device_list,
5564 					  struct amdgpu_hive_info *hive,
5565 					  bool need_emergency_restart)
5566 {
5567 	struct amdgpu_device *tmp_adev = NULL;
5568 	int i;
5569 
5570 	/* block all schedulers and reset given job's ring */
5571 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5572 		amdgpu_device_set_mp1_state(tmp_adev);
5573 
5574 		/*
5575 		 * Try to put the audio codec into suspend state
5576 		 * before gpu reset started.
5577 		 *
5578 		 * Due to the power domain of the graphics device
5579 		 * is shared with AZ power domain. Without this,
5580 		 * we may change the audio hardware from behind
5581 		 * the audio driver's back. That will trigger
5582 		 * some audio codec errors.
5583 		 */
5584 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5585 			tmp_adev->pcie_reset_ctx.audio_suspended = true;
5586 
5587 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5588 
5589 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5590 
5591 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5592 
5593 		/*
5594 		 * Mark these ASICs to be reset as untracked first
5595 		 * And add them back after reset completed
5596 		 */
5597 		amdgpu_unregister_gpu_instance(tmp_adev);
5598 
5599 		drm_client_dev_suspend(adev_to_drm(tmp_adev));
5600 
5601 		/* disable ras on ALL IPs */
5602 		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
5603 		    amdgpu_device_ip_need_full_reset(tmp_adev))
5604 			amdgpu_ras_suspend(tmp_adev);
5605 
5606 		amdgpu_userq_pre_reset(tmp_adev);
5607 
5608 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5609 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5610 
5611 			if (!amdgpu_ring_sched_ready(ring))
5612 				continue;
5613 
5614 			drm_sched_wqueue_stop(&ring->sched);
5615 
5616 			if (need_emergency_restart)
5617 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5618 		}
5619 		atomic_inc(&tmp_adev->gpu_reset_counter);
5620 	}
5621 }
5622 
5623 static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
5624 			      struct list_head *device_list,
5625 			      struct amdgpu_reset_context *reset_context)
5626 {
5627 	struct amdgpu_device *tmp_adev = NULL;
5628 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5629 	int r = 0;
5630 
5631 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5632 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5633 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5634 		/*TODO Should we stop ?*/
5635 		if (r) {
5636 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5637 				  r, adev_to_drm(tmp_adev)->unique);
5638 			tmp_adev->asic_reset_res = r;
5639 		}
5640 	}
5641 
5642 	/* Actual ASIC resets if needed.*/
5643 	/* Host driver will handle XGMI hive reset for SRIOV */
5644 	if (amdgpu_sriov_vf(adev)) {
5645 
5646 		/* Bail out of reset early */
5647 		if (amdgpu_ras_is_rma(adev))
5648 			return -ENODEV;
5649 
5650 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5651 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5652 			amdgpu_ras_set_fed(adev, true);
5653 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5654 		}
5655 
5656 		r = amdgpu_device_reset_sriov(adev, reset_context);
5657 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5658 			amdgpu_virt_release_full_gpu(adev, true);
5659 			goto retry;
5660 		}
5661 		if (r)
5662 			adev->asic_reset_res = r;
5663 	} else {
5664 		r = amdgpu_do_asic_reset(device_list, reset_context);
5665 		if (r && r == -EAGAIN)
5666 			goto retry;
5667 	}
5668 
5669 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5670 		/*
5671 		 * Drop any pending non scheduler resets queued before reset is done.
5672 		 * Any reset scheduled after this point would be valid. Scheduler resets
5673 		 * were already dropped during drm_sched_stop and no new ones can come
5674 		 * in before drm_sched_start.
5675 		 */
5676 		amdgpu_device_stop_pending_resets(tmp_adev);
5677 	}
5678 
5679 	return r;
5680 }
5681 
5682 static int amdgpu_device_sched_resume(struct list_head *device_list,
5683 			      struct amdgpu_reset_context *reset_context,
5684 			      bool   job_signaled)
5685 {
5686 	struct amdgpu_device *tmp_adev = NULL;
5687 	int i, r = 0;
5688 
5689 	/* Post ASIC reset for all devs .*/
5690 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5691 
5692 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5693 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5694 
5695 			if (!amdgpu_ring_sched_ready(ring))
5696 				continue;
5697 
5698 			drm_sched_wqueue_start(&ring->sched);
5699 		}
5700 
5701 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5702 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5703 
5704 		if (tmp_adev->asic_reset_res) {
5705 			/* bad news, how to tell it to userspace ?
5706 			 * for ras error, we should report GPU bad status instead of
5707 			 * reset failure
5708 			 */
5709 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5710 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5711 				dev_info(
5712 					tmp_adev->dev,
5713 					"GPU reset(%d) failed with error %d\n",
5714 					atomic_read(
5715 						&tmp_adev->gpu_reset_counter),
5716 					tmp_adev->asic_reset_res);
5717 			amdgpu_vf_error_put(tmp_adev,
5718 					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
5719 					    tmp_adev->asic_reset_res);
5720 			if (!r)
5721 				r = tmp_adev->asic_reset_res;
5722 			tmp_adev->asic_reset_res = 0;
5723 		} else {
5724 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
5725 				 atomic_read(&tmp_adev->gpu_reset_counter));
5726 			if (amdgpu_acpi_smart_shift_update(tmp_adev,
5727 							   AMDGPU_SS_DEV_D0))
5728 				dev_warn(tmp_adev->dev,
5729 					 "smart shift update failed\n");
5730 		}
5731 	}
5732 
5733 	return r;
5734 }
5735 
5736 static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
5737 			      struct list_head *device_list,
5738 			      bool   need_emergency_restart)
5739 {
5740 	struct amdgpu_device *tmp_adev = NULL;
5741 
5742 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5743 		/* unlock kfd: SRIOV would do it separately */
5744 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5745 			amdgpu_amdkfd_post_reset(tmp_adev);
5746 
5747 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5748 		 * need to bring up kfd here if it's not be initialized before
5749 		 */
5750 		if (!adev->kfd.init_complete)
5751 			amdgpu_amdkfd_device_init(adev);
5752 
5753 		if (tmp_adev->pcie_reset_ctx.audio_suspended)
5754 			amdgpu_device_resume_display_audio(tmp_adev);
5755 
5756 		amdgpu_device_unset_mp1_state(tmp_adev);
5757 
5758 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5759 
5760 	}
5761 }
5762 
5763 
5764 /**
5765  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5766  *
5767  * @adev: amdgpu_device pointer
5768  * @job: which job trigger hang
5769  * @reset_context: amdgpu reset context pointer
5770  *
5771  * Attempt to reset the GPU if it has hung (all asics).
5772  * Attempt to do soft-reset or full-reset and reinitialize Asic
5773  * Returns 0 for success or an error on failure.
5774  */
5775 
5776 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5777 			      struct amdgpu_job *job,
5778 			      struct amdgpu_reset_context *reset_context)
5779 {
5780 	struct list_head device_list;
5781 	bool job_signaled = false;
5782 	struct amdgpu_hive_info *hive = NULL;
5783 	int r = 0;
5784 	bool need_emergency_restart = false;
5785 	/* save the pasid here as the job may be freed before the end of the reset */
5786 	int pasid = job ? job->pasid : -EINVAL;
5787 
5788 	/*
5789 	 * If it reaches here because of hang/timeout and a RAS error is
5790 	 * detected at the same time, let RAS recovery take care of it.
5791 	 */
5792 	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
5793 	    !amdgpu_sriov_vf(adev) &&
5794 	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
5795 		dev_dbg(adev->dev,
5796 			"Gpu recovery from source: %d yielding to RAS error recovery handling",
5797 			reset_context->src);
5798 		return 0;
5799 	}
5800 
5801 	/*
5802 	 * Special case: RAS triggered and full reset isn't supported
5803 	 */
5804 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5805 
5806 	/*
5807 	 * Flush RAM to disk so that after reboot
5808 	 * the user can read log and see why the system rebooted.
5809 	 */
5810 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5811 		amdgpu_ras_get_context(adev)->reboot) {
5812 		dev_warn(adev->dev, "Emergency reboot.");
5813 
5814 		ksys_sync_helper();
5815 		emergency_restart();
5816 	}
5817 
5818 	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
5819 		 need_emergency_restart ? "jobs stop" : "reset",
5820 		 reset_context->src);
5821 
5822 	if (!amdgpu_sriov_vf(adev))
5823 		hive = amdgpu_get_xgmi_hive(adev);
5824 	if (hive)
5825 		mutex_lock(&hive->hive_lock);
5826 
5827 	reset_context->job = job;
5828 	reset_context->hive = hive;
5829 	INIT_LIST_HEAD(&device_list);
5830 
5831 	amdgpu_device_recovery_prepare(adev, &device_list, hive);
5832 
5833 	if (!amdgpu_sriov_vf(adev)) {
5834 		r = amdgpu_device_health_check(&device_list);
5835 		if (r)
5836 			goto end_reset;
5837 	}
5838 
5839 	/* Cannot be called after locking reset domain */
5840 	amdgpu_ras_pre_reset(adev, &device_list);
5841 
5842 	/* We need to lock reset domain only once both for XGMI and single device */
5843 	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
5844 
5845 	/* unmap all the mappings of doorbell and framebuffer to prevent user space from
5846 	 * accessing them
5847 	 */
5848 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
5849 	amdgpu_amdkfd_clear_kfd_mapping(adev);
5850 
5851 	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
5852 				      hive, need_emergency_restart);
5853 	if (need_emergency_restart)
5854 		goto skip_sched_resume;
5855 	/*
5856 	 * Must check guilty signal here since after this point all old
5857 	 * HW fences are force signaled.
5858 	 *
5859 	 * job->base holds a reference to parent fence
5860 	 */
5861 	if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) {
5862 		job_signaled = true;
5863 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5864 		goto skip_hw_reset;
5865 	}
5866 
5867 	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
5868 	if (r)
5869 		goto reset_unlock;
5870 skip_hw_reset:
5871 	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
5872 	if (r)
5873 		goto reset_unlock;
5874 skip_sched_resume:
5875 	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
5876 reset_unlock:
5877 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
5878 	amdgpu_ras_post_reset(adev, &device_list);
5879 end_reset:
5880 	if (hive) {
5881 		mutex_unlock(&hive->hive_lock);
5882 		amdgpu_put_xgmi_hive(hive);
5883 	}
5884 
5885 	if (r)
5886 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5887 
5888 	atomic_set(&adev->reset_domain->reset_res, r);
5889 
5890 	if (!r) {
5891 		struct amdgpu_task_info *ti = NULL;
5892 
5893 		/*
5894 		 * The job may already be freed at this point via the sched tdr workqueue so
5895 		 * use the cached pasid.
5896 		 */
5897 		if (pasid >= 0)
5898 			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
5899 
5900 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
5901 				     ti ? &ti->task : NULL);
5902 
5903 		amdgpu_vm_put_task_info(ti);
5904 	}
5905 
5906 	return r;
5907 }
5908 
5909 /**
5910  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5911  *
5912  * @adev: amdgpu_device pointer
5913  * @speed: pointer to the speed of the link
5914  * @width: pointer to the width of the link
5915  *
5916  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5917  * first physical partner to an AMD dGPU.
5918  * This will exclude any virtual switches and links.
5919  */
5920 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5921 					    enum pci_bus_speed *speed,
5922 					    enum pcie_link_width *width)
5923 {
5924 	struct pci_dev *parent = adev->pdev;
5925 
5926 	if (!speed || !width)
5927 		return;
5928 
5929 	*speed = PCI_SPEED_UNKNOWN;
5930 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5931 
5932 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5933 		while ((parent = pci_upstream_bridge(parent))) {
5934 			/* skip upstream/downstream switches internal to dGPU*/
5935 			if (parent->vendor == PCI_VENDOR_ID_ATI)
5936 				continue;
5937 			*speed = pcie_get_speed_cap(parent);
5938 			*width = pcie_get_width_cap(parent);
5939 			break;
5940 		}
5941 	} else {
5942 		/* use the current speeds rather than max if switching is not supported */
5943 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5944 	}
5945 }
5946 
5947 /**
5948  * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
5949  *
5950  * @adev: amdgpu_device pointer
5951  * @speed: pointer to the speed of the link
5952  * @width: pointer to the width of the link
5953  *
5954  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5955  * AMD dGPU which may be a virtual upstream bridge.
5956  */
5957 static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
5958 					enum pci_bus_speed *speed,
5959 					enum pcie_link_width *width)
5960 {
5961 	struct pci_dev *parent = adev->pdev;
5962 
5963 	if (!speed || !width)
5964 		return;
5965 
5966 	parent = pci_upstream_bridge(parent);
5967 	if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
5968 		/* use the upstream/downstream switches internal to dGPU */
5969 		*speed = pcie_get_speed_cap(parent);
5970 		*width = pcie_get_width_cap(parent);
5971 		while ((parent = pci_upstream_bridge(parent))) {
5972 			if (parent->vendor == PCI_VENDOR_ID_ATI) {
5973 				/* use the upstream/downstream switches internal to dGPU */
5974 				*speed = pcie_get_speed_cap(parent);
5975 				*width = pcie_get_width_cap(parent);
5976 			}
5977 		}
5978 	} else {
5979 		/* use the device itself */
5980 		*speed = pcie_get_speed_cap(adev->pdev);
5981 		*width = pcie_get_width_cap(adev->pdev);
5982 	}
5983 }
5984 
5985 /**
5986  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5987  *
5988  * @adev: amdgpu_device pointer
5989  *
5990  * Fetches and stores in the driver the PCIE capabilities (gen speed
5991  * and lanes) of the slot the device is in. Handles APUs and
5992  * virtualized environments where PCIE config space may not be available.
5993  */
5994 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5995 {
5996 	enum pci_bus_speed speed_cap, platform_speed_cap;
5997 	enum pcie_link_width platform_link_width, link_width;
5998 
5999 	if (amdgpu_pcie_gen_cap)
6000 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6001 
6002 	if (amdgpu_pcie_lane_cap)
6003 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6004 
6005 	/* covers APUs as well */
6006 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6007 		if (adev->pm.pcie_gen_mask == 0)
6008 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6009 		if (adev->pm.pcie_mlw_mask == 0)
6010 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6011 		return;
6012 	}
6013 
6014 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6015 		return;
6016 
6017 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6018 					&platform_link_width);
6019 	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
6020 
6021 	if (adev->pm.pcie_gen_mask == 0) {
6022 		/* asic caps */
6023 		if (speed_cap == PCI_SPEED_UNKNOWN) {
6024 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6025 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6026 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6027 		} else {
6028 			if (speed_cap == PCIE_SPEED_32_0GT)
6029 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6030 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6031 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6032 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6033 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6034 			else if (speed_cap == PCIE_SPEED_16_0GT)
6035 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6036 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6037 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6038 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6039 			else if (speed_cap == PCIE_SPEED_8_0GT)
6040 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6041 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6042 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6043 			else if (speed_cap == PCIE_SPEED_5_0GT)
6044 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6045 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6046 			else
6047 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6048 		}
6049 		/* platform caps */
6050 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6051 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6052 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6053 		} else {
6054 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6055 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6056 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6057 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6058 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6059 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6060 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6061 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6062 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6063 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6064 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6065 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6066 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6067 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6068 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6069 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6070 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6071 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6072 			else
6073 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6074 
6075 		}
6076 	}
6077 	if (adev->pm.pcie_mlw_mask == 0) {
6078 		/* asic caps */
6079 		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6080 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6081 		} else {
6082 			switch (link_width) {
6083 			case PCIE_LNK_X32:
6084 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6085 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6086 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6087 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6088 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6089 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6090 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6091 				break;
6092 			case PCIE_LNK_X16:
6093 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6094 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6095 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6096 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6097 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6098 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6099 				break;
6100 			case PCIE_LNK_X12:
6101 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6102 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6103 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6104 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6105 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6106 				break;
6107 			case PCIE_LNK_X8:
6108 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6109 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6110 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6111 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6112 				break;
6113 			case PCIE_LNK_X4:
6114 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6115 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6116 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6117 				break;
6118 			case PCIE_LNK_X2:
6119 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6120 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6121 				break;
6122 			case PCIE_LNK_X1:
6123 				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6124 				break;
6125 			default:
6126 				break;
6127 			}
6128 		}
6129 		/* platform caps */
6130 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6131 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6132 		} else {
6133 			switch (platform_link_width) {
6134 			case PCIE_LNK_X32:
6135 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6136 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6137 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6138 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6139 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6140 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6141 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6142 				break;
6143 			case PCIE_LNK_X16:
6144 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6145 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6146 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6147 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6148 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6149 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6150 				break;
6151 			case PCIE_LNK_X12:
6152 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6153 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6154 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6155 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6156 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6157 				break;
6158 			case PCIE_LNK_X8:
6159 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6160 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6161 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6162 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6163 				break;
6164 			case PCIE_LNK_X4:
6165 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6166 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6167 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6168 				break;
6169 			case PCIE_LNK_X2:
6170 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6171 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6172 				break;
6173 			case PCIE_LNK_X1:
6174 				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6175 				break;
6176 			default:
6177 				break;
6178 			}
6179 		}
6180 	}
6181 }
6182 
6183 /**
6184  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6185  *
6186  * @adev: amdgpu_device pointer
6187  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6188  *
6189  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6190  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6191  * @peer_adev.
6192  */
6193 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6194 				      struct amdgpu_device *peer_adev)
6195 {
6196 #ifdef CONFIG_HSA_AMD_P2P
6197 	bool p2p_access =
6198 		!adev->gmc.xgmi.connected_to_cpu &&
6199 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6200 	if (!p2p_access)
6201 		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6202 			pci_name(peer_adev->pdev));
6203 
6204 	bool is_large_bar = adev->gmc.visible_vram_size &&
6205 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6206 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6207 
6208 	if (!p2p_addressable) {
6209 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6210 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6211 		resource_size_t aper_limit =
6212 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6213 
6214 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6215 				     aper_limit & address_mask);
6216 	}
6217 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6218 #else
6219 	return false;
6220 #endif
6221 }
6222 
6223 int amdgpu_device_baco_enter(struct amdgpu_device *adev)
6224 {
6225 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6226 
6227 	if (!amdgpu_device_supports_baco(adev))
6228 		return -ENOTSUPP;
6229 
6230 	if (ras && adev->ras_enabled &&
6231 	    adev->nbio.funcs->enable_doorbell_interrupt)
6232 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6233 
6234 	return amdgpu_dpm_baco_enter(adev);
6235 }
6236 
6237 int amdgpu_device_baco_exit(struct amdgpu_device *adev)
6238 {
6239 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6240 	int ret = 0;
6241 
6242 	if (!amdgpu_device_supports_baco(adev))
6243 		return -ENOTSUPP;
6244 
6245 	ret = amdgpu_dpm_baco_exit(adev);
6246 	if (ret)
6247 		return ret;
6248 
6249 	if (ras && adev->ras_enabled &&
6250 	    adev->nbio.funcs->enable_doorbell_interrupt)
6251 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6252 
6253 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6254 	    adev->nbio.funcs->clear_doorbell_interrupt)
6255 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6256 
6257 	return 0;
6258 }
6259 
6260 /**
6261  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6262  * @pdev: PCI device struct
6263  * @state: PCI channel state
6264  *
6265  * Description: Called when a PCI error is detected.
6266  *
6267  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6268  */
6269 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6270 {
6271 	struct drm_device *dev = pci_get_drvdata(pdev);
6272 	struct amdgpu_device *adev = drm_to_adev(dev);
6273 	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
6274 		amdgpu_get_xgmi_hive(adev);
6275 	struct amdgpu_reset_context reset_context;
6276 	struct list_head device_list;
6277 
6278 	dev_info(adev->dev, "PCI error: detected callback!!\n");
6279 
6280 	adev->pci_channel_state = state;
6281 
6282 	switch (state) {
6283 	case pci_channel_io_normal:
6284 		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6285 		return PCI_ERS_RESULT_CAN_RECOVER;
6286 	case pci_channel_io_frozen:
6287 		/* Fatal error, prepare for slot reset */
6288 		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6289 		if (hive) {
6290 			/* Hive devices should be able to support FW based
6291 			 * link reset on other devices, if not return.
6292 			 */
6293 			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6294 				dev_warn(adev->dev,
6295 					 "No support for XGMI hive yet...\n");
6296 				return PCI_ERS_RESULT_DISCONNECT;
6297 			}
6298 			/* Set dpc status only if device is part of hive
6299 			 * Non-hive devices should be able to recover after
6300 			 * link reset.
6301 			 */
6302 			amdgpu_reset_set_dpc_status(adev, true);
6303 
6304 			mutex_lock(&hive->hive_lock);
6305 		} else {
6306 			if (amdgpu_device_bus_status_check(adev))
6307 				amdgpu_reset_set_dpc_status(adev, true);
6308 		}
6309 		memset(&reset_context, 0, sizeof(reset_context));
6310 		INIT_LIST_HEAD(&device_list);
6311 
6312 		amdgpu_device_recovery_prepare(adev, &device_list, hive);
6313 		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6314 		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6315 					      hive, false);
6316 		if (hive)
6317 			mutex_unlock(&hive->hive_lock);
6318 		return PCI_ERS_RESULT_NEED_RESET;
6319 	case pci_channel_io_perm_failure:
6320 		/* Permanent error, prepare for device removal */
6321 		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6322 		return PCI_ERS_RESULT_DISCONNECT;
6323 	}
6324 
6325 	return PCI_ERS_RESULT_NEED_RESET;
6326 }
6327 
6328 /**
6329  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6330  * @pdev: pointer to PCI device
6331  */
6332 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6333 {
6334 	struct drm_device *dev = pci_get_drvdata(pdev);
6335 	struct amdgpu_device *adev = drm_to_adev(dev);
6336 
6337 	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6338 
6339 	/* TODO - dump whatever for debugging purposes */
6340 
6341 	/* This called only if amdgpu_pci_error_detected returns
6342 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6343 	 * works, no need to reset slot.
6344 	 */
6345 
6346 	return PCI_ERS_RESULT_RECOVERED;
6347 }
6348 
6349 /**
6350  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6351  * @pdev: PCI device struct
6352  *
6353  * Description: This routine is called by the pci error recovery
6354  * code after the PCI slot has been reset, just before we
6355  * should resume normal operations.
6356  */
6357 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6358 {
6359 	struct drm_device *dev = pci_get_drvdata(pdev);
6360 	struct amdgpu_device *adev = drm_to_adev(dev);
6361 	struct amdgpu_reset_context reset_context;
6362 	struct amdgpu_device *tmp_adev;
6363 	struct amdgpu_hive_info *hive;
6364 	struct list_head device_list;
6365 	struct pci_dev *link_dev;
6366 	int r = 0, i, timeout;
6367 	u32 memsize;
6368 	u16 status;
6369 
6370 	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6371 
6372 	memset(&reset_context, 0, sizeof(reset_context));
6373 	INIT_LIST_HEAD(&device_list);
6374 	hive = amdgpu_get_xgmi_hive(adev);
6375 	if (hive) {
6376 		mutex_lock(&hive->hive_lock);
6377 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6378 			list_add_tail(&tmp_adev->reset_list, &device_list);
6379 	} else {
6380 		list_add_tail(&adev->reset_list, &device_list);
6381 	}
6382 
6383 	if (adev->pcie_reset_ctx.swus)
6384 		link_dev = adev->pcie_reset_ctx.swus;
6385 	else
6386 		link_dev = adev->pdev;
6387 	/* wait for asic to come out of reset, timeout = 10s */
6388 	timeout = 10000;
6389 	do {
6390 		usleep_range(10000, 10500);
6391 		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
6392 		timeout -= 10;
6393 	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
6394 		 (status != PCI_VENDOR_ID_AMD));
6395 
6396 	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
6397 		r = -ETIME;
6398 		goto out;
6399 	}
6400 
6401 	amdgpu_device_load_switch_state(adev);
6402 	/* Restore PCI confspace */
6403 	amdgpu_device_load_pci_state(pdev);
6404 
6405 	/* confirm  ASIC came out of reset */
6406 	for (i = 0; i < adev->usec_timeout; i++) {
6407 		memsize = amdgpu_asic_get_config_memsize(adev);
6408 
6409 		if (memsize != 0xffffffff)
6410 			break;
6411 		udelay(1);
6412 	}
6413 	if (memsize == 0xffffffff) {
6414 		r = -ETIME;
6415 		goto out;
6416 	}
6417 
6418 	reset_context.method = AMD_RESET_METHOD_NONE;
6419 	reset_context.reset_req_dev = adev;
6420 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6421 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
6422 
6423 	if (hive) {
6424 		reset_context.hive = hive;
6425 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6426 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
6427 	} else {
6428 		adev->pcie_reset_ctx.in_link_reset = true;
6429 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6430 	}
6431 
6432 	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
6433 out:
6434 	if (!r) {
6435 		if (amdgpu_device_cache_pci_state(adev->pdev))
6436 			pci_restore_state(adev->pdev);
6437 		dev_info(adev->dev, "PCIe error recovery succeeded\n");
6438 	} else {
6439 		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6440 		if (hive) {
6441 			list_for_each_entry(tmp_adev, &device_list, reset_list)
6442 				amdgpu_device_unset_mp1_state(tmp_adev);
6443 		}
6444 		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6445 	}
6446 
6447 	if (hive) {
6448 		mutex_unlock(&hive->hive_lock);
6449 		amdgpu_put_xgmi_hive(hive);
6450 	}
6451 
6452 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6453 }
6454 
6455 /**
6456  * amdgpu_pci_resume() - resume normal ops after PCI reset
6457  * @pdev: pointer to PCI device
6458  *
6459  * Called when the error recovery driver tells us that its
6460  * OK to resume normal operation.
6461  */
6462 void amdgpu_pci_resume(struct pci_dev *pdev)
6463 {
6464 	struct drm_device *dev = pci_get_drvdata(pdev);
6465 	struct amdgpu_device *adev = drm_to_adev(dev);
6466 	struct list_head device_list;
6467 	struct amdgpu_hive_info *hive = NULL;
6468 	struct amdgpu_device *tmp_adev = NULL;
6469 
6470 	dev_info(adev->dev, "PCI error: resume callback!!\n");
6471 
6472 	/* Only continue execution for the case of pci_channel_io_frozen */
6473 	if (adev->pci_channel_state != pci_channel_io_frozen)
6474 		return;
6475 
6476 	INIT_LIST_HEAD(&device_list);
6477 
6478 	hive = amdgpu_get_xgmi_hive(adev);
6479 	if (hive) {
6480 		mutex_lock(&hive->hive_lock);
6481 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6482 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
6483 			list_add_tail(&tmp_adev->reset_list, &device_list);
6484 		}
6485 	} else {
6486 		adev->pcie_reset_ctx.in_link_reset = false;
6487 		list_add_tail(&adev->reset_list, &device_list);
6488 	}
6489 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
6490 	amdgpu_device_gpu_resume(adev, &device_list, false);
6491 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6492 
6493 	if (hive) {
6494 		mutex_unlock(&hive->hive_lock);
6495 		amdgpu_put_xgmi_hive(hive);
6496 	}
6497 }
6498 
6499 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
6500 {
6501 	struct pci_dev *swus, *swds;
6502 	int r;
6503 
6504 	swds = pci_upstream_bridge(adev->pdev);
6505 	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
6506 	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
6507 		return;
6508 	swus = pci_upstream_bridge(swds);
6509 	if (!swus ||
6510 	    (swus->vendor != PCI_VENDOR_ID_ATI &&
6511 	     swus->vendor != PCI_VENDOR_ID_AMD) ||
6512 	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
6513 		return;
6514 
6515 	/* If already saved, return */
6516 	if (adev->pcie_reset_ctx.swus)
6517 		return;
6518 	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
6519 	r = pci_save_state(swds);
6520 	if (r)
6521 		return;
6522 	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
6523 
6524 	r = pci_save_state(swus);
6525 	if (r)
6526 		return;
6527 	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
6528 
6529 	adev->pcie_reset_ctx.swus = swus;
6530 }
6531 
6532 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
6533 {
6534 	struct pci_dev *pdev;
6535 	int r;
6536 
6537 	if (!adev->pcie_reset_ctx.swds_pcistate ||
6538 	    !adev->pcie_reset_ctx.swus_pcistate)
6539 		return;
6540 
6541 	pdev = adev->pcie_reset_ctx.swus;
6542 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
6543 	if (!r) {
6544 		pci_restore_state(pdev);
6545 	} else {
6546 		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
6547 		return;
6548 	}
6549 
6550 	pdev = pci_upstream_bridge(adev->pdev);
6551 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
6552 	if (!r)
6553 		pci_restore_state(pdev);
6554 	else
6555 		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
6556 }
6557 
6558 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6559 {
6560 	struct drm_device *dev = pci_get_drvdata(pdev);
6561 	struct amdgpu_device *adev = drm_to_adev(dev);
6562 	int r;
6563 
6564 	if (amdgpu_sriov_vf(adev))
6565 		return false;
6566 
6567 	r = pci_save_state(pdev);
6568 	if (!r) {
6569 		kfree(adev->pci_state);
6570 
6571 		adev->pci_state = pci_store_saved_state(pdev);
6572 
6573 		if (!adev->pci_state) {
6574 			dev_err(adev->dev, "Failed to store PCI saved state");
6575 			return false;
6576 		}
6577 	} else {
6578 		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
6579 		return false;
6580 	}
6581 
6582 	amdgpu_device_cache_switch_state(adev);
6583 
6584 	return true;
6585 }
6586 
6587 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6588 {
6589 	struct drm_device *dev = pci_get_drvdata(pdev);
6590 	struct amdgpu_device *adev = drm_to_adev(dev);
6591 	int r;
6592 
6593 	if (!adev->pci_state)
6594 		return false;
6595 
6596 	r = pci_load_saved_state(pdev, adev->pci_state);
6597 
6598 	if (!r) {
6599 		pci_restore_state(pdev);
6600 	} else {
6601 		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
6602 		return false;
6603 	}
6604 
6605 	return true;
6606 }
6607 
6608 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6609 		struct amdgpu_ring *ring)
6610 {
6611 #ifdef CONFIG_X86_64
6612 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6613 		return;
6614 #endif
6615 	if (adev->gmc.xgmi.connected_to_cpu)
6616 		return;
6617 
6618 	if (ring && ring->funcs->emit_hdp_flush) {
6619 		amdgpu_ring_emit_hdp_flush(ring);
6620 		return;
6621 	}
6622 
6623 	if (!ring && amdgpu_sriov_runtime(adev)) {
6624 		if (!amdgpu_kiq_hdp_flush(adev))
6625 			return;
6626 	}
6627 
6628 	amdgpu_hdp_flush(adev, ring);
6629 }
6630 
6631 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6632 		struct amdgpu_ring *ring)
6633 {
6634 #ifdef CONFIG_X86_64
6635 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6636 		return;
6637 #endif
6638 	if (adev->gmc.xgmi.connected_to_cpu)
6639 		return;
6640 
6641 	amdgpu_hdp_invalidate(adev, ring);
6642 }
6643 
6644 int amdgpu_in_reset(struct amdgpu_device *adev)
6645 {
6646 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6647 }
6648 
6649 /**
6650  * amdgpu_device_halt() - bring hardware to some kind of halt state
6651  *
6652  * @adev: amdgpu_device pointer
6653  *
6654  * Bring hardware to some kind of halt state so that no one can touch it
6655  * any more. It will help to maintain error context when error occurred.
6656  * Compare to a simple hang, the system will keep stable at least for SSH
6657  * access. Then it should be trivial to inspect the hardware state and
6658  * see what's going on. Implemented as following:
6659  *
6660  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6661  *    clears all CPU mappings to device, disallows remappings through page faults
6662  * 2. amdgpu_irq_disable_all() disables all interrupts
6663  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6664  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6665  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6666  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6667  *    flush any in flight DMA operations
6668  */
6669 void amdgpu_device_halt(struct amdgpu_device *adev)
6670 {
6671 	struct pci_dev *pdev = adev->pdev;
6672 	struct drm_device *ddev = adev_to_drm(adev);
6673 
6674 	amdgpu_xcp_dev_unplug(adev);
6675 	drm_dev_unplug(ddev);
6676 
6677 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
6678 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
6679 
6680 	amdgpu_irq_disable_all(adev);
6681 
6682 	amdgpu_fence_driver_hw_fini(adev);
6683 
6684 	adev->no_hw_access = true;
6685 
6686 	amdgpu_device_unmap_mmio(adev);
6687 
6688 	pci_disable_device(pdev);
6689 	pci_wait_for_pending_transaction(pdev);
6690 }
6691 
6692 /**
6693  * amdgpu_device_get_gang - return a reference to the current gang
6694  * @adev: amdgpu_device pointer
6695  *
6696  * Returns: A new reference to the current gang leader.
6697  */
6698 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6699 {
6700 	struct dma_fence *fence;
6701 
6702 	rcu_read_lock();
6703 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6704 	rcu_read_unlock();
6705 	return fence;
6706 }
6707 
6708 /**
6709  * amdgpu_device_switch_gang - switch to a new gang
6710  * @adev: amdgpu_device pointer
6711  * @gang: the gang to switch to
6712  *
6713  * Try to switch to a new gang.
6714  * Returns: NULL if we switched to the new gang or a reference to the current
6715  * gang leader.
6716  */
6717 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6718 					    struct dma_fence *gang)
6719 {
6720 	struct dma_fence *old = NULL;
6721 
6722 	dma_fence_get(gang);
6723 	do {
6724 		dma_fence_put(old);
6725 		old = amdgpu_device_get_gang(adev);
6726 		if (old == gang)
6727 			break;
6728 
6729 		if (!dma_fence_is_signaled(old)) {
6730 			dma_fence_put(gang);
6731 			return old;
6732 		}
6733 
6734 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6735 			 old, gang) != old);
6736 
6737 	/*
6738 	 * Drop it once for the exchanged reference in adev and once for the
6739 	 * thread local reference acquired in amdgpu_device_get_gang().
6740 	 */
6741 	dma_fence_put(old);
6742 	dma_fence_put(old);
6743 	return NULL;
6744 }
6745 
6746 /**
6747  * amdgpu_device_enforce_isolation - enforce HW isolation
6748  * @adev: the amdgpu device pointer
6749  * @ring: the HW ring the job is supposed to run on
6750  * @job: the job which is about to be pushed to the HW ring
6751  *
6752  * Makes sure that only one client at a time can use the GFX block.
6753  * Returns: The dependency to wait on before the job can be pushed to the HW.
6754  * The function is called multiple times until NULL is returned.
6755  */
6756 struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
6757 						  struct amdgpu_ring *ring,
6758 						  struct amdgpu_job *job)
6759 {
6760 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
6761 	struct drm_sched_fence *f = job->base.s_fence;
6762 	struct dma_fence *dep;
6763 	void *owner;
6764 	int r;
6765 
6766 	/*
6767 	 * For now enforce isolation only for the GFX block since we only need
6768 	 * the cleaner shader on those rings.
6769 	 */
6770 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
6771 	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6772 		return NULL;
6773 
6774 	/*
6775 	 * All submissions where enforce isolation is false are handled as if
6776 	 * they come from a single client. Use ~0l as the owner to distinct it
6777 	 * from kernel submissions where the owner is NULL.
6778 	 */
6779 	owner = job->enforce_isolation ? f->owner : (void *)~0l;
6780 
6781 	mutex_lock(&adev->enforce_isolation_mutex);
6782 
6783 	/*
6784 	 * The "spearhead" submission is the first one which changes the
6785 	 * ownership to its client. We always need to wait for it to be
6786 	 * pushed to the HW before proceeding with anything.
6787 	 */
6788 	if (&f->scheduled != isolation->spearhead &&
6789 	    !dma_fence_is_signaled(isolation->spearhead)) {
6790 		dep = isolation->spearhead;
6791 		goto out_grab_ref;
6792 	}
6793 
6794 	if (isolation->owner != owner) {
6795 
6796 		/*
6797 		 * Wait for any gang to be assembled before switching to a
6798 		 * different owner or otherwise we could deadlock the
6799 		 * submissions.
6800 		 */
6801 		if (!job->gang_submit) {
6802 			dep = amdgpu_device_get_gang(adev);
6803 			if (!dma_fence_is_signaled(dep))
6804 				goto out_return_dep;
6805 			dma_fence_put(dep);
6806 		}
6807 
6808 		dma_fence_put(isolation->spearhead);
6809 		isolation->spearhead = dma_fence_get(&f->scheduled);
6810 		amdgpu_sync_move(&isolation->active, &isolation->prev);
6811 		trace_amdgpu_isolation(isolation->owner, owner);
6812 		isolation->owner = owner;
6813 	}
6814 
6815 	/*
6816 	 * Specifying the ring here helps to pipeline submissions even when
6817 	 * isolation is enabled. If that is not desired for testing NULL can be
6818 	 * used instead of the ring to enforce a CPU round trip while switching
6819 	 * between clients.
6820 	 */
6821 	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
6822 	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
6823 	if (r)
6824 		dev_warn(adev->dev, "OOM tracking isolation\n");
6825 
6826 out_grab_ref:
6827 	dma_fence_get(dep);
6828 out_return_dep:
6829 	mutex_unlock(&adev->enforce_isolation_mutex);
6830 	return dep;
6831 }
6832 
6833 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6834 {
6835 	switch (adev->asic_type) {
6836 #ifdef CONFIG_DRM_AMDGPU_SI
6837 	case CHIP_HAINAN:
6838 #endif
6839 	case CHIP_TOPAZ:
6840 		/* chips with no display hardware */
6841 		return false;
6842 #ifdef CONFIG_DRM_AMDGPU_SI
6843 	case CHIP_TAHITI:
6844 	case CHIP_PITCAIRN:
6845 	case CHIP_VERDE:
6846 	case CHIP_OLAND:
6847 #endif
6848 #ifdef CONFIG_DRM_AMDGPU_CIK
6849 	case CHIP_BONAIRE:
6850 	case CHIP_HAWAII:
6851 	case CHIP_KAVERI:
6852 	case CHIP_KABINI:
6853 	case CHIP_MULLINS:
6854 #endif
6855 	case CHIP_TONGA:
6856 	case CHIP_FIJI:
6857 	case CHIP_POLARIS10:
6858 	case CHIP_POLARIS11:
6859 	case CHIP_POLARIS12:
6860 	case CHIP_VEGAM:
6861 	case CHIP_CARRIZO:
6862 	case CHIP_STONEY:
6863 		/* chips with display hardware */
6864 		return true;
6865 	default:
6866 		/* IP discovery */
6867 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6868 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6869 			return false;
6870 		return true;
6871 	}
6872 }
6873 
6874 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
6875 {
6876 	ssize_t size = 0;
6877 
6878 	if (!ring || !ring->adev)
6879 		return size;
6880 
6881 	if (amdgpu_device_should_recover_gpu(ring->adev))
6882 		size |= AMDGPU_RESET_TYPE_FULL;
6883 
6884 	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
6885 	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
6886 		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
6887 
6888 	return size;
6889 }
6890 
6891 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
6892 {
6893 	ssize_t size = 0;
6894 
6895 	if (supported_reset == 0) {
6896 		size += sysfs_emit_at(buf, size, "unsupported");
6897 		size += sysfs_emit_at(buf, size, "\n");
6898 		return size;
6899 
6900 	}
6901 
6902 	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
6903 		size += sysfs_emit_at(buf, size, "soft ");
6904 
6905 	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
6906 		size += sysfs_emit_at(buf, size, "queue ");
6907 
6908 	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
6909 		size += sysfs_emit_at(buf, size, "pipe ");
6910 
6911 	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
6912 		size += sysfs_emit_at(buf, size, "full ");
6913 
6914 	size += sysfs_emit_at(buf, size, "\n");
6915 	return size;
6916 }
6917 
6918 void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
6919 			   enum amdgpu_uid_type type, uint8_t inst,
6920 			   uint64_t uid)
6921 {
6922 	if (!uid_info)
6923 		return;
6924 
6925 	if (type >= AMDGPU_UID_TYPE_MAX) {
6926 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6927 			     type);
6928 		return;
6929 	}
6930 
6931 	if (inst >= AMDGPU_UID_INST_MAX) {
6932 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6933 			     inst);
6934 		return;
6935 	}
6936 
6937 	if (uid_info->uid[type][inst] != 0) {
6938 		dev_warn_once(
6939 			uid_info->adev->dev,
6940 			"Overwriting existing UID %llu for type %d instance %d\n",
6941 			uid_info->uid[type][inst], type, inst);
6942 	}
6943 
6944 	uid_info->uid[type][inst] = uid;
6945 }
6946 
6947 u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
6948 			  enum amdgpu_uid_type type, uint8_t inst)
6949 {
6950 	if (!uid_info)
6951 		return 0;
6952 
6953 	if (type >= AMDGPU_UID_TYPE_MAX) {
6954 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6955 			     type);
6956 		return 0;
6957 	}
6958 
6959 	if (inst >= AMDGPU_UID_INST_MAX) {
6960 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6961 			     inst);
6962 		return 0;
6963 	}
6964 
6965 	return uid_info->uid[type][inst];
6966 }
6967