xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision d7a5e069372f6a8af89dd6ee7b6fbe7ce5c99a67)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/aperture.h>
30 #include <linux/power_supply.h>
31 #include <linux/kthread.h>
32 #include <linux/module.h>
33 #include <linux/console.h>
34 #include <linux/slab.h>
35 #include <linux/iommu.h>
36 #include <linux/pci.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39 #include <linux/nospec.h>
40 
41 #include <drm/drm_atomic_helper.h>
42 #include <drm/drm_client_event.h>
43 #include <drm/drm_crtc_helper.h>
44 #include <drm/drm_probe_helper.h>
45 #include <drm/amdgpu_drm.h>
46 #include <linux/device.h>
47 #include <linux/vgaarb.h>
48 #include <linux/vga_switcheroo.h>
49 #include <linux/efi.h>
50 #include "amdgpu.h"
51 #include "amdgpu_trace.h"
52 #include "amdgpu_i2c.h"
53 #include "atom.h"
54 #include "amdgpu_atombios.h"
55 #include "amdgpu_atomfirmware.h"
56 #include "amd_pcie.h"
57 #ifdef CONFIG_DRM_AMDGPU_SI
58 #include "si.h"
59 #endif
60 #ifdef CONFIG_DRM_AMDGPU_CIK
61 #include "cik.h"
62 #endif
63 #include "vi.h"
64 #include "soc15.h"
65 #include "nv.h"
66 #include "bif/bif_4_1_d.h"
67 #include <linux/firmware.h>
68 #include "amdgpu_vf_error.h"
69 
70 #include "amdgpu_amdkfd.h"
71 #include "amdgpu_pm.h"
72 
73 #include "amdgpu_xgmi.h"
74 #include "amdgpu_ras.h"
75 #include "amdgpu_ras_mgr.h"
76 #include "amdgpu_pmu.h"
77 #include "amdgpu_fru_eeprom.h"
78 #include "amdgpu_reset.h"
79 #include "amdgpu_virt.h"
80 #include "amdgpu_dev_coredump.h"
81 
82 #include <linux/suspend.h>
83 #include <drm/task_barrier.h>
84 #include <linux/pm_runtime.h>
85 
86 #include <drm/drm_drv.h>
87 
88 #if IS_ENABLED(CONFIG_X86)
89 #include <asm/intel-family.h>
90 #include <asm/cpu_device_id.h>
91 #endif
92 
93 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
98 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
99 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
100 MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
101 
102 #define AMDGPU_RESUME_MS		2000
103 #define AMDGPU_MAX_RETRY_LIMIT		2
104 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
105 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
106 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
107 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
108 
109 #define AMDGPU_VBIOS_SKIP (1U << 0)
110 #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
111 
112 static const struct drm_driver amdgpu_kms_driver;
113 
114 const char *amdgpu_asic_name[] = {
115 	"TAHITI",
116 	"PITCAIRN",
117 	"VERDE",
118 	"OLAND",
119 	"HAINAN",
120 	"BONAIRE",
121 	"KAVERI",
122 	"KABINI",
123 	"HAWAII",
124 	"MULLINS",
125 	"TOPAZ",
126 	"TONGA",
127 	"FIJI",
128 	"CARRIZO",
129 	"STONEY",
130 	"POLARIS10",
131 	"POLARIS11",
132 	"POLARIS12",
133 	"VEGAM",
134 	"VEGA10",
135 	"VEGA12",
136 	"VEGA20",
137 	"RAVEN",
138 	"ARCTURUS",
139 	"RENOIR",
140 	"ALDEBARAN",
141 	"NAVI10",
142 	"CYAN_SKILLFISH",
143 	"NAVI14",
144 	"NAVI12",
145 	"SIENNA_CICHLID",
146 	"NAVY_FLOUNDER",
147 	"VANGOGH",
148 	"DIMGREY_CAVEFISH",
149 	"BEIGE_GOBY",
150 	"YELLOW_CARP",
151 	"IP DISCOVERY",
152 	"LAST",
153 };
154 
155 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
156 /*
157  * Default init level where all blocks are expected to be initialized. This is
158  * the level of initialization expected by default and also after a full reset
159  * of the device.
160  */
161 struct amdgpu_init_level amdgpu_init_default = {
162 	.level = AMDGPU_INIT_LEVEL_DEFAULT,
163 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
164 };
165 
166 struct amdgpu_init_level amdgpu_init_recovery = {
167 	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
168 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
169 };
170 
171 /*
172  * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
173  * is used for cases like reset on initialization where the entire hive needs to
174  * be reset before first use.
175  */
176 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
177 	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
178 	.hwini_ip_block_mask =
179 		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
180 		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
181 		BIT(AMD_IP_BLOCK_TYPE_PSP)
182 };
183 
184 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
185 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
186 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
187 
188 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
189 
190 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
191 					     enum amd_ip_block_type block)
192 {
193 	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
194 }
195 
196 void amdgpu_set_init_level(struct amdgpu_device *adev,
197 			   enum amdgpu_init_lvl_id lvl)
198 {
199 	switch (lvl) {
200 	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
201 		adev->init_lvl = &amdgpu_init_minimal_xgmi;
202 		break;
203 	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
204 		adev->init_lvl = &amdgpu_init_recovery;
205 		break;
206 	case AMDGPU_INIT_LEVEL_DEFAULT:
207 		fallthrough;
208 	default:
209 		adev->init_lvl = &amdgpu_init_default;
210 		break;
211 	}
212 }
213 
214 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
215 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
216 				     void *data);
217 
218 /**
219  * DOC: pcie_replay_count
220  *
221  * The amdgpu driver provides a sysfs API for reporting the total number
222  * of PCIe replays (NAKs).
223  * The file pcie_replay_count is used for this and returns the total
224  * number of replays as a sum of the NAKs generated and NAKs received.
225  */
226 
227 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
228 		struct device_attribute *attr, char *buf)
229 {
230 	struct drm_device *ddev = dev_get_drvdata(dev);
231 	struct amdgpu_device *adev = drm_to_adev(ddev);
232 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
233 
234 	return sysfs_emit(buf, "%llu\n", cnt);
235 }
236 
237 static DEVICE_ATTR(pcie_replay_count, 0444,
238 		amdgpu_device_get_pcie_replay_count, NULL);
239 
240 static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
241 {
242 	int ret = 0;
243 
244 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
245 		ret = sysfs_create_file(&adev->dev->kobj,
246 					&dev_attr_pcie_replay_count.attr);
247 
248 	return ret;
249 }
250 
251 static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
252 {
253 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
254 		sysfs_remove_file(&adev->dev->kobj,
255 				  &dev_attr_pcie_replay_count.attr);
256 }
257 
258 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
259 					  const struct bin_attribute *attr, char *buf,
260 					  loff_t ppos, size_t count)
261 {
262 	struct device *dev = kobj_to_dev(kobj);
263 	struct drm_device *ddev = dev_get_drvdata(dev);
264 	struct amdgpu_device *adev = drm_to_adev(ddev);
265 	ssize_t bytes_read;
266 
267 	switch (ppos) {
268 	case AMDGPU_SYS_REG_STATE_XGMI:
269 		bytes_read = amdgpu_asic_get_reg_state(
270 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
271 		break;
272 	case AMDGPU_SYS_REG_STATE_WAFL:
273 		bytes_read = amdgpu_asic_get_reg_state(
274 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
275 		break;
276 	case AMDGPU_SYS_REG_STATE_PCIE:
277 		bytes_read = amdgpu_asic_get_reg_state(
278 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
279 		break;
280 	case AMDGPU_SYS_REG_STATE_USR:
281 		bytes_read = amdgpu_asic_get_reg_state(
282 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
283 		break;
284 	case AMDGPU_SYS_REG_STATE_USR_1:
285 		bytes_read = amdgpu_asic_get_reg_state(
286 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
287 		break;
288 	default:
289 		return -EINVAL;
290 	}
291 
292 	return bytes_read;
293 }
294 
295 static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
296 		      AMDGPU_SYS_REG_STATE_END);
297 
298 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
299 {
300 	int ret;
301 
302 	if (!amdgpu_asic_get_reg_state_supported(adev))
303 		return 0;
304 
305 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
306 
307 	return ret;
308 }
309 
310 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
311 {
312 	if (!amdgpu_asic_get_reg_state_supported(adev))
313 		return;
314 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
315 }
316 
317 /**
318  * DOC: board_info
319  *
320  * The amdgpu driver provides a sysfs API for giving board related information.
321  * It provides the form factor information in the format
322  *
323  *   type : form factor
324  *
325  * Possible form factor values
326  *
327  * - "cem"		- PCIE CEM card
328  * - "oam"		- Open Compute Accelerator Module
329  * - "unknown"	- Not known
330  *
331  */
332 
333 static ssize_t amdgpu_device_get_board_info(struct device *dev,
334 					    struct device_attribute *attr,
335 					    char *buf)
336 {
337 	struct drm_device *ddev = dev_get_drvdata(dev);
338 	struct amdgpu_device *adev = drm_to_adev(ddev);
339 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
340 	const char *pkg;
341 
342 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
343 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
344 
345 	switch (pkg_type) {
346 	case AMDGPU_PKG_TYPE_CEM:
347 		pkg = "cem";
348 		break;
349 	case AMDGPU_PKG_TYPE_OAM:
350 		pkg = "oam";
351 		break;
352 	default:
353 		pkg = "unknown";
354 		break;
355 	}
356 
357 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
358 }
359 
360 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
361 
362 static struct attribute *amdgpu_board_attrs[] = {
363 	&dev_attr_board_info.attr,
364 	NULL,
365 };
366 
367 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
368 					     struct attribute *attr, int n)
369 {
370 	struct device *dev = kobj_to_dev(kobj);
371 	struct drm_device *ddev = dev_get_drvdata(dev);
372 	struct amdgpu_device *adev = drm_to_adev(ddev);
373 
374 	if (adev->flags & AMD_IS_APU)
375 		return 0;
376 
377 	return attr->mode;
378 }
379 
380 static const struct attribute_group amdgpu_board_attrs_group = {
381 	.attrs = amdgpu_board_attrs,
382 	.is_visible = amdgpu_board_attrs_is_visible
383 };
384 
385 /**
386  * DOC: uma/carveout_options
387  *
388  * This is a read-only file that lists all available UMA allocation
389  * options and their corresponding indices. Example output::
390  *
391  *     $ cat uma/carveout_options
392  *     0: Minimum (512 MB)
393  *     1:  (1 GB)
394  *     2:  (2 GB)
395  *     3:  (4 GB)
396  *     4:  (6 GB)
397  *     5:  (8 GB)
398  *     6:  (12 GB)
399  *     7: Medium (16 GB)
400  *     8:  (24 GB)
401  *     9: High (32 GB)
402  */
403 static ssize_t carveout_options_show(struct device *dev,
404 				     struct device_attribute *attr,
405 				     char *buf)
406 {
407 	struct drm_device *ddev = dev_get_drvdata(dev);
408 	struct amdgpu_device *adev = drm_to_adev(ddev);
409 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
410 	uint32_t memory_carved;
411 	ssize_t size = 0;
412 
413 	if (!uma_info || !uma_info->num_entries)
414 		return -ENODEV;
415 
416 	for (int i = 0; i < uma_info->num_entries; i++) {
417 		memory_carved = uma_info->entries[i].memory_carved_mb;
418 		if (memory_carved >= SZ_1G/SZ_1M) {
419 			size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n",
420 					      i,
421 					      uma_info->entries[i].name,
422 					      memory_carved >> 10);
423 		} else {
424 			size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n",
425 					      i,
426 					      uma_info->entries[i].name,
427 					      memory_carved);
428 		}
429 	}
430 
431 	return size;
432 }
433 static DEVICE_ATTR_RO(carveout_options);
434 
435 /**
436  * DOC: uma/carveout
437  *
438  * This file is both readable and writable. When read, it shows the
439  * index of the current setting. Writing a valid index to this file
440  * allows users to change the UMA carveout size to the selected option
441  * on the next boot.
442  *
443  * The available options and their corresponding indices can be read
444  * from the uma/carveout_options file.
445  */
446 static ssize_t carveout_show(struct device *dev,
447 			     struct device_attribute *attr,
448 			     char *buf)
449 {
450 	struct drm_device *ddev = dev_get_drvdata(dev);
451 	struct amdgpu_device *adev = drm_to_adev(ddev);
452 
453 	return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index);
454 }
455 
456 static ssize_t carveout_store(struct device *dev,
457 			      struct device_attribute *attr,
458 			      const char *buf, size_t count)
459 {
460 	struct drm_device *ddev = dev_get_drvdata(dev);
461 	struct amdgpu_device *adev = drm_to_adev(ddev);
462 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
463 	struct amdgpu_uma_carveout_option *opt;
464 	unsigned long val;
465 	uint8_t flags;
466 	int r;
467 
468 	r = kstrtoul(buf, 10, &val);
469 	if (r)
470 		return r;
471 
472 	if (val >= uma_info->num_entries)
473 		return -EINVAL;
474 
475 	val = array_index_nospec(val, uma_info->num_entries);
476 	opt = &uma_info->entries[val];
477 
478 	if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) &&
479 	    !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) {
480 		drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val);
481 		return -EINVAL;
482 	}
483 
484 	flags = opt->flags;
485 	flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1);
486 
487 	guard(mutex)(&uma_info->update_lock);
488 
489 	r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags);
490 	if (r)
491 		return r;
492 
493 	uma_info->uma_option_index = val;
494 
495 	return count;
496 }
497 static DEVICE_ATTR_RW(carveout);
498 
499 static struct attribute *amdgpu_uma_attrs[] = {
500 	&dev_attr_carveout.attr,
501 	&dev_attr_carveout_options.attr,
502 	NULL
503 };
504 
505 const struct attribute_group amdgpu_uma_attr_group = {
506 	.name = "uma",
507 	.attrs = amdgpu_uma_attrs
508 };
509 
510 static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev)
511 {
512 	int rc;
513 
514 	if (!(adev->flags & AMD_IS_APU))
515 		return;
516 
517 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
518 		return;
519 
520 	rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info);
521 	if (rc) {
522 		drm_dbg(adev_to_drm(adev),
523 			"Failed to parse UMA carveout info from VBIOS: %d\n", rc);
524 		goto out_info;
525 	}
526 
527 	mutex_init(&adev->uma_info.update_lock);
528 
529 	rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group);
530 	if (rc) {
531 		drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc);
532 		goto out_attr;
533 	}
534 
535 	return;
536 
537 out_attr:
538 	mutex_destroy(&adev->uma_info.update_lock);
539 out_info:
540 	return;
541 }
542 
543 static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev)
544 {
545 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
546 
547 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
548 		return;
549 
550 	mutex_destroy(&uma_info->update_lock);
551 	uma_info->num_entries = 0;
552 }
553 
554 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
555 
556 /**
557  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
558  *
559  * @adev: amdgpu device pointer
560  *
561  * Returns true if the device is a dGPU with ATPX power control,
562  * otherwise return false.
563  */
564 bool amdgpu_device_supports_px(struct amdgpu_device *adev)
565 {
566 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
567 		return true;
568 	return false;
569 }
570 
571 /**
572  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
573  *
574  * @adev: amdgpu device pointer
575  *
576  * Returns true if the device is a dGPU with ACPI power control,
577  * otherwise return false.
578  */
579 bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
580 {
581 	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
582 		return false;
583 
584 	if (adev->has_pr3 ||
585 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
586 		return true;
587 	return false;
588 }
589 
590 /**
591  * amdgpu_device_supports_baco - Does the device support BACO
592  *
593  * @adev: amdgpu device pointer
594  *
595  * Return:
596  * 1 if the device supports BACO;
597  * 3 if the device supports MACO (only works if BACO is supported)
598  * otherwise return 0.
599  */
600 int amdgpu_device_supports_baco(struct amdgpu_device *adev)
601 {
602 	return amdgpu_asic_supports_baco(adev);
603 }
604 
605 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
606 {
607 	int bamaco_support;
608 
609 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
610 	bamaco_support = amdgpu_device_supports_baco(adev);
611 
612 	switch (amdgpu_runtime_pm) {
613 	case 2:
614 		if (bamaco_support & MACO_SUPPORT) {
615 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
616 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
617 		} else if (bamaco_support == BACO_SUPPORT) {
618 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
619 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
620 		}
621 		break;
622 	case 1:
623 		if (bamaco_support & BACO_SUPPORT) {
624 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
625 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
626 		}
627 		break;
628 	case -1:
629 	case -2:
630 		if (amdgpu_device_supports_px(adev)) {
631 			/* enable PX as runtime mode */
632 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
633 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
634 		} else if (amdgpu_device_supports_boco(adev)) {
635 			/* enable boco as runtime mode */
636 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
637 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
638 		} else {
639 			if (!bamaco_support)
640 				goto no_runtime_pm;
641 
642 			switch (adev->asic_type) {
643 			case CHIP_VEGA20:
644 			case CHIP_ARCTURUS:
645 				/* BACO are not supported on vega20 and arctrus */
646 				break;
647 			case CHIP_VEGA10:
648 				/* enable BACO as runpm mode if noretry=0 */
649 				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
650 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
651 				break;
652 			default:
653 				/* enable BACO as runpm mode on CI+ */
654 				if (!amdgpu_passthrough(adev))
655 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
656 				break;
657 			}
658 
659 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
660 				if (bamaco_support & MACO_SUPPORT) {
661 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
662 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
663 				} else {
664 					dev_info(adev->dev, "Using BACO for runtime pm\n");
665 				}
666 			}
667 		}
668 		break;
669 	case 0:
670 		dev_info(adev->dev, "runtime pm is manually disabled\n");
671 		break;
672 	default:
673 		break;
674 	}
675 
676 no_runtime_pm:
677 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
678 		dev_info(adev->dev, "Runtime PM not available\n");
679 }
680 /**
681  * amdgpu_device_supports_smart_shift - Is the device dGPU with
682  * smart shift support
683  *
684  * @adev: amdgpu device pointer
685  *
686  * Returns true if the device is a dGPU with Smart Shift support,
687  * otherwise returns false.
688  */
689 bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
690 {
691 	return (amdgpu_device_supports_boco(adev) &&
692 		amdgpu_acpi_is_power_shift_control_supported());
693 }
694 
695 /*
696  * VRAM access helper functions
697  */
698 
699 /**
700  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
701  *
702  * @adev: amdgpu_device pointer
703  * @pos: offset of the buffer in vram
704  * @buf: virtual address of the buffer in system memory
705  * @size: read/write size, sizeof(@buf) must > @size
706  * @write: true - write to vram, otherwise - read from vram
707  */
708 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
709 			     void *buf, size_t size, bool write)
710 {
711 	unsigned long flags;
712 	uint32_t hi = ~0, tmp = 0;
713 	uint32_t *data = buf;
714 	uint64_t last;
715 	int idx;
716 
717 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
718 		return;
719 
720 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
721 
722 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
723 	for (last = pos + size; pos < last; pos += 4) {
724 		tmp = pos >> 31;
725 
726 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
727 		if (tmp != hi) {
728 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
729 			hi = tmp;
730 		}
731 		if (write)
732 			WREG32_NO_KIQ(mmMM_DATA, *data++);
733 		else
734 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
735 	}
736 
737 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
738 	drm_dev_exit(idx);
739 }
740 
741 /**
742  * amdgpu_device_aper_access - access vram by vram aperture
743  *
744  * @adev: amdgpu_device pointer
745  * @pos: offset of the buffer in vram
746  * @buf: virtual address of the buffer in system memory
747  * @size: read/write size, sizeof(@buf) must > @size
748  * @write: true - write to vram, otherwise - read from vram
749  *
750  * The return value means how many bytes have been transferred.
751  */
752 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
753 				 void *buf, size_t size, bool write)
754 {
755 #ifdef CONFIG_64BIT
756 	void __iomem *addr;
757 	size_t count = 0;
758 	uint64_t last;
759 
760 	if (!adev->mman.aper_base_kaddr)
761 		return 0;
762 
763 	last = min(pos + size, adev->gmc.visible_vram_size);
764 	if (last > pos) {
765 		addr = adev->mman.aper_base_kaddr + pos;
766 		count = last - pos;
767 
768 		if (write) {
769 			memcpy_toio(addr, buf, count);
770 			/* Make sure HDP write cache flush happens without any reordering
771 			 * after the system memory contents are sent over PCIe device
772 			 */
773 			mb();
774 			amdgpu_device_flush_hdp(adev, NULL);
775 		} else {
776 			amdgpu_device_invalidate_hdp(adev, NULL);
777 			/* Make sure HDP read cache is invalidated before issuing a read
778 			 * to the PCIe device
779 			 */
780 			mb();
781 			memcpy_fromio(buf, addr, count);
782 		}
783 
784 	}
785 
786 	return count;
787 #else
788 	return 0;
789 #endif
790 }
791 
792 /**
793  * amdgpu_device_vram_access - read/write a buffer in vram
794  *
795  * @adev: amdgpu_device pointer
796  * @pos: offset of the buffer in vram
797  * @buf: virtual address of the buffer in system memory
798  * @size: read/write size, sizeof(@buf) must > @size
799  * @write: true - write to vram, otherwise - read from vram
800  */
801 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
802 			       void *buf, size_t size, bool write)
803 {
804 	size_t count;
805 
806 	/* try to using vram apreature to access vram first */
807 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
808 	size -= count;
809 	if (size) {
810 		/* using MM to access rest vram */
811 		pos += count;
812 		buf += count;
813 		amdgpu_device_mm_access(adev, pos, buf, size, write);
814 	}
815 }
816 
817 /*
818  * register access helper functions.
819  */
820 
821 /* Check if hw access should be skipped because of hotplug or device error */
822 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
823 {
824 	if (adev->no_hw_access)
825 		return true;
826 
827 #ifdef CONFIG_LOCKDEP
828 	/*
829 	 * This is a bit complicated to understand, so worth a comment. What we assert
830 	 * here is that the GPU reset is not running on another thread in parallel.
831 	 *
832 	 * For this we trylock the read side of the reset semaphore, if that succeeds
833 	 * we know that the reset is not running in parallel.
834 	 *
835 	 * If the trylock fails we assert that we are either already holding the read
836 	 * side of the lock or are the reset thread itself and hold the write side of
837 	 * the lock.
838 	 */
839 	if (in_task()) {
840 		if (down_read_trylock(&adev->reset_domain->sem))
841 			up_read(&adev->reset_domain->sem);
842 		else
843 			lockdep_assert_held(&adev->reset_domain->sem);
844 	}
845 #endif
846 	return false;
847 }
848 
849 /**
850  * amdgpu_device_get_rev_id - query device rev_id
851  *
852  * @adev: amdgpu_device pointer
853  *
854  * Return device rev_id
855  */
856 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
857 {
858 	return adev->nbio.funcs->get_rev_id(adev);
859 }
860 
861 static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
862 {
863 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
864 		return AMDGPU_VBIOS_SKIP;
865 
866 	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
867 		return AMDGPU_VBIOS_OPTIONAL;
868 
869 	return 0;
870 }
871 
872 /**
873  * amdgpu_device_asic_init - Wrapper for atom asic_init
874  *
875  * @adev: amdgpu_device pointer
876  *
877  * Does any asic specific work and then calls atom asic init.
878  */
879 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
880 {
881 	uint32_t flags;
882 	bool optional;
883 	int ret;
884 
885 	amdgpu_asic_pre_asic_init(adev);
886 	flags = amdgpu_device_get_vbios_flags(adev);
887 	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
888 
889 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
890 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
891 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
892 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
893 		amdgpu_psp_wait_for_bootloader(adev);
894 		if (optional && !adev->bios)
895 			return 0;
896 
897 		ret = amdgpu_atomfirmware_asic_init(adev, true);
898 		return ret;
899 	} else {
900 		if (optional && !adev->bios)
901 			return 0;
902 
903 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
904 	}
905 
906 	return 0;
907 }
908 
909 /**
910  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
911  *
912  * @adev: amdgpu_device pointer
913  *
914  * Allocates a scratch page of VRAM for use by various things in the
915  * driver.
916  */
917 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
918 {
919 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
920 				       AMDGPU_GEM_DOMAIN_VRAM |
921 				       AMDGPU_GEM_DOMAIN_GTT,
922 				       &adev->mem_scratch.robj,
923 				       &adev->mem_scratch.gpu_addr,
924 				       (void **)&adev->mem_scratch.ptr);
925 }
926 
927 /**
928  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
929  *
930  * @adev: amdgpu_device pointer
931  *
932  * Frees the VRAM scratch page.
933  */
934 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
935 {
936 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
937 }
938 
939 /**
940  * amdgpu_device_program_register_sequence - program an array of registers.
941  *
942  * @adev: amdgpu_device pointer
943  * @registers: pointer to the register array
944  * @array_size: size of the register array
945  *
946  * Programs an array or registers with and or masks.
947  * This is a helper for setting golden registers.
948  */
949 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
950 					     const u32 *registers,
951 					     const u32 array_size)
952 {
953 	u32 tmp, reg, and_mask, or_mask;
954 	int i;
955 
956 	if (array_size % 3)
957 		return;
958 
959 	for (i = 0; i < array_size; i += 3) {
960 		reg = registers[i + 0];
961 		and_mask = registers[i + 1];
962 		or_mask = registers[i + 2];
963 
964 		if (and_mask == 0xffffffff) {
965 			tmp = or_mask;
966 		} else {
967 			tmp = RREG32(reg);
968 			tmp &= ~and_mask;
969 			if (adev->family >= AMDGPU_FAMILY_AI)
970 				tmp |= (or_mask & and_mask);
971 			else
972 				tmp |= or_mask;
973 		}
974 		WREG32(reg, tmp);
975 	}
976 }
977 
978 /**
979  * amdgpu_device_pci_config_reset - reset the GPU
980  *
981  * @adev: amdgpu_device pointer
982  *
983  * Resets the GPU using the pci config reset sequence.
984  * Only applicable to asics prior to vega10.
985  */
986 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
987 {
988 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
989 }
990 
991 /**
992  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
993  *
994  * @adev: amdgpu_device pointer
995  *
996  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
997  */
998 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
999 {
1000 	return pci_reset_function(adev->pdev);
1001 }
1002 
1003 /*
1004  * amdgpu_device_wb_*()
1005  * Writeback is the method by which the GPU updates special pages in memory
1006  * with the status of certain GPU events (fences, ring pointers,etc.).
1007  */
1008 
1009 /**
1010  * amdgpu_device_wb_fini - Disable Writeback and free memory
1011  *
1012  * @adev: amdgpu_device pointer
1013  *
1014  * Disables Writeback and frees the Writeback memory (all asics).
1015  * Used at driver shutdown.
1016  */
1017 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1018 {
1019 	if (adev->wb.wb_obj) {
1020 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1021 				      &adev->wb.gpu_addr,
1022 				      (void **)&adev->wb.wb);
1023 		adev->wb.wb_obj = NULL;
1024 	}
1025 }
1026 
1027 /**
1028  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1029  *
1030  * @adev: amdgpu_device pointer
1031  *
1032  * Initializes writeback and allocates writeback memory (all asics).
1033  * Used at driver startup.
1034  * Returns 0 on success or an -error on failure.
1035  */
1036 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1037 {
1038 	int r;
1039 
1040 	if (adev->wb.wb_obj == NULL) {
1041 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1042 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1043 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1044 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1045 					    (void **)&adev->wb.wb);
1046 		if (r) {
1047 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1048 			return r;
1049 		}
1050 
1051 		adev->wb.num_wb = AMDGPU_MAX_WB;
1052 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1053 
1054 		/* clear wb memory */
1055 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 /**
1062  * amdgpu_device_wb_get - Allocate a wb entry
1063  *
1064  * @adev: amdgpu_device pointer
1065  * @wb: wb index
1066  *
1067  * Allocate a wb slot for use by the driver (all asics).
1068  * Returns 0 on success or -EINVAL on failure.
1069  */
1070 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1071 {
1072 	unsigned long flags, offset;
1073 
1074 	spin_lock_irqsave(&adev->wb.lock, flags);
1075 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1076 	if (offset < adev->wb.num_wb) {
1077 		__set_bit(offset, adev->wb.used);
1078 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1079 		*wb = offset << 3; /* convert to dw offset */
1080 		return 0;
1081 	} else {
1082 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1083 		return -EINVAL;
1084 	}
1085 }
1086 
1087 /**
1088  * amdgpu_device_wb_free - Free a wb entry
1089  *
1090  * @adev: amdgpu_device pointer
1091  * @wb: wb index
1092  *
1093  * Free a wb slot allocated for use by the driver (all asics)
1094  */
1095 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1096 {
1097 	unsigned long flags;
1098 
1099 	wb >>= 3;
1100 	spin_lock_irqsave(&adev->wb.lock, flags);
1101 	if (wb < adev->wb.num_wb)
1102 		__clear_bit(wb, adev->wb.used);
1103 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1104 }
1105 
1106 /**
1107  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1108  *
1109  * @adev: amdgpu_device pointer
1110  *
1111  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1112  * to fail, but if any of the BARs is not accessible after the size we abort
1113  * driver loading by returning -ENODEV.
1114  */
1115 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1116 {
1117 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1118 	struct pci_bus *root;
1119 	struct resource *res;
1120 	int max_size, r;
1121 	unsigned int i;
1122 	u16 cmd;
1123 
1124 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1125 		return 0;
1126 
1127 	/* Bypass for VF */
1128 	if (amdgpu_sriov_vf(adev))
1129 		return 0;
1130 
1131 	if (!amdgpu_rebar)
1132 		return 0;
1133 
1134 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1135 	if ((amdgpu_runtime_pm != 0) &&
1136 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1137 	    adev->pdev->device == 0x731f &&
1138 	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1139 		return 0;
1140 
1141 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1142 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1143 		dev_warn(
1144 			adev->dev,
1145 			"System can't access extended configuration space, please check!!\n");
1146 
1147 	/* skip if the bios has already enabled large BAR */
1148 	if (adev->gmc.real_vram_size &&
1149 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1150 		return 0;
1151 
1152 	/* Check if the root BUS has 64bit memory resources */
1153 	root = adev->pdev->bus;
1154 	while (root->parent)
1155 		root = root->parent;
1156 
1157 	pci_bus_for_each_resource(root, res, i) {
1158 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1159 		    res->start > 0x100000000ull)
1160 			break;
1161 	}
1162 
1163 	/* Trying to resize is pointless without a root hub window above 4GB */
1164 	if (!res)
1165 		return 0;
1166 
1167 	/* Limit the BAR size to what is available */
1168 	max_size = pci_rebar_get_max_size(adev->pdev, 0);
1169 	if (max_size < 0)
1170 		return 0;
1171 	rbar_size = min(max_size, rbar_size);
1172 
1173 	/* Disable memory decoding while we change the BAR addresses and size */
1174 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1175 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1176 			      cmd & ~PCI_COMMAND_MEMORY);
1177 
1178 	/* Tear down doorbell as resizing will release BARs */
1179 	amdgpu_doorbell_fini(adev);
1180 
1181 	r = pci_resize_resource(adev->pdev, 0, rbar_size,
1182 				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
1183 								  : 1 << 2);
1184 	if (r == -ENOSPC)
1185 		dev_info(adev->dev,
1186 			 "Not enough PCI address space for a large BAR.");
1187 	else if (r && r != -ENOTSUPP)
1188 		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1189 
1190 	/* When the doorbell or fb BAR isn't available we have no chance of
1191 	 * using the device.
1192 	 */
1193 	r = amdgpu_doorbell_init(adev);
1194 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1195 		return -ENODEV;
1196 
1197 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1198 
1199 	return 0;
1200 }
1201 
1202 /*
1203  * GPU helpers function.
1204  */
1205 /**
1206  * amdgpu_device_need_post - check if the hw need post or not
1207  *
1208  * @adev: amdgpu_device pointer
1209  *
1210  * Check if the asic has been initialized (all asics) at driver startup
1211  * or post is needed if  hw reset is performed.
1212  * Returns true if need or false if not.
1213  */
1214 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1215 {
1216 	uint32_t reg, flags;
1217 
1218 	if (amdgpu_sriov_vf(adev))
1219 		return false;
1220 
1221 	flags = amdgpu_device_get_vbios_flags(adev);
1222 	if (flags & AMDGPU_VBIOS_SKIP)
1223 		return false;
1224 	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1225 		return false;
1226 
1227 	if (amdgpu_passthrough(adev)) {
1228 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1229 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1230 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1231 		 * vpost executed for smc version below 22.15
1232 		 */
1233 		if (adev->asic_type == CHIP_FIJI) {
1234 			int err;
1235 			uint32_t fw_ver;
1236 
1237 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1238 			/* force vPost if error occurred */
1239 			if (err)
1240 				return true;
1241 
1242 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1243 			release_firmware(adev->pm.fw);
1244 			if (fw_ver < 0x00160e00)
1245 				return true;
1246 		}
1247 	}
1248 
1249 	/* Don't post if we need to reset whole hive on init */
1250 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1251 		return false;
1252 
1253 	if (adev->has_hw_reset) {
1254 		adev->has_hw_reset = false;
1255 		return true;
1256 	}
1257 
1258 	/* bios scratch used on CIK+ */
1259 	if (adev->asic_type >= CHIP_BONAIRE)
1260 		return amdgpu_atombios_scratch_need_asic_init(adev);
1261 
1262 	/* check MEM_SIZE for older asics */
1263 	reg = amdgpu_asic_get_config_memsize(adev);
1264 
1265 	if ((reg != 0) && (reg != 0xffffffff))
1266 		return false;
1267 
1268 	return true;
1269 }
1270 
1271 /*
1272  * Check whether seamless boot is supported.
1273  *
1274  * So far we only support seamless boot on DCE 3.0 or later.
1275  * If users report that it works on older ASICS as well, we may
1276  * loosen this.
1277  */
1278 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1279 {
1280 	switch (amdgpu_seamless) {
1281 	case -1:
1282 		break;
1283 	case 1:
1284 		return true;
1285 	case 0:
1286 		return false;
1287 	default:
1288 		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1289 			amdgpu_seamless);
1290 		return false;
1291 	}
1292 
1293 	if (!(adev->flags & AMD_IS_APU))
1294 		return false;
1295 
1296 	if (adev->mman.keep_stolen_vga_memory)
1297 		return false;
1298 
1299 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1300 }
1301 
1302 /*
1303  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1304  * don't support dynamic speed switching. Until we have confirmation from Intel
1305  * that a specific host supports it, it's safer that we keep it disabled for all.
1306  *
1307  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1308  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1309  */
1310 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1311 {
1312 #if IS_ENABLED(CONFIG_X86)
1313 	struct cpuinfo_x86 *c = &cpu_data(0);
1314 
1315 	/* eGPU change speeds based on USB4 fabric conditions */
1316 	if (dev_is_removable(adev->dev))
1317 		return true;
1318 
1319 	if (c->x86_vendor == X86_VENDOR_INTEL)
1320 		return false;
1321 #endif
1322 	return true;
1323 }
1324 
1325 static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1326 {
1327 	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1328 	 * It's unclear if this is a platform-specific or GPU-specific issue.
1329 	 * Disable ASPM on SI for the time being.
1330 	 */
1331 	if (adev->family == AMDGPU_FAMILY_SI)
1332 		return true;
1333 
1334 #if IS_ENABLED(CONFIG_X86)
1335 	struct cpuinfo_x86 *c = &cpu_data(0);
1336 
1337 	if (c->x86_vendor == X86_VENDOR_INTEL) {
1338 		switch (c->x86_model) {
1339 		case VFM_MODEL(INTEL_ALDERLAKE):
1340 		case VFM_MODEL(INTEL_ALDERLAKE_L):
1341 		case VFM_MODEL(INTEL_RAPTORLAKE):
1342 		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1343 		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1344 		case VFM_MODEL(INTEL_TIGERLAKE):
1345 		case VFM_MODEL(INTEL_TIGERLAKE_L):
1346 			return true;
1347 		default:
1348 			return false;
1349 		}
1350 	} else {
1351 		return false;
1352 	}
1353 #else
1354 	return false;
1355 #endif
1356 }
1357 
1358 /**
1359  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1360  *
1361  * @adev: amdgpu_device pointer
1362  *
1363  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1364  * be set for this device.
1365  *
1366  * Returns true if it should be used or false if not.
1367  */
1368 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1369 {
1370 	switch (amdgpu_aspm) {
1371 	case -1:
1372 		break;
1373 	case 0:
1374 		return false;
1375 	case 1:
1376 		return true;
1377 	default:
1378 		return false;
1379 	}
1380 	if (adev->flags & AMD_IS_APU)
1381 		return false;
1382 	if (amdgpu_device_aspm_support_quirk(adev))
1383 		return false;
1384 	return pcie_aspm_enabled(adev->pdev);
1385 }
1386 
1387 /* if we get transitioned to only one device, take VGA back */
1388 /**
1389  * amdgpu_device_vga_set_decode - enable/disable vga decode
1390  *
1391  * @pdev: PCI device pointer
1392  * @state: enable/disable vga decode
1393  *
1394  * Enable/disable vga decode (all asics).
1395  * Returns VGA resource flags.
1396  */
1397 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1398 		bool state)
1399 {
1400 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1401 
1402 	amdgpu_asic_set_vga_state(adev, state);
1403 	if (state)
1404 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1405 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1406 	else
1407 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1408 }
1409 
1410 /**
1411  * amdgpu_device_check_block_size - validate the vm block size
1412  *
1413  * @adev: amdgpu_device pointer
1414  *
1415  * Validates the vm block size specified via module parameter.
1416  * The vm block size defines number of bits in page table versus page directory,
1417  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1418  * page table and the remaining bits are in the page directory.
1419  */
1420 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1421 {
1422 	/* defines number of bits in page table versus page directory,
1423 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1424 	 * page table and the remaining bits are in the page directory
1425 	 */
1426 	if (amdgpu_vm_block_size == -1)
1427 		return;
1428 
1429 	if (amdgpu_vm_block_size < 9) {
1430 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1431 			 amdgpu_vm_block_size);
1432 		amdgpu_vm_block_size = -1;
1433 	}
1434 }
1435 
1436 /**
1437  * amdgpu_device_check_vm_size - validate the vm size
1438  *
1439  * @adev: amdgpu_device pointer
1440  *
1441  * Validates the vm size in GB specified via module parameter.
1442  * The VM size is the size of the GPU virtual memory space in GB.
1443  */
1444 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1445 {
1446 	/* no need to check the default value */
1447 	if (amdgpu_vm_size == -1)
1448 		return;
1449 
1450 	if (amdgpu_vm_size < 1) {
1451 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1452 			 amdgpu_vm_size);
1453 		amdgpu_vm_size = -1;
1454 	}
1455 }
1456 
1457 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1458 {
1459 	struct sysinfo si;
1460 	bool is_os_64 = (sizeof(void *) == 8);
1461 	uint64_t total_memory;
1462 	uint64_t dram_size_seven_GB = 0x1B8000000;
1463 	uint64_t dram_size_three_GB = 0xB8000000;
1464 
1465 	if (amdgpu_smu_memory_pool_size == 0)
1466 		return;
1467 
1468 	if (!is_os_64) {
1469 		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
1470 		goto def_value;
1471 	}
1472 	si_meminfo(&si);
1473 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1474 
1475 	if ((amdgpu_smu_memory_pool_size == 1) ||
1476 		(amdgpu_smu_memory_pool_size == 2)) {
1477 		if (total_memory < dram_size_three_GB)
1478 			goto def_value1;
1479 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1480 		(amdgpu_smu_memory_pool_size == 8)) {
1481 		if (total_memory < dram_size_seven_GB)
1482 			goto def_value1;
1483 	} else {
1484 		dev_warn(adev->dev, "Smu memory pool size not supported\n");
1485 		goto def_value;
1486 	}
1487 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1488 
1489 	return;
1490 
1491 def_value1:
1492 	dev_warn(adev->dev, "No enough system memory\n");
1493 def_value:
1494 	adev->pm.smu_prv_buffer_size = 0;
1495 }
1496 
1497 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1498 {
1499 	if (!(adev->flags & AMD_IS_APU) ||
1500 	    adev->asic_type < CHIP_RAVEN)
1501 		return 0;
1502 
1503 	switch (adev->asic_type) {
1504 	case CHIP_RAVEN:
1505 		if (adev->pdev->device == 0x15dd)
1506 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1507 		if (adev->pdev->device == 0x15d8)
1508 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1509 		break;
1510 	case CHIP_RENOIR:
1511 		if ((adev->pdev->device == 0x1636) ||
1512 		    (adev->pdev->device == 0x164c))
1513 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1514 		else
1515 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1516 		break;
1517 	case CHIP_VANGOGH:
1518 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1519 		break;
1520 	case CHIP_YELLOW_CARP:
1521 		break;
1522 	case CHIP_CYAN_SKILLFISH:
1523 		if ((adev->pdev->device == 0x13FE) ||
1524 		    (adev->pdev->device == 0x143F))
1525 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1526 		break;
1527 	default:
1528 		break;
1529 	}
1530 
1531 	return 0;
1532 }
1533 
1534 /**
1535  * amdgpu_device_check_arguments - validate module params
1536  *
1537  * @adev: amdgpu_device pointer
1538  *
1539  * Validates certain module parameters and updates
1540  * the associated values used by the driver (all asics).
1541  */
1542 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1543 {
1544 	int i;
1545 
1546 	if (amdgpu_sched_jobs < 4) {
1547 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1548 			 amdgpu_sched_jobs);
1549 		amdgpu_sched_jobs = 4;
1550 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1551 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1552 			 amdgpu_sched_jobs);
1553 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1554 	}
1555 
1556 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1557 		/* gart size must be greater or equal to 32M */
1558 		dev_warn(adev->dev, "gart size (%d) too small\n",
1559 			 amdgpu_gart_size);
1560 		amdgpu_gart_size = -1;
1561 	}
1562 
1563 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1564 		/* gtt size must be greater or equal to 32M */
1565 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1566 				 amdgpu_gtt_size);
1567 		amdgpu_gtt_size = -1;
1568 	}
1569 
1570 	/* valid range is between 4 and 9 inclusive */
1571 	if (amdgpu_vm_fragment_size != -1 &&
1572 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1573 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1574 		amdgpu_vm_fragment_size = -1;
1575 	}
1576 
1577 	if (amdgpu_sched_hw_submission < 2) {
1578 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1579 			 amdgpu_sched_hw_submission);
1580 		amdgpu_sched_hw_submission = 2;
1581 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1582 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1583 			 amdgpu_sched_hw_submission);
1584 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1585 	}
1586 
1587 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1588 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1589 		amdgpu_reset_method = -1;
1590 	}
1591 
1592 	amdgpu_device_check_smu_prv_buffer_size(adev);
1593 
1594 	amdgpu_device_check_vm_size(adev);
1595 
1596 	amdgpu_device_check_block_size(adev);
1597 
1598 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1599 
1600 	for (i = 0; i < MAX_XCP; i++) {
1601 		switch (amdgpu_enforce_isolation) {
1602 		case -1:
1603 		case 0:
1604 		default:
1605 			/* disable */
1606 			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
1607 			break;
1608 		case 1:
1609 			/* enable */
1610 			adev->enforce_isolation[i] =
1611 				AMDGPU_ENFORCE_ISOLATION_ENABLE;
1612 			break;
1613 		case 2:
1614 			/* enable legacy mode */
1615 			adev->enforce_isolation[i] =
1616 				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
1617 			break;
1618 		case 3:
1619 			/* enable only process isolation without submitting cleaner shader */
1620 			adev->enforce_isolation[i] =
1621 				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
1622 			break;
1623 		}
1624 	}
1625 
1626 	return 0;
1627 }
1628 
1629 /**
1630  * amdgpu_switcheroo_set_state - set switcheroo state
1631  *
1632  * @pdev: pci dev pointer
1633  * @state: vga_switcheroo state
1634  *
1635  * Callback for the switcheroo driver.  Suspends or resumes
1636  * the asics before or after it is powered up using ACPI methods.
1637  */
1638 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1639 					enum vga_switcheroo_state state)
1640 {
1641 	struct drm_device *dev = pci_get_drvdata(pdev);
1642 	int r;
1643 
1644 	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
1645 	    state == VGA_SWITCHEROO_OFF)
1646 		return;
1647 
1648 	if (state == VGA_SWITCHEROO_ON) {
1649 		pr_info("switched on\n");
1650 		/* don't suspend or resume card normally */
1651 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1652 
1653 		pci_set_power_state(pdev, PCI_D0);
1654 		amdgpu_device_load_pci_state(pdev);
1655 		r = pci_enable_device(pdev);
1656 		if (r)
1657 			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
1658 				 r);
1659 		amdgpu_device_resume(dev, true);
1660 
1661 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1662 	} else {
1663 		dev_info(&pdev->dev, "switched off\n");
1664 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1665 		amdgpu_device_prepare(dev);
1666 		amdgpu_device_suspend(dev, true);
1667 		amdgpu_device_cache_pci_state(pdev);
1668 		/* Shut down the device */
1669 		pci_disable_device(pdev);
1670 		pci_set_power_state(pdev, PCI_D3cold);
1671 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1672 	}
1673 }
1674 
1675 /**
1676  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1677  *
1678  * @pdev: pci dev pointer
1679  *
1680  * Callback for the switcheroo driver.  Check of the switcheroo
1681  * state can be changed.
1682  * Returns true if the state can be changed, false if not.
1683  */
1684 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1685 {
1686 	struct drm_device *dev = pci_get_drvdata(pdev);
1687 
1688        /*
1689 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1690 	* locking inversion with the driver load path. And the access here is
1691 	* completely racy anyway. So don't bother with locking for now.
1692 	*/
1693 	return atomic_read(&dev->open_count) == 0;
1694 }
1695 
1696 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1697 	.set_gpu_state = amdgpu_switcheroo_set_state,
1698 	.reprobe = NULL,
1699 	.can_switch = amdgpu_switcheroo_can_switch,
1700 };
1701 
1702 /**
1703  * amdgpu_device_enable_virtual_display - enable virtual display feature
1704  *
1705  * @adev: amdgpu_device pointer
1706  *
1707  * Enabled the virtual display feature if the user has enabled it via
1708  * the module parameter virtual_display.  This feature provides a virtual
1709  * display hardware on headless boards or in virtualized environments.
1710  * This function parses and validates the configuration string specified by
1711  * the user and configures the virtual display configuration (number of
1712  * virtual connectors, crtcs, etc.) specified.
1713  */
1714 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1715 {
1716 	adev->enable_virtual_display = false;
1717 
1718 	if (amdgpu_virtual_display) {
1719 		const char *pci_address_name = pci_name(adev->pdev);
1720 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1721 
1722 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1723 		pciaddstr_tmp = pciaddstr;
1724 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1725 			pciaddname = strsep(&pciaddname_tmp, ",");
1726 			if (!strcmp("all", pciaddname)
1727 			    || !strcmp(pci_address_name, pciaddname)) {
1728 				long num_crtc;
1729 				int res = -1;
1730 
1731 				adev->enable_virtual_display = true;
1732 
1733 				if (pciaddname_tmp)
1734 					res = kstrtol(pciaddname_tmp, 10,
1735 						      &num_crtc);
1736 
1737 				if (!res) {
1738 					if (num_crtc < 1)
1739 						num_crtc = 1;
1740 					if (num_crtc > 6)
1741 						num_crtc = 6;
1742 					adev->mode_info.num_crtc = num_crtc;
1743 				} else {
1744 					adev->mode_info.num_crtc = 1;
1745 				}
1746 				break;
1747 			}
1748 		}
1749 
1750 		dev_info(
1751 			adev->dev,
1752 			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1753 			amdgpu_virtual_display, pci_address_name,
1754 			adev->enable_virtual_display, adev->mode_info.num_crtc);
1755 
1756 		kfree(pciaddstr);
1757 	}
1758 }
1759 
1760 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1761 {
1762 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1763 		adev->mode_info.num_crtc = 1;
1764 		adev->enable_virtual_display = true;
1765 		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
1766 			 adev->enable_virtual_display,
1767 			 adev->mode_info.num_crtc);
1768 	}
1769 }
1770 
1771 /**
1772  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1773  *
1774  * @adev: amdgpu_device pointer
1775  *
1776  * Parses the asic configuration parameters specified in the gpu info
1777  * firmware and makes them available to the driver for use in configuring
1778  * the asic.
1779  * Returns 0 on success, -EINVAL on failure.
1780  */
1781 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1782 {
1783 	const char *chip_name;
1784 	int err;
1785 	const struct gpu_info_firmware_header_v1_0 *hdr;
1786 
1787 	adev->firmware.gpu_info_fw = NULL;
1788 
1789 	switch (adev->asic_type) {
1790 	default:
1791 		return 0;
1792 	case CHIP_VEGA10:
1793 		chip_name = "vega10";
1794 		break;
1795 	case CHIP_VEGA12:
1796 		chip_name = "vega12";
1797 		break;
1798 	case CHIP_RAVEN:
1799 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1800 			chip_name = "raven2";
1801 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1802 			chip_name = "picasso";
1803 		else
1804 			chip_name = "raven";
1805 		break;
1806 	case CHIP_ARCTURUS:
1807 		chip_name = "arcturus";
1808 		break;
1809 	case CHIP_NAVI12:
1810 		if (adev->discovery.bin)
1811 			return 0;
1812 		chip_name = "navi12";
1813 		break;
1814 	case CHIP_CYAN_SKILLFISH:
1815 		if (adev->discovery.bin)
1816 			return 0;
1817 		chip_name = "cyan_skillfish";
1818 		break;
1819 	}
1820 
1821 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
1822 				   AMDGPU_UCODE_OPTIONAL,
1823 				   "amdgpu/%s_gpu_info.bin", chip_name);
1824 	if (err) {
1825 		dev_err(adev->dev,
1826 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
1827 			chip_name);
1828 		goto out;
1829 	}
1830 
1831 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1832 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1833 
1834 	switch (hdr->version_major) {
1835 	case 1:
1836 	{
1837 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1838 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1839 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1840 
1841 		/*
1842 		 * Should be dropped when DAL no longer needs it.
1843 		 */
1844 		if (adev->asic_type == CHIP_NAVI12)
1845 			goto parse_soc_bounding_box;
1846 
1847 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1848 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1849 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1850 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1851 		adev->gfx.config.max_texture_channel_caches =
1852 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1853 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1854 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1855 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1856 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1857 		adev->gfx.config.double_offchip_lds_buf =
1858 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1859 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1860 		adev->gfx.cu_info.max_waves_per_simd =
1861 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1862 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1863 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1864 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1865 		if (hdr->version_minor >= 1) {
1866 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1867 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1868 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1869 			adev->gfx.config.num_sc_per_sh =
1870 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1871 			adev->gfx.config.num_packer_per_sc =
1872 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1873 		}
1874 
1875 parse_soc_bounding_box:
1876 		/*
1877 		 * soc bounding box info is not integrated in disocovery table,
1878 		 * we always need to parse it from gpu info firmware if needed.
1879 		 */
1880 		if (hdr->version_minor == 2) {
1881 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1882 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1883 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1885 		}
1886 		break;
1887 	}
1888 	default:
1889 		dev_err(adev->dev,
1890 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1891 		err = -EINVAL;
1892 		goto out;
1893 	}
1894 out:
1895 	return err;
1896 }
1897 
1898 static void amdgpu_uid_init(struct amdgpu_device *adev)
1899 {
1900 	/* Initialize the UID for the device */
1901 	adev->uid_info = kzalloc_obj(struct amdgpu_uid);
1902 	if (!adev->uid_info) {
1903 		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
1904 		return;
1905 	}
1906 	adev->uid_info->adev = adev;
1907 }
1908 
1909 static void amdgpu_uid_fini(struct amdgpu_device *adev)
1910 {
1911 	/* Free the UID memory */
1912 	kfree(adev->uid_info);
1913 	adev->uid_info = NULL;
1914 }
1915 
1916 static struct pci_dev *amdgpu_device_find_parent(struct amdgpu_device *adev)
1917 {
1918 	struct pci_dev *parent = adev->pdev;
1919 
1920 	/* skip upstream/downstream switches internal to dGPU */
1921 	while ((parent = pci_upstream_bridge(parent))) {
1922 		if (parent->vendor == PCI_VENDOR_ID_ATI)
1923 			continue;
1924 		break;
1925 	}
1926 
1927 	return parent;
1928 }
1929 
1930 /**
1931  * amdgpu_device_ip_early_init - run early init for hardware IPs
1932  *
1933  * @adev: amdgpu_device pointer
1934  *
1935  * Early initialization pass for hardware IPs.  The hardware IPs that make
1936  * up each asic are discovered each IP's early_init callback is run.  This
1937  * is the first stage in initializing the asic.
1938  * Returns 0 on success, negative error code on failure.
1939  */
1940 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1941 {
1942 	struct amdgpu_ip_block *ip_block;
1943 	struct pci_dev *parent;
1944 	bool total, skip_bios;
1945 	uint32_t bios_flags;
1946 	int i, r;
1947 
1948 	amdgpu_device_enable_virtual_display(adev);
1949 
1950 	if (amdgpu_sriov_vf(adev)) {
1951 		r = amdgpu_virt_request_full_gpu(adev, true);
1952 		if (r)
1953 			return r;
1954 
1955 		r = amdgpu_virt_init_critical_region(adev);
1956 		if (r)
1957 			return r;
1958 	}
1959 
1960 	switch (adev->asic_type) {
1961 #ifdef CONFIG_DRM_AMDGPU_SI
1962 	case CHIP_VERDE:
1963 	case CHIP_TAHITI:
1964 	case CHIP_PITCAIRN:
1965 	case CHIP_OLAND:
1966 	case CHIP_HAINAN:
1967 		adev->family = AMDGPU_FAMILY_SI;
1968 		r = si_set_ip_blocks(adev);
1969 		if (r)
1970 			return r;
1971 		break;
1972 #endif
1973 #ifdef CONFIG_DRM_AMDGPU_CIK
1974 	case CHIP_BONAIRE:
1975 	case CHIP_HAWAII:
1976 	case CHIP_KAVERI:
1977 	case CHIP_KABINI:
1978 	case CHIP_MULLINS:
1979 		if (adev->flags & AMD_IS_APU)
1980 			adev->family = AMDGPU_FAMILY_KV;
1981 		else
1982 			adev->family = AMDGPU_FAMILY_CI;
1983 
1984 		r = cik_set_ip_blocks(adev);
1985 		if (r)
1986 			return r;
1987 		break;
1988 #endif
1989 	case CHIP_TOPAZ:
1990 	case CHIP_TONGA:
1991 	case CHIP_FIJI:
1992 	case CHIP_POLARIS10:
1993 	case CHIP_POLARIS11:
1994 	case CHIP_POLARIS12:
1995 	case CHIP_VEGAM:
1996 	case CHIP_CARRIZO:
1997 	case CHIP_STONEY:
1998 		if (adev->flags & AMD_IS_APU)
1999 			adev->family = AMDGPU_FAMILY_CZ;
2000 		else
2001 			adev->family = AMDGPU_FAMILY_VI;
2002 
2003 		r = vi_set_ip_blocks(adev);
2004 		if (r)
2005 			return r;
2006 		break;
2007 	default:
2008 		r = amdgpu_discovery_set_ip_blocks(adev);
2009 		if (r) {
2010 			adev->num_ip_blocks = 0;
2011 			return r;
2012 		}
2013 		break;
2014 	}
2015 
2016 	/* Check for IP version 9.4.3 with A0 hardware */
2017 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2018 	    !amdgpu_device_get_rev_id(adev)) {
2019 		dev_err(adev->dev, "Unsupported A0 hardware\n");
2020 		return -ENODEV;	/* device unsupported - no device error */
2021 	}
2022 
2023 	if (amdgpu_has_atpx() &&
2024 	    (amdgpu_is_atpx_hybrid() ||
2025 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2026 	    ((adev->flags & AMD_IS_APU) == 0) &&
2027 	    !dev_is_removable(&adev->pdev->dev))
2028 		adev->flags |= AMD_IS_PX;
2029 
2030 	if (!(adev->flags & AMD_IS_APU)) {
2031 		parent = pcie_find_root_port(adev->pdev);
2032 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2033 	}
2034 
2035 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2036 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2037 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2038 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2039 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2040 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2041 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2042 
2043 	adev->virt.is_xgmi_node_migrate_enabled = false;
2044 	if (amdgpu_sriov_vf(adev)) {
2045 		adev->virt.is_xgmi_node_migrate_enabled =
2046 			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2047 	}
2048 
2049 	total = true;
2050 	for (i = 0; i < adev->num_ip_blocks; i++) {
2051 		ip_block = &adev->ip_blocks[i];
2052 
2053 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2054 			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2055 				 adev->ip_blocks[i].version->funcs->name);
2056 			adev->ip_blocks[i].status.valid = false;
2057 		} else if (ip_block->version->funcs->early_init) {
2058 			r = ip_block->version->funcs->early_init(ip_block);
2059 			if (r == -ENOENT) {
2060 				adev->ip_blocks[i].status.valid = false;
2061 			} else if (r) {
2062 				dev_err(adev->dev,
2063 					"early_init of IP block <%s> failed %d\n",
2064 					adev->ip_blocks[i].version->funcs->name,
2065 					r);
2066 				total = false;
2067 			} else {
2068 				adev->ip_blocks[i].status.valid = true;
2069 			}
2070 		} else {
2071 			adev->ip_blocks[i].status.valid = true;
2072 		}
2073 		/* get the vbios after the asic_funcs are set up */
2074 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2075 			r = amdgpu_device_parse_gpu_info_fw(adev);
2076 			if (r)
2077 				return r;
2078 
2079 			bios_flags = amdgpu_device_get_vbios_flags(adev);
2080 			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2081 			/* Read BIOS */
2082 			if (!skip_bios) {
2083 				bool optional =
2084 					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2085 				if (!amdgpu_get_bios(adev) && !optional)
2086 					return -EINVAL;
2087 
2088 				if (optional && !adev->bios)
2089 					dev_info(
2090 						adev->dev,
2091 						"VBIOS image optional, proceeding without VBIOS image");
2092 
2093 				if (adev->bios) {
2094 					r = amdgpu_atombios_init(adev);
2095 					if (r) {
2096 						dev_err(adev->dev,
2097 							"amdgpu_atombios_init failed\n");
2098 						amdgpu_vf_error_put(
2099 							adev,
2100 							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2101 							0, 0);
2102 						return r;
2103 					}
2104 				}
2105 			}
2106 
2107 			/*get pf2vf msg info at it's earliest time*/
2108 			if (amdgpu_sriov_vf(adev))
2109 				amdgpu_virt_init_data_exchange(adev);
2110 
2111 		}
2112 	}
2113 	if (!total)
2114 		return -ENODEV;
2115 
2116 	if (adev->gmc.xgmi.supported)
2117 		amdgpu_xgmi_early_init(adev);
2118 
2119 	if (amdgpu_is_multi_aid(adev))
2120 		amdgpu_uid_init(adev);
2121 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2122 	if (ip_block->status.valid != false)
2123 		amdgpu_amdkfd_device_probe(adev);
2124 
2125 	adev->cg_flags &= amdgpu_cg_mask;
2126 	adev->pg_flags &= amdgpu_pg_mask;
2127 
2128 	return 0;
2129 }
2130 
2131 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2132 {
2133 	int i, r;
2134 
2135 	for (i = 0; i < adev->num_ip_blocks; i++) {
2136 		if (!adev->ip_blocks[i].status.sw)
2137 			continue;
2138 		if (adev->ip_blocks[i].status.hw)
2139 			continue;
2140 		if (!amdgpu_ip_member_of_hwini(
2141 			    adev, adev->ip_blocks[i].version->type))
2142 			continue;
2143 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2144 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2145 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2146 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2147 			if (r) {
2148 				dev_err(adev->dev,
2149 					"hw_init of IP block <%s> failed %d\n",
2150 					adev->ip_blocks[i].version->funcs->name,
2151 					r);
2152 				return r;
2153 			}
2154 			adev->ip_blocks[i].status.hw = true;
2155 		}
2156 	}
2157 
2158 	return 0;
2159 }
2160 
2161 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2162 {
2163 	int i, r;
2164 
2165 	for (i = 0; i < adev->num_ip_blocks; i++) {
2166 		if (!adev->ip_blocks[i].status.sw)
2167 			continue;
2168 		if (adev->ip_blocks[i].status.hw)
2169 			continue;
2170 		if (!amdgpu_ip_member_of_hwini(
2171 			    adev, adev->ip_blocks[i].version->type))
2172 			continue;
2173 		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2174 		if (r) {
2175 			dev_err(adev->dev,
2176 				"hw_init of IP block <%s> failed %d\n",
2177 				adev->ip_blocks[i].version->funcs->name, r);
2178 			return r;
2179 		}
2180 		adev->ip_blocks[i].status.hw = true;
2181 	}
2182 
2183 	return 0;
2184 }
2185 
2186 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2187 {
2188 	int r = 0;
2189 	int i;
2190 	uint32_t smu_version;
2191 
2192 	if (adev->asic_type >= CHIP_VEGA10) {
2193 		for (i = 0; i < adev->num_ip_blocks; i++) {
2194 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2195 				continue;
2196 
2197 			if (!amdgpu_ip_member_of_hwini(adev,
2198 						       AMD_IP_BLOCK_TYPE_PSP))
2199 				break;
2200 
2201 			if (!adev->ip_blocks[i].status.sw)
2202 				continue;
2203 
2204 			/* no need to do the fw loading again if already done*/
2205 			if (adev->ip_blocks[i].status.hw == true)
2206 				break;
2207 
2208 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2209 				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2210 				if (r)
2211 					return r;
2212 			} else {
2213 				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2214 				if (r) {
2215 					dev_err(adev->dev,
2216 						"hw_init of IP block <%s> failed %d\n",
2217 						adev->ip_blocks[i]
2218 							.version->funcs->name,
2219 						r);
2220 					return r;
2221 				}
2222 				adev->ip_blocks[i].status.hw = true;
2223 			}
2224 			break;
2225 		}
2226 	}
2227 
2228 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2229 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2230 
2231 	return r;
2232 }
2233 
2234 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2235 {
2236 	struct drm_sched_init_args args = {
2237 		.ops = &amdgpu_sched_ops,
2238 		.timeout_wq = adev->reset_domain->wq,
2239 		.dev = adev->dev,
2240 	};
2241 	long timeout;
2242 	int r, i;
2243 
2244 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2245 		struct amdgpu_ring *ring = adev->rings[i];
2246 
2247 		/* No need to setup the GPU scheduler for rings that don't need it */
2248 		if (!ring || ring->no_scheduler)
2249 			continue;
2250 
2251 		switch (ring->funcs->type) {
2252 		case AMDGPU_RING_TYPE_GFX:
2253 			timeout = adev->gfx_timeout;
2254 			break;
2255 		case AMDGPU_RING_TYPE_COMPUTE:
2256 			timeout = adev->compute_timeout;
2257 			break;
2258 		case AMDGPU_RING_TYPE_SDMA:
2259 			timeout = adev->sdma_timeout;
2260 			break;
2261 		default:
2262 			timeout = adev->video_timeout;
2263 			break;
2264 		}
2265 
2266 		args.timeout = timeout;
2267 		args.credit_limit = ring->num_hw_submission;
2268 		args.score = ring->sched_score;
2269 		args.name = ring->name;
2270 
2271 		r = drm_sched_init(&ring->sched, &args);
2272 		if (r) {
2273 			dev_err(adev->dev,
2274 				"Failed to create scheduler on ring %s.\n",
2275 				ring->name);
2276 			return r;
2277 		}
2278 		r = amdgpu_uvd_entity_init(adev, ring);
2279 		if (r) {
2280 			dev_err(adev->dev,
2281 				"Failed to create UVD scheduling entity on ring %s.\n",
2282 				ring->name);
2283 			return r;
2284 		}
2285 		r = amdgpu_vce_entity_init(adev, ring);
2286 		if (r) {
2287 			dev_err(adev->dev,
2288 				"Failed to create VCE scheduling entity on ring %s.\n",
2289 				ring->name);
2290 			return r;
2291 		}
2292 	}
2293 
2294 	if (adev->xcp_mgr)
2295 		amdgpu_xcp_update_partition_sched_list(adev);
2296 
2297 	return 0;
2298 }
2299 
2300 
2301 /**
2302  * amdgpu_device_ip_init - run init for hardware IPs
2303  *
2304  * @adev: amdgpu_device pointer
2305  *
2306  * Main initialization pass for hardware IPs.  The list of all the hardware
2307  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2308  * are run.  sw_init initializes the software state associated with each IP
2309  * and hw_init initializes the hardware associated with each IP.
2310  * Returns 0 on success, negative error code on failure.
2311  */
2312 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2313 {
2314 	bool init_badpage;
2315 	int i, r;
2316 
2317 	r = amdgpu_ras_init(adev);
2318 	if (r)
2319 		return r;
2320 
2321 	for (i = 0; i < adev->num_ip_blocks; i++) {
2322 		if (!adev->ip_blocks[i].status.valid)
2323 			continue;
2324 		if (adev->ip_blocks[i].version->funcs->sw_init) {
2325 			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2326 			if (r) {
2327 				dev_err(adev->dev,
2328 					"sw_init of IP block <%s> failed %d\n",
2329 					adev->ip_blocks[i].version->funcs->name,
2330 					r);
2331 				goto init_failed;
2332 			}
2333 		}
2334 		adev->ip_blocks[i].status.sw = true;
2335 
2336 		if (!amdgpu_ip_member_of_hwini(
2337 			    adev, adev->ip_blocks[i].version->type))
2338 			continue;
2339 
2340 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2341 			/* need to do common hw init early so everything is set up for gmc */
2342 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2343 			if (r) {
2344 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2345 					r);
2346 				goto init_failed;
2347 			}
2348 			adev->ip_blocks[i].status.hw = true;
2349 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2350 			/* need to do gmc hw init early so we can allocate gpu mem */
2351 			/* Try to reserve bad pages early */
2352 			if (amdgpu_sriov_vf(adev))
2353 				amdgpu_virt_exchange_data(adev);
2354 
2355 			r = amdgpu_device_mem_scratch_init(adev);
2356 			if (r) {
2357 				dev_err(adev->dev,
2358 					"amdgpu_mem_scratch_init failed %d\n",
2359 					r);
2360 				goto init_failed;
2361 			}
2362 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2363 			if (r) {
2364 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2365 					r);
2366 				goto init_failed;
2367 			}
2368 			r = amdgpu_device_wb_init(adev);
2369 			if (r) {
2370 				dev_err(adev->dev,
2371 					"amdgpu_device_wb_init failed %d\n", r);
2372 				goto init_failed;
2373 			}
2374 			adev->ip_blocks[i].status.hw = true;
2375 
2376 			/* right after GMC hw init, we create CSA */
2377 			if (adev->gfx.mcbp) {
2378 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2379 							       AMDGPU_GEM_DOMAIN_VRAM |
2380 							       AMDGPU_GEM_DOMAIN_GTT,
2381 							       AMDGPU_CSA_SIZE);
2382 				if (r) {
2383 					dev_err(adev->dev,
2384 						"allocate CSA failed %d\n", r);
2385 					goto init_failed;
2386 				}
2387 			}
2388 
2389 			r = amdgpu_seq64_init(adev);
2390 			if (r) {
2391 				dev_err(adev->dev, "allocate seq64 failed %d\n",
2392 					r);
2393 				goto init_failed;
2394 			}
2395 		}
2396 	}
2397 
2398 	if (amdgpu_sriov_vf(adev))
2399 		amdgpu_virt_init_data_exchange(adev);
2400 
2401 	r = amdgpu_ib_pool_init(adev);
2402 	if (r) {
2403 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2404 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2405 		goto init_failed;
2406 	}
2407 
2408 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2409 	if (r)
2410 		goto init_failed;
2411 
2412 	r = amdgpu_device_ip_hw_init_phase1(adev);
2413 	if (r)
2414 		goto init_failed;
2415 
2416 	r = amdgpu_device_fw_loading(adev);
2417 	if (r)
2418 		goto init_failed;
2419 
2420 	r = amdgpu_device_ip_hw_init_phase2(adev);
2421 	if (r)
2422 		goto init_failed;
2423 
2424 	/*
2425 	 * retired pages will be loaded from eeprom and reserved here,
2426 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2427 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2428 	 * for I2C communication which only true at this point.
2429 	 *
2430 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2431 	 * failure from bad gpu situation and stop amdgpu init process
2432 	 * accordingly. For other failed cases, it will still release all
2433 	 * the resource and print error message, rather than returning one
2434 	 * negative value to upper level.
2435 	 *
2436 	 * Note: theoretically, this should be called before all vram allocations
2437 	 * to protect retired page from abusing
2438 	 */
2439 	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2440 	r = amdgpu_ras_recovery_init(adev, init_badpage);
2441 	if (r)
2442 		goto init_failed;
2443 
2444 	/**
2445 	 * In case of XGMI grab extra reference for reset domain for this device
2446 	 */
2447 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2448 		if (amdgpu_xgmi_add_device(adev) == 0) {
2449 			if (!amdgpu_sriov_vf(adev)) {
2450 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2451 
2452 				if (WARN_ON(!hive)) {
2453 					r = -ENOENT;
2454 					goto init_failed;
2455 				}
2456 
2457 				if (!hive->reset_domain ||
2458 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2459 					r = -ENOENT;
2460 					amdgpu_put_xgmi_hive(hive);
2461 					goto init_failed;
2462 				}
2463 
2464 				/* Drop the early temporary reset domain we created for device */
2465 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2466 				adev->reset_domain = hive->reset_domain;
2467 				amdgpu_put_xgmi_hive(hive);
2468 			}
2469 		}
2470 	}
2471 
2472 	r = amdgpu_device_init_schedulers(adev);
2473 	if (r)
2474 		goto init_failed;
2475 
2476 	amdgpu_ttm_enable_buffer_funcs(adev);
2477 
2478 	/* Don't init kfd if whole hive need to be reset during init */
2479 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
2480 		amdgpu_amdkfd_device_init(adev);
2481 	}
2482 
2483 	amdgpu_fru_get_product_info(adev);
2484 
2485 	r = amdgpu_cper_init(adev);
2486 
2487 init_failed:
2488 
2489 	return r;
2490 }
2491 
2492 /**
2493  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2494  *
2495  * @adev: amdgpu_device pointer
2496  *
2497  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2498  * this function before a GPU reset.  If the value is retained after a
2499  * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
2500  */
2501 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2502 {
2503 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2504 }
2505 
2506 /**
2507  * amdgpu_device_check_vram_lost - check if vram is valid
2508  *
2509  * @adev: amdgpu_device pointer
2510  *
2511  * Checks the reset magic value written to the gart pointer in VRAM.
2512  * The driver calls this after a GPU reset to see if the contents of
2513  * VRAM is lost or now.
2514  * returns true if vram is lost, false if not.
2515  */
2516 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2517 {
2518 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2519 			AMDGPU_RESET_MAGIC_NUM))
2520 		return true;
2521 
2522 	if (!amdgpu_in_reset(adev))
2523 		return false;
2524 
2525 	/*
2526 	 * For all ASICs with baco/mode1 reset, the VRAM is
2527 	 * always assumed to be lost.
2528 	 */
2529 	switch (amdgpu_asic_reset_method(adev)) {
2530 	case AMD_RESET_METHOD_LEGACY:
2531 	case AMD_RESET_METHOD_LINK:
2532 	case AMD_RESET_METHOD_BACO:
2533 	case AMD_RESET_METHOD_MODE1:
2534 		return true;
2535 	default:
2536 		return false;
2537 	}
2538 }
2539 
2540 /**
2541  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2542  *
2543  * @adev: amdgpu_device pointer
2544  * @state: clockgating state (gate or ungate)
2545  *
2546  * The list of all the hardware IPs that make up the asic is walked and the
2547  * set_clockgating_state callbacks are run.
2548  * Late initialization pass enabling clockgating for hardware IPs.
2549  * Fini or suspend, pass disabling clockgating for hardware IPs.
2550  * Returns 0 on success, negative error code on failure.
2551  */
2552 
2553 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2554 			       enum amd_clockgating_state state)
2555 {
2556 	int i, j, r;
2557 
2558 	if (amdgpu_emu_mode == 1)
2559 		return 0;
2560 
2561 	for (j = 0; j < adev->num_ip_blocks; j++) {
2562 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2563 		if (!adev->ip_blocks[i].status.late_initialized)
2564 			continue;
2565 		if (!adev->ip_blocks[i].version)
2566 			continue;
2567 		/* skip CG for GFX, SDMA on S0ix */
2568 		if (adev->in_s0ix &&
2569 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2570 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2571 			continue;
2572 		/* skip CG for VCE/UVD, it's handled specially */
2573 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2574 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2575 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2576 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2577 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2578 			/* enable clockgating to save power */
2579 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
2580 										     state);
2581 			if (r) {
2582 				dev_err(adev->dev,
2583 					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
2584 					adev->ip_blocks[i].version->funcs->name,
2585 					r);
2586 				return r;
2587 			}
2588 		}
2589 	}
2590 
2591 	return 0;
2592 }
2593 
2594 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2595 			       enum amd_powergating_state state)
2596 {
2597 	int i, j, r;
2598 
2599 	if (amdgpu_emu_mode == 1)
2600 		return 0;
2601 
2602 	for (j = 0; j < adev->num_ip_blocks; j++) {
2603 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2604 		if (!adev->ip_blocks[i].status.late_initialized)
2605 			continue;
2606 		if (!adev->ip_blocks[i].version)
2607 			continue;
2608 		/* skip PG for GFX, SDMA on S0ix */
2609 		if (adev->in_s0ix &&
2610 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2611 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2612 			continue;
2613 		/* skip CG for VCE/UVD, it's handled specially */
2614 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2615 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2616 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2617 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2618 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2619 			/* enable powergating to save power */
2620 			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
2621 											state);
2622 			if (r) {
2623 				dev_err(adev->dev,
2624 					"set_powergating_state(gate) of IP block <%s> failed %d\n",
2625 					adev->ip_blocks[i].version->funcs->name,
2626 					r);
2627 				return r;
2628 			}
2629 		}
2630 	}
2631 	return 0;
2632 }
2633 
2634 static int amdgpu_device_enable_mgpu_fan_boost(void)
2635 {
2636 	struct amdgpu_gpu_instance *gpu_ins;
2637 	struct amdgpu_device *adev;
2638 	int i, ret = 0;
2639 
2640 	mutex_lock(&mgpu_info.mutex);
2641 
2642 	/*
2643 	 * MGPU fan boost feature should be enabled
2644 	 * only when there are two or more dGPUs in
2645 	 * the system
2646 	 */
2647 	if (mgpu_info.num_dgpu < 2)
2648 		goto out;
2649 
2650 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2651 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2652 		adev = gpu_ins->adev;
2653 		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
2654 		    !gpu_ins->mgpu_fan_enabled) {
2655 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2656 			if (ret)
2657 				break;
2658 
2659 			gpu_ins->mgpu_fan_enabled = 1;
2660 		}
2661 	}
2662 
2663 out:
2664 	mutex_unlock(&mgpu_info.mutex);
2665 
2666 	return ret;
2667 }
2668 
2669 /**
2670  * amdgpu_device_ip_late_init - run late init for hardware IPs
2671  *
2672  * @adev: amdgpu_device pointer
2673  *
2674  * Late initialization pass for hardware IPs.  The list of all the hardware
2675  * IPs that make up the asic is walked and the late_init callbacks are run.
2676  * late_init covers any special initialization that an IP requires
2677  * after all of the have been initialized or something that needs to happen
2678  * late in the init process.
2679  * Returns 0 on success, negative error code on failure.
2680  */
2681 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2682 {
2683 	struct amdgpu_gpu_instance *gpu_instance;
2684 	int i = 0, r;
2685 
2686 	for (i = 0; i < adev->num_ip_blocks; i++) {
2687 		if (!adev->ip_blocks[i].status.hw)
2688 			continue;
2689 		if (adev->ip_blocks[i].version->funcs->late_init) {
2690 			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
2691 			if (r) {
2692 				dev_err(adev->dev,
2693 					"late_init of IP block <%s> failed %d\n",
2694 					adev->ip_blocks[i].version->funcs->name,
2695 					r);
2696 				return r;
2697 			}
2698 		}
2699 		adev->ip_blocks[i].status.late_initialized = true;
2700 	}
2701 
2702 	r = amdgpu_ras_late_init(adev);
2703 	if (r) {
2704 		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
2705 		return r;
2706 	}
2707 
2708 	if (!amdgpu_reset_in_recovery(adev))
2709 		amdgpu_ras_set_error_query_ready(adev, true);
2710 
2711 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2712 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2713 
2714 	amdgpu_device_fill_reset_magic(adev);
2715 
2716 	r = amdgpu_device_enable_mgpu_fan_boost();
2717 	if (r)
2718 		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
2719 
2720 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2721 	if (amdgpu_passthrough(adev) &&
2722 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2723 	     adev->asic_type == CHIP_ALDEBARAN))
2724 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2725 
2726 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2727 		mutex_lock(&mgpu_info.mutex);
2728 
2729 		/*
2730 		 * Reset device p-state to low as this was booted with high.
2731 		 *
2732 		 * This should be performed only after all devices from the same
2733 		 * hive get initialized.
2734 		 *
2735 		 * However, it's unknown how many device in the hive in advance.
2736 		 * As this is counted one by one during devices initializations.
2737 		 *
2738 		 * So, we wait for all XGMI interlinked devices initialized.
2739 		 * This may bring some delays as those devices may come from
2740 		 * different hives. But that should be OK.
2741 		 */
2742 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2743 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2744 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2745 				if (gpu_instance->adev->flags & AMD_IS_APU)
2746 					continue;
2747 
2748 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2749 						AMDGPU_XGMI_PSTATE_MIN);
2750 				if (r) {
2751 					dev_err(adev->dev,
2752 						"pstate setting failed (%d).\n",
2753 						r);
2754 					break;
2755 				}
2756 			}
2757 		}
2758 
2759 		mutex_unlock(&mgpu_info.mutex);
2760 	}
2761 
2762 	return 0;
2763 }
2764 
2765 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
2766 {
2767 	struct amdgpu_device *adev = ip_block->adev;
2768 	int r;
2769 
2770 	if (!ip_block->version->funcs->hw_fini) {
2771 		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
2772 			ip_block->version->funcs->name);
2773 	} else {
2774 		r = ip_block->version->funcs->hw_fini(ip_block);
2775 		/* XXX handle errors */
2776 		if (r) {
2777 			dev_dbg(adev->dev,
2778 				"hw_fini of IP block <%s> failed %d\n",
2779 				ip_block->version->funcs->name, r);
2780 		}
2781 	}
2782 
2783 	ip_block->status.hw = false;
2784 }
2785 
2786 /**
2787  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2788  *
2789  * @adev: amdgpu_device pointer
2790  *
2791  * For ASICs need to disable SMC first
2792  */
2793 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2794 {
2795 	int i;
2796 
2797 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
2798 		return;
2799 
2800 	for (i = 0; i < adev->num_ip_blocks; i++) {
2801 		if (!adev->ip_blocks[i].status.hw)
2802 			continue;
2803 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2804 			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2805 			break;
2806 		}
2807 	}
2808 }
2809 
2810 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2811 {
2812 	int i, r;
2813 
2814 	for (i = 0; i < adev->num_ip_blocks; i++) {
2815 		if (!adev->ip_blocks[i].version)
2816 			continue;
2817 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2818 			continue;
2819 
2820 		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
2821 		if (r) {
2822 			dev_dbg(adev->dev,
2823 				"early_fini of IP block <%s> failed %d\n",
2824 				adev->ip_blocks[i].version->funcs->name, r);
2825 		}
2826 	}
2827 
2828 	amdgpu_amdkfd_suspend(adev, true);
2829 	amdgpu_amdkfd_teardown_processes(adev);
2830 	amdgpu_userq_suspend(adev);
2831 
2832 	/* Workaround for ASICs need to disable SMC first */
2833 	amdgpu_device_smu_fini_early(adev);
2834 
2835 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2836 		if (!adev->ip_blocks[i].status.hw)
2837 			continue;
2838 
2839 		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2840 	}
2841 
2842 	if (amdgpu_sriov_vf(adev)) {
2843 		if (amdgpu_virt_release_full_gpu(adev, false))
2844 			dev_err(adev->dev,
2845 				"failed to release exclusive mode on fini\n");
2846 	}
2847 
2848 	/*
2849 	 * Driver reload on the APU can fail due to firmware validation because
2850 	 * the PSP is always running, as it is shared across the whole SoC.
2851 	 * This same issue does not occur on dGPU because it has a mechanism
2852 	 * that checks whether the PSP is running. A solution for those issues
2853 	 * in the APU is to trigger a GPU reset, but this should be done during
2854 	 * the unload phase to avoid adding boot latency and screen flicker.
2855 	 * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends
2856 	 * a request to PMFW to unload MP1, PMFW will put GC in reset and power down
2857 	 * the voltage. Hence, skipping reset for APUs with GFX V11 or later.
2858 	 */
2859 	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu &&
2860 		amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) {
2861 		r = amdgpu_asic_reset(adev);
2862 		if (r)
2863 			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
2864 	}
2865 
2866 	return 0;
2867 }
2868 
2869 /**
2870  * amdgpu_device_ip_fini - run fini for hardware IPs
2871  *
2872  * @adev: amdgpu_device pointer
2873  *
2874  * Main teardown pass for hardware IPs.  The list of all the hardware
2875  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2876  * are run.  hw_fini tears down the hardware associated with each IP
2877  * and sw_fini tears down any software state associated with each IP.
2878  * Returns 0 on success, negative error code on failure.
2879  */
2880 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2881 {
2882 	int i, r;
2883 
2884 	amdgpu_cper_fini(adev);
2885 
2886 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2887 		amdgpu_virt_release_ras_err_handler_data(adev);
2888 
2889 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2890 		amdgpu_xgmi_remove_device(adev);
2891 
2892 	amdgpu_amdkfd_device_fini_sw(adev);
2893 
2894 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2895 		if (!adev->ip_blocks[i].status.sw)
2896 			continue;
2897 
2898 		if (!adev->ip_blocks[i].version)
2899 			continue;
2900 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2901 			amdgpu_ucode_free_bo(adev);
2902 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2903 			amdgpu_device_wb_fini(adev);
2904 			amdgpu_device_mem_scratch_fini(adev);
2905 			amdgpu_ib_pool_fini(adev);
2906 			amdgpu_seq64_fini(adev);
2907 			amdgpu_doorbell_fini(adev);
2908 		}
2909 		if (adev->ip_blocks[i].version->funcs->sw_fini) {
2910 			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
2911 			/* XXX handle errors */
2912 			if (r) {
2913 				dev_dbg(adev->dev,
2914 					"sw_fini of IP block <%s> failed %d\n",
2915 					adev->ip_blocks[i].version->funcs->name,
2916 					r);
2917 			}
2918 		}
2919 		adev->ip_blocks[i].status.sw = false;
2920 		adev->ip_blocks[i].status.valid = false;
2921 	}
2922 
2923 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2924 		if (!adev->ip_blocks[i].status.late_initialized)
2925 			continue;
2926 		if (!adev->ip_blocks[i].version)
2927 			continue;
2928 		if (adev->ip_blocks[i].version->funcs->late_fini)
2929 			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
2930 		adev->ip_blocks[i].status.late_initialized = false;
2931 	}
2932 
2933 	amdgpu_ras_fini(adev);
2934 	amdgpu_uid_fini(adev);
2935 
2936 	return 0;
2937 }
2938 
2939 /**
2940  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2941  *
2942  * @work: work_struct.
2943  */
2944 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2945 {
2946 	struct amdgpu_device *adev =
2947 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2948 	int r;
2949 
2950 	r = amdgpu_ib_ring_tests(adev);
2951 	if (r)
2952 		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
2953 }
2954 
2955 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2956 {
2957 	struct amdgpu_device *adev =
2958 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2959 
2960 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2961 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2962 
2963 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
2964 		adev->gfx.gfx_off_state = true;
2965 }
2966 
2967 /**
2968  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2969  *
2970  * @adev: amdgpu_device pointer
2971  *
2972  * Main suspend function for hardware IPs.  The list of all the hardware
2973  * IPs that make up the asic is walked, clockgating is disabled and the
2974  * suspend callbacks are run.  suspend puts the hardware and software state
2975  * in each IP into a state suitable for suspend.
2976  * Returns 0 on success, negative error code on failure.
2977  */
2978 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2979 {
2980 	int i, r, rec;
2981 
2982 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2983 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2984 
2985 	/*
2986 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2987 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2988 	 * scenario. Add the missing df cstate disablement here.
2989 	 */
2990 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2991 		dev_warn(adev->dev, "Failed to disallow df cstate");
2992 
2993 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2994 		if (!adev->ip_blocks[i].status.valid)
2995 			continue;
2996 
2997 		/* displays are handled separately */
2998 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2999 			continue;
3000 
3001 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3002 		if (r)
3003 			goto unwind;
3004 	}
3005 
3006 	return 0;
3007 unwind:
3008 	rec = amdgpu_device_ip_resume_phase3(adev);
3009 	if (rec)
3010 		dev_err(adev->dev,
3011 			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
3012 			rec);
3013 
3014 	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
3015 
3016 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3017 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3018 
3019 	return r;
3020 }
3021 
3022 /**
3023  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3024  *
3025  * @adev: amdgpu_device pointer
3026  *
3027  * Main suspend function for hardware IPs.  The list of all the hardware
3028  * IPs that make up the asic is walked, clockgating is disabled and the
3029  * suspend callbacks are run.  suspend puts the hardware and software state
3030  * in each IP into a state suitable for suspend.
3031  * Returns 0 on success, negative error code on failure.
3032  */
3033 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3034 {
3035 	int i, r, rec;
3036 
3037 	if (adev->in_s0ix)
3038 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3039 
3040 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3041 		if (!adev->ip_blocks[i].status.valid)
3042 			continue;
3043 		/* displays are handled in phase1 */
3044 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3045 			continue;
3046 		/* PSP lost connection when err_event_athub occurs */
3047 		if (amdgpu_ras_intr_triggered() &&
3048 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3049 			adev->ip_blocks[i].status.hw = false;
3050 			continue;
3051 		}
3052 
3053 		/* skip unnecessary suspend if we do not initialize them yet */
3054 		if (!amdgpu_ip_member_of_hwini(
3055 			    adev, adev->ip_blocks[i].version->type))
3056 			continue;
3057 
3058 		/* Since we skip suspend for S0i3, we need to cancel the delayed
3059 		 * idle work here as the suspend callback never gets called.
3060 		 */
3061 		if (adev->in_s0ix &&
3062 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3063 		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3064 			cancel_delayed_work_sync(&adev->gfx.idle_work);
3065 		/* skip suspend of gfx/mes and psp for S0ix
3066 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3067 		 * like at runtime. PSP is also part of the always on hardware
3068 		 * so no need to suspend it.
3069 		 */
3070 		if (adev->in_s0ix &&
3071 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3072 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3073 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3074 			continue;
3075 
3076 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3077 		if (adev->in_s0ix &&
3078 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3079 		     IP_VERSION(5, 0, 0)) &&
3080 		    (adev->ip_blocks[i].version->type ==
3081 		     AMD_IP_BLOCK_TYPE_SDMA))
3082 			continue;
3083 
3084 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3085 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3086 		 * from this location and RLC Autoload automatically also gets loaded
3087 		 * from here based on PMFW -> PSP message during re-init sequence.
3088 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3089 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3090 		 */
3091 		if (amdgpu_in_reset(adev) &&
3092 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3093 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3094 			continue;
3095 
3096 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3097 		if (r)
3098 			goto unwind;
3099 
3100 		/* handle putting the SMC in the appropriate state */
3101 		if (!amdgpu_sriov_vf(adev)) {
3102 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3103 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3104 				if (r) {
3105 					dev_err(adev->dev,
3106 						"SMC failed to set mp1 state %d, %d\n",
3107 						adev->mp1_state, r);
3108 					goto unwind;
3109 				}
3110 			}
3111 		}
3112 	}
3113 
3114 	return 0;
3115 unwind:
3116 	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3117 	rec = amdgpu_device_ip_resume_phase1(adev);
3118 	if (rec) {
3119 		dev_err(adev->dev,
3120 			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3121 			rec);
3122 		return r;
3123 	}
3124 
3125 	rec = amdgpu_device_fw_loading(adev);
3126 	if (rec) {
3127 		dev_err(adev->dev,
3128 			"amdgpu_device_fw_loading failed during unwind: %d\n",
3129 			rec);
3130 		return r;
3131 	}
3132 
3133 	rec = amdgpu_device_ip_resume_phase2(adev);
3134 	if (rec) {
3135 		dev_err(adev->dev,
3136 			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3137 			rec);
3138 		return r;
3139 	}
3140 
3141 	return r;
3142 }
3143 
3144 /**
3145  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3146  *
3147  * @adev: amdgpu_device pointer
3148  *
3149  * Main suspend function for hardware IPs.  The list of all the hardware
3150  * IPs that make up the asic is walked, clockgating is disabled and the
3151  * suspend callbacks are run.  suspend puts the hardware and software state
3152  * in each IP into a state suitable for suspend.
3153  * Returns 0 on success, negative error code on failure.
3154  */
3155 static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3156 {
3157 	int r;
3158 
3159 	if (amdgpu_sriov_vf(adev)) {
3160 		amdgpu_virt_fini_data_exchange(adev);
3161 		amdgpu_virt_request_full_gpu(adev, false);
3162 	}
3163 
3164 	amdgpu_ttm_disable_buffer_funcs(adev);
3165 
3166 	r = amdgpu_device_ip_suspend_phase1(adev);
3167 	if (r)
3168 		return r;
3169 	r = amdgpu_device_ip_suspend_phase2(adev);
3170 
3171 	if (amdgpu_sriov_vf(adev))
3172 		amdgpu_virt_release_full_gpu(adev, false);
3173 
3174 	return r;
3175 }
3176 
3177 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3178 {
3179 	int i, r;
3180 
3181 	static enum amd_ip_block_type ip_order[] = {
3182 		AMD_IP_BLOCK_TYPE_COMMON,
3183 		AMD_IP_BLOCK_TYPE_GMC,
3184 		AMD_IP_BLOCK_TYPE_PSP,
3185 		AMD_IP_BLOCK_TYPE_IH,
3186 	};
3187 
3188 	for (i = 0; i < adev->num_ip_blocks; i++) {
3189 		int j;
3190 		struct amdgpu_ip_block *block;
3191 
3192 		block = &adev->ip_blocks[i];
3193 		block->status.hw = false;
3194 
3195 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3196 
3197 			if (block->version->type != ip_order[j] ||
3198 				!block->status.valid)
3199 				continue;
3200 
3201 			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3202 			if (r) {
3203 				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3204 					 block->version->funcs->name);
3205 				return r;
3206 			}
3207 			block->status.hw = true;
3208 		}
3209 	}
3210 
3211 	return 0;
3212 }
3213 
3214 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3215 {
3216 	struct amdgpu_ip_block *block;
3217 	int i, r = 0;
3218 
3219 	static enum amd_ip_block_type ip_order[] = {
3220 		AMD_IP_BLOCK_TYPE_SMC,
3221 		AMD_IP_BLOCK_TYPE_DCE,
3222 		AMD_IP_BLOCK_TYPE_GFX,
3223 		AMD_IP_BLOCK_TYPE_SDMA,
3224 		AMD_IP_BLOCK_TYPE_MES,
3225 		AMD_IP_BLOCK_TYPE_UVD,
3226 		AMD_IP_BLOCK_TYPE_VCE,
3227 		AMD_IP_BLOCK_TYPE_VCN,
3228 		AMD_IP_BLOCK_TYPE_JPEG
3229 	};
3230 
3231 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3232 		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
3233 
3234 		if (!block)
3235 			continue;
3236 
3237 		if (block->status.valid && !block->status.hw) {
3238 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3239 				r = amdgpu_ip_block_resume(block);
3240 			} else {
3241 				r = block->version->funcs->hw_init(block);
3242 			}
3243 
3244 			if (r) {
3245 				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3246 					 block->version->funcs->name);
3247 				break;
3248 			}
3249 			block->status.hw = true;
3250 		}
3251 	}
3252 
3253 	return r;
3254 }
3255 
3256 /**
3257  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3258  *
3259  * @adev: amdgpu_device pointer
3260  *
3261  * First resume function for hardware IPs.  The list of all the hardware
3262  * IPs that make up the asic is walked and the resume callbacks are run for
3263  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3264  * after a suspend and updates the software state as necessary.  This
3265  * function is also used for restoring the GPU after a GPU reset.
3266  * Returns 0 on success, negative error code on failure.
3267  */
3268 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3269 {
3270 	int i, r;
3271 
3272 	for (i = 0; i < adev->num_ip_blocks; i++) {
3273 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3274 			continue;
3275 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3276 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3277 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3278 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3279 
3280 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3281 			if (r)
3282 				return r;
3283 		}
3284 	}
3285 
3286 	return 0;
3287 }
3288 
3289 /**
3290  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3291  *
3292  * @adev: amdgpu_device pointer
3293  *
3294  * Second resume function for hardware IPs.  The list of all the hardware
3295  * IPs that make up the asic is walked and the resume callbacks are run for
3296  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3297  * functional state after a suspend and updates the software state as
3298  * necessary.  This function is also used for restoring the GPU after a GPU
3299  * reset.
3300  * Returns 0 on success, negative error code on failure.
3301  */
3302 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3303 {
3304 	int i, r;
3305 
3306 	for (i = 0; i < adev->num_ip_blocks; i++) {
3307 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3308 			continue;
3309 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3310 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3311 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3312 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3313 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3314 			continue;
3315 		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3316 		if (r)
3317 			return r;
3318 	}
3319 
3320 	return 0;
3321 }
3322 
3323 /**
3324  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3325  *
3326  * @adev: amdgpu_device pointer
3327  *
3328  * Third resume function for hardware IPs.  The list of all the hardware
3329  * IPs that make up the asic is walked and the resume callbacks are run for
3330  * all DCE.  resume puts the hardware into a functional state after a suspend
3331  * and updates the software state as necessary.  This function is also used
3332  * for restoring the GPU after a GPU reset.
3333  *
3334  * Returns 0 on success, negative error code on failure.
3335  */
3336 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3337 {
3338 	int i, r;
3339 
3340 	for (i = 0; i < adev->num_ip_blocks; i++) {
3341 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3342 			continue;
3343 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3344 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3345 			if (r)
3346 				return r;
3347 		}
3348 	}
3349 
3350 	return 0;
3351 }
3352 
3353 /**
3354  * amdgpu_device_ip_resume - run resume for hardware IPs
3355  *
3356  * @adev: amdgpu_device pointer
3357  *
3358  * Main resume function for hardware IPs.  The hardware IPs
3359  * are split into two resume functions because they are
3360  * also used in recovering from a GPU reset and some additional
3361  * steps need to be take between them.  In this case (S3/S4) they are
3362  * run sequentially.
3363  * Returns 0 on success, negative error code on failure.
3364  */
3365 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3366 {
3367 	int r;
3368 
3369 	r = amdgpu_device_ip_resume_phase1(adev);
3370 	if (r)
3371 		return r;
3372 
3373 	r = amdgpu_device_fw_loading(adev);
3374 	if (r)
3375 		return r;
3376 
3377 	r = amdgpu_device_ip_resume_phase2(adev);
3378 
3379 	amdgpu_ttm_enable_buffer_funcs(adev);
3380 
3381 	if (r)
3382 		return r;
3383 
3384 	amdgpu_fence_driver_hw_init(adev);
3385 
3386 	r = amdgpu_device_ip_resume_phase3(adev);
3387 
3388 	return r;
3389 }
3390 
3391 /**
3392  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3393  *
3394  * @adev: amdgpu_device pointer
3395  *
3396  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3397  */
3398 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3399 {
3400 	if (amdgpu_sriov_vf(adev)) {
3401 		if (adev->is_atom_fw) {
3402 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3403 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3404 		} else {
3405 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3406 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3407 		}
3408 
3409 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3410 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3411 	}
3412 }
3413 
3414 /**
3415  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3416  *
3417  * @pdev : pci device context
3418  * @asic_type: AMD asic type
3419  *
3420  * Check if there is DC (new modesetting infrastructre) support for an asic.
3421  * returns true if DC has support, false if not.
3422  */
3423 bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
3424 				       enum amd_asic_type asic_type)
3425 {
3426 	switch (asic_type) {
3427 #ifdef CONFIG_DRM_AMDGPU_SI
3428 	case CHIP_HAINAN:
3429 #endif
3430 	case CHIP_TOPAZ:
3431 		/* chips with no display hardware */
3432 		return false;
3433 #if defined(CONFIG_DRM_AMD_DC)
3434 	case CHIP_TAHITI:
3435 	case CHIP_PITCAIRN:
3436 	case CHIP_VERDE:
3437 	case CHIP_OLAND:
3438 		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
3439 	default:
3440 		return amdgpu_dc != 0;
3441 #else
3442 	default:
3443 		if (amdgpu_dc > 0)
3444 			dev_info_once(
3445 				&pdev->dev,
3446 				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3447 		return false;
3448 #endif
3449 	}
3450 }
3451 
3452 /**
3453  * amdgpu_device_has_dc_support - check if dc is supported
3454  *
3455  * @adev: amdgpu_device pointer
3456  *
3457  * Returns true for supported, false for not supported
3458  */
3459 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3460 {
3461 	if (adev->enable_virtual_display ||
3462 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3463 		return false;
3464 
3465 	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
3466 }
3467 
3468 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3469 {
3470 	struct amdgpu_device *adev =
3471 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3472 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3473 
3474 	/* It's a bug to not have a hive within this function */
3475 	if (WARN_ON(!hive))
3476 		return;
3477 
3478 	/*
3479 	 * Use task barrier to synchronize all xgmi reset works across the
3480 	 * hive. task_barrier_enter and task_barrier_exit will block
3481 	 * until all the threads running the xgmi reset works reach
3482 	 * those points. task_barrier_full will do both blocks.
3483 	 */
3484 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3485 
3486 		task_barrier_enter(&hive->tb);
3487 		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
3488 
3489 		if (adev->asic_reset_res)
3490 			goto fail;
3491 
3492 		task_barrier_exit(&hive->tb);
3493 		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
3494 
3495 		if (adev->asic_reset_res)
3496 			goto fail;
3497 
3498 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3499 	} else {
3500 
3501 		task_barrier_full(&hive->tb);
3502 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3503 	}
3504 
3505 fail:
3506 	if (adev->asic_reset_res)
3507 		dev_warn(adev->dev,
3508 			 "ASIC reset failed with error, %d for drm dev, %s",
3509 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3510 	amdgpu_put_xgmi_hive(hive);
3511 }
3512 
3513 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3514 {
3515 	char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
3516 	char *input = buf;
3517 	char *timeout_setting = NULL;
3518 	int index = 0;
3519 	long timeout;
3520 	int ret = 0;
3521 
3522 	/* By default timeout for all queues is 2 sec */
3523 	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3524 		adev->video_timeout = msecs_to_jiffies(2000);
3525 
3526 	if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
3527 		return 0;
3528 
3529 	/*
3530 	 * strsep() destructively modifies its input by replacing delimiters
3531 	 * with '\0'. Use a stack copy so the global module parameter buffer
3532 	 * remains intact for multi-GPU systems where this function is called
3533 	 * once per device.
3534 	 */
3535 	strscpy(buf, amdgpu_lockup_timeout, sizeof(buf));
3536 
3537 	while ((timeout_setting = strsep(&input, ",")) &&
3538 	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3539 		ret = kstrtol(timeout_setting, 0, &timeout);
3540 		if (ret)
3541 			return ret;
3542 
3543 		if (timeout == 0) {
3544 			index++;
3545 			continue;
3546 		} else if (timeout < 0) {
3547 			timeout = MAX_SCHEDULE_TIMEOUT;
3548 			dev_warn(adev->dev, "lockup timeout disabled");
3549 			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3550 		} else {
3551 			timeout = msecs_to_jiffies(timeout);
3552 		}
3553 
3554 		switch (index++) {
3555 		case 0:
3556 			adev->gfx_timeout = timeout;
3557 			break;
3558 		case 1:
3559 			adev->compute_timeout = timeout;
3560 			break;
3561 		case 2:
3562 			adev->sdma_timeout = timeout;
3563 			break;
3564 		case 3:
3565 			adev->video_timeout = timeout;
3566 			break;
3567 		default:
3568 			break;
3569 		}
3570 	}
3571 
3572 	/* When only one value specified apply it to all queues. */
3573 	if (index == 1)
3574 		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3575 			adev->video_timeout = timeout;
3576 
3577 	return ret;
3578 }
3579 
3580 /**
3581  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3582  *
3583  * @adev: amdgpu_device pointer
3584  *
3585  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3586  */
3587 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3588 {
3589 	struct iommu_domain *domain;
3590 
3591 	domain = iommu_get_domain_for_dev(adev->dev);
3592 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3593 		adev->ram_is_direct_mapped = true;
3594 }
3595 
3596 #if defined(CONFIG_HSA_AMD_P2P)
3597 /**
3598  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
3599  *
3600  * @adev: amdgpu_device pointer
3601  *
3602  * return if IOMMU remapping bar address
3603  */
3604 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
3605 {
3606 	struct iommu_domain *domain;
3607 
3608 	domain = iommu_get_domain_for_dev(adev->dev);
3609 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
3610 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
3611 		return true;
3612 
3613 	return false;
3614 }
3615 #endif
3616 
3617 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3618 {
3619 	if (amdgpu_mcbp == 1)
3620 		adev->gfx.mcbp = true;
3621 	else if (amdgpu_mcbp == 0)
3622 		adev->gfx.mcbp = false;
3623 
3624 	if (amdgpu_sriov_vf(adev))
3625 		adev->gfx.mcbp = true;
3626 
3627 	if (adev->gfx.mcbp)
3628 		dev_info(adev->dev, "MCBP is enabled\n");
3629 }
3630 
3631 static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
3632 {
3633 	int r;
3634 
3635 	r = amdgpu_atombios_sysfs_init(adev);
3636 	if (r)
3637 		drm_err(&adev->ddev,
3638 			"registering atombios sysfs failed (%d).\n", r);
3639 
3640 	r = amdgpu_pm_sysfs_init(adev);
3641 	if (r)
3642 		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
3643 
3644 	r = amdgpu_ucode_sysfs_init(adev);
3645 	if (r) {
3646 		adev->ucode_sysfs_en = false;
3647 		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
3648 	} else
3649 		adev->ucode_sysfs_en = true;
3650 
3651 	r = amdgpu_device_attr_sysfs_init(adev);
3652 	if (r)
3653 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3654 
3655 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
3656 	if (r)
3657 		dev_err(adev->dev,
3658 			"Could not create amdgpu board attributes\n");
3659 
3660 	amdgpu_fru_sysfs_init(adev);
3661 	amdgpu_reg_state_sysfs_init(adev);
3662 	amdgpu_xcp_sysfs_init(adev);
3663 	amdgpu_uma_sysfs_init(adev);
3664 	amdgpu_ptl_sysfs_init(adev);
3665 
3666 	return r;
3667 }
3668 
3669 static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
3670 {
3671 	if (adev->pm.sysfs_initialized)
3672 		amdgpu_pm_sysfs_fini(adev);
3673 	if (adev->ucode_sysfs_en)
3674 		amdgpu_ucode_sysfs_fini(adev);
3675 	amdgpu_device_attr_sysfs_fini(adev);
3676 	amdgpu_fru_sysfs_fini(adev);
3677 
3678 	amdgpu_reg_state_sysfs_fini(adev);
3679 	amdgpu_xcp_sysfs_fini(adev);
3680 	amdgpu_uma_sysfs_fini(adev);
3681 	amdgpu_ptl_sysfs_fini(adev);
3682 }
3683 
3684 /**
3685  * amdgpu_device_init - initialize the driver
3686  *
3687  * @adev: amdgpu_device pointer
3688  * @flags: driver flags
3689  *
3690  * Initializes the driver info and hw (all asics).
3691  * Returns 0 for success or an error on failure.
3692  * Called at driver startup.
3693  */
3694 int amdgpu_device_init(struct amdgpu_device *adev,
3695 		       uint32_t flags)
3696 {
3697 	struct pci_dev *pdev = adev->pdev;
3698 	int r, i;
3699 	bool px = false;
3700 	u32 max_MBps;
3701 	int tmp;
3702 
3703 	adev->shutdown = false;
3704 	adev->flags = flags;
3705 
3706 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3707 		adev->asic_type = amdgpu_force_asic_type;
3708 	else
3709 		adev->asic_type = flags & AMD_ASIC_MASK;
3710 
3711 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3712 	if (amdgpu_emu_mode == 1)
3713 		adev->usec_timeout *= 10;
3714 	adev->gmc.gart_size = 512 * 1024 * 1024;
3715 	adev->accel_working = false;
3716 	adev->num_rings = 0;
3717 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3718 	adev->mman.buffer_funcs = NULL;
3719 	adev->mman.num_buffer_funcs_scheds = 0;
3720 	adev->vm_manager.vm_pte_funcs = NULL;
3721 	adev->vm_manager.vm_pte_num_scheds = 0;
3722 	adev->gmc.gmc_funcs = NULL;
3723 	adev->harvest_ip_mask = 0x0;
3724 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3725 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3726 
3727 	amdgpu_reg_access_init(adev);
3728 
3729 	dev_info(
3730 		adev->dev,
3731 		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3732 		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3733 		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3734 
3735 	/* mutex initialization are all done here so we
3736 	 * can recall function without having locking issues
3737 	 */
3738 	mutex_init(&adev->firmware.mutex);
3739 	mutex_init(&adev->pm.mutex);
3740 	mutex_init(&adev->gfx.gpu_clock_mutex);
3741 	mutex_init(&adev->srbm_mutex);
3742 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3743 	mutex_init(&adev->gfx.gfx_off_mutex);
3744 	mutex_init(&adev->gfx.partition_mutex);
3745 	mutex_init(&adev->grbm_idx_mutex);
3746 	mutex_init(&adev->mn_lock);
3747 	mutex_init(&adev->virt.vf_errors.lock);
3748 	hash_init(adev->mn_hash);
3749 	mutex_init(&adev->psp.mutex);
3750 	mutex_init(&adev->psp.ptl.mutex);
3751 	mutex_init(&adev->notifier_lock);
3752 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3753 	mutex_init(&adev->benchmark_mutex);
3754 	mutex_init(&adev->gfx.reset_sem_mutex);
3755 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
3756 	mutex_init(&adev->enforce_isolation_mutex);
3757 	for (i = 0; i < MAX_XCP; ++i) {
3758 		adev->isolation[i].spearhead = dma_fence_get_stub();
3759 		amdgpu_sync_create(&adev->isolation[i].active);
3760 		amdgpu_sync_create(&adev->isolation[i].prev);
3761 	}
3762 	mutex_init(&adev->gfx.userq_sch_mutex);
3763 	mutex_init(&adev->gfx.workload_profile_mutex);
3764 	mutex_init(&adev->vcn.workload_profile_mutex);
3765 
3766 	amdgpu_device_init_apu_flags(adev);
3767 
3768 	r = amdgpu_device_check_arguments(adev);
3769 	if (r)
3770 		return r;
3771 
3772 	spin_lock_init(&adev->mmio_idx_lock);
3773 	spin_lock_init(&adev->mm_stats.lock);
3774 	spin_lock_init(&adev->virt.rlcg_reg_lock);
3775 	spin_lock_init(&adev->wb.lock);
3776 
3777 	INIT_LIST_HEAD(&adev->reset_list);
3778 
3779 	INIT_LIST_HEAD(&adev->ras_list);
3780 
3781 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3782 
3783 	xa_init_flags(&adev->userq_doorbell_xa, XA_FLAGS_LOCK_IRQ);
3784 
3785 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3786 			  amdgpu_device_delayed_init_work_handler);
3787 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3788 			  amdgpu_device_delay_enable_gfx_off);
3789 	/*
3790 	 * Initialize the enforce_isolation work structures for each XCP
3791 	 * partition.  This work handler is responsible for enforcing shader
3792 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
3793 	 * each GFX and compute ring.  If there are any fences, it schedules
3794 	 * the `enforce_isolation_work` to be run after a delay.  If there are
3795 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
3796 	 * runqueue.
3797 	 */
3798 	for (i = 0; i < MAX_XCP; i++) {
3799 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
3800 				  amdgpu_gfx_enforce_isolation_handler);
3801 		adev->gfx.enforce_isolation[i].adev = adev;
3802 		adev->gfx.enforce_isolation[i].xcp_id = i;
3803 	}
3804 
3805 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3806 
3807 	amdgpu_coredump_init(adev);
3808 
3809 	adev->gfx.gfx_off_req_count = 1;
3810 	adev->gfx.gfx_off_residency = 0;
3811 	adev->gfx.gfx_off_entrycount = 0;
3812 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3813 
3814 	atomic_set(&adev->throttling_logging_enabled, 1);
3815 	/*
3816 	 * If throttling continues, logging will be performed every minute
3817 	 * to avoid log flooding. "-1" is subtracted since the thermal
3818 	 * throttling interrupt comes every second. Thus, the total logging
3819 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3820 	 * for throttling interrupt) = 60 seconds.
3821 	 */
3822 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3823 
3824 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3825 
3826 	/* Registers mapping */
3827 	/* TODO: block userspace mapping of io register */
3828 	if (adev->asic_type >= CHIP_BONAIRE) {
3829 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3830 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3831 	} else {
3832 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3833 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3834 	}
3835 
3836 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3837 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3838 
3839 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3840 	if (!adev->rmmio)
3841 		return -ENOMEM;
3842 
3843 	dev_info(adev->dev, "register mmio base: 0x%08X\n",
3844 		 (uint32_t)adev->rmmio_base);
3845 	dev_info(adev->dev, "register mmio size: %u\n",
3846 		 (unsigned int)adev->rmmio_size);
3847 
3848 	/*
3849 	 * Reset domain needs to be present early, before XGMI hive discovered
3850 	 * (if any) and initialized to use reset sem and in_gpu reset flag
3851 	 * early on during init and before calling to RREG32.
3852 	 */
3853 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3854 	if (!adev->reset_domain)
3855 		return -ENOMEM;
3856 
3857 	/* detect hw virtualization here */
3858 	amdgpu_virt_init(adev);
3859 
3860 	amdgpu_device_get_pcie_info(adev);
3861 
3862 	r = amdgpu_device_get_job_timeout_settings(adev);
3863 	if (r) {
3864 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3865 		return r;
3866 	}
3867 
3868 	amdgpu_device_set_mcbp(adev);
3869 
3870 	/*
3871 	 * By default, use default mode where all blocks are expected to be
3872 	 * initialized. At present a 'swinit' of blocks is required to be
3873 	 * completed before the need for a different level is detected.
3874 	 */
3875 	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
3876 
3877 	amdgpu_device_check_iommu_direct_map(adev);
3878 
3879 	/* early init functions */
3880 	r = amdgpu_device_ip_early_init(adev);
3881 	if (r)
3882 		return r;
3883 
3884 	/*
3885 	 * No need to remove conflicting FBs for non-display class devices.
3886 	 * This prevents the sysfb from being freed accidently.
3887 	 */
3888 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
3889 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
3890 		/* Get rid of things like offb */
3891 		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
3892 		if (r)
3893 			return r;
3894 	}
3895 
3896 	/* Enable TMZ based on IP_VERSION */
3897 	amdgpu_gmc_tmz_set(adev);
3898 
3899 	if (amdgpu_sriov_vf(adev) &&
3900 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
3901 		/* VF MMIO access (except mailbox range) from CPU
3902 		 * will be blocked during sriov runtime
3903 		 */
3904 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
3905 
3906 	amdgpu_gmc_noretry_set(adev);
3907 	/* Need to get xgmi info early to decide the reset behavior*/
3908 	if (adev->gmc.xgmi.supported) {
3909 		if (adev->gfxhub.funcs &&
3910 		    adev->gfxhub.funcs->get_xgmi_info) {
3911 			r = adev->gfxhub.funcs->get_xgmi_info(adev);
3912 			if (r)
3913 				return r;
3914 		}
3915 	}
3916 
3917 	if (adev->gmc.xgmi.connected_to_cpu) {
3918 		if (adev->mmhub.funcs &&
3919 		    adev->mmhub.funcs->get_xgmi_info) {
3920 			r = adev->mmhub.funcs->get_xgmi_info(adev);
3921 			if (r)
3922 				return r;
3923 		}
3924 	}
3925 
3926 	/* enable PCIE atomic ops */
3927 	if (amdgpu_sriov_vf(adev)) {
3928 		if (adev->virt.fw_reserve.p_pf2vf)
3929 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3930 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3931 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3932 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3933 	 * internal path natively support atomics, set have_atomics_support to true.
3934 	 */
3935 	} else if ((adev->flags & AMD_IS_APU &&
3936 		   amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) ||
3937 		   (adev->gmc.xgmi.connected_to_cpu &&
3938 		   amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))) {
3939 		adev->have_atomics_support = true;
3940 	} else {
3941 		adev->have_atomics_support =
3942 			!pci_enable_atomic_ops_to_root(adev->pdev,
3943 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3944 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3945 	}
3946 
3947 	if (!adev->have_atomics_support)
3948 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3949 
3950 	/* doorbell bar mapping and doorbell index init*/
3951 	amdgpu_doorbell_init(adev);
3952 
3953 	if (amdgpu_emu_mode == 1) {
3954 		/* post the asic on emulation mode */
3955 		emu_soc_asic_init(adev);
3956 		goto fence_driver_init;
3957 	}
3958 
3959 	amdgpu_reset_init(adev);
3960 
3961 	/* detect if we are with an SRIOV vbios */
3962 	if (adev->bios)
3963 		amdgpu_device_detect_sriov_bios(adev);
3964 
3965 	/* check if we need to reset the asic
3966 	 *  E.g., driver was not cleanly unloaded previously, etc.
3967 	 */
3968 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3969 		if (adev->gmc.xgmi.num_physical_nodes) {
3970 			dev_info(adev->dev, "Pending hive reset.\n");
3971 			amdgpu_set_init_level(adev,
3972 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3973 		} else {
3974 				tmp = amdgpu_reset_method;
3975 				/* It should do a default reset when loading or reloading the driver,
3976 				 * regardless of the module parameter reset_method.
3977 				 */
3978 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3979 				r = amdgpu_asic_reset(adev);
3980 				amdgpu_reset_method = tmp;
3981 		}
3982 
3983 		if (r) {
3984 		  dev_err(adev->dev, "asic reset on init failed\n");
3985 		  goto failed;
3986 		}
3987 	}
3988 
3989 	/* Post card if necessary */
3990 	if (amdgpu_device_need_post(adev)) {
3991 		if (!adev->bios) {
3992 			dev_err(adev->dev, "no vBIOS found\n");
3993 			r = -EINVAL;
3994 			goto failed;
3995 		}
3996 		dev_info(adev->dev, "GPU posting now...\n");
3997 		r = amdgpu_device_asic_init(adev);
3998 		if (r) {
3999 			dev_err(adev->dev, "gpu post error!\n");
4000 			goto failed;
4001 		}
4002 	}
4003 
4004 	if (adev->bios) {
4005 		if (adev->is_atom_fw) {
4006 			/* Initialize clocks */
4007 			r = amdgpu_atomfirmware_get_clock_info(adev);
4008 			if (r) {
4009 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4010 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4011 				goto failed;
4012 			}
4013 		} else {
4014 			/* Initialize clocks */
4015 			r = amdgpu_atombios_get_clock_info(adev);
4016 			if (r) {
4017 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4018 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4019 				goto failed;
4020 			}
4021 			/* init i2c buses */
4022 			amdgpu_i2c_init(adev);
4023 		}
4024 	}
4025 
4026 fence_driver_init:
4027 	/* Fence driver */
4028 	r = amdgpu_fence_driver_sw_init(adev);
4029 	if (r) {
4030 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4031 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4032 		goto failed;
4033 	}
4034 
4035 	/* init the mode config */
4036 	drm_mode_config_init(adev_to_drm(adev));
4037 
4038 	r = amdgpu_device_ip_init(adev);
4039 	if (r) {
4040 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4041 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4042 		goto release_ras_con;
4043 	}
4044 
4045 	amdgpu_fence_driver_hw_init(adev);
4046 
4047 	dev_info(adev->dev,
4048 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4049 			adev->gfx.config.max_shader_engines,
4050 			adev->gfx.config.max_sh_per_se,
4051 			adev->gfx.config.max_cu_per_sh,
4052 			adev->gfx.cu_info.number);
4053 
4054 	adev->accel_working = true;
4055 
4056 	amdgpu_vm_check_compute_bug(adev);
4057 
4058 	/* Initialize the buffer migration limit. */
4059 	if (amdgpu_moverate >= 0)
4060 		max_MBps = amdgpu_moverate;
4061 	else
4062 		max_MBps = 8; /* Allow 8 MB/s. */
4063 	/* Get a log2 for easy divisions. */
4064 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4065 
4066 	/*
4067 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4068 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4069 	 * gpu instance is counted less.
4070 	 */
4071 	amdgpu_register_gpu_instance(adev);
4072 
4073 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4074 	 * explicit gating rather than handling it automatically.
4075 	 */
4076 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4077 		r = amdgpu_device_ip_late_init(adev);
4078 		if (r) {
4079 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4080 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4081 			goto release_ras_con;
4082 		}
4083 		/* must succeed. */
4084 		amdgpu_ras_resume(adev);
4085 		queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4086 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4087 	}
4088 
4089 	if (amdgpu_sriov_vf(adev)) {
4090 		amdgpu_virt_release_full_gpu(adev, true);
4091 		flush_delayed_work(&adev->delayed_init_work);
4092 	}
4093 
4094 	/* Don't init kfd if whole hive need to be reset during init */
4095 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4096 		kgd2kfd_init_zone_device(adev);
4097 		kfd_update_svm_support_properties(adev);
4098 	}
4099 
4100 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4101 		amdgpu_xgmi_reset_on_init(adev);
4102 
4103 	/*
4104 	 * Place those sysfs registering after `late_init`. As some of those
4105 	 * operations performed in `late_init` might affect the sysfs
4106 	 * interfaces creating.
4107 	 */
4108 	r = amdgpu_device_sys_interface_init(adev);
4109 
4110 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4111 		r = amdgpu_pmu_init(adev);
4112 	if (r)
4113 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4114 
4115 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4116 	if (amdgpu_device_cache_pci_state(adev->pdev))
4117 		pci_restore_state(pdev);
4118 
4119 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4120 	/* this will fail for cards that aren't VGA class devices, just
4121 	 * ignore it
4122 	 */
4123 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4124 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4125 
4126 	px = amdgpu_device_supports_px(adev);
4127 
4128 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4129 				apple_gmux_detect(NULL, NULL)))
4130 		vga_switcheroo_register_client(adev->pdev,
4131 					       &amdgpu_switcheroo_ops, px);
4132 
4133 	if (px)
4134 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4135 
4136 	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4137 	r = register_pm_notifier(&adev->pm_nb);
4138 	if (r)
4139 		goto failed;
4140 
4141 	return 0;
4142 
4143 release_ras_con:
4144 	if (amdgpu_sriov_vf(adev))
4145 		amdgpu_virt_release_full_gpu(adev, true);
4146 
4147 	/* failed in exclusive mode due to timeout */
4148 	if (amdgpu_sriov_vf(adev) &&
4149 		!amdgpu_sriov_runtime(adev) &&
4150 		amdgpu_virt_mmio_blocked(adev) &&
4151 		!amdgpu_virt_wait_reset(adev)) {
4152 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4153 		/* Don't send request since VF is inactive. */
4154 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4155 		adev->virt.ops = NULL;
4156 		r = -EAGAIN;
4157 	}
4158 	amdgpu_release_ras_context(adev);
4159 
4160 failed:
4161 	amdgpu_vf_error_trans_all(adev);
4162 
4163 	return r;
4164 }
4165 
4166 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4167 {
4168 
4169 	/* Clear all CPU mappings pointing to this device */
4170 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4171 
4172 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4173 	amdgpu_doorbell_fini(adev);
4174 
4175 	iounmap(adev->rmmio);
4176 	adev->rmmio = NULL;
4177 	if (adev->mman.aper_base_kaddr)
4178 		iounmap(adev->mman.aper_base_kaddr);
4179 	adev->mman.aper_base_kaddr = NULL;
4180 
4181 	/* Memory manager related */
4182 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4183 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4184 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4185 	}
4186 }
4187 
4188 /**
4189  * amdgpu_device_fini_hw - tear down the driver
4190  *
4191  * @adev: amdgpu_device pointer
4192  *
4193  * Tear down the driver info (all asics).
4194  * Called at driver shutdown.
4195  */
4196 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4197 {
4198 	dev_info(adev->dev, "finishing device.\n");
4199 	flush_delayed_work(&adev->delayed_init_work);
4200 
4201 	if (adev->mman.initialized)
4202 		drain_workqueue(adev->mman.bdev.wq);
4203 	adev->shutdown = true;
4204 
4205 	unregister_pm_notifier(&adev->pm_nb);
4206 
4207 	/* make sure IB test finished before entering exclusive mode
4208 	 * to avoid preemption on IB test
4209 	 */
4210 	if (amdgpu_sriov_vf(adev)) {
4211 		amdgpu_virt_request_full_gpu(adev, false);
4212 		amdgpu_virt_fini_data_exchange(adev);
4213 	}
4214 
4215 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
4216 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
4217 
4218 	/* disable all interrupts */
4219 	amdgpu_irq_disable_all(adev);
4220 	if (adev->mode_info.mode_config_initialized) {
4221 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4222 			drm_helper_force_disable_all(adev_to_drm(adev));
4223 		else
4224 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4225 	}
4226 	amdgpu_fence_driver_hw_fini(adev);
4227 
4228 	amdgpu_device_sys_interface_fini(adev);
4229 
4230 	/* disable ras feature must before hw fini */
4231 	amdgpu_ras_pre_fini(adev);
4232 
4233 	amdgpu_ttm_disable_buffer_funcs(adev);
4234 
4235 	/*
4236 	 * device went through surprise hotplug; we need to destroy topology
4237 	 * before ip_fini_early to prevent kfd locking refcount issues by calling
4238 	 * amdgpu_amdkfd_suspend()
4239 	 */
4240 	if (pci_dev_is_disconnected(adev->pdev))
4241 		amdgpu_amdkfd_device_fini_sw(adev);
4242 
4243 	amdgpu_coredump_fini(adev);
4244 	amdgpu_device_ip_fini_early(adev);
4245 
4246 	amdgpu_irq_fini_hw(adev);
4247 
4248 	if (adev->mman.initialized)
4249 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4250 
4251 	amdgpu_gart_dummy_page_fini(adev);
4252 
4253 	if (pci_dev_is_disconnected(adev->pdev))
4254 		amdgpu_device_unmap_mmio(adev);
4255 
4256 }
4257 
4258 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4259 {
4260 	int i, idx;
4261 	bool px;
4262 
4263 	amdgpu_device_ip_fini(adev);
4264 	amdgpu_fence_driver_sw_fini(adev);
4265 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4266 	adev->accel_working = false;
4267 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4268 	for (i = 0; i < MAX_XCP; ++i) {
4269 		dma_fence_put(adev->isolation[i].spearhead);
4270 		amdgpu_sync_free(&adev->isolation[i].active);
4271 		amdgpu_sync_free(&adev->isolation[i].prev);
4272 	}
4273 
4274 	amdgpu_reset_fini(adev);
4275 
4276 	/* free i2c buses */
4277 	amdgpu_i2c_fini(adev);
4278 
4279 	if (adev->bios) {
4280 		if (amdgpu_emu_mode != 1)
4281 			amdgpu_atombios_fini(adev);
4282 		amdgpu_bios_release(adev);
4283 	}
4284 
4285 	kfree(adev->fru_info);
4286 	adev->fru_info = NULL;
4287 
4288 	kfree(adev->xcp_mgr);
4289 	adev->xcp_mgr = NULL;
4290 
4291 	px = amdgpu_device_supports_px(adev);
4292 
4293 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4294 				apple_gmux_detect(NULL, NULL)))
4295 		vga_switcheroo_unregister_client(adev->pdev);
4296 
4297 	if (px)
4298 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4299 
4300 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4301 		vga_client_unregister(adev->pdev);
4302 
4303 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4304 
4305 		iounmap(adev->rmmio);
4306 		adev->rmmio = NULL;
4307 		drm_dev_exit(idx);
4308 	}
4309 
4310 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4311 		amdgpu_pmu_fini(adev);
4312 	if (adev->discovery.bin)
4313 		amdgpu_discovery_fini(adev);
4314 
4315 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4316 	adev->reset_domain = NULL;
4317 
4318 	kfree(adev->pci_state);
4319 	kfree(adev->pcie_reset_ctx.swds_pcistate);
4320 	kfree(adev->pcie_reset_ctx.swus_pcistate);
4321 }
4322 
4323 /**
4324  * amdgpu_device_evict_resources - evict device resources
4325  * @adev: amdgpu device object
4326  *
4327  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4328  * of the vram memory type. Mainly used for evicting device resources
4329  * at suspend time.
4330  *
4331  */
4332 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4333 {
4334 	int ret;
4335 
4336 	/* No need to evict vram on APUs unless going to S4 */
4337 	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4338 		return 0;
4339 
4340 	/* No need to evict when going to S5 through S4 callbacks */
4341 	if (system_state == SYSTEM_POWER_OFF)
4342 		return 0;
4343 
4344 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4345 	if (ret) {
4346 		dev_warn(adev->dev, "evicting device resources failed\n");
4347 		return ret;
4348 	}
4349 
4350 	if (adev->in_s4) {
4351 		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
4352 		if (ret)
4353 			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
4354 	}
4355 	return ret;
4356 }
4357 
4358 /*
4359  * Suspend & resume.
4360  */
4361 /**
4362  * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4363  * @nb: notifier block
4364  * @mode: suspend mode
4365  * @data: data
4366  *
4367  * This function is called when the system is about to suspend or hibernate.
4368  * It is used to set the appropriate flags so that eviction can be optimized
4369  * in the pm prepare callback.
4370  */
4371 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
4372 				     void *data)
4373 {
4374 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
4375 
4376 	switch (mode) {
4377 	case PM_HIBERNATION_PREPARE:
4378 		adev->in_s4 = true;
4379 		break;
4380 	case PM_POST_HIBERNATION:
4381 		adev->in_s4 = false;
4382 		break;
4383 	}
4384 
4385 	return NOTIFY_DONE;
4386 }
4387 
4388 /**
4389  * amdgpu_device_prepare - prepare for device suspend
4390  *
4391  * @dev: drm dev pointer
4392  *
4393  * Prepare to put the hw in the suspend state (all asics).
4394  * Returns 0 for success or an error on failure.
4395  * Called at driver suspend.
4396  */
4397 int amdgpu_device_prepare(struct drm_device *dev)
4398 {
4399 	struct amdgpu_device *adev = drm_to_adev(dev);
4400 	int i, r;
4401 
4402 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4403 		return 0;
4404 
4405 	/* Evict the majority of BOs before starting suspend sequence */
4406 	r = amdgpu_device_evict_resources(adev);
4407 	if (r)
4408 		return r;
4409 
4410 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4411 
4412 	for (i = 0; i < adev->num_ip_blocks; i++) {
4413 		if (!adev->ip_blocks[i].status.valid)
4414 			continue;
4415 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4416 			continue;
4417 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4418 		if (r)
4419 			return r;
4420 	}
4421 
4422 	return 0;
4423 }
4424 
4425 /**
4426  * amdgpu_device_complete - complete power state transition
4427  *
4428  * @dev: drm dev pointer
4429  *
4430  * Undo the changes from amdgpu_device_prepare. This will be
4431  * called on all resume transitions, including those that failed.
4432  */
4433 void amdgpu_device_complete(struct drm_device *dev)
4434 {
4435 	struct amdgpu_device *adev = drm_to_adev(dev);
4436 	int i;
4437 
4438 	for (i = 0; i < adev->num_ip_blocks; i++) {
4439 		if (!adev->ip_blocks[i].status.valid)
4440 			continue;
4441 		if (!adev->ip_blocks[i].version->funcs->complete)
4442 			continue;
4443 		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
4444 	}
4445 }
4446 
4447 /**
4448  * amdgpu_device_suspend - initiate device suspend
4449  *
4450  * @dev: drm dev pointer
4451  * @notify_clients: notify in-kernel DRM clients
4452  *
4453  * Puts the hw in the suspend state (all asics).
4454  * Returns 0 for success or an error on failure.
4455  * Called at driver suspend.
4456  */
4457 int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4458 {
4459 	struct amdgpu_device *adev = drm_to_adev(dev);
4460 	int r, rec;
4461 
4462 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4463 		return 0;
4464 
4465 	adev->in_suspend = true;
4466 
4467 	if (amdgpu_sriov_vf(adev)) {
4468 		if (!adev->in_runpm)
4469 			amdgpu_amdkfd_suspend_process(adev);
4470 		amdgpu_virt_fini_data_exchange(adev);
4471 		r = amdgpu_virt_request_full_gpu(adev, false);
4472 		if (r)
4473 			return r;
4474 	}
4475 
4476 	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
4477 	if (r)
4478 		goto unwind_sriov;
4479 
4480 	if (notify_clients)
4481 		drm_client_dev_suspend(adev_to_drm(adev));
4482 
4483 	cancel_delayed_work_sync(&adev->delayed_init_work);
4484 
4485 	amdgpu_ras_suspend(adev);
4486 
4487 	r = amdgpu_device_ip_suspend_phase1(adev);
4488 	if (r)
4489 		goto unwind_smartshift;
4490 
4491 	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4492 	r = amdgpu_userq_suspend(adev);
4493 	if (r)
4494 		goto unwind_ip_phase1;
4495 
4496 	r = amdgpu_device_evict_resources(adev);
4497 	if (r)
4498 		goto unwind_userq;
4499 
4500 	amdgpu_ttm_disable_buffer_funcs(adev);
4501 
4502 	amdgpu_fence_driver_hw_fini(adev);
4503 
4504 	r = amdgpu_device_ip_suspend_phase2(adev);
4505 	if (r)
4506 		goto unwind_evict;
4507 
4508 	if (amdgpu_sriov_vf(adev))
4509 		amdgpu_virt_release_full_gpu(adev, false);
4510 
4511 	return 0;
4512 
4513 unwind_evict:
4514 	amdgpu_ttm_enable_buffer_funcs(adev);
4515 	amdgpu_fence_driver_hw_init(adev);
4516 
4517 unwind_userq:
4518 	rec = amdgpu_userq_resume(adev);
4519 	if (rec) {
4520 		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
4521 		return r;
4522 	}
4523 	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4524 	if (rec) {
4525 		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
4526 		return r;
4527 	}
4528 
4529 unwind_ip_phase1:
4530 	/* suspend phase 1 = resume phase 3 */
4531 	rec = amdgpu_device_ip_resume_phase3(adev);
4532 	if (rec) {
4533 		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
4534 		return r;
4535 	}
4536 
4537 unwind_smartshift:
4538 	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
4539 	if (rec) {
4540 		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
4541 		return r;
4542 	}
4543 
4544 	if (notify_clients)
4545 		drm_client_dev_resume(adev_to_drm(adev));
4546 
4547 	amdgpu_ras_resume(adev);
4548 
4549 unwind_sriov:
4550 	if (amdgpu_sriov_vf(adev)) {
4551 		rec = amdgpu_virt_request_full_gpu(adev, true);
4552 		if (rec) {
4553 			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
4554 			return r;
4555 		}
4556 	}
4557 
4558 	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
4559 
4560 	return r;
4561 }
4562 
4563 static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
4564 {
4565 	int r;
4566 	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
4567 
4568 	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
4569 	 * may not work. The access could be blocked by nBIF protection as VF isn't in
4570 	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
4571 	 * so that QEMU reprograms MSIX table.
4572 	 */
4573 	amdgpu_restore_msix(adev);
4574 
4575 	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4576 	if (r)
4577 		return r;
4578 
4579 	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
4580 		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
4581 
4582 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
4583 	adev->vm_manager.vram_base_offset +=
4584 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
4585 
4586 	return 0;
4587 }
4588 
4589 /**
4590  * amdgpu_device_resume - initiate device resume
4591  *
4592  * @dev: drm dev pointer
4593  * @notify_clients: notify in-kernel DRM clients
4594  *
4595  * Bring the hw back to operating state (all asics).
4596  * Returns 0 for success or an error on failure.
4597  * Called at driver resume.
4598  */
4599 int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
4600 {
4601 	struct amdgpu_device *adev = drm_to_adev(dev);
4602 	int r = 0;
4603 
4604 	if (amdgpu_sriov_vf(adev)) {
4605 		r = amdgpu_virt_request_full_gpu(adev, true);
4606 		if (r)
4607 			return r;
4608 	}
4609 
4610 	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
4611 		r = amdgpu_virt_resume(adev);
4612 		if (r)
4613 			goto exit;
4614 	}
4615 
4616 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4617 		return 0;
4618 
4619 	if (adev->in_s0ix)
4620 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4621 
4622 	/* post card */
4623 	if (amdgpu_device_need_post(adev)) {
4624 		r = amdgpu_device_asic_init(adev);
4625 		if (r)
4626 			dev_err(adev->dev, "amdgpu asic init failed\n");
4627 	}
4628 
4629 	r = amdgpu_device_ip_resume(adev);
4630 
4631 	if (r) {
4632 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4633 		goto exit;
4634 	}
4635 
4636 	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4637 	if (r)
4638 		goto exit;
4639 
4640 	r = amdgpu_userq_resume(adev);
4641 	if (r)
4642 		goto exit;
4643 
4644 	r = amdgpu_device_ip_late_init(adev);
4645 	if (r)
4646 		goto exit;
4647 
4648 	queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4649 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4650 exit:
4651 	if (amdgpu_sriov_vf(adev)) {
4652 		amdgpu_virt_init_data_exchange(adev);
4653 		amdgpu_virt_release_full_gpu(adev, true);
4654 
4655 		if (!r && !adev->in_runpm)
4656 			r = amdgpu_amdkfd_resume_process(adev);
4657 	}
4658 
4659 	if (r)
4660 		return r;
4661 
4662 	/* Make sure IB tests flushed */
4663 	flush_delayed_work(&adev->delayed_init_work);
4664 
4665 	if (notify_clients)
4666 		drm_client_dev_resume(adev_to_drm(adev));
4667 
4668 	amdgpu_ras_resume(adev);
4669 
4670 	if (adev->mode_info.num_crtc) {
4671 		/*
4672 		 * Most of the connector probing functions try to acquire runtime pm
4673 		 * refs to ensure that the GPU is powered on when connector polling is
4674 		 * performed. Since we're calling this from a runtime PM callback,
4675 		 * trying to acquire rpm refs will cause us to deadlock.
4676 		 *
4677 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4678 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4679 		 */
4680 #ifdef CONFIG_PM
4681 		dev->dev->power.disable_depth++;
4682 #endif
4683 		if (!adev->dc_enabled)
4684 			drm_helper_hpd_irq_event(dev);
4685 		else
4686 			drm_kms_helper_hotplug_event(dev);
4687 #ifdef CONFIG_PM
4688 		dev->dev->power.disable_depth--;
4689 #endif
4690 	}
4691 
4692 	amdgpu_vram_mgr_clear_reset_blocks(adev);
4693 	adev->in_suspend = false;
4694 
4695 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
4696 		dev_warn(adev->dev, "smart shift update failed\n");
4697 
4698 	return 0;
4699 }
4700 
4701 /**
4702  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4703  *
4704  * @adev: amdgpu_device pointer
4705  *
4706  * The list of all the hardware IPs that make up the asic is walked and
4707  * the check_soft_reset callbacks are run.  check_soft_reset determines
4708  * if the asic is still hung or not.
4709  * Returns true if any of the IPs are still in a hung state, false if not.
4710  */
4711 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4712 {
4713 	int i;
4714 	bool asic_hang = false;
4715 
4716 	if (amdgpu_sriov_vf(adev))
4717 		return true;
4718 
4719 	if (amdgpu_asic_need_full_reset(adev))
4720 		return true;
4721 
4722 	for (i = 0; i < adev->num_ip_blocks; i++) {
4723 		if (!adev->ip_blocks[i].status.valid)
4724 			continue;
4725 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4726 			adev->ip_blocks[i].status.hang =
4727 				adev->ip_blocks[i].version->funcs->check_soft_reset(
4728 					&adev->ip_blocks[i]);
4729 		if (adev->ip_blocks[i].status.hang) {
4730 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4731 			asic_hang = true;
4732 		}
4733 	}
4734 	return asic_hang;
4735 }
4736 
4737 /**
4738  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4739  *
4740  * @adev: amdgpu_device pointer
4741  *
4742  * The list of all the hardware IPs that make up the asic is walked and the
4743  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4744  * handles any IP specific hardware or software state changes that are
4745  * necessary for a soft reset to succeed.
4746  * Returns 0 on success, negative error code on failure.
4747  */
4748 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4749 {
4750 	int i, r = 0;
4751 
4752 	for (i = 0; i < adev->num_ip_blocks; i++) {
4753 		if (!adev->ip_blocks[i].status.valid)
4754 			continue;
4755 		if (adev->ip_blocks[i].status.hang &&
4756 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4757 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
4758 			if (r)
4759 				return r;
4760 		}
4761 	}
4762 
4763 	return 0;
4764 }
4765 
4766 /**
4767  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4768  *
4769  * @adev: amdgpu_device pointer
4770  *
4771  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4772  * reset is necessary to recover.
4773  * Returns true if a full asic reset is required, false if not.
4774  */
4775 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4776 {
4777 	int i;
4778 
4779 	if (amdgpu_asic_need_full_reset(adev))
4780 		return true;
4781 
4782 	for (i = 0; i < adev->num_ip_blocks; i++) {
4783 		if (!adev->ip_blocks[i].status.valid)
4784 			continue;
4785 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4786 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4787 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4788 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4789 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4790 			if (adev->ip_blocks[i].status.hang) {
4791 				dev_info(adev->dev, "Some block need full reset!\n");
4792 				return true;
4793 			}
4794 		}
4795 	}
4796 	return false;
4797 }
4798 
4799 /**
4800  * amdgpu_device_ip_soft_reset - do a soft reset
4801  *
4802  * @adev: amdgpu_device pointer
4803  *
4804  * The list of all the hardware IPs that make up the asic is walked and the
4805  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4806  * IP specific hardware or software state changes that are necessary to soft
4807  * reset the IP.
4808  * Returns 0 on success, negative error code on failure.
4809  */
4810 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4811 {
4812 	int i, r = 0;
4813 
4814 	for (i = 0; i < adev->num_ip_blocks; i++) {
4815 		if (!adev->ip_blocks[i].status.valid)
4816 			continue;
4817 		if (adev->ip_blocks[i].status.hang &&
4818 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4819 			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
4820 			if (r)
4821 				return r;
4822 		}
4823 	}
4824 
4825 	return 0;
4826 }
4827 
4828 /**
4829  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4830  *
4831  * @adev: amdgpu_device pointer
4832  *
4833  * The list of all the hardware IPs that make up the asic is walked and the
4834  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4835  * handles any IP specific hardware or software state changes that are
4836  * necessary after the IP has been soft reset.
4837  * Returns 0 on success, negative error code on failure.
4838  */
4839 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4840 {
4841 	int i, r = 0;
4842 
4843 	for (i = 0; i < adev->num_ip_blocks; i++) {
4844 		if (!adev->ip_blocks[i].status.valid)
4845 			continue;
4846 		if (adev->ip_blocks[i].status.hang &&
4847 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4848 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
4849 		if (r)
4850 			return r;
4851 	}
4852 
4853 	return 0;
4854 }
4855 
4856 /**
4857  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4858  *
4859  * @adev: amdgpu_device pointer
4860  * @reset_context: amdgpu reset context pointer
4861  *
4862  * do VF FLR and reinitialize Asic
4863  * return 0 means succeeded otherwise failed
4864  */
4865 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4866 				     struct amdgpu_reset_context *reset_context)
4867 {
4868 	int r;
4869 	struct amdgpu_hive_info *hive = NULL;
4870 
4871 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
4872 		if (!amdgpu_ras_get_fed_status(adev))
4873 			amdgpu_virt_ready_to_reset(adev);
4874 		amdgpu_virt_wait_reset(adev);
4875 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
4876 		r = amdgpu_virt_request_full_gpu(adev, true);
4877 	} else {
4878 		r = amdgpu_virt_reset_gpu(adev);
4879 	}
4880 	if (r)
4881 		return r;
4882 
4883 	amdgpu_ras_clear_err_state(adev);
4884 	amdgpu_irq_gpu_reset_resume_helper(adev);
4885 
4886 	/* some sw clean up VF needs to do before recover */
4887 	amdgpu_virt_post_reset(adev);
4888 
4889 	/* Resume IP prior to SMC */
4890 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4891 	if (r)
4892 		return r;
4893 
4894 	amdgpu_virt_init_data_exchange(adev);
4895 
4896 	r = amdgpu_device_fw_loading(adev);
4897 	if (r)
4898 		return r;
4899 
4900 	/* now we are okay to resume SMC/CP/SDMA */
4901 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4902 	if (r)
4903 		return r;
4904 
4905 	hive = amdgpu_get_xgmi_hive(adev);
4906 	/* Update PSP FW topology after reset */
4907 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4908 		r = amdgpu_xgmi_update_topology(hive, adev);
4909 	if (hive)
4910 		amdgpu_put_xgmi_hive(hive);
4911 	if (r)
4912 		return r;
4913 
4914 	r = amdgpu_ib_ring_tests(adev);
4915 	if (r)
4916 		return r;
4917 
4918 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
4919 		amdgpu_inc_vram_lost(adev);
4920 
4921 	/* need to be called during full access so we can't do it later like
4922 	 * bare-metal does.
4923 	 */
4924 	amdgpu_amdkfd_post_reset(adev);
4925 	amdgpu_virt_release_full_gpu(adev, true);
4926 
4927 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4928 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
4929 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
4930 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
4931 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4932 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
4933 		amdgpu_ras_resume(adev);
4934 
4935 	amdgpu_virt_ras_telemetry_post_reset(adev);
4936 
4937 	return 0;
4938 }
4939 
4940 /**
4941  * amdgpu_device_has_job_running - check if there is any unfinished job
4942  *
4943  * @adev: amdgpu_device pointer
4944  *
4945  * check if there is any job running on the device when guest driver receives
4946  * FLR notification from host driver. If there are still jobs running, then
4947  * the guest driver will not respond the FLR reset. Instead, let the job hit
4948  * the timeout and guest driver then issue the reset request.
4949  */
4950 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4951 {
4952 	int i;
4953 
4954 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4955 		struct amdgpu_ring *ring = adev->rings[i];
4956 
4957 		if (!amdgpu_ring_sched_ready(ring))
4958 			continue;
4959 
4960 		if (amdgpu_fence_count_emitted(ring))
4961 			return true;
4962 	}
4963 	return false;
4964 }
4965 
4966 /**
4967  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4968  *
4969  * @adev: amdgpu_device pointer
4970  *
4971  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4972  * a hung GPU.
4973  */
4974 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4975 {
4976 
4977 	if (amdgpu_gpu_recovery == 0)
4978 		goto disabled;
4979 
4980 	/* Skip soft reset check in fatal error mode */
4981 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4982 		return true;
4983 
4984 	if (amdgpu_sriov_vf(adev))
4985 		return true;
4986 
4987 	if (amdgpu_gpu_recovery == -1) {
4988 		switch (adev->asic_type) {
4989 #ifdef CONFIG_DRM_AMDGPU_SI
4990 		case CHIP_VERDE:
4991 		case CHIP_TAHITI:
4992 		case CHIP_PITCAIRN:
4993 		case CHIP_OLAND:
4994 		case CHIP_HAINAN:
4995 #endif
4996 #ifdef CONFIG_DRM_AMDGPU_CIK
4997 		case CHIP_KAVERI:
4998 		case CHIP_KABINI:
4999 		case CHIP_MULLINS:
5000 #endif
5001 		case CHIP_CARRIZO:
5002 		case CHIP_STONEY:
5003 		case CHIP_CYAN_SKILLFISH:
5004 			goto disabled;
5005 		default:
5006 			break;
5007 		}
5008 	}
5009 
5010 	return true;
5011 
5012 disabled:
5013 		dev_info(adev->dev, "GPU recovery disabled.\n");
5014 		return false;
5015 }
5016 
5017 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5018 {
5019 	u32 i;
5020 	int ret = 0;
5021 
5022 	if (adev->bios)
5023 		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5024 
5025 	dev_info(adev->dev, "GPU mode1 reset\n");
5026 
5027 	/* Cache the state before bus master disable. The saved config space
5028 	 * values are used in other cases like restore after mode-2 reset.
5029 	 */
5030 	amdgpu_device_cache_pci_state(adev->pdev);
5031 
5032 	/* disable BM */
5033 	pci_clear_master(adev->pdev);
5034 
5035 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5036 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5037 		ret = amdgpu_dpm_mode1_reset(adev);
5038 	} else {
5039 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5040 		ret = psp_gpu_reset(adev);
5041 	}
5042 
5043 	if (ret)
5044 		goto mode1_reset_failed;
5045 
5046 	/* enable mmio access after mode 1 reset completed */
5047 	adev->no_hw_access = false;
5048 
5049 	/* ensure no_hw_access is updated before we access hw */
5050 	smp_mb();
5051 
5052 	amdgpu_device_load_pci_state(adev->pdev);
5053 	ret = amdgpu_psp_wait_for_bootloader(adev);
5054 	if (ret)
5055 		goto mode1_reset_failed;
5056 
5057 	/* wait for asic to come out of reset */
5058 	for (i = 0; i < adev->usec_timeout; i++) {
5059 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5060 
5061 		if (memsize != 0xffffffff)
5062 			break;
5063 		udelay(1);
5064 	}
5065 
5066 	if (i >= adev->usec_timeout) {
5067 		ret = -ETIMEDOUT;
5068 		goto mode1_reset_failed;
5069 	}
5070 
5071 	if (adev->bios)
5072 		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5073 
5074 	return 0;
5075 
5076 mode1_reset_failed:
5077 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5078 	return ret;
5079 }
5080 
5081 int amdgpu_device_link_reset(struct amdgpu_device *adev)
5082 {
5083 	int ret = 0;
5084 
5085 	dev_info(adev->dev, "GPU link reset\n");
5086 
5087 	if (!amdgpu_reset_in_dpc(adev))
5088 		ret = amdgpu_dpm_link_reset(adev);
5089 
5090 	if (ret)
5091 		goto link_reset_failed;
5092 
5093 	ret = amdgpu_psp_wait_for_bootloader(adev);
5094 	if (ret)
5095 		goto link_reset_failed;
5096 
5097 	return 0;
5098 
5099 link_reset_failed:
5100 	dev_err(adev->dev, "GPU link reset failed\n");
5101 	return ret;
5102 }
5103 
5104 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5105 				 struct amdgpu_reset_context *reset_context)
5106 {
5107 	int i, r = 0;
5108 	struct amdgpu_job *job = NULL;
5109 	struct dma_fence *fence = NULL;
5110 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5111 	bool need_full_reset =
5112 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5113 
5114 	if (reset_context->reset_req_dev == adev)
5115 		job = reset_context->job;
5116 
5117 	if (amdgpu_sriov_vf(adev))
5118 		amdgpu_virt_pre_reset(adev);
5119 
5120 	amdgpu_fence_driver_isr_toggle(adev, true);
5121 
5122 	if (job)
5123 		fence = &job->hw_fence->base;
5124 
5125 	/* block all schedulers and reset given job's ring */
5126 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5127 		struct amdgpu_ring *ring = adev->rings[i];
5128 
5129 		if (!amdgpu_ring_sched_ready(ring))
5130 			continue;
5131 
5132 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5133 		amdgpu_fence_driver_force_completion(ring, fence);
5134 	}
5135 
5136 	amdgpu_fence_driver_isr_toggle(adev, false);
5137 
5138 	if (job && job->vm)
5139 		drm_sched_increase_karma(&job->base);
5140 
5141 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5142 	/* If reset handler not implemented, continue; otherwise return */
5143 	if (r == -EOPNOTSUPP)
5144 		r = 0;
5145 	else
5146 		return r;
5147 
5148 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5149 	if (!amdgpu_sriov_vf(adev)) {
5150 
5151 		if (!need_full_reset)
5152 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5153 
5154 		if (!need_full_reset && amdgpu_gpu_recovery &&
5155 		    amdgpu_device_ip_check_soft_reset(adev)) {
5156 			amdgpu_device_ip_pre_soft_reset(adev);
5157 			r = amdgpu_device_ip_soft_reset(adev);
5158 			amdgpu_device_ip_post_soft_reset(adev);
5159 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5160 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5161 				need_full_reset = true;
5162 			}
5163 		}
5164 
5165 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5166 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5167 			/* Trigger ip dump before we reset the asic */
5168 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5169 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5170 					tmp_adev->ip_blocks[i].version->funcs
5171 						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5172 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5173 		}
5174 
5175 		if (need_full_reset)
5176 			r = amdgpu_device_ip_suspend(adev);
5177 		if (need_full_reset)
5178 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5179 		else
5180 			clear_bit(AMDGPU_NEED_FULL_RESET,
5181 				  &reset_context->flags);
5182 	}
5183 
5184 	return r;
5185 }
5186 
5187 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5188 {
5189 	struct list_head *device_list_handle;
5190 	bool full_reset, vram_lost = false;
5191 	struct amdgpu_device *tmp_adev;
5192 	int r, init_level;
5193 
5194 	device_list_handle = reset_context->reset_device_list;
5195 
5196 	if (!device_list_handle)
5197 		return -EINVAL;
5198 
5199 	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5200 
5201 	/**
5202 	 * If it's reset on init, it's default init level, otherwise keep level
5203 	 * as recovery level.
5204 	 */
5205 	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5206 			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5207 	else
5208 			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5209 
5210 	r = 0;
5211 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5212 		amdgpu_set_init_level(tmp_adev, init_level);
5213 		if (full_reset) {
5214 			/* post card */
5215 			amdgpu_reset_set_dpc_status(tmp_adev, false);
5216 			amdgpu_ras_clear_err_state(tmp_adev);
5217 			r = amdgpu_device_asic_init(tmp_adev);
5218 			if (r) {
5219 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5220 			} else {
5221 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5222 
5223 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5224 				if (r)
5225 					goto out;
5226 
5227 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5228 
5229 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5230 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5231 
5232 				if (vram_lost) {
5233 					dev_info(
5234 						tmp_adev->dev,
5235 						"VRAM is lost due to GPU reset!\n");
5236 					amdgpu_inc_vram_lost(tmp_adev);
5237 				}
5238 
5239 				r = amdgpu_device_fw_loading(tmp_adev);
5240 				if (r)
5241 					return r;
5242 
5243 				r = amdgpu_xcp_restore_partition_mode(
5244 					tmp_adev->xcp_mgr);
5245 				if (r)
5246 					goto out;
5247 
5248 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5249 				if (r)
5250 					goto out;
5251 
5252 				amdgpu_ttm_enable_buffer_funcs(tmp_adev);
5253 
5254 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5255 				if (r)
5256 					goto out;
5257 
5258 				if (vram_lost)
5259 					amdgpu_device_fill_reset_magic(tmp_adev);
5260 
5261 				/*
5262 				 * Add this ASIC as tracked as reset was already
5263 				 * complete successfully.
5264 				 */
5265 				amdgpu_register_gpu_instance(tmp_adev);
5266 
5267 				if (!reset_context->hive &&
5268 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5269 					amdgpu_xgmi_add_device(tmp_adev);
5270 
5271 				r = amdgpu_device_ip_late_init(tmp_adev);
5272 				if (r)
5273 					goto out;
5274 
5275 				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
5276 				if (r)
5277 					goto out;
5278 
5279 				drm_client_dev_resume(adev_to_drm(tmp_adev));
5280 
5281 				/*
5282 				 * The GPU enters bad state once faulty pages
5283 				 * by ECC has reached the threshold, and ras
5284 				 * recovery is scheduled next. So add one check
5285 				 * here to break recovery if it indeed exceeds
5286 				 * bad page threshold, and remind user to
5287 				 * retire this GPU or setting one bigger
5288 				 * bad_page_threshold value to fix this once
5289 				 * probing driver again.
5290 				 */
5291 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5292 					/* must succeed. */
5293 					amdgpu_ras_resume(tmp_adev);
5294 				} else {
5295 					r = -EINVAL;
5296 					goto out;
5297 				}
5298 
5299 				/* Update PSP FW topology after reset */
5300 				if (reset_context->hive &&
5301 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5302 					r = amdgpu_xgmi_update_topology(
5303 						reset_context->hive, tmp_adev);
5304 			}
5305 		}
5306 
5307 out:
5308 		if (!r) {
5309 			/* IP init is complete now, set level as default */
5310 			amdgpu_set_init_level(tmp_adev,
5311 					      AMDGPU_INIT_LEVEL_DEFAULT);
5312 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5313 			r = amdgpu_ib_ring_tests(tmp_adev);
5314 			if (r) {
5315 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5316 				r = -EAGAIN;
5317 				goto end;
5318 			}
5319 		}
5320 
5321 		if (r)
5322 			tmp_adev->asic_reset_res = r;
5323 	}
5324 
5325 end:
5326 	return r;
5327 }
5328 
5329 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5330 			 struct amdgpu_reset_context *reset_context)
5331 {
5332 	struct amdgpu_device *tmp_adev = NULL;
5333 	bool need_full_reset, skip_hw_reset;
5334 	int r = 0;
5335 
5336 	/* Try reset handler method first */
5337 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5338 				    reset_list);
5339 
5340 	reset_context->reset_device_list = device_list_handle;
5341 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5342 	/* If reset handler not implemented, continue; otherwise return */
5343 	if (r == -EOPNOTSUPP)
5344 		r = 0;
5345 	else
5346 		return r;
5347 
5348 	/* Reset handler not implemented, use the default method */
5349 	need_full_reset =
5350 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5351 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5352 
5353 	/*
5354 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5355 	 * to allow proper links negotiation in FW (within 1 sec)
5356 	 */
5357 	if (!skip_hw_reset && need_full_reset) {
5358 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5359 			/* For XGMI run all resets in parallel to speed up the process */
5360 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5361 				if (!queue_work(system_dfl_wq,
5362 						&tmp_adev->xgmi_reset_work))
5363 					r = -EALREADY;
5364 			} else
5365 				r = amdgpu_asic_reset(tmp_adev);
5366 
5367 			if (r) {
5368 				dev_err(tmp_adev->dev,
5369 					"ASIC reset failed with error, %d for drm dev, %s",
5370 					r, adev_to_drm(tmp_adev)->unique);
5371 				goto out;
5372 			}
5373 		}
5374 
5375 		/* For XGMI wait for all resets to complete before proceed */
5376 		if (!r) {
5377 			list_for_each_entry(tmp_adev, device_list_handle,
5378 					    reset_list) {
5379 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5380 					flush_work(&tmp_adev->xgmi_reset_work);
5381 					r = tmp_adev->asic_reset_res;
5382 					if (r)
5383 						break;
5384 				}
5385 			}
5386 		}
5387 	}
5388 
5389 	if (!r && amdgpu_ras_intr_triggered()) {
5390 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5391 			amdgpu_ras_reset_error_count(tmp_adev,
5392 						     AMDGPU_RAS_BLOCK__MMHUB);
5393 		}
5394 
5395 		amdgpu_ras_intr_cleared();
5396 	}
5397 
5398 	r = amdgpu_device_reinit_after_reset(reset_context);
5399 	if (r == -EAGAIN)
5400 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5401 	else
5402 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5403 
5404 out:
5405 	return r;
5406 }
5407 
5408 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5409 {
5410 
5411 	switch (amdgpu_asic_reset_method(adev)) {
5412 	case AMD_RESET_METHOD_MODE1:
5413 	case AMD_RESET_METHOD_LINK:
5414 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5415 		break;
5416 	case AMD_RESET_METHOD_MODE2:
5417 		adev->mp1_state = PP_MP1_STATE_RESET;
5418 		break;
5419 	default:
5420 		adev->mp1_state = PP_MP1_STATE_NONE;
5421 		break;
5422 	}
5423 }
5424 
5425 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5426 {
5427 	amdgpu_vf_error_trans_all(adev);
5428 	adev->mp1_state = PP_MP1_STATE_NONE;
5429 }
5430 
5431 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5432 {
5433 	struct pci_dev *p = NULL;
5434 
5435 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5436 			adev->pdev->bus->number, 1);
5437 	if (p) {
5438 		pm_runtime_enable(&(p->dev));
5439 		pm_runtime_resume(&(p->dev));
5440 	}
5441 
5442 	pci_dev_put(p);
5443 }
5444 
5445 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5446 {
5447 	enum amd_reset_method reset_method;
5448 	struct pci_dev *p = NULL;
5449 	u64 expires;
5450 
5451 	/*
5452 	 * For now, only BACO and mode1 reset are confirmed
5453 	 * to suffer the audio issue without proper suspended.
5454 	 */
5455 	reset_method = amdgpu_asic_reset_method(adev);
5456 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5457 	     (reset_method != AMD_RESET_METHOD_MODE1))
5458 		return -EINVAL;
5459 
5460 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5461 			adev->pdev->bus->number, 1);
5462 	if (!p)
5463 		return -ENODEV;
5464 
5465 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5466 	if (!expires)
5467 		/*
5468 		 * If we cannot get the audio device autosuspend delay,
5469 		 * a fixed 4S interval will be used. Considering 3S is
5470 		 * the audio controller default autosuspend delay setting.
5471 		 * 4S used here is guaranteed to cover that.
5472 		 */
5473 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5474 
5475 	while (!pm_runtime_status_suspended(&(p->dev))) {
5476 		if (!pm_runtime_suspend(&(p->dev)))
5477 			break;
5478 
5479 		if (expires < ktime_get_mono_fast_ns()) {
5480 			dev_warn(adev->dev, "failed to suspend display audio\n");
5481 			pci_dev_put(p);
5482 			/* TODO: abort the succeeding gpu reset? */
5483 			return -ETIMEDOUT;
5484 		}
5485 	}
5486 
5487 	pm_runtime_disable(&(p->dev));
5488 
5489 	pci_dev_put(p);
5490 	return 0;
5491 }
5492 
5493 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5494 {
5495 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5496 
5497 #if defined(CONFIG_DEBUG_FS)
5498 	if (!amdgpu_sriov_vf(adev))
5499 		cancel_work(&adev->reset_work);
5500 #endif
5501 	amdgpu_userq_mgr_cancel_reset_work(adev);
5502 
5503 	if (adev->kfd.dev)
5504 		cancel_work(&adev->kfd.reset_work);
5505 
5506 	if (amdgpu_sriov_vf(adev))
5507 		cancel_work(&adev->virt.flr_work);
5508 
5509 	if (con && adev->ras_enabled)
5510 		cancel_work(&con->recovery_work);
5511 
5512 }
5513 
5514 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5515 {
5516 	struct amdgpu_device *tmp_adev;
5517 	int ret = 0;
5518 
5519 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5520 		ret |= amdgpu_device_bus_status_check(tmp_adev);
5521 	}
5522 
5523 	return ret;
5524 }
5525 
5526 static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
5527 					  struct list_head *device_list,
5528 					  struct amdgpu_hive_info *hive)
5529 {
5530 	struct amdgpu_device *tmp_adev = NULL;
5531 
5532 	/*
5533 	 * Build list of devices to reset.
5534 	 * In case we are in XGMI hive mode, resort the device list
5535 	 * to put adev in the 1st position.
5536 	 */
5537 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5538 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5539 			list_add_tail(&tmp_adev->reset_list, device_list);
5540 			if (adev->shutdown)
5541 				tmp_adev->shutdown = true;
5542 		}
5543 		if (!list_is_first(&adev->reset_list, device_list))
5544 			list_rotate_to_front(&adev->reset_list, device_list);
5545 	} else {
5546 		list_add_tail(&adev->reset_list, device_list);
5547 	}
5548 }
5549 
5550 static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
5551 						  struct list_head *device_list)
5552 {
5553 	struct amdgpu_device *tmp_adev = NULL;
5554 
5555 	if (list_empty(device_list))
5556 		return;
5557 	tmp_adev =
5558 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5559 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5560 }
5561 
5562 static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
5563 						  struct list_head *device_list)
5564 {
5565 	struct amdgpu_device *tmp_adev = NULL;
5566 
5567 	if (list_empty(device_list))
5568 		return;
5569 	tmp_adev =
5570 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5571 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5572 }
5573 
5574 static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
5575 					  struct amdgpu_job *job,
5576 					  struct amdgpu_reset_context *reset_context,
5577 					  struct list_head *device_list,
5578 					  struct amdgpu_hive_info *hive,
5579 					  bool need_emergency_restart)
5580 {
5581 	struct amdgpu_device *tmp_adev = NULL;
5582 	int i;
5583 
5584 	/* block all schedulers and reset given job's ring */
5585 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5586 		amdgpu_device_set_mp1_state(tmp_adev);
5587 
5588 		/*
5589 		 * Try to put the audio codec into suspend state
5590 		 * before gpu reset started.
5591 		 *
5592 		 * Due to the power domain of the graphics device
5593 		 * is shared with AZ power domain. Without this,
5594 		 * we may change the audio hardware from behind
5595 		 * the audio driver's back. That will trigger
5596 		 * some audio codec errors.
5597 		 */
5598 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5599 			tmp_adev->pcie_reset_ctx.audio_suspended = true;
5600 
5601 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5602 
5603 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5604 
5605 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5606 
5607 		/*
5608 		 * Mark these ASICs to be reset as untracked first
5609 		 * And add them back after reset completed
5610 		 */
5611 		amdgpu_unregister_gpu_instance(tmp_adev);
5612 
5613 		drm_client_dev_suspend(adev_to_drm(tmp_adev));
5614 
5615 		/* disable ras on ALL IPs */
5616 		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
5617 		    amdgpu_device_ip_need_full_reset(tmp_adev))
5618 			amdgpu_ras_suspend(tmp_adev);
5619 
5620 		amdgpu_userq_pre_reset(tmp_adev);
5621 
5622 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5623 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5624 
5625 			if (!amdgpu_ring_sched_ready(ring))
5626 				continue;
5627 
5628 			drm_sched_wqueue_stop(&ring->sched);
5629 
5630 			if (need_emergency_restart)
5631 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5632 		}
5633 		atomic_inc(&tmp_adev->gpu_reset_counter);
5634 	}
5635 }
5636 
5637 static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
5638 			      struct list_head *device_list,
5639 			      struct amdgpu_reset_context *reset_context)
5640 {
5641 	struct amdgpu_device *tmp_adev = NULL;
5642 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5643 	int r = 0;
5644 
5645 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5646 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5647 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5648 		/*TODO Should we stop ?*/
5649 		if (r) {
5650 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5651 				  r, adev_to_drm(tmp_adev)->unique);
5652 			tmp_adev->asic_reset_res = r;
5653 		}
5654 	}
5655 
5656 	/* Actual ASIC resets if needed.*/
5657 	/* Host driver will handle XGMI hive reset for SRIOV */
5658 	if (amdgpu_sriov_vf(adev)) {
5659 
5660 		/* Bail out of reset early */
5661 		if (amdgpu_ras_is_rma(adev))
5662 			return -ENODEV;
5663 
5664 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5665 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5666 			amdgpu_ras_set_fed(adev, true);
5667 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5668 		}
5669 
5670 		r = amdgpu_device_reset_sriov(adev, reset_context);
5671 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5672 			amdgpu_virt_release_full_gpu(adev, true);
5673 			goto retry;
5674 		}
5675 		if (r)
5676 			adev->asic_reset_res = r;
5677 	} else {
5678 		r = amdgpu_do_asic_reset(device_list, reset_context);
5679 		if (r && r == -EAGAIN)
5680 			goto retry;
5681 	}
5682 
5683 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5684 		/*
5685 		 * Drop any pending non scheduler resets queued before reset is done.
5686 		 * Any reset scheduled after this point would be valid. Scheduler resets
5687 		 * were already dropped during drm_sched_stop and no new ones can come
5688 		 * in before drm_sched_start.
5689 		 */
5690 		amdgpu_device_stop_pending_resets(tmp_adev);
5691 	}
5692 
5693 	return r;
5694 }
5695 
5696 static int amdgpu_device_sched_resume(struct list_head *device_list,
5697 			      struct amdgpu_reset_context *reset_context,
5698 			      bool   job_signaled)
5699 {
5700 	struct amdgpu_device *tmp_adev = NULL;
5701 	int i, r = 0;
5702 
5703 	/* Post ASIC reset for all devs .*/
5704 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5705 
5706 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5707 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5708 
5709 			if (!amdgpu_ring_sched_ready(ring))
5710 				continue;
5711 
5712 			drm_sched_wqueue_start(&ring->sched);
5713 		}
5714 
5715 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5716 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5717 
5718 		if (tmp_adev->asic_reset_res) {
5719 			/* bad news, how to tell it to userspace ?
5720 			 * for ras error, we should report GPU bad status instead of
5721 			 * reset failure
5722 			 */
5723 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5724 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5725 				dev_info(
5726 					tmp_adev->dev,
5727 					"GPU reset(%d) failed with error %d\n",
5728 					atomic_read(
5729 						&tmp_adev->gpu_reset_counter),
5730 					tmp_adev->asic_reset_res);
5731 			amdgpu_vf_error_put(tmp_adev,
5732 					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
5733 					    tmp_adev->asic_reset_res);
5734 			if (!r)
5735 				r = tmp_adev->asic_reset_res;
5736 			tmp_adev->asic_reset_res = 0;
5737 		} else {
5738 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
5739 				 atomic_read(&tmp_adev->gpu_reset_counter));
5740 			if (amdgpu_acpi_smart_shift_update(tmp_adev,
5741 							   AMDGPU_SS_DEV_D0))
5742 				dev_warn(tmp_adev->dev,
5743 					 "smart shift update failed\n");
5744 		}
5745 	}
5746 
5747 	return r;
5748 }
5749 
5750 static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
5751 			      struct list_head *device_list,
5752 			      bool   need_emergency_restart)
5753 {
5754 	struct amdgpu_device *tmp_adev = NULL;
5755 
5756 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5757 		/* unlock kfd: SRIOV would do it separately */
5758 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5759 			amdgpu_amdkfd_post_reset(tmp_adev);
5760 
5761 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5762 		 * need to bring up kfd here if it's not be initialized before
5763 		 */
5764 		if (!adev->kfd.init_complete)
5765 			amdgpu_amdkfd_device_init(adev);
5766 
5767 		if (tmp_adev->pcie_reset_ctx.audio_suspended)
5768 			amdgpu_device_resume_display_audio(tmp_adev);
5769 
5770 		amdgpu_device_unset_mp1_state(tmp_adev);
5771 
5772 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5773 
5774 	}
5775 }
5776 
5777 
5778 /**
5779  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5780  *
5781  * @adev: amdgpu_device pointer
5782  * @job: which job trigger hang
5783  * @reset_context: amdgpu reset context pointer
5784  *
5785  * Attempt to reset the GPU if it has hung (all asics).
5786  * Attempt to do soft-reset or full-reset and reinitialize Asic
5787  * Returns 0 for success or an error on failure.
5788  */
5789 
5790 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5791 			      struct amdgpu_job *job,
5792 			      struct amdgpu_reset_context *reset_context)
5793 {
5794 	struct list_head device_list;
5795 	bool job_signaled = false;
5796 	struct amdgpu_hive_info *hive = NULL;
5797 	int r = 0;
5798 	bool need_emergency_restart = false;
5799 	/* save the pasid here as the job may be freed before the end of the reset */
5800 	int pasid = job ? job->pasid : -EINVAL;
5801 
5802 	/*
5803 	 * If it reaches here because of hang/timeout and a RAS error is
5804 	 * detected at the same time, let RAS recovery take care of it.
5805 	 */
5806 	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
5807 	    !amdgpu_sriov_vf(adev) &&
5808 	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
5809 		dev_dbg(adev->dev,
5810 			"Gpu recovery from source: %d yielding to RAS error recovery handling",
5811 			reset_context->src);
5812 		return 0;
5813 	}
5814 
5815 	/*
5816 	 * Special case: RAS triggered and full reset isn't supported
5817 	 */
5818 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5819 
5820 	/*
5821 	 * Flush RAM to disk so that after reboot
5822 	 * the user can read log and see why the system rebooted.
5823 	 */
5824 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5825 		amdgpu_ras_get_context(adev)->reboot) {
5826 		dev_warn(adev->dev, "Emergency reboot.");
5827 
5828 		ksys_sync_helper();
5829 		emergency_restart();
5830 	}
5831 
5832 	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
5833 		 need_emergency_restart ? "jobs stop" : "reset",
5834 		 reset_context->src);
5835 
5836 	if (!amdgpu_sriov_vf(adev))
5837 		hive = amdgpu_get_xgmi_hive(adev);
5838 	if (hive)
5839 		mutex_lock(&hive->hive_lock);
5840 
5841 	reset_context->job = job;
5842 	reset_context->hive = hive;
5843 	INIT_LIST_HEAD(&device_list);
5844 
5845 	amdgpu_device_recovery_prepare(adev, &device_list, hive);
5846 
5847 	if (!amdgpu_sriov_vf(adev)) {
5848 		r = amdgpu_device_health_check(&device_list);
5849 		if (r)
5850 			goto end_reset;
5851 	}
5852 
5853 	/* Cannot be called after locking reset domain */
5854 	amdgpu_ras_pre_reset(adev, &device_list);
5855 
5856 	/* We need to lock reset domain only once both for XGMI and single device */
5857 	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
5858 
5859 	/* unmap all the mappings of doorbell and framebuffer to prevent user space from
5860 	 * accessing them
5861 	 */
5862 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
5863 	amdgpu_amdkfd_clear_kfd_mapping(adev);
5864 
5865 	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
5866 				      hive, need_emergency_restart);
5867 	if (need_emergency_restart)
5868 		goto skip_sched_resume;
5869 	/*
5870 	 * Must check guilty signal here since after this point all old
5871 	 * HW fences are force signaled.
5872 	 *
5873 	 * job->base holds a reference to parent fence
5874 	 */
5875 	if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) {
5876 		job_signaled = true;
5877 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5878 		goto skip_hw_reset;
5879 	}
5880 
5881 	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
5882 	if (r)
5883 		goto reset_unlock;
5884 skip_hw_reset:
5885 	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
5886 	if (r)
5887 		goto reset_unlock;
5888 skip_sched_resume:
5889 	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
5890 reset_unlock:
5891 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
5892 	amdgpu_ras_post_reset(adev, &device_list);
5893 end_reset:
5894 	if (hive) {
5895 		mutex_unlock(&hive->hive_lock);
5896 		amdgpu_put_xgmi_hive(hive);
5897 	}
5898 
5899 	if (r)
5900 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5901 
5902 	atomic_set(&adev->reset_domain->reset_res, r);
5903 
5904 	if (!r) {
5905 		struct amdgpu_task_info *ti = NULL;
5906 
5907 		/*
5908 		 * The job may already be freed at this point via the sched tdr workqueue so
5909 		 * use the cached pasid.
5910 		 */
5911 		if (pasid >= 0)
5912 			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
5913 
5914 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
5915 				     ti ? &ti->task : NULL);
5916 
5917 		amdgpu_vm_put_task_info(ti);
5918 	}
5919 
5920 	return r;
5921 }
5922 
5923 /**
5924  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5925  *
5926  * @adev: amdgpu_device pointer
5927  * @speed: pointer to the speed of the link
5928  * @width: pointer to the width of the link
5929  *
5930  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5931  * first physical partner to an AMD dGPU.
5932  * This will exclude any virtual switches and links.
5933  */
5934 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5935 					    enum pci_bus_speed *speed,
5936 					    enum pcie_link_width *width)
5937 {
5938 	if (!speed || !width)
5939 		return;
5940 
5941 	*speed = PCI_SPEED_UNKNOWN;
5942 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5943 
5944 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5945 		struct pci_dev *parent = amdgpu_device_find_parent(adev);
5946 
5947 		if (parent) {
5948 			*speed = pcie_get_speed_cap(parent);
5949 			*width = pcie_get_width_cap(parent);
5950 		}
5951 	} else {
5952 		/* use the current speeds rather than max if switching is not supported */
5953 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5954 	}
5955 }
5956 
5957 /**
5958  * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
5959  *
5960  * @adev: amdgpu_device pointer
5961  * @speed: pointer to the speed of the link
5962  * @width: pointer to the width of the link
5963  *
5964  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5965  * AMD dGPU which may be a virtual upstream bridge.
5966  */
5967 static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
5968 					enum pci_bus_speed *speed,
5969 					enum pcie_link_width *width)
5970 {
5971 	struct pci_dev *parent = adev->pdev;
5972 
5973 	if (!speed || !width)
5974 		return;
5975 
5976 	/* use the device itself */
5977 	*speed = pcie_get_speed_cap(adev->pdev);
5978 	*width = pcie_get_width_cap(adev->pdev);
5979 
5980 	/* use the link outside the device */
5981 	parent = amdgpu_device_find_parent(adev);
5982 	if (parent) {
5983 		*speed = pcie_get_speed_cap(parent);
5984 		*width = pcie_get_width_cap(parent);
5985 	}
5986 }
5987 
5988 /**
5989  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5990  *
5991  * @adev: amdgpu_device pointer
5992  *
5993  * Fetches and stores in the driver the PCIE capabilities (gen speed
5994  * and lanes) of the slot the device is in. Handles APUs and
5995  * virtualized environments where PCIE config space may not be available.
5996  */
5997 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5998 {
5999 	enum pci_bus_speed speed_cap, platform_speed_cap;
6000 	enum pcie_link_width platform_link_width, link_width;
6001 
6002 	if (amdgpu_pcie_gen_cap)
6003 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6004 
6005 	if (amdgpu_pcie_lane_cap)
6006 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6007 
6008 	/* covers APUs as well */
6009 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6010 		if (adev->pm.pcie_gen_mask == 0)
6011 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6012 		if (adev->pm.pcie_mlw_mask == 0)
6013 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6014 		return;
6015 	}
6016 
6017 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6018 		return;
6019 
6020 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6021 					&platform_link_width);
6022 	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
6023 
6024 	if (adev->pm.pcie_gen_mask == 0) {
6025 		/* asic caps */
6026 		if (speed_cap == PCI_SPEED_UNKNOWN) {
6027 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6028 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6029 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6030 		} else {
6031 			if (speed_cap == PCIE_SPEED_32_0GT)
6032 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6033 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6034 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6035 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6036 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6037 			else if (speed_cap == PCIE_SPEED_16_0GT)
6038 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6039 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6040 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6041 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6042 			else if (speed_cap == PCIE_SPEED_8_0GT)
6043 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6044 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6045 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6046 			else if (speed_cap == PCIE_SPEED_5_0GT)
6047 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6048 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6049 			else
6050 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6051 		}
6052 		/* platform caps */
6053 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6054 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6055 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6056 		} else {
6057 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6058 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6059 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6060 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6061 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6062 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6063 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6064 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6065 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6066 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6067 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6068 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6069 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6070 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6071 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6072 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6073 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6074 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6075 			else
6076 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6077 
6078 		}
6079 	}
6080 	if (adev->pm.pcie_mlw_mask == 0) {
6081 		/* asic caps */
6082 		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6083 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6084 		} else {
6085 			switch (link_width) {
6086 			case PCIE_LNK_X32:
6087 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6088 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6089 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6090 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6091 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6092 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6093 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6094 				break;
6095 			case PCIE_LNK_X16:
6096 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6097 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6098 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6099 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6100 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6101 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6102 				break;
6103 			case PCIE_LNK_X12:
6104 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6105 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6106 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6107 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6108 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6109 				break;
6110 			case PCIE_LNK_X8:
6111 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6112 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6113 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6114 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6115 				break;
6116 			case PCIE_LNK_X4:
6117 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6118 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6119 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6120 				break;
6121 			case PCIE_LNK_X2:
6122 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6123 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6124 				break;
6125 			case PCIE_LNK_X1:
6126 				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6127 				break;
6128 			default:
6129 				break;
6130 			}
6131 		}
6132 		/* platform caps */
6133 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6134 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6135 		} else {
6136 			switch (platform_link_width) {
6137 			case PCIE_LNK_X32:
6138 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6139 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6140 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6141 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6142 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6143 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6144 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6145 				break;
6146 			case PCIE_LNK_X16:
6147 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6148 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6149 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6150 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6151 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6152 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6153 				break;
6154 			case PCIE_LNK_X12:
6155 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6156 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6157 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6158 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6159 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6160 				break;
6161 			case PCIE_LNK_X8:
6162 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6163 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6164 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6165 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6166 				break;
6167 			case PCIE_LNK_X4:
6168 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6169 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6170 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6171 				break;
6172 			case PCIE_LNK_X2:
6173 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6174 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6175 				break;
6176 			case PCIE_LNK_X1:
6177 				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6178 				break;
6179 			default:
6180 				break;
6181 			}
6182 		}
6183 	}
6184 }
6185 
6186 /**
6187  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6188  *
6189  * @adev: amdgpu_device pointer
6190  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6191  *
6192  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6193  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6194  * @peer_adev.
6195  */
6196 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6197 				      struct amdgpu_device *peer_adev)
6198 {
6199 #ifdef CONFIG_HSA_AMD_P2P
6200 	bool p2p_access =
6201 		!adev->gmc.xgmi.connected_to_cpu &&
6202 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6203 	if (!p2p_access)
6204 		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6205 			pci_name(peer_adev->pdev));
6206 
6207 	bool is_large_bar = adev->gmc.visible_vram_size &&
6208 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6209 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6210 
6211 	if (!p2p_addressable) {
6212 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6213 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6214 		resource_size_t aper_limit =
6215 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6216 
6217 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6218 				     aper_limit & address_mask);
6219 	}
6220 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6221 #else
6222 	return false;
6223 #endif
6224 }
6225 
6226 int amdgpu_device_baco_enter(struct amdgpu_device *adev)
6227 {
6228 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6229 
6230 	if (!amdgpu_device_supports_baco(adev))
6231 		return -ENOTSUPP;
6232 
6233 	if (ras && adev->ras_enabled &&
6234 	    adev->nbio.funcs->enable_doorbell_interrupt)
6235 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6236 
6237 	return amdgpu_dpm_baco_enter(adev);
6238 }
6239 
6240 int amdgpu_device_baco_exit(struct amdgpu_device *adev)
6241 {
6242 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6243 	int ret = 0;
6244 
6245 	if (!amdgpu_device_supports_baco(adev))
6246 		return -ENOTSUPP;
6247 
6248 	ret = amdgpu_dpm_baco_exit(adev);
6249 	if (ret)
6250 		return ret;
6251 
6252 	if (ras && adev->ras_enabled &&
6253 	    adev->nbio.funcs->enable_doorbell_interrupt)
6254 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6255 
6256 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6257 	    adev->nbio.funcs->clear_doorbell_interrupt)
6258 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6259 
6260 	return 0;
6261 }
6262 
6263 /**
6264  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6265  * @pdev: PCI device struct
6266  * @state: PCI channel state
6267  *
6268  * Description: Called when a PCI error is detected.
6269  *
6270  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6271  */
6272 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6273 {
6274 	struct drm_device *dev = pci_get_drvdata(pdev);
6275 	struct amdgpu_device *adev = drm_to_adev(dev);
6276 	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
6277 		amdgpu_get_xgmi_hive(adev);
6278 	struct amdgpu_reset_context reset_context;
6279 	struct list_head device_list;
6280 
6281 	dev_info(adev->dev, "PCI error: detected callback!!\n");
6282 
6283 	adev->pci_channel_state = state;
6284 
6285 	switch (state) {
6286 	case pci_channel_io_normal:
6287 		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6288 		return PCI_ERS_RESULT_CAN_RECOVER;
6289 	case pci_channel_io_frozen:
6290 		/* Fatal error, prepare for slot reset */
6291 		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6292 		if (hive) {
6293 			/* Hive devices should be able to support FW based
6294 			 * link reset on other devices, if not return.
6295 			 */
6296 			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6297 				dev_warn(adev->dev,
6298 					 "No support for XGMI hive yet...\n");
6299 				return PCI_ERS_RESULT_DISCONNECT;
6300 			}
6301 			/* Set dpc status only if device is part of hive
6302 			 * Non-hive devices should be able to recover after
6303 			 * link reset.
6304 			 */
6305 			amdgpu_reset_set_dpc_status(adev, true);
6306 
6307 			mutex_lock(&hive->hive_lock);
6308 		} else {
6309 			if (amdgpu_device_bus_status_check(adev))
6310 				amdgpu_reset_set_dpc_status(adev, true);
6311 		}
6312 		memset(&reset_context, 0, sizeof(reset_context));
6313 		INIT_LIST_HEAD(&device_list);
6314 
6315 		amdgpu_device_recovery_prepare(adev, &device_list, hive);
6316 		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6317 		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6318 					      hive, false);
6319 		if (hive)
6320 			mutex_unlock(&hive->hive_lock);
6321 		return PCI_ERS_RESULT_NEED_RESET;
6322 	case pci_channel_io_perm_failure:
6323 		/* Permanent error, prepare for device removal */
6324 		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6325 		return PCI_ERS_RESULT_DISCONNECT;
6326 	}
6327 
6328 	return PCI_ERS_RESULT_NEED_RESET;
6329 }
6330 
6331 /**
6332  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6333  * @pdev: pointer to PCI device
6334  */
6335 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6336 {
6337 	struct drm_device *dev = pci_get_drvdata(pdev);
6338 	struct amdgpu_device *adev = drm_to_adev(dev);
6339 
6340 	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6341 
6342 	/* TODO - dump whatever for debugging purposes */
6343 
6344 	/* This called only if amdgpu_pci_error_detected returns
6345 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6346 	 * works, no need to reset slot.
6347 	 */
6348 
6349 	return PCI_ERS_RESULT_RECOVERED;
6350 }
6351 
6352 /**
6353  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6354  * @pdev: PCI device struct
6355  *
6356  * Description: This routine is called by the pci error recovery
6357  * code after the PCI slot has been reset, just before we
6358  * should resume normal operations.
6359  */
6360 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6361 {
6362 	struct drm_device *dev = pci_get_drvdata(pdev);
6363 	struct amdgpu_device *adev = drm_to_adev(dev);
6364 	struct amdgpu_reset_context reset_context;
6365 	struct amdgpu_device *tmp_adev;
6366 	struct amdgpu_hive_info *hive;
6367 	struct list_head device_list;
6368 	struct pci_dev *link_dev;
6369 	int r = 0, i, timeout;
6370 	u32 memsize;
6371 	u16 status;
6372 
6373 	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6374 
6375 	memset(&reset_context, 0, sizeof(reset_context));
6376 	INIT_LIST_HEAD(&device_list);
6377 	hive = amdgpu_get_xgmi_hive(adev);
6378 	if (hive) {
6379 		mutex_lock(&hive->hive_lock);
6380 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6381 			list_add_tail(&tmp_adev->reset_list, &device_list);
6382 	} else {
6383 		list_add_tail(&adev->reset_list, &device_list);
6384 	}
6385 
6386 	if (adev->pcie_reset_ctx.swus)
6387 		link_dev = adev->pcie_reset_ctx.swus;
6388 	else
6389 		link_dev = adev->pdev;
6390 	/* wait for asic to come out of reset, timeout = 10s */
6391 	timeout = 10000;
6392 	do {
6393 		usleep_range(10000, 10500);
6394 		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
6395 		timeout -= 10;
6396 	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
6397 		 (status != PCI_VENDOR_ID_AMD));
6398 
6399 	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
6400 		r = -ETIME;
6401 		goto out;
6402 	}
6403 
6404 	amdgpu_device_load_switch_state(adev);
6405 	/* Restore PCI confspace */
6406 	amdgpu_device_load_pci_state(pdev);
6407 
6408 	/* confirm  ASIC came out of reset */
6409 	for (i = 0; i < adev->usec_timeout; i++) {
6410 		memsize = amdgpu_asic_get_config_memsize(adev);
6411 
6412 		if (memsize != 0xffffffff)
6413 			break;
6414 		udelay(1);
6415 	}
6416 	if (memsize == 0xffffffff) {
6417 		r = -ETIME;
6418 		goto out;
6419 	}
6420 
6421 	reset_context.method = AMD_RESET_METHOD_NONE;
6422 	reset_context.reset_req_dev = adev;
6423 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6424 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
6425 
6426 	if (hive) {
6427 		reset_context.hive = hive;
6428 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6429 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
6430 	} else {
6431 		adev->pcie_reset_ctx.in_link_reset = true;
6432 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6433 	}
6434 
6435 	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
6436 out:
6437 	if (!r) {
6438 		if (amdgpu_device_cache_pci_state(adev->pdev))
6439 			pci_restore_state(adev->pdev);
6440 		dev_info(adev->dev, "PCIe error recovery succeeded\n");
6441 	} else {
6442 		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6443 		if (hive) {
6444 			list_for_each_entry(tmp_adev, &device_list, reset_list)
6445 				amdgpu_device_unset_mp1_state(tmp_adev);
6446 		}
6447 		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6448 	}
6449 
6450 	if (hive) {
6451 		mutex_unlock(&hive->hive_lock);
6452 		amdgpu_put_xgmi_hive(hive);
6453 	}
6454 
6455 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6456 }
6457 
6458 /**
6459  * amdgpu_pci_resume() - resume normal ops after PCI reset
6460  * @pdev: pointer to PCI device
6461  *
6462  * Called when the error recovery driver tells us that its
6463  * OK to resume normal operation.
6464  */
6465 void amdgpu_pci_resume(struct pci_dev *pdev)
6466 {
6467 	struct drm_device *dev = pci_get_drvdata(pdev);
6468 	struct amdgpu_device *adev = drm_to_adev(dev);
6469 	struct list_head device_list;
6470 	struct amdgpu_hive_info *hive = NULL;
6471 	struct amdgpu_device *tmp_adev = NULL;
6472 
6473 	dev_info(adev->dev, "PCI error: resume callback!!\n");
6474 
6475 	/* Only continue execution for the case of pci_channel_io_frozen */
6476 	if (adev->pci_channel_state != pci_channel_io_frozen)
6477 		return;
6478 
6479 	INIT_LIST_HEAD(&device_list);
6480 
6481 	hive = amdgpu_get_xgmi_hive(adev);
6482 	if (hive) {
6483 		mutex_lock(&hive->hive_lock);
6484 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6485 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
6486 			list_add_tail(&tmp_adev->reset_list, &device_list);
6487 		}
6488 	} else {
6489 		adev->pcie_reset_ctx.in_link_reset = false;
6490 		list_add_tail(&adev->reset_list, &device_list);
6491 	}
6492 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
6493 	amdgpu_device_gpu_resume(adev, &device_list, false);
6494 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6495 
6496 	if (hive) {
6497 		mutex_unlock(&hive->hive_lock);
6498 		amdgpu_put_xgmi_hive(hive);
6499 	}
6500 }
6501 
6502 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
6503 {
6504 	struct pci_dev *swus, *swds;
6505 	int r;
6506 
6507 	swds = pci_upstream_bridge(adev->pdev);
6508 	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
6509 	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
6510 		return;
6511 	swus = pci_upstream_bridge(swds);
6512 	if (!swus ||
6513 	    (swus->vendor != PCI_VENDOR_ID_ATI &&
6514 	     swus->vendor != PCI_VENDOR_ID_AMD) ||
6515 	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
6516 		return;
6517 
6518 	/* If already saved, return */
6519 	if (adev->pcie_reset_ctx.swus)
6520 		return;
6521 	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
6522 	r = pci_save_state(swds);
6523 	if (r)
6524 		return;
6525 	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
6526 
6527 	r = pci_save_state(swus);
6528 	if (r)
6529 		return;
6530 	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
6531 
6532 	adev->pcie_reset_ctx.swus = swus;
6533 }
6534 
6535 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
6536 {
6537 	struct pci_dev *pdev;
6538 	int r;
6539 
6540 	if (!adev->pcie_reset_ctx.swds_pcistate ||
6541 	    !adev->pcie_reset_ctx.swus_pcistate)
6542 		return;
6543 
6544 	pdev = adev->pcie_reset_ctx.swus;
6545 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
6546 	if (!r) {
6547 		pci_restore_state(pdev);
6548 	} else {
6549 		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
6550 		return;
6551 	}
6552 
6553 	pdev = pci_upstream_bridge(adev->pdev);
6554 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
6555 	if (!r)
6556 		pci_restore_state(pdev);
6557 	else
6558 		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
6559 }
6560 
6561 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6562 {
6563 	struct drm_device *dev = pci_get_drvdata(pdev);
6564 	struct amdgpu_device *adev = drm_to_adev(dev);
6565 	int r;
6566 
6567 	if (amdgpu_sriov_vf(adev))
6568 		return false;
6569 
6570 	r = pci_save_state(pdev);
6571 	if (!r) {
6572 		kfree(adev->pci_state);
6573 
6574 		adev->pci_state = pci_store_saved_state(pdev);
6575 
6576 		if (!adev->pci_state) {
6577 			dev_err(adev->dev, "Failed to store PCI saved state");
6578 			return false;
6579 		}
6580 	} else {
6581 		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
6582 		return false;
6583 	}
6584 
6585 	amdgpu_device_cache_switch_state(adev);
6586 
6587 	return true;
6588 }
6589 
6590 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6591 {
6592 	struct drm_device *dev = pci_get_drvdata(pdev);
6593 	struct amdgpu_device *adev = drm_to_adev(dev);
6594 	int r;
6595 
6596 	if (!adev->pci_state)
6597 		return false;
6598 
6599 	r = pci_load_saved_state(pdev, adev->pci_state);
6600 
6601 	if (!r) {
6602 		pci_restore_state(pdev);
6603 	} else {
6604 		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
6605 		return false;
6606 	}
6607 
6608 	return true;
6609 }
6610 
6611 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6612 		struct amdgpu_ring *ring)
6613 {
6614 #ifdef CONFIG_X86_64
6615 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6616 		return;
6617 #endif
6618 	if (adev->gmc.xgmi.connected_to_cpu)
6619 		return;
6620 
6621 	if (ring && ring->funcs->emit_hdp_flush) {
6622 		amdgpu_ring_emit_hdp_flush(ring);
6623 		return;
6624 	}
6625 
6626 	if (!ring && amdgpu_sriov_runtime(adev)) {
6627 		if (!amdgpu_kiq_hdp_flush(adev))
6628 			return;
6629 	}
6630 
6631 	amdgpu_hdp_flush(adev, ring);
6632 }
6633 
6634 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6635 		struct amdgpu_ring *ring)
6636 {
6637 #ifdef CONFIG_X86_64
6638 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6639 		return;
6640 #endif
6641 	if (adev->gmc.xgmi.connected_to_cpu)
6642 		return;
6643 
6644 	amdgpu_hdp_invalidate(adev, ring);
6645 }
6646 
6647 int amdgpu_in_reset(struct amdgpu_device *adev)
6648 {
6649 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6650 }
6651 
6652 /**
6653  * amdgpu_device_halt() - bring hardware to some kind of halt state
6654  *
6655  * @adev: amdgpu_device pointer
6656  *
6657  * Bring hardware to some kind of halt state so that no one can touch it
6658  * any more. It will help to maintain error context when error occurred.
6659  * Compare to a simple hang, the system will keep stable at least for SSH
6660  * access. Then it should be trivial to inspect the hardware state and
6661  * see what's going on. Implemented as following:
6662  *
6663  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6664  *    clears all CPU mappings to device, disallows remappings through page faults
6665  * 2. amdgpu_irq_disable_all() disables all interrupts
6666  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6667  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6668  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6669  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6670  *    flush any in flight DMA operations
6671  */
6672 void amdgpu_device_halt(struct amdgpu_device *adev)
6673 {
6674 	struct pci_dev *pdev = adev->pdev;
6675 	struct drm_device *ddev = adev_to_drm(adev);
6676 
6677 	amdgpu_xcp_dev_unplug(adev);
6678 	drm_dev_unplug(ddev);
6679 
6680 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
6681 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
6682 
6683 	amdgpu_irq_disable_all(adev);
6684 
6685 	amdgpu_fence_driver_hw_fini(adev);
6686 
6687 	adev->no_hw_access = true;
6688 
6689 	amdgpu_device_unmap_mmio(adev);
6690 
6691 	pci_disable_device(pdev);
6692 	pci_wait_for_pending_transaction(pdev);
6693 }
6694 
6695 /**
6696  * amdgpu_device_get_gang - return a reference to the current gang
6697  * @adev: amdgpu_device pointer
6698  *
6699  * Returns: A new reference to the current gang leader.
6700  */
6701 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6702 {
6703 	struct dma_fence *fence;
6704 
6705 	rcu_read_lock();
6706 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6707 	rcu_read_unlock();
6708 	return fence;
6709 }
6710 
6711 /**
6712  * amdgpu_device_switch_gang - switch to a new gang
6713  * @adev: amdgpu_device pointer
6714  * @gang: the gang to switch to
6715  *
6716  * Try to switch to a new gang.
6717  * Returns: NULL if we switched to the new gang or a reference to the current
6718  * gang leader.
6719  */
6720 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6721 					    struct dma_fence *gang)
6722 {
6723 	struct dma_fence *old = NULL;
6724 
6725 	dma_fence_get(gang);
6726 	do {
6727 		dma_fence_put(old);
6728 		old = amdgpu_device_get_gang(adev);
6729 		if (old == gang)
6730 			break;
6731 
6732 		if (!dma_fence_is_signaled(old)) {
6733 			dma_fence_put(gang);
6734 			return old;
6735 		}
6736 
6737 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6738 			 old, gang) != old);
6739 
6740 	/*
6741 	 * Drop it once for the exchanged reference in adev and once for the
6742 	 * thread local reference acquired in amdgpu_device_get_gang().
6743 	 */
6744 	dma_fence_put(old);
6745 	dma_fence_put(old);
6746 	return NULL;
6747 }
6748 
6749 /**
6750  * amdgpu_device_enforce_isolation - enforce HW isolation
6751  * @adev: the amdgpu device pointer
6752  * @ring: the HW ring the job is supposed to run on
6753  * @job: the job which is about to be pushed to the HW ring
6754  *
6755  * Makes sure that only one client at a time can use the GFX block.
6756  * Returns: The dependency to wait on before the job can be pushed to the HW.
6757  * The function is called multiple times until NULL is returned.
6758  */
6759 struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
6760 						  struct amdgpu_ring *ring,
6761 						  struct amdgpu_job *job)
6762 {
6763 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
6764 	struct drm_sched_fence *f = job->base.s_fence;
6765 	struct dma_fence *dep;
6766 	void *owner;
6767 	int r;
6768 
6769 	/*
6770 	 * For now enforce isolation only for the GFX block since we only need
6771 	 * the cleaner shader on those rings.
6772 	 */
6773 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
6774 	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6775 		return NULL;
6776 
6777 	/*
6778 	 * All submissions where enforce isolation is false are handled as if
6779 	 * they come from a single client. Use ~0l as the owner to distinct it
6780 	 * from kernel submissions where the owner is NULL.
6781 	 */
6782 	owner = job->enforce_isolation ? f->owner : (void *)~0l;
6783 
6784 	mutex_lock(&adev->enforce_isolation_mutex);
6785 
6786 	/*
6787 	 * The "spearhead" submission is the first one which changes the
6788 	 * ownership to its client. We always need to wait for it to be
6789 	 * pushed to the HW before proceeding with anything.
6790 	 */
6791 	if (&f->scheduled != isolation->spearhead &&
6792 	    !dma_fence_is_signaled(isolation->spearhead)) {
6793 		dep = isolation->spearhead;
6794 		goto out_grab_ref;
6795 	}
6796 
6797 	if (isolation->owner != owner) {
6798 
6799 		/*
6800 		 * Wait for any gang to be assembled before switching to a
6801 		 * different owner or otherwise we could deadlock the
6802 		 * submissions.
6803 		 */
6804 		if (!job->gang_submit) {
6805 			dep = amdgpu_device_get_gang(adev);
6806 			if (!dma_fence_is_signaled(dep))
6807 				goto out_return_dep;
6808 			dma_fence_put(dep);
6809 		}
6810 
6811 		dma_fence_put(isolation->spearhead);
6812 		isolation->spearhead = dma_fence_get(&f->scheduled);
6813 		amdgpu_sync_move(&isolation->active, &isolation->prev);
6814 		trace_amdgpu_isolation(isolation->owner, owner);
6815 		isolation->owner = owner;
6816 	}
6817 
6818 	/*
6819 	 * Specifying the ring here helps to pipeline submissions even when
6820 	 * isolation is enabled. If that is not desired for testing NULL can be
6821 	 * used instead of the ring to enforce a CPU round trip while switching
6822 	 * between clients.
6823 	 */
6824 	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
6825 	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
6826 	if (r)
6827 		dev_warn(adev->dev, "OOM tracking isolation\n");
6828 
6829 out_grab_ref:
6830 	dma_fence_get(dep);
6831 out_return_dep:
6832 	mutex_unlock(&adev->enforce_isolation_mutex);
6833 	return dep;
6834 }
6835 
6836 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6837 {
6838 	switch (adev->asic_type) {
6839 #ifdef CONFIG_DRM_AMDGPU_SI
6840 	case CHIP_HAINAN:
6841 #endif
6842 	case CHIP_TOPAZ:
6843 		/* chips with no display hardware */
6844 		return false;
6845 #ifdef CONFIG_DRM_AMDGPU_SI
6846 	case CHIP_TAHITI:
6847 	case CHIP_PITCAIRN:
6848 	case CHIP_VERDE:
6849 	case CHIP_OLAND:
6850 #endif
6851 #ifdef CONFIG_DRM_AMDGPU_CIK
6852 	case CHIP_BONAIRE:
6853 	case CHIP_HAWAII:
6854 	case CHIP_KAVERI:
6855 	case CHIP_KABINI:
6856 	case CHIP_MULLINS:
6857 #endif
6858 	case CHIP_TONGA:
6859 	case CHIP_FIJI:
6860 	case CHIP_POLARIS10:
6861 	case CHIP_POLARIS11:
6862 	case CHIP_POLARIS12:
6863 	case CHIP_VEGAM:
6864 	case CHIP_CARRIZO:
6865 	case CHIP_STONEY:
6866 		/* chips with display hardware */
6867 		return true;
6868 	default:
6869 		/* IP discovery */
6870 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6871 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6872 			return false;
6873 		return true;
6874 	}
6875 }
6876 
6877 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
6878 {
6879 	ssize_t size = 0;
6880 
6881 	if (!ring || !ring->adev)
6882 		return size;
6883 
6884 	if (amdgpu_device_should_recover_gpu(ring->adev))
6885 		size |= AMDGPU_RESET_TYPE_FULL;
6886 
6887 	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
6888 	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
6889 		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
6890 
6891 	return size;
6892 }
6893 
6894 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
6895 {
6896 	ssize_t size = 0;
6897 
6898 	if (supported_reset == 0) {
6899 		size += sysfs_emit_at(buf, size, "unsupported");
6900 		size += sysfs_emit_at(buf, size, "\n");
6901 		return size;
6902 
6903 	}
6904 
6905 	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
6906 		size += sysfs_emit_at(buf, size, "soft ");
6907 
6908 	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
6909 		size += sysfs_emit_at(buf, size, "queue ");
6910 
6911 	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
6912 		size += sysfs_emit_at(buf, size, "pipe ");
6913 
6914 	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
6915 		size += sysfs_emit_at(buf, size, "full ");
6916 
6917 	size += sysfs_emit_at(buf, size, "\n");
6918 	return size;
6919 }
6920 
6921 void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
6922 			   enum amdgpu_uid_type type, uint8_t inst,
6923 			   uint64_t uid)
6924 {
6925 	if (!uid_info)
6926 		return;
6927 
6928 	if (type >= AMDGPU_UID_TYPE_MAX) {
6929 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6930 			     type);
6931 		return;
6932 	}
6933 
6934 	if (inst >= AMDGPU_UID_INST_MAX) {
6935 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6936 			     inst);
6937 		return;
6938 	}
6939 
6940 	if (uid_info->uid[type][inst] != 0) {
6941 		dev_warn_once(
6942 			uid_info->adev->dev,
6943 			"Overwriting existing UID %llu for type %d instance %d\n",
6944 			uid_info->uid[type][inst], type, inst);
6945 	}
6946 
6947 	uid_info->uid[type][inst] = uid;
6948 }
6949 
6950 u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
6951 			  enum amdgpu_uid_type type, uint8_t inst)
6952 {
6953 	if (!uid_info)
6954 		return 0;
6955 
6956 	if (type >= AMDGPU_UID_TYPE_MAX) {
6957 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6958 			     type);
6959 		return 0;
6960 	}
6961 
6962 	if (inst >= AMDGPU_UID_INST_MAX) {
6963 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6964 			     inst);
6965 		return 0;
6966 	}
6967 
6968 	return uid_info->uid[type][inst];
6969 }
6970