xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 6b606216e03fa2b53cc179d8383b683a140fe6e1)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/aperture.h>
30 #include <linux/power_supply.h>
31 #include <linux/kthread.h>
32 #include <linux/module.h>
33 #include <linux/console.h>
34 #include <linux/slab.h>
35 #include <linux/iommu.h>
36 #include <linux/pci.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39 #include <linux/nospec.h>
40 
41 #include <drm/drm_atomic_helper.h>
42 #include <drm/drm_client_event.h>
43 #include <drm/drm_crtc_helper.h>
44 #include <drm/drm_probe_helper.h>
45 #include <drm/amdgpu_drm.h>
46 #include <linux/device.h>
47 #include <linux/vgaarb.h>
48 #include <linux/vga_switcheroo.h>
49 #include <linux/efi.h>
50 #include "amdgpu.h"
51 #include "amdgpu_trace.h"
52 #include "amdgpu_i2c.h"
53 #include "atom.h"
54 #include "amdgpu_atombios.h"
55 #include "amdgpu_atomfirmware.h"
56 #include "amd_pcie.h"
57 #ifdef CONFIG_DRM_AMDGPU_SI
58 #include "si.h"
59 #endif
60 #ifdef CONFIG_DRM_AMDGPU_CIK
61 #include "cik.h"
62 #endif
63 #include "vi.h"
64 #include "soc15.h"
65 #include "nv.h"
66 #include "bif/bif_4_1_d.h"
67 #include <linux/firmware.h>
68 #include "amdgpu_vf_error.h"
69 
70 #include "amdgpu_amdkfd.h"
71 #include "amdgpu_pm.h"
72 
73 #include "amdgpu_xgmi.h"
74 #include "amdgpu_ras.h"
75 #include "amdgpu_ras_mgr.h"
76 #include "amdgpu_pmu.h"
77 #include "amdgpu_fru_eeprom.h"
78 #include "amdgpu_reset.h"
79 #include "amdgpu_virt.h"
80 #include "amdgpu_dev_coredump.h"
81 
82 #include <linux/suspend.h>
83 #include <drm/task_barrier.h>
84 #include <linux/pm_runtime.h>
85 
86 #include <drm/drm_drv.h>
87 
88 #if IS_ENABLED(CONFIG_X86)
89 #include <asm/intel-family.h>
90 #include <asm/cpu_device_id.h>
91 #endif
92 
93 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
98 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
99 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
100 MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
101 
102 #define AMDGPU_RESUME_MS		2000
103 #define AMDGPU_MAX_RETRY_LIMIT		2
104 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
105 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
106 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
107 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
108 
109 #define AMDGPU_VBIOS_SKIP (1U << 0)
110 #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
111 
112 static const struct drm_driver amdgpu_kms_driver;
113 
114 const char *amdgpu_asic_name[] = {
115 	"TAHITI",
116 	"PITCAIRN",
117 	"VERDE",
118 	"OLAND",
119 	"HAINAN",
120 	"BONAIRE",
121 	"KAVERI",
122 	"KABINI",
123 	"HAWAII",
124 	"MULLINS",
125 	"TOPAZ",
126 	"TONGA",
127 	"FIJI",
128 	"CARRIZO",
129 	"STONEY",
130 	"POLARIS10",
131 	"POLARIS11",
132 	"POLARIS12",
133 	"VEGAM",
134 	"VEGA10",
135 	"VEGA12",
136 	"VEGA20",
137 	"RAVEN",
138 	"ARCTURUS",
139 	"RENOIR",
140 	"ALDEBARAN",
141 	"NAVI10",
142 	"CYAN_SKILLFISH",
143 	"NAVI14",
144 	"NAVI12",
145 	"SIENNA_CICHLID",
146 	"NAVY_FLOUNDER",
147 	"VANGOGH",
148 	"DIMGREY_CAVEFISH",
149 	"BEIGE_GOBY",
150 	"YELLOW_CARP",
151 	"IP DISCOVERY",
152 	"LAST",
153 };
154 
155 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
156 /*
157  * Default init level where all blocks are expected to be initialized. This is
158  * the level of initialization expected by default and also after a full reset
159  * of the device.
160  */
161 struct amdgpu_init_level amdgpu_init_default = {
162 	.level = AMDGPU_INIT_LEVEL_DEFAULT,
163 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
164 };
165 
166 struct amdgpu_init_level amdgpu_init_recovery = {
167 	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
168 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
169 };
170 
171 /*
172  * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
173  * is used for cases like reset on initialization where the entire hive needs to
174  * be reset before first use.
175  */
176 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
177 	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
178 	.hwini_ip_block_mask =
179 		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
180 		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
181 		BIT(AMD_IP_BLOCK_TYPE_PSP)
182 };
183 
184 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
185 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
186 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
187 
188 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
189 
190 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
191 					     enum amd_ip_block_type block)
192 {
193 	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
194 }
195 
196 void amdgpu_set_init_level(struct amdgpu_device *adev,
197 			   enum amdgpu_init_lvl_id lvl)
198 {
199 	switch (lvl) {
200 	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
201 		adev->init_lvl = &amdgpu_init_minimal_xgmi;
202 		break;
203 	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
204 		adev->init_lvl = &amdgpu_init_recovery;
205 		break;
206 	case AMDGPU_INIT_LEVEL_DEFAULT:
207 		fallthrough;
208 	default:
209 		adev->init_lvl = &amdgpu_init_default;
210 		break;
211 	}
212 }
213 
214 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
215 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
216 				     void *data);
217 
218 /**
219  * DOC: pcie_replay_count
220  *
221  * The amdgpu driver provides a sysfs API for reporting the total number
222  * of PCIe replays (NAKs).
223  * The file pcie_replay_count is used for this and returns the total
224  * number of replays as a sum of the NAKs generated and NAKs received.
225  */
226 
227 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
228 		struct device_attribute *attr, char *buf)
229 {
230 	struct drm_device *ddev = dev_get_drvdata(dev);
231 	struct amdgpu_device *adev = drm_to_adev(ddev);
232 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
233 
234 	return sysfs_emit(buf, "%llu\n", cnt);
235 }
236 
237 static DEVICE_ATTR(pcie_replay_count, 0444,
238 		amdgpu_device_get_pcie_replay_count, NULL);
239 
240 static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
241 {
242 	int ret = 0;
243 
244 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
245 		ret = sysfs_create_file(&adev->dev->kobj,
246 					&dev_attr_pcie_replay_count.attr);
247 
248 	return ret;
249 }
250 
251 static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
252 {
253 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
254 		sysfs_remove_file(&adev->dev->kobj,
255 				  &dev_attr_pcie_replay_count.attr);
256 }
257 
258 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
259 					  const struct bin_attribute *attr, char *buf,
260 					  loff_t ppos, size_t count)
261 {
262 	struct device *dev = kobj_to_dev(kobj);
263 	struct drm_device *ddev = dev_get_drvdata(dev);
264 	struct amdgpu_device *adev = drm_to_adev(ddev);
265 	ssize_t bytes_read;
266 
267 	switch (ppos) {
268 	case AMDGPU_SYS_REG_STATE_XGMI:
269 		bytes_read = amdgpu_asic_get_reg_state(
270 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
271 		break;
272 	case AMDGPU_SYS_REG_STATE_WAFL:
273 		bytes_read = amdgpu_asic_get_reg_state(
274 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
275 		break;
276 	case AMDGPU_SYS_REG_STATE_PCIE:
277 		bytes_read = amdgpu_asic_get_reg_state(
278 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
279 		break;
280 	case AMDGPU_SYS_REG_STATE_USR:
281 		bytes_read = amdgpu_asic_get_reg_state(
282 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
283 		break;
284 	case AMDGPU_SYS_REG_STATE_USR_1:
285 		bytes_read = amdgpu_asic_get_reg_state(
286 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
287 		break;
288 	default:
289 		return -EINVAL;
290 	}
291 
292 	return bytes_read;
293 }
294 
295 static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
296 		      AMDGPU_SYS_REG_STATE_END);
297 
298 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
299 {
300 	int ret;
301 
302 	if (!amdgpu_asic_get_reg_state_supported(adev))
303 		return 0;
304 
305 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
306 
307 	return ret;
308 }
309 
310 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
311 {
312 	if (!amdgpu_asic_get_reg_state_supported(adev))
313 		return;
314 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
315 }
316 
317 /**
318  * DOC: board_info
319  *
320  * The amdgpu driver provides a sysfs API for giving board related information.
321  * It provides the form factor information in the format
322  *
323  *   type : form factor
324  *
325  * Possible form factor values
326  *
327  * - "cem"		- PCIE CEM card
328  * - "oam"		- Open Compute Accelerator Module
329  * - "unknown"	- Not known
330  *
331  */
332 
333 static ssize_t amdgpu_device_get_board_info(struct device *dev,
334 					    struct device_attribute *attr,
335 					    char *buf)
336 {
337 	struct drm_device *ddev = dev_get_drvdata(dev);
338 	struct amdgpu_device *adev = drm_to_adev(ddev);
339 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
340 	const char *pkg;
341 
342 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
343 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
344 
345 	switch (pkg_type) {
346 	case AMDGPU_PKG_TYPE_CEM:
347 		pkg = "cem";
348 		break;
349 	case AMDGPU_PKG_TYPE_OAM:
350 		pkg = "oam";
351 		break;
352 	default:
353 		pkg = "unknown";
354 		break;
355 	}
356 
357 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
358 }
359 
360 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
361 
362 static struct attribute *amdgpu_board_attrs[] = {
363 	&dev_attr_board_info.attr,
364 	NULL,
365 };
366 
367 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
368 					     struct attribute *attr, int n)
369 {
370 	struct device *dev = kobj_to_dev(kobj);
371 	struct drm_device *ddev = dev_get_drvdata(dev);
372 	struct amdgpu_device *adev = drm_to_adev(ddev);
373 
374 	if (adev->flags & AMD_IS_APU)
375 		return 0;
376 
377 	return attr->mode;
378 }
379 
380 static const struct attribute_group amdgpu_board_attrs_group = {
381 	.attrs = amdgpu_board_attrs,
382 	.is_visible = amdgpu_board_attrs_is_visible
383 };
384 
385 /**
386  * DOC: uma/carveout_options
387  *
388  * This is a read-only file that lists all available UMA allocation
389  * options and their corresponding indices. Example output::
390  *
391  *     $ cat uma/carveout_options
392  *     0: Minimum (512 MB)
393  *     1:  (1 GB)
394  *     2:  (2 GB)
395  *     3:  (4 GB)
396  *     4:  (6 GB)
397  *     5:  (8 GB)
398  *     6:  (12 GB)
399  *     7: Medium (16 GB)
400  *     8:  (24 GB)
401  *     9: High (32 GB)
402  */
403 static ssize_t carveout_options_show(struct device *dev,
404 				     struct device_attribute *attr,
405 				     char *buf)
406 {
407 	struct drm_device *ddev = dev_get_drvdata(dev);
408 	struct amdgpu_device *adev = drm_to_adev(ddev);
409 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
410 	uint32_t memory_carved;
411 	ssize_t size = 0;
412 
413 	if (!uma_info || !uma_info->num_entries)
414 		return -ENODEV;
415 
416 	for (int i = 0; i < uma_info->num_entries; i++) {
417 		memory_carved = uma_info->entries[i].memory_carved_mb;
418 		if (memory_carved >= SZ_1G/SZ_1M) {
419 			size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n",
420 					      i,
421 					      uma_info->entries[i].name,
422 					      memory_carved >> 10);
423 		} else {
424 			size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n",
425 					      i,
426 					      uma_info->entries[i].name,
427 					      memory_carved);
428 		}
429 	}
430 
431 	return size;
432 }
433 static DEVICE_ATTR_RO(carveout_options);
434 
435 /**
436  * DOC: uma/carveout
437  *
438  * This file is both readable and writable. When read, it shows the
439  * index of the current setting. Writing a valid index to this file
440  * allows users to change the UMA carveout size to the selected option
441  * on the next boot.
442  *
443  * The available options and their corresponding indices can be read
444  * from the uma/carveout_options file.
445  */
446 static ssize_t carveout_show(struct device *dev,
447 			     struct device_attribute *attr,
448 			     char *buf)
449 {
450 	struct drm_device *ddev = dev_get_drvdata(dev);
451 	struct amdgpu_device *adev = drm_to_adev(ddev);
452 
453 	return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index);
454 }
455 
456 static ssize_t carveout_store(struct device *dev,
457 			      struct device_attribute *attr,
458 			      const char *buf, size_t count)
459 {
460 	struct drm_device *ddev = dev_get_drvdata(dev);
461 	struct amdgpu_device *adev = drm_to_adev(ddev);
462 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
463 	struct amdgpu_uma_carveout_option *opt;
464 	unsigned long val;
465 	uint8_t flags;
466 	int r;
467 
468 	r = kstrtoul(buf, 10, &val);
469 	if (r)
470 		return r;
471 
472 	if (val >= uma_info->num_entries)
473 		return -EINVAL;
474 
475 	val = array_index_nospec(val, uma_info->num_entries);
476 	opt = &uma_info->entries[val];
477 
478 	if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) &&
479 	    !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) {
480 		drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val);
481 		return -EINVAL;
482 	}
483 
484 	flags = opt->flags;
485 	flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1);
486 
487 	guard(mutex)(&uma_info->update_lock);
488 
489 	r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags);
490 	if (r)
491 		return r;
492 
493 	uma_info->uma_option_index = val;
494 
495 	return count;
496 }
497 static DEVICE_ATTR_RW(carveout);
498 
499 static struct attribute *amdgpu_uma_attrs[] = {
500 	&dev_attr_carveout.attr,
501 	&dev_attr_carveout_options.attr,
502 	NULL
503 };
504 
505 const struct attribute_group amdgpu_uma_attr_group = {
506 	.name = "uma",
507 	.attrs = amdgpu_uma_attrs
508 };
509 
510 static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev)
511 {
512 	int rc;
513 
514 	if (!(adev->flags & AMD_IS_APU))
515 		return;
516 
517 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
518 		return;
519 
520 	rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info);
521 	if (rc) {
522 		drm_dbg(adev_to_drm(adev),
523 			"Failed to parse UMA carveout info from VBIOS: %d\n", rc);
524 		goto out_info;
525 	}
526 
527 	mutex_init(&adev->uma_info.update_lock);
528 
529 	rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group);
530 	if (rc) {
531 		drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc);
532 		goto out_attr;
533 	}
534 
535 	return;
536 
537 out_attr:
538 	mutex_destroy(&adev->uma_info.update_lock);
539 out_info:
540 	return;
541 }
542 
543 static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev)
544 {
545 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
546 
547 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
548 		return;
549 
550 	mutex_destroy(&uma_info->update_lock);
551 	uma_info->num_entries = 0;
552 }
553 
554 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
555 
556 /**
557  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
558  *
559  * @adev: amdgpu device pointer
560  *
561  * Returns true if the device is a dGPU with ATPX power control,
562  * otherwise return false.
563  */
564 bool amdgpu_device_supports_px(struct amdgpu_device *adev)
565 {
566 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
567 		return true;
568 	return false;
569 }
570 
571 /**
572  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
573  *
574  * @adev: amdgpu device pointer
575  *
576  * Returns true if the device is a dGPU with ACPI power control,
577  * otherwise return false.
578  */
579 bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
580 {
581 	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
582 		return false;
583 
584 	if (adev->has_pr3 ||
585 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
586 		return true;
587 	return false;
588 }
589 
590 /**
591  * amdgpu_device_supports_baco - Does the device support BACO
592  *
593  * @adev: amdgpu device pointer
594  *
595  * Return:
596  * 1 if the device supports BACO;
597  * 3 if the device supports MACO (only works if BACO is supported)
598  * otherwise return 0.
599  */
600 int amdgpu_device_supports_baco(struct amdgpu_device *adev)
601 {
602 	return amdgpu_asic_supports_baco(adev);
603 }
604 
605 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
606 {
607 	int bamaco_support;
608 
609 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
610 	bamaco_support = amdgpu_device_supports_baco(adev);
611 
612 	switch (amdgpu_runtime_pm) {
613 	case 2:
614 		if (bamaco_support & MACO_SUPPORT) {
615 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
616 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
617 		} else if (bamaco_support == BACO_SUPPORT) {
618 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
619 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
620 		}
621 		break;
622 	case 1:
623 		if (bamaco_support & BACO_SUPPORT) {
624 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
625 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
626 		}
627 		break;
628 	case -1:
629 	case -2:
630 		if (amdgpu_device_supports_px(adev)) {
631 			/* enable PX as runtime mode */
632 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
633 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
634 		} else if (amdgpu_device_supports_boco(adev)) {
635 			/* enable boco as runtime mode */
636 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
637 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
638 		} else {
639 			if (!bamaco_support)
640 				goto no_runtime_pm;
641 
642 			switch (adev->asic_type) {
643 			case CHIP_VEGA20:
644 			case CHIP_ARCTURUS:
645 				/* BACO are not supported on vega20 and arctrus */
646 				break;
647 			case CHIP_VEGA10:
648 				/* enable BACO as runpm mode if noretry=0 */
649 				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
650 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
651 				break;
652 			default:
653 				/* enable BACO as runpm mode on CI+ */
654 				if (!amdgpu_passthrough(adev))
655 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
656 				break;
657 			}
658 
659 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
660 				if (bamaco_support & MACO_SUPPORT) {
661 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
662 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
663 				} else {
664 					dev_info(adev->dev, "Using BACO for runtime pm\n");
665 				}
666 			}
667 		}
668 		break;
669 	case 0:
670 		dev_info(adev->dev, "runtime pm is manually disabled\n");
671 		break;
672 	default:
673 		break;
674 	}
675 
676 no_runtime_pm:
677 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
678 		dev_info(adev->dev, "Runtime PM not available\n");
679 }
680 /**
681  * amdgpu_device_supports_smart_shift - Is the device dGPU with
682  * smart shift support
683  *
684  * @adev: amdgpu device pointer
685  *
686  * Returns true if the device is a dGPU with Smart Shift support,
687  * otherwise returns false.
688  */
689 bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
690 {
691 	return (amdgpu_device_supports_boco(adev) &&
692 		amdgpu_acpi_is_power_shift_control_supported());
693 }
694 
695 /*
696  * VRAM access helper functions
697  */
698 
699 /**
700  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
701  *
702  * @adev: amdgpu_device pointer
703  * @pos: offset of the buffer in vram
704  * @buf: virtual address of the buffer in system memory
705  * @size: read/write size, sizeof(@buf) must > @size
706  * @write: true - write to vram, otherwise - read from vram
707  */
708 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
709 			     void *buf, size_t size, bool write)
710 {
711 	unsigned long flags;
712 	uint32_t hi = ~0, tmp = 0;
713 	uint32_t *data = buf;
714 	uint64_t last;
715 	int idx;
716 
717 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
718 		return;
719 
720 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
721 
722 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
723 	for (last = pos + size; pos < last; pos += 4) {
724 		tmp = pos >> 31;
725 
726 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
727 		if (tmp != hi) {
728 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
729 			hi = tmp;
730 		}
731 		if (write)
732 			WREG32_NO_KIQ(mmMM_DATA, *data++);
733 		else
734 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
735 	}
736 
737 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
738 	drm_dev_exit(idx);
739 }
740 
741 /**
742  * amdgpu_device_aper_access - access vram by vram aperture
743  *
744  * @adev: amdgpu_device pointer
745  * @pos: offset of the buffer in vram
746  * @buf: virtual address of the buffer in system memory
747  * @size: read/write size, sizeof(@buf) must > @size
748  * @write: true - write to vram, otherwise - read from vram
749  *
750  * The return value means how many bytes have been transferred.
751  */
752 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
753 				 void *buf, size_t size, bool write)
754 {
755 #ifdef CONFIG_64BIT
756 	void __iomem *addr;
757 	size_t count = 0;
758 	uint64_t last;
759 
760 	if (!adev->mman.aper_base_kaddr)
761 		return 0;
762 
763 	last = min(pos + size, adev->gmc.visible_vram_size);
764 	if (last > pos) {
765 		addr = adev->mman.aper_base_kaddr + pos;
766 		count = last - pos;
767 
768 		if (write) {
769 			memcpy_toio(addr, buf, count);
770 			/* Make sure HDP write cache flush happens without any reordering
771 			 * after the system memory contents are sent over PCIe device
772 			 */
773 			mb();
774 			amdgpu_device_flush_hdp(adev, NULL);
775 		} else {
776 			amdgpu_device_invalidate_hdp(adev, NULL);
777 			/* Make sure HDP read cache is invalidated before issuing a read
778 			 * to the PCIe device
779 			 */
780 			mb();
781 			memcpy_fromio(buf, addr, count);
782 		}
783 
784 	}
785 
786 	return count;
787 #else
788 	return 0;
789 #endif
790 }
791 
792 /**
793  * amdgpu_device_vram_access - read/write a buffer in vram
794  *
795  * @adev: amdgpu_device pointer
796  * @pos: offset of the buffer in vram
797  * @buf: virtual address of the buffer in system memory
798  * @size: read/write size, sizeof(@buf) must > @size
799  * @write: true - write to vram, otherwise - read from vram
800  */
801 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
802 			       void *buf, size_t size, bool write)
803 {
804 	size_t count;
805 
806 	/* try to using vram apreature to access vram first */
807 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
808 	size -= count;
809 	if (size) {
810 		/* using MM to access rest vram */
811 		pos += count;
812 		buf += count;
813 		amdgpu_device_mm_access(adev, pos, buf, size, write);
814 	}
815 }
816 
817 /*
818  * register access helper functions.
819  */
820 
821 /* Check if hw access should be skipped because of hotplug or device error */
822 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
823 {
824 	if (adev->no_hw_access)
825 		return true;
826 
827 #ifdef CONFIG_LOCKDEP
828 	/*
829 	 * This is a bit complicated to understand, so worth a comment. What we assert
830 	 * here is that the GPU reset is not running on another thread in parallel.
831 	 *
832 	 * For this we trylock the read side of the reset semaphore, if that succeeds
833 	 * we know that the reset is not running in parallel.
834 	 *
835 	 * If the trylock fails we assert that we are either already holding the read
836 	 * side of the lock or are the reset thread itself and hold the write side of
837 	 * the lock.
838 	 */
839 	if (in_task()) {
840 		if (down_read_trylock(&adev->reset_domain->sem))
841 			up_read(&adev->reset_domain->sem);
842 		else
843 			lockdep_assert_held(&adev->reset_domain->sem);
844 	}
845 #endif
846 	return false;
847 }
848 
849 /**
850  * amdgpu_device_get_rev_id - query device rev_id
851  *
852  * @adev: amdgpu_device pointer
853  *
854  * Return device rev_id
855  */
856 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
857 {
858 	return adev->nbio.funcs->get_rev_id(adev);
859 }
860 
861 static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
862 {
863 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
864 		return AMDGPU_VBIOS_SKIP;
865 
866 	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
867 		return AMDGPU_VBIOS_OPTIONAL;
868 
869 	return 0;
870 }
871 
872 /**
873  * amdgpu_device_asic_init - Wrapper for atom asic_init
874  *
875  * @adev: amdgpu_device pointer
876  *
877  * Does any asic specific work and then calls atom asic init.
878  */
879 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
880 {
881 	uint32_t flags;
882 	bool optional;
883 	int ret;
884 
885 	amdgpu_asic_pre_asic_init(adev);
886 	flags = amdgpu_device_get_vbios_flags(adev);
887 	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
888 
889 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
890 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
891 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
892 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
893 		amdgpu_psp_wait_for_bootloader(adev);
894 		if (optional && !adev->bios)
895 			return 0;
896 
897 		ret = amdgpu_atomfirmware_asic_init(adev, true);
898 		return ret;
899 	} else {
900 		if (optional && !adev->bios)
901 			return 0;
902 
903 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
904 	}
905 
906 	return 0;
907 }
908 
909 /**
910  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
911  *
912  * @adev: amdgpu_device pointer
913  *
914  * Allocates a scratch page of VRAM for use by various things in the
915  * driver.
916  */
917 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
918 {
919 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
920 				       AMDGPU_GEM_DOMAIN_VRAM |
921 				       AMDGPU_GEM_DOMAIN_GTT,
922 				       &adev->mem_scratch.robj,
923 				       &adev->mem_scratch.gpu_addr,
924 				       (void **)&adev->mem_scratch.ptr);
925 }
926 
927 /**
928  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
929  *
930  * @adev: amdgpu_device pointer
931  *
932  * Frees the VRAM scratch page.
933  */
934 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
935 {
936 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
937 }
938 
939 /**
940  * amdgpu_device_program_register_sequence - program an array of registers.
941  *
942  * @adev: amdgpu_device pointer
943  * @registers: pointer to the register array
944  * @array_size: size of the register array
945  *
946  * Programs an array or registers with and or masks.
947  * This is a helper for setting golden registers.
948  */
949 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
950 					     const u32 *registers,
951 					     const u32 array_size)
952 {
953 	u32 tmp, reg, and_mask, or_mask;
954 	int i;
955 
956 	if (array_size % 3)
957 		return;
958 
959 	for (i = 0; i < array_size; i += 3) {
960 		reg = registers[i + 0];
961 		and_mask = registers[i + 1];
962 		or_mask = registers[i + 2];
963 
964 		if (and_mask == 0xffffffff) {
965 			tmp = or_mask;
966 		} else {
967 			tmp = RREG32(reg);
968 			tmp &= ~and_mask;
969 			if (adev->family >= AMDGPU_FAMILY_AI)
970 				tmp |= (or_mask & and_mask);
971 			else
972 				tmp |= or_mask;
973 		}
974 		WREG32(reg, tmp);
975 	}
976 }
977 
978 /**
979  * amdgpu_device_pci_config_reset - reset the GPU
980  *
981  * @adev: amdgpu_device pointer
982  *
983  * Resets the GPU using the pci config reset sequence.
984  * Only applicable to asics prior to vega10.
985  */
986 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
987 {
988 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
989 }
990 
991 /**
992  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
993  *
994  * @adev: amdgpu_device pointer
995  *
996  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
997  */
998 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
999 {
1000 	return pci_reset_function(adev->pdev);
1001 }
1002 
1003 /*
1004  * amdgpu_device_wb_*()
1005  * Writeback is the method by which the GPU updates special pages in memory
1006  * with the status of certain GPU events (fences, ring pointers,etc.).
1007  */
1008 
1009 /**
1010  * amdgpu_device_wb_fini - Disable Writeback and free memory
1011  *
1012  * @adev: amdgpu_device pointer
1013  *
1014  * Disables Writeback and frees the Writeback memory (all asics).
1015  * Used at driver shutdown.
1016  */
1017 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1018 {
1019 	if (adev->wb.wb_obj) {
1020 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1021 				      &adev->wb.gpu_addr,
1022 				      (void **)&adev->wb.wb);
1023 		adev->wb.wb_obj = NULL;
1024 	}
1025 }
1026 
1027 /**
1028  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1029  *
1030  * @adev: amdgpu_device pointer
1031  *
1032  * Initializes writeback and allocates writeback memory (all asics).
1033  * Used at driver startup.
1034  * Returns 0 on success or an -error on failure.
1035  */
1036 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1037 {
1038 	int r;
1039 
1040 	if (adev->wb.wb_obj == NULL) {
1041 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1042 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1043 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1044 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1045 					    (void **)&adev->wb.wb);
1046 		if (r) {
1047 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1048 			return r;
1049 		}
1050 
1051 		adev->wb.num_wb = AMDGPU_MAX_WB;
1052 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1053 
1054 		/* clear wb memory */
1055 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 /**
1062  * amdgpu_device_wb_get - Allocate a wb entry
1063  *
1064  * @adev: amdgpu_device pointer
1065  * @wb: wb index
1066  *
1067  * Allocate a wb slot for use by the driver (all asics).
1068  * Returns 0 on success or -EINVAL on failure.
1069  */
1070 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1071 {
1072 	unsigned long flags, offset;
1073 
1074 	spin_lock_irqsave(&adev->wb.lock, flags);
1075 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1076 	if (offset < adev->wb.num_wb) {
1077 		__set_bit(offset, adev->wb.used);
1078 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1079 		*wb = offset << 3; /* convert to dw offset */
1080 		return 0;
1081 	} else {
1082 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1083 		return -EINVAL;
1084 	}
1085 }
1086 
1087 /**
1088  * amdgpu_device_wb_free - Free a wb entry
1089  *
1090  * @adev: amdgpu_device pointer
1091  * @wb: wb index
1092  *
1093  * Free a wb slot allocated for use by the driver (all asics)
1094  */
1095 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1096 {
1097 	unsigned long flags;
1098 
1099 	wb >>= 3;
1100 	spin_lock_irqsave(&adev->wb.lock, flags);
1101 	if (wb < adev->wb.num_wb)
1102 		__clear_bit(wb, adev->wb.used);
1103 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1104 }
1105 
1106 /**
1107  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1108  *
1109  * @adev: amdgpu_device pointer
1110  *
1111  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1112  * to fail, but if any of the BARs is not accessible after the size we abort
1113  * driver loading by returning -ENODEV.
1114  */
1115 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1116 {
1117 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1118 	struct pci_bus *root;
1119 	struct resource *res;
1120 	int max_size, r;
1121 	unsigned int i;
1122 	u16 cmd;
1123 
1124 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1125 		return 0;
1126 
1127 	/* Bypass for VF */
1128 	if (amdgpu_sriov_vf(adev))
1129 		return 0;
1130 
1131 	if (!amdgpu_rebar)
1132 		return 0;
1133 
1134 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1135 	if ((amdgpu_runtime_pm != 0) &&
1136 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1137 	    adev->pdev->device == 0x731f &&
1138 	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1139 		return 0;
1140 
1141 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1142 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1143 		dev_warn(
1144 			adev->dev,
1145 			"System can't access extended configuration space, please check!!\n");
1146 
1147 	/* skip if the bios has already enabled large BAR */
1148 	if (adev->gmc.real_vram_size &&
1149 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1150 		return 0;
1151 
1152 	/* Check if the root BUS has 64bit memory resources */
1153 	root = adev->pdev->bus;
1154 	while (root->parent)
1155 		root = root->parent;
1156 
1157 	pci_bus_for_each_resource(root, res, i) {
1158 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1159 		    res->start > 0x100000000ull)
1160 			break;
1161 	}
1162 
1163 	/* Trying to resize is pointless without a root hub window above 4GB */
1164 	if (!res)
1165 		return 0;
1166 
1167 	/* Limit the BAR size to what is available */
1168 	max_size = pci_rebar_get_max_size(adev->pdev, 0);
1169 	if (max_size < 0)
1170 		return 0;
1171 	rbar_size = min(max_size, rbar_size);
1172 
1173 	/* Disable memory decoding while we change the BAR addresses and size */
1174 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1175 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1176 			      cmd & ~PCI_COMMAND_MEMORY);
1177 
1178 	/* Tear down doorbell as resizing will release BARs */
1179 	amdgpu_doorbell_fini(adev);
1180 
1181 	r = pci_resize_resource(adev->pdev, 0, rbar_size,
1182 				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
1183 								  : 1 << 2);
1184 	if (r == -ENOSPC)
1185 		dev_info(adev->dev,
1186 			 "Not enough PCI address space for a large BAR.");
1187 	else if (r && r != -ENOTSUPP)
1188 		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1189 
1190 	/* When the doorbell or fb BAR isn't available we have no chance of
1191 	 * using the device.
1192 	 */
1193 	r = amdgpu_doorbell_init(adev);
1194 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1195 		return -ENODEV;
1196 
1197 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1198 
1199 	return 0;
1200 }
1201 
1202 /*
1203  * GPU helpers function.
1204  */
1205 /**
1206  * amdgpu_device_need_post - check if the hw need post or not
1207  *
1208  * @adev: amdgpu_device pointer
1209  *
1210  * Check if the asic has been initialized (all asics) at driver startup
1211  * or post is needed if  hw reset is performed.
1212  * Returns true if need or false if not.
1213  */
1214 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1215 {
1216 	uint32_t reg, flags;
1217 
1218 	if (amdgpu_sriov_vf(adev))
1219 		return false;
1220 
1221 	flags = amdgpu_device_get_vbios_flags(adev);
1222 	if (flags & AMDGPU_VBIOS_SKIP)
1223 		return false;
1224 	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1225 		return false;
1226 
1227 	if (amdgpu_passthrough(adev)) {
1228 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1229 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1230 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1231 		 * vpost executed for smc version below 22.15
1232 		 */
1233 		if (adev->asic_type == CHIP_FIJI) {
1234 			int err;
1235 			uint32_t fw_ver;
1236 
1237 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1238 			/* force vPost if error occurred */
1239 			if (err)
1240 				return true;
1241 
1242 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1243 			release_firmware(adev->pm.fw);
1244 			if (fw_ver < 0x00160e00)
1245 				return true;
1246 		}
1247 	}
1248 
1249 	/* Don't post if we need to reset whole hive on init */
1250 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1251 		return false;
1252 
1253 	if (adev->has_hw_reset) {
1254 		adev->has_hw_reset = false;
1255 		return true;
1256 	}
1257 
1258 	/* bios scratch used on CIK+ */
1259 	if (adev->asic_type >= CHIP_BONAIRE)
1260 		return amdgpu_atombios_scratch_need_asic_init(adev);
1261 
1262 	/* check MEM_SIZE for older asics */
1263 	reg = amdgpu_asic_get_config_memsize(adev);
1264 
1265 	if ((reg != 0) && (reg != 0xffffffff))
1266 		return false;
1267 
1268 	return true;
1269 }
1270 
1271 /*
1272  * Check whether seamless boot is supported.
1273  *
1274  * So far we only support seamless boot on DCE 3.0 or later.
1275  * If users report that it works on older ASICS as well, we may
1276  * loosen this.
1277  */
1278 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1279 {
1280 	switch (amdgpu_seamless) {
1281 	case -1:
1282 		break;
1283 	case 1:
1284 		return true;
1285 	case 0:
1286 		return false;
1287 	default:
1288 		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1289 			amdgpu_seamless);
1290 		return false;
1291 	}
1292 
1293 	if (!(adev->flags & AMD_IS_APU))
1294 		return false;
1295 
1296 	if (adev->mman.keep_stolen_vga_memory)
1297 		return false;
1298 
1299 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1300 }
1301 
1302 /*
1303  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1304  * don't support dynamic speed switching. Until we have confirmation from Intel
1305  * that a specific host supports it, it's safer that we keep it disabled for all.
1306  *
1307  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1308  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1309  */
1310 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1311 {
1312 #if IS_ENABLED(CONFIG_X86)
1313 	struct cpuinfo_x86 *c = &cpu_data(0);
1314 
1315 	/* eGPU change speeds based on USB4 fabric conditions */
1316 	if (dev_is_removable(adev->dev))
1317 		return true;
1318 
1319 	if (c->x86_vendor == X86_VENDOR_INTEL)
1320 		return false;
1321 #endif
1322 	return true;
1323 }
1324 
1325 static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1326 {
1327 	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1328 	 * It's unclear if this is a platform-specific or GPU-specific issue.
1329 	 * Disable ASPM on SI for the time being.
1330 	 */
1331 	if (adev->family == AMDGPU_FAMILY_SI)
1332 		return true;
1333 
1334 #if IS_ENABLED(CONFIG_X86)
1335 	struct cpuinfo_x86 *c = &cpu_data(0);
1336 
1337 	if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
1338 		  amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
1339 		return false;
1340 
1341 	if (c->x86 == 6 &&
1342 		adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
1343 		switch (c->x86_model) {
1344 		case VFM_MODEL(INTEL_ALDERLAKE):
1345 		case VFM_MODEL(INTEL_ALDERLAKE_L):
1346 		case VFM_MODEL(INTEL_RAPTORLAKE):
1347 		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1348 		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1349 			return true;
1350 		default:
1351 			return false;
1352 		}
1353 	} else {
1354 		return false;
1355 	}
1356 #else
1357 	return false;
1358 #endif
1359 }
1360 
1361 /**
1362  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1363  *
1364  * @adev: amdgpu_device pointer
1365  *
1366  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1367  * be set for this device.
1368  *
1369  * Returns true if it should be used or false if not.
1370  */
1371 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1372 {
1373 	switch (amdgpu_aspm) {
1374 	case -1:
1375 		break;
1376 	case 0:
1377 		return false;
1378 	case 1:
1379 		return true;
1380 	default:
1381 		return false;
1382 	}
1383 	if (adev->flags & AMD_IS_APU)
1384 		return false;
1385 	if (amdgpu_device_aspm_support_quirk(adev))
1386 		return false;
1387 	return pcie_aspm_enabled(adev->pdev);
1388 }
1389 
1390 /* if we get transitioned to only one device, take VGA back */
1391 /**
1392  * amdgpu_device_vga_set_decode - enable/disable vga decode
1393  *
1394  * @pdev: PCI device pointer
1395  * @state: enable/disable vga decode
1396  *
1397  * Enable/disable vga decode (all asics).
1398  * Returns VGA resource flags.
1399  */
1400 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1401 		bool state)
1402 {
1403 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1404 
1405 	amdgpu_asic_set_vga_state(adev, state);
1406 	if (state)
1407 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1408 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1409 	else
1410 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1411 }
1412 
1413 /**
1414  * amdgpu_device_check_block_size - validate the vm block size
1415  *
1416  * @adev: amdgpu_device pointer
1417  *
1418  * Validates the vm block size specified via module parameter.
1419  * The vm block size defines number of bits in page table versus page directory,
1420  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1421  * page table and the remaining bits are in the page directory.
1422  */
1423 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1424 {
1425 	/* defines number of bits in page table versus page directory,
1426 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1427 	 * page table and the remaining bits are in the page directory
1428 	 */
1429 	if (amdgpu_vm_block_size == -1)
1430 		return;
1431 
1432 	if (amdgpu_vm_block_size < 9) {
1433 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1434 			 amdgpu_vm_block_size);
1435 		amdgpu_vm_block_size = -1;
1436 	}
1437 }
1438 
1439 /**
1440  * amdgpu_device_check_vm_size - validate the vm size
1441  *
1442  * @adev: amdgpu_device pointer
1443  *
1444  * Validates the vm size in GB specified via module parameter.
1445  * The VM size is the size of the GPU virtual memory space in GB.
1446  */
1447 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1448 {
1449 	/* no need to check the default value */
1450 	if (amdgpu_vm_size == -1)
1451 		return;
1452 
1453 	if (amdgpu_vm_size < 1) {
1454 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1455 			 amdgpu_vm_size);
1456 		amdgpu_vm_size = -1;
1457 	}
1458 }
1459 
1460 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1461 {
1462 	struct sysinfo si;
1463 	bool is_os_64 = (sizeof(void *) == 8);
1464 	uint64_t total_memory;
1465 	uint64_t dram_size_seven_GB = 0x1B8000000;
1466 	uint64_t dram_size_three_GB = 0xB8000000;
1467 
1468 	if (amdgpu_smu_memory_pool_size == 0)
1469 		return;
1470 
1471 	if (!is_os_64) {
1472 		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
1473 		goto def_value;
1474 	}
1475 	si_meminfo(&si);
1476 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1477 
1478 	if ((amdgpu_smu_memory_pool_size == 1) ||
1479 		(amdgpu_smu_memory_pool_size == 2)) {
1480 		if (total_memory < dram_size_three_GB)
1481 			goto def_value1;
1482 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1483 		(amdgpu_smu_memory_pool_size == 8)) {
1484 		if (total_memory < dram_size_seven_GB)
1485 			goto def_value1;
1486 	} else {
1487 		dev_warn(adev->dev, "Smu memory pool size not supported\n");
1488 		goto def_value;
1489 	}
1490 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1491 
1492 	return;
1493 
1494 def_value1:
1495 	dev_warn(adev->dev, "No enough system memory\n");
1496 def_value:
1497 	adev->pm.smu_prv_buffer_size = 0;
1498 }
1499 
1500 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1501 {
1502 	if (!(adev->flags & AMD_IS_APU) ||
1503 	    adev->asic_type < CHIP_RAVEN)
1504 		return 0;
1505 
1506 	switch (adev->asic_type) {
1507 	case CHIP_RAVEN:
1508 		if (adev->pdev->device == 0x15dd)
1509 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1510 		if (adev->pdev->device == 0x15d8)
1511 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1512 		break;
1513 	case CHIP_RENOIR:
1514 		if ((adev->pdev->device == 0x1636) ||
1515 		    (adev->pdev->device == 0x164c))
1516 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1517 		else
1518 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1519 		break;
1520 	case CHIP_VANGOGH:
1521 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1522 		break;
1523 	case CHIP_YELLOW_CARP:
1524 		break;
1525 	case CHIP_CYAN_SKILLFISH:
1526 		if ((adev->pdev->device == 0x13FE) ||
1527 		    (adev->pdev->device == 0x143F))
1528 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1529 		break;
1530 	default:
1531 		break;
1532 	}
1533 
1534 	return 0;
1535 }
1536 
1537 /**
1538  * amdgpu_device_check_arguments - validate module params
1539  *
1540  * @adev: amdgpu_device pointer
1541  *
1542  * Validates certain module parameters and updates
1543  * the associated values used by the driver (all asics).
1544  */
1545 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1546 {
1547 	int i;
1548 
1549 	if (amdgpu_sched_jobs < 4) {
1550 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1551 			 amdgpu_sched_jobs);
1552 		amdgpu_sched_jobs = 4;
1553 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1554 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1555 			 amdgpu_sched_jobs);
1556 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1557 	}
1558 
1559 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1560 		/* gart size must be greater or equal to 32M */
1561 		dev_warn(adev->dev, "gart size (%d) too small\n",
1562 			 amdgpu_gart_size);
1563 		amdgpu_gart_size = -1;
1564 	}
1565 
1566 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1567 		/* gtt size must be greater or equal to 32M */
1568 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1569 				 amdgpu_gtt_size);
1570 		amdgpu_gtt_size = -1;
1571 	}
1572 
1573 	/* valid range is between 4 and 9 inclusive */
1574 	if (amdgpu_vm_fragment_size != -1 &&
1575 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1576 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1577 		amdgpu_vm_fragment_size = -1;
1578 	}
1579 
1580 	if (amdgpu_sched_hw_submission < 2) {
1581 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1582 			 amdgpu_sched_hw_submission);
1583 		amdgpu_sched_hw_submission = 2;
1584 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1585 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1586 			 amdgpu_sched_hw_submission);
1587 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1588 	}
1589 
1590 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1591 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1592 		amdgpu_reset_method = -1;
1593 	}
1594 
1595 	amdgpu_device_check_smu_prv_buffer_size(adev);
1596 
1597 	amdgpu_device_check_vm_size(adev);
1598 
1599 	amdgpu_device_check_block_size(adev);
1600 
1601 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1602 
1603 	for (i = 0; i < MAX_XCP; i++) {
1604 		switch (amdgpu_enforce_isolation) {
1605 		case -1:
1606 		case 0:
1607 		default:
1608 			/* disable */
1609 			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
1610 			break;
1611 		case 1:
1612 			/* enable */
1613 			adev->enforce_isolation[i] =
1614 				AMDGPU_ENFORCE_ISOLATION_ENABLE;
1615 			break;
1616 		case 2:
1617 			/* enable legacy mode */
1618 			adev->enforce_isolation[i] =
1619 				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
1620 			break;
1621 		case 3:
1622 			/* enable only process isolation without submitting cleaner shader */
1623 			adev->enforce_isolation[i] =
1624 				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
1625 			break;
1626 		}
1627 	}
1628 
1629 	return 0;
1630 }
1631 
1632 /**
1633  * amdgpu_switcheroo_set_state - set switcheroo state
1634  *
1635  * @pdev: pci dev pointer
1636  * @state: vga_switcheroo state
1637  *
1638  * Callback for the switcheroo driver.  Suspends or resumes
1639  * the asics before or after it is powered up using ACPI methods.
1640  */
1641 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1642 					enum vga_switcheroo_state state)
1643 {
1644 	struct drm_device *dev = pci_get_drvdata(pdev);
1645 	int r;
1646 
1647 	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
1648 	    state == VGA_SWITCHEROO_OFF)
1649 		return;
1650 
1651 	if (state == VGA_SWITCHEROO_ON) {
1652 		pr_info("switched on\n");
1653 		/* don't suspend or resume card normally */
1654 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1655 
1656 		pci_set_power_state(pdev, PCI_D0);
1657 		amdgpu_device_load_pci_state(pdev);
1658 		r = pci_enable_device(pdev);
1659 		if (r)
1660 			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
1661 				 r);
1662 		amdgpu_device_resume(dev, true);
1663 
1664 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1665 	} else {
1666 		dev_info(&pdev->dev, "switched off\n");
1667 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1668 		amdgpu_device_prepare(dev);
1669 		amdgpu_device_suspend(dev, true);
1670 		amdgpu_device_cache_pci_state(pdev);
1671 		/* Shut down the device */
1672 		pci_disable_device(pdev);
1673 		pci_set_power_state(pdev, PCI_D3cold);
1674 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1675 	}
1676 }
1677 
1678 /**
1679  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1680  *
1681  * @pdev: pci dev pointer
1682  *
1683  * Callback for the switcheroo driver.  Check of the switcheroo
1684  * state can be changed.
1685  * Returns true if the state can be changed, false if not.
1686  */
1687 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1688 {
1689 	struct drm_device *dev = pci_get_drvdata(pdev);
1690 
1691        /*
1692 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1693 	* locking inversion with the driver load path. And the access here is
1694 	* completely racy anyway. So don't bother with locking for now.
1695 	*/
1696 	return atomic_read(&dev->open_count) == 0;
1697 }
1698 
1699 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1700 	.set_gpu_state = amdgpu_switcheroo_set_state,
1701 	.reprobe = NULL,
1702 	.can_switch = amdgpu_switcheroo_can_switch,
1703 };
1704 
1705 /**
1706  * amdgpu_device_enable_virtual_display - enable virtual display feature
1707  *
1708  * @adev: amdgpu_device pointer
1709  *
1710  * Enabled the virtual display feature if the user has enabled it via
1711  * the module parameter virtual_display.  This feature provides a virtual
1712  * display hardware on headless boards or in virtualized environments.
1713  * This function parses and validates the configuration string specified by
1714  * the user and configures the virtual display configuration (number of
1715  * virtual connectors, crtcs, etc.) specified.
1716  */
1717 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1718 {
1719 	adev->enable_virtual_display = false;
1720 
1721 	if (amdgpu_virtual_display) {
1722 		const char *pci_address_name = pci_name(adev->pdev);
1723 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1724 
1725 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1726 		pciaddstr_tmp = pciaddstr;
1727 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1728 			pciaddname = strsep(&pciaddname_tmp, ",");
1729 			if (!strcmp("all", pciaddname)
1730 			    || !strcmp(pci_address_name, pciaddname)) {
1731 				long num_crtc;
1732 				int res = -1;
1733 
1734 				adev->enable_virtual_display = true;
1735 
1736 				if (pciaddname_tmp)
1737 					res = kstrtol(pciaddname_tmp, 10,
1738 						      &num_crtc);
1739 
1740 				if (!res) {
1741 					if (num_crtc < 1)
1742 						num_crtc = 1;
1743 					if (num_crtc > 6)
1744 						num_crtc = 6;
1745 					adev->mode_info.num_crtc = num_crtc;
1746 				} else {
1747 					adev->mode_info.num_crtc = 1;
1748 				}
1749 				break;
1750 			}
1751 		}
1752 
1753 		dev_info(
1754 			adev->dev,
1755 			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1756 			amdgpu_virtual_display, pci_address_name,
1757 			adev->enable_virtual_display, adev->mode_info.num_crtc);
1758 
1759 		kfree(pciaddstr);
1760 	}
1761 }
1762 
1763 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1764 {
1765 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1766 		adev->mode_info.num_crtc = 1;
1767 		adev->enable_virtual_display = true;
1768 		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
1769 			 adev->enable_virtual_display,
1770 			 adev->mode_info.num_crtc);
1771 	}
1772 }
1773 
1774 /**
1775  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1776  *
1777  * @adev: amdgpu_device pointer
1778  *
1779  * Parses the asic configuration parameters specified in the gpu info
1780  * firmware and makes them available to the driver for use in configuring
1781  * the asic.
1782  * Returns 0 on success, -EINVAL on failure.
1783  */
1784 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1785 {
1786 	const char *chip_name;
1787 	int err;
1788 	const struct gpu_info_firmware_header_v1_0 *hdr;
1789 
1790 	adev->firmware.gpu_info_fw = NULL;
1791 
1792 	switch (adev->asic_type) {
1793 	default:
1794 		return 0;
1795 	case CHIP_VEGA10:
1796 		chip_name = "vega10";
1797 		break;
1798 	case CHIP_VEGA12:
1799 		chip_name = "vega12";
1800 		break;
1801 	case CHIP_RAVEN:
1802 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1803 			chip_name = "raven2";
1804 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1805 			chip_name = "picasso";
1806 		else
1807 			chip_name = "raven";
1808 		break;
1809 	case CHIP_ARCTURUS:
1810 		chip_name = "arcturus";
1811 		break;
1812 	case CHIP_NAVI12:
1813 		if (adev->discovery.bin)
1814 			return 0;
1815 		chip_name = "navi12";
1816 		break;
1817 	case CHIP_CYAN_SKILLFISH:
1818 		if (adev->discovery.bin)
1819 			return 0;
1820 		chip_name = "cyan_skillfish";
1821 		break;
1822 	}
1823 
1824 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
1825 				   AMDGPU_UCODE_OPTIONAL,
1826 				   "amdgpu/%s_gpu_info.bin", chip_name);
1827 	if (err) {
1828 		dev_err(adev->dev,
1829 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
1830 			chip_name);
1831 		goto out;
1832 	}
1833 
1834 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1835 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1836 
1837 	switch (hdr->version_major) {
1838 	case 1:
1839 	{
1840 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1841 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1842 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1843 
1844 		/*
1845 		 * Should be dropped when DAL no longer needs it.
1846 		 */
1847 		if (adev->asic_type == CHIP_NAVI12)
1848 			goto parse_soc_bounding_box;
1849 
1850 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1851 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1852 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1853 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1854 		adev->gfx.config.max_texture_channel_caches =
1855 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1856 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1857 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1858 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1859 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1860 		adev->gfx.config.double_offchip_lds_buf =
1861 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1862 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1863 		adev->gfx.cu_info.max_waves_per_simd =
1864 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1865 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1866 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1867 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1868 		if (hdr->version_minor >= 1) {
1869 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1870 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1871 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1872 			adev->gfx.config.num_sc_per_sh =
1873 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1874 			adev->gfx.config.num_packer_per_sc =
1875 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1876 		}
1877 
1878 parse_soc_bounding_box:
1879 		/*
1880 		 * soc bounding box info is not integrated in disocovery table,
1881 		 * we always need to parse it from gpu info firmware if needed.
1882 		 */
1883 		if (hdr->version_minor == 2) {
1884 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1885 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1886 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1887 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1888 		}
1889 		break;
1890 	}
1891 	default:
1892 		dev_err(adev->dev,
1893 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1894 		err = -EINVAL;
1895 		goto out;
1896 	}
1897 out:
1898 	return err;
1899 }
1900 
1901 static void amdgpu_uid_init(struct amdgpu_device *adev)
1902 {
1903 	/* Initialize the UID for the device */
1904 	adev->uid_info = kzalloc_obj(struct amdgpu_uid);
1905 	if (!adev->uid_info) {
1906 		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
1907 		return;
1908 	}
1909 	adev->uid_info->adev = adev;
1910 }
1911 
1912 static void amdgpu_uid_fini(struct amdgpu_device *adev)
1913 {
1914 	/* Free the UID memory */
1915 	kfree(adev->uid_info);
1916 	adev->uid_info = NULL;
1917 }
1918 
1919 /**
1920  * amdgpu_device_ip_early_init - run early init for hardware IPs
1921  *
1922  * @adev: amdgpu_device pointer
1923  *
1924  * Early initialization pass for hardware IPs.  The hardware IPs that make
1925  * up each asic are discovered each IP's early_init callback is run.  This
1926  * is the first stage in initializing the asic.
1927  * Returns 0 on success, negative error code on failure.
1928  */
1929 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1930 {
1931 	struct amdgpu_ip_block *ip_block;
1932 	struct pci_dev *parent;
1933 	bool total, skip_bios;
1934 	uint32_t bios_flags;
1935 	int i, r;
1936 
1937 	amdgpu_device_enable_virtual_display(adev);
1938 
1939 	if (amdgpu_sriov_vf(adev)) {
1940 		r = amdgpu_virt_request_full_gpu(adev, true);
1941 		if (r)
1942 			return r;
1943 
1944 		r = amdgpu_virt_init_critical_region(adev);
1945 		if (r)
1946 			return r;
1947 	}
1948 
1949 	switch (adev->asic_type) {
1950 #ifdef CONFIG_DRM_AMDGPU_SI
1951 	case CHIP_VERDE:
1952 	case CHIP_TAHITI:
1953 	case CHIP_PITCAIRN:
1954 	case CHIP_OLAND:
1955 	case CHIP_HAINAN:
1956 		adev->family = AMDGPU_FAMILY_SI;
1957 		r = si_set_ip_blocks(adev);
1958 		if (r)
1959 			return r;
1960 		break;
1961 #endif
1962 #ifdef CONFIG_DRM_AMDGPU_CIK
1963 	case CHIP_BONAIRE:
1964 	case CHIP_HAWAII:
1965 	case CHIP_KAVERI:
1966 	case CHIP_KABINI:
1967 	case CHIP_MULLINS:
1968 		if (adev->flags & AMD_IS_APU)
1969 			adev->family = AMDGPU_FAMILY_KV;
1970 		else
1971 			adev->family = AMDGPU_FAMILY_CI;
1972 
1973 		r = cik_set_ip_blocks(adev);
1974 		if (r)
1975 			return r;
1976 		break;
1977 #endif
1978 	case CHIP_TOPAZ:
1979 	case CHIP_TONGA:
1980 	case CHIP_FIJI:
1981 	case CHIP_POLARIS10:
1982 	case CHIP_POLARIS11:
1983 	case CHIP_POLARIS12:
1984 	case CHIP_VEGAM:
1985 	case CHIP_CARRIZO:
1986 	case CHIP_STONEY:
1987 		if (adev->flags & AMD_IS_APU)
1988 			adev->family = AMDGPU_FAMILY_CZ;
1989 		else
1990 			adev->family = AMDGPU_FAMILY_VI;
1991 
1992 		r = vi_set_ip_blocks(adev);
1993 		if (r)
1994 			return r;
1995 		break;
1996 	default:
1997 		r = amdgpu_discovery_set_ip_blocks(adev);
1998 		if (r) {
1999 			adev->num_ip_blocks = 0;
2000 			return r;
2001 		}
2002 		break;
2003 	}
2004 
2005 	/* Check for IP version 9.4.3 with A0 hardware */
2006 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2007 	    !amdgpu_device_get_rev_id(adev)) {
2008 		dev_err(adev->dev, "Unsupported A0 hardware\n");
2009 		return -ENODEV;	/* device unsupported - no device error */
2010 	}
2011 
2012 	if (amdgpu_has_atpx() &&
2013 	    (amdgpu_is_atpx_hybrid() ||
2014 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2015 	    ((adev->flags & AMD_IS_APU) == 0) &&
2016 	    !dev_is_removable(&adev->pdev->dev))
2017 		adev->flags |= AMD_IS_PX;
2018 
2019 	if (!(adev->flags & AMD_IS_APU)) {
2020 		parent = pcie_find_root_port(adev->pdev);
2021 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2022 	}
2023 
2024 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2025 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2026 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2027 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2028 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2029 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2030 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2031 
2032 	adev->virt.is_xgmi_node_migrate_enabled = false;
2033 	if (amdgpu_sriov_vf(adev)) {
2034 		adev->virt.is_xgmi_node_migrate_enabled =
2035 			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2036 	}
2037 
2038 	total = true;
2039 	for (i = 0; i < adev->num_ip_blocks; i++) {
2040 		ip_block = &adev->ip_blocks[i];
2041 
2042 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2043 			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2044 				 adev->ip_blocks[i].version->funcs->name);
2045 			adev->ip_blocks[i].status.valid = false;
2046 		} else if (ip_block->version->funcs->early_init) {
2047 			r = ip_block->version->funcs->early_init(ip_block);
2048 			if (r == -ENOENT) {
2049 				adev->ip_blocks[i].status.valid = false;
2050 			} else if (r) {
2051 				dev_err(adev->dev,
2052 					"early_init of IP block <%s> failed %d\n",
2053 					adev->ip_blocks[i].version->funcs->name,
2054 					r);
2055 				total = false;
2056 			} else {
2057 				adev->ip_blocks[i].status.valid = true;
2058 			}
2059 		} else {
2060 			adev->ip_blocks[i].status.valid = true;
2061 		}
2062 		/* get the vbios after the asic_funcs are set up */
2063 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2064 			r = amdgpu_device_parse_gpu_info_fw(adev);
2065 			if (r)
2066 				return r;
2067 
2068 			bios_flags = amdgpu_device_get_vbios_flags(adev);
2069 			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2070 			/* Read BIOS */
2071 			if (!skip_bios) {
2072 				bool optional =
2073 					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2074 				if (!amdgpu_get_bios(adev) && !optional)
2075 					return -EINVAL;
2076 
2077 				if (optional && !adev->bios)
2078 					dev_info(
2079 						adev->dev,
2080 						"VBIOS image optional, proceeding without VBIOS image");
2081 
2082 				if (adev->bios) {
2083 					r = amdgpu_atombios_init(adev);
2084 					if (r) {
2085 						dev_err(adev->dev,
2086 							"amdgpu_atombios_init failed\n");
2087 						amdgpu_vf_error_put(
2088 							adev,
2089 							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2090 							0, 0);
2091 						return r;
2092 					}
2093 				}
2094 			}
2095 
2096 			/*get pf2vf msg info at it's earliest time*/
2097 			if (amdgpu_sriov_vf(adev))
2098 				amdgpu_virt_init_data_exchange(adev);
2099 
2100 		}
2101 	}
2102 	if (!total)
2103 		return -ENODEV;
2104 
2105 	if (adev->gmc.xgmi.supported)
2106 		amdgpu_xgmi_early_init(adev);
2107 
2108 	if (amdgpu_is_multi_aid(adev))
2109 		amdgpu_uid_init(adev);
2110 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2111 	if (ip_block->status.valid != false)
2112 		amdgpu_amdkfd_device_probe(adev);
2113 
2114 	adev->cg_flags &= amdgpu_cg_mask;
2115 	adev->pg_flags &= amdgpu_pg_mask;
2116 
2117 	return 0;
2118 }
2119 
2120 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2121 {
2122 	int i, r;
2123 
2124 	for (i = 0; i < adev->num_ip_blocks; i++) {
2125 		if (!adev->ip_blocks[i].status.sw)
2126 			continue;
2127 		if (adev->ip_blocks[i].status.hw)
2128 			continue;
2129 		if (!amdgpu_ip_member_of_hwini(
2130 			    adev, adev->ip_blocks[i].version->type))
2131 			continue;
2132 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2133 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2134 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2135 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2136 			if (r) {
2137 				dev_err(adev->dev,
2138 					"hw_init of IP block <%s> failed %d\n",
2139 					adev->ip_blocks[i].version->funcs->name,
2140 					r);
2141 				return r;
2142 			}
2143 			adev->ip_blocks[i].status.hw = true;
2144 		}
2145 	}
2146 
2147 	return 0;
2148 }
2149 
2150 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2151 {
2152 	int i, r;
2153 
2154 	for (i = 0; i < adev->num_ip_blocks; i++) {
2155 		if (!adev->ip_blocks[i].status.sw)
2156 			continue;
2157 		if (adev->ip_blocks[i].status.hw)
2158 			continue;
2159 		if (!amdgpu_ip_member_of_hwini(
2160 			    adev, adev->ip_blocks[i].version->type))
2161 			continue;
2162 		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2163 		if (r) {
2164 			dev_err(adev->dev,
2165 				"hw_init of IP block <%s> failed %d\n",
2166 				adev->ip_blocks[i].version->funcs->name, r);
2167 			return r;
2168 		}
2169 		adev->ip_blocks[i].status.hw = true;
2170 	}
2171 
2172 	return 0;
2173 }
2174 
2175 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2176 {
2177 	int r = 0;
2178 	int i;
2179 	uint32_t smu_version;
2180 
2181 	if (adev->asic_type >= CHIP_VEGA10) {
2182 		for (i = 0; i < adev->num_ip_blocks; i++) {
2183 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2184 				continue;
2185 
2186 			if (!amdgpu_ip_member_of_hwini(adev,
2187 						       AMD_IP_BLOCK_TYPE_PSP))
2188 				break;
2189 
2190 			if (!adev->ip_blocks[i].status.sw)
2191 				continue;
2192 
2193 			/* no need to do the fw loading again if already done*/
2194 			if (adev->ip_blocks[i].status.hw == true)
2195 				break;
2196 
2197 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2198 				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2199 				if (r)
2200 					return r;
2201 			} else {
2202 				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2203 				if (r) {
2204 					dev_err(adev->dev,
2205 						"hw_init of IP block <%s> failed %d\n",
2206 						adev->ip_blocks[i]
2207 							.version->funcs->name,
2208 						r);
2209 					return r;
2210 				}
2211 				adev->ip_blocks[i].status.hw = true;
2212 			}
2213 			break;
2214 		}
2215 	}
2216 
2217 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2218 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2219 
2220 	return r;
2221 }
2222 
2223 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2224 {
2225 	struct drm_sched_init_args args = {
2226 		.ops = &amdgpu_sched_ops,
2227 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
2228 		.timeout_wq = adev->reset_domain->wq,
2229 		.dev = adev->dev,
2230 	};
2231 	long timeout;
2232 	int r, i;
2233 
2234 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2235 		struct amdgpu_ring *ring = adev->rings[i];
2236 
2237 		/* No need to setup the GPU scheduler for rings that don't need it */
2238 		if (!ring || ring->no_scheduler)
2239 			continue;
2240 
2241 		switch (ring->funcs->type) {
2242 		case AMDGPU_RING_TYPE_GFX:
2243 			timeout = adev->gfx_timeout;
2244 			break;
2245 		case AMDGPU_RING_TYPE_COMPUTE:
2246 			timeout = adev->compute_timeout;
2247 			break;
2248 		case AMDGPU_RING_TYPE_SDMA:
2249 			timeout = adev->sdma_timeout;
2250 			break;
2251 		default:
2252 			timeout = adev->video_timeout;
2253 			break;
2254 		}
2255 
2256 		args.timeout = timeout;
2257 		args.credit_limit = ring->num_hw_submission;
2258 		args.score = ring->sched_score;
2259 		args.name = ring->name;
2260 
2261 		r = drm_sched_init(&ring->sched, &args);
2262 		if (r) {
2263 			dev_err(adev->dev,
2264 				"Failed to create scheduler on ring %s.\n",
2265 				ring->name);
2266 			return r;
2267 		}
2268 		r = amdgpu_uvd_entity_init(adev, ring);
2269 		if (r) {
2270 			dev_err(adev->dev,
2271 				"Failed to create UVD scheduling entity on ring %s.\n",
2272 				ring->name);
2273 			return r;
2274 		}
2275 		r = amdgpu_vce_entity_init(adev, ring);
2276 		if (r) {
2277 			dev_err(adev->dev,
2278 				"Failed to create VCE scheduling entity on ring %s.\n",
2279 				ring->name);
2280 			return r;
2281 		}
2282 	}
2283 
2284 	if (adev->xcp_mgr)
2285 		amdgpu_xcp_update_partition_sched_list(adev);
2286 
2287 	return 0;
2288 }
2289 
2290 
2291 /**
2292  * amdgpu_device_ip_init - run init for hardware IPs
2293  *
2294  * @adev: amdgpu_device pointer
2295  *
2296  * Main initialization pass for hardware IPs.  The list of all the hardware
2297  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2298  * are run.  sw_init initializes the software state associated with each IP
2299  * and hw_init initializes the hardware associated with each IP.
2300  * Returns 0 on success, negative error code on failure.
2301  */
2302 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2303 {
2304 	bool init_badpage;
2305 	int i, r;
2306 
2307 	r = amdgpu_ras_init(adev);
2308 	if (r)
2309 		return r;
2310 
2311 	for (i = 0; i < adev->num_ip_blocks; i++) {
2312 		if (!adev->ip_blocks[i].status.valid)
2313 			continue;
2314 		if (adev->ip_blocks[i].version->funcs->sw_init) {
2315 			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2316 			if (r) {
2317 				dev_err(adev->dev,
2318 					"sw_init of IP block <%s> failed %d\n",
2319 					adev->ip_blocks[i].version->funcs->name,
2320 					r);
2321 				goto init_failed;
2322 			}
2323 		}
2324 		adev->ip_blocks[i].status.sw = true;
2325 
2326 		if (!amdgpu_ip_member_of_hwini(
2327 			    adev, adev->ip_blocks[i].version->type))
2328 			continue;
2329 
2330 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2331 			/* need to do common hw init early so everything is set up for gmc */
2332 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2333 			if (r) {
2334 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2335 					r);
2336 				goto init_failed;
2337 			}
2338 			adev->ip_blocks[i].status.hw = true;
2339 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2340 			/* need to do gmc hw init early so we can allocate gpu mem */
2341 			/* Try to reserve bad pages early */
2342 			if (amdgpu_sriov_vf(adev))
2343 				amdgpu_virt_exchange_data(adev);
2344 
2345 			r = amdgpu_device_mem_scratch_init(adev);
2346 			if (r) {
2347 				dev_err(adev->dev,
2348 					"amdgpu_mem_scratch_init failed %d\n",
2349 					r);
2350 				goto init_failed;
2351 			}
2352 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2353 			if (r) {
2354 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2355 					r);
2356 				goto init_failed;
2357 			}
2358 			r = amdgpu_device_wb_init(adev);
2359 			if (r) {
2360 				dev_err(adev->dev,
2361 					"amdgpu_device_wb_init failed %d\n", r);
2362 				goto init_failed;
2363 			}
2364 			adev->ip_blocks[i].status.hw = true;
2365 
2366 			/* right after GMC hw init, we create CSA */
2367 			if (adev->gfx.mcbp) {
2368 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2369 							       AMDGPU_GEM_DOMAIN_VRAM |
2370 							       AMDGPU_GEM_DOMAIN_GTT,
2371 							       AMDGPU_CSA_SIZE);
2372 				if (r) {
2373 					dev_err(adev->dev,
2374 						"allocate CSA failed %d\n", r);
2375 					goto init_failed;
2376 				}
2377 			}
2378 
2379 			r = amdgpu_seq64_init(adev);
2380 			if (r) {
2381 				dev_err(adev->dev, "allocate seq64 failed %d\n",
2382 					r);
2383 				goto init_failed;
2384 			}
2385 		}
2386 	}
2387 
2388 	if (amdgpu_sriov_vf(adev))
2389 		amdgpu_virt_init_data_exchange(adev);
2390 
2391 	r = amdgpu_ib_pool_init(adev);
2392 	if (r) {
2393 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2394 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2395 		goto init_failed;
2396 	}
2397 
2398 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2399 	if (r)
2400 		goto init_failed;
2401 
2402 	r = amdgpu_device_ip_hw_init_phase1(adev);
2403 	if (r)
2404 		goto init_failed;
2405 
2406 	r = amdgpu_device_fw_loading(adev);
2407 	if (r)
2408 		goto init_failed;
2409 
2410 	r = amdgpu_device_ip_hw_init_phase2(adev);
2411 	if (r)
2412 		goto init_failed;
2413 
2414 	/*
2415 	 * retired pages will be loaded from eeprom and reserved here,
2416 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2417 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2418 	 * for I2C communication which only true at this point.
2419 	 *
2420 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2421 	 * failure from bad gpu situation and stop amdgpu init process
2422 	 * accordingly. For other failed cases, it will still release all
2423 	 * the resource and print error message, rather than returning one
2424 	 * negative value to upper level.
2425 	 *
2426 	 * Note: theoretically, this should be called before all vram allocations
2427 	 * to protect retired page from abusing
2428 	 */
2429 	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2430 	r = amdgpu_ras_recovery_init(adev, init_badpage);
2431 	if (r)
2432 		goto init_failed;
2433 
2434 	/**
2435 	 * In case of XGMI grab extra reference for reset domain for this device
2436 	 */
2437 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2438 		if (amdgpu_xgmi_add_device(adev) == 0) {
2439 			if (!amdgpu_sriov_vf(adev)) {
2440 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2441 
2442 				if (WARN_ON(!hive)) {
2443 					r = -ENOENT;
2444 					goto init_failed;
2445 				}
2446 
2447 				if (!hive->reset_domain ||
2448 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2449 					r = -ENOENT;
2450 					amdgpu_put_xgmi_hive(hive);
2451 					goto init_failed;
2452 				}
2453 
2454 				/* Drop the early temporary reset domain we created for device */
2455 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2456 				adev->reset_domain = hive->reset_domain;
2457 				amdgpu_put_xgmi_hive(hive);
2458 			}
2459 		}
2460 	}
2461 
2462 	r = amdgpu_device_init_schedulers(adev);
2463 	if (r)
2464 		goto init_failed;
2465 
2466 	amdgpu_ttm_set_buffer_funcs_status(adev, true);
2467 
2468 	/* Don't init kfd if whole hive need to be reset during init */
2469 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
2470 		amdgpu_amdkfd_device_init(adev);
2471 	}
2472 
2473 	amdgpu_fru_get_product_info(adev);
2474 
2475 	r = amdgpu_cper_init(adev);
2476 
2477 init_failed:
2478 
2479 	return r;
2480 }
2481 
2482 /**
2483  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2484  *
2485  * @adev: amdgpu_device pointer
2486  *
2487  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2488  * this function before a GPU reset.  If the value is retained after a
2489  * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
2490  */
2491 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2492 {
2493 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2494 }
2495 
2496 /**
2497  * amdgpu_device_check_vram_lost - check if vram is valid
2498  *
2499  * @adev: amdgpu_device pointer
2500  *
2501  * Checks the reset magic value written to the gart pointer in VRAM.
2502  * The driver calls this after a GPU reset to see if the contents of
2503  * VRAM is lost or now.
2504  * returns true if vram is lost, false if not.
2505  */
2506 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2507 {
2508 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2509 			AMDGPU_RESET_MAGIC_NUM))
2510 		return true;
2511 
2512 	if (!amdgpu_in_reset(adev))
2513 		return false;
2514 
2515 	/*
2516 	 * For all ASICs with baco/mode1 reset, the VRAM is
2517 	 * always assumed to be lost.
2518 	 */
2519 	switch (amdgpu_asic_reset_method(adev)) {
2520 	case AMD_RESET_METHOD_LEGACY:
2521 	case AMD_RESET_METHOD_LINK:
2522 	case AMD_RESET_METHOD_BACO:
2523 	case AMD_RESET_METHOD_MODE1:
2524 		return true;
2525 	default:
2526 		return false;
2527 	}
2528 }
2529 
2530 /**
2531  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2532  *
2533  * @adev: amdgpu_device pointer
2534  * @state: clockgating state (gate or ungate)
2535  *
2536  * The list of all the hardware IPs that make up the asic is walked and the
2537  * set_clockgating_state callbacks are run.
2538  * Late initialization pass enabling clockgating for hardware IPs.
2539  * Fini or suspend, pass disabling clockgating for hardware IPs.
2540  * Returns 0 on success, negative error code on failure.
2541  */
2542 
2543 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2544 			       enum amd_clockgating_state state)
2545 {
2546 	int i, j, r;
2547 
2548 	if (amdgpu_emu_mode == 1)
2549 		return 0;
2550 
2551 	for (j = 0; j < adev->num_ip_blocks; j++) {
2552 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2553 		if (!adev->ip_blocks[i].status.late_initialized)
2554 			continue;
2555 		if (!adev->ip_blocks[i].version)
2556 			continue;
2557 		/* skip CG for GFX, SDMA on S0ix */
2558 		if (adev->in_s0ix &&
2559 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2560 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2561 			continue;
2562 		/* skip CG for VCE/UVD, it's handled specially */
2563 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2564 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2565 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2566 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2567 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2568 			/* enable clockgating to save power */
2569 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
2570 										     state);
2571 			if (r) {
2572 				dev_err(adev->dev,
2573 					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
2574 					adev->ip_blocks[i].version->funcs->name,
2575 					r);
2576 				return r;
2577 			}
2578 		}
2579 	}
2580 
2581 	return 0;
2582 }
2583 
2584 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2585 			       enum amd_powergating_state state)
2586 {
2587 	int i, j, r;
2588 
2589 	if (amdgpu_emu_mode == 1)
2590 		return 0;
2591 
2592 	for (j = 0; j < adev->num_ip_blocks; j++) {
2593 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2594 		if (!adev->ip_blocks[i].status.late_initialized)
2595 			continue;
2596 		if (!adev->ip_blocks[i].version)
2597 			continue;
2598 		/* skip PG for GFX, SDMA on S0ix */
2599 		if (adev->in_s0ix &&
2600 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2601 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2602 			continue;
2603 		/* skip CG for VCE/UVD, it's handled specially */
2604 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2605 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2606 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2607 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2608 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2609 			/* enable powergating to save power */
2610 			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
2611 											state);
2612 			if (r) {
2613 				dev_err(adev->dev,
2614 					"set_powergating_state(gate) of IP block <%s> failed %d\n",
2615 					adev->ip_blocks[i].version->funcs->name,
2616 					r);
2617 				return r;
2618 			}
2619 		}
2620 	}
2621 	return 0;
2622 }
2623 
2624 static int amdgpu_device_enable_mgpu_fan_boost(void)
2625 {
2626 	struct amdgpu_gpu_instance *gpu_ins;
2627 	struct amdgpu_device *adev;
2628 	int i, ret = 0;
2629 
2630 	mutex_lock(&mgpu_info.mutex);
2631 
2632 	/*
2633 	 * MGPU fan boost feature should be enabled
2634 	 * only when there are two or more dGPUs in
2635 	 * the system
2636 	 */
2637 	if (mgpu_info.num_dgpu < 2)
2638 		goto out;
2639 
2640 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2641 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2642 		adev = gpu_ins->adev;
2643 		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
2644 		    !gpu_ins->mgpu_fan_enabled) {
2645 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2646 			if (ret)
2647 				break;
2648 
2649 			gpu_ins->mgpu_fan_enabled = 1;
2650 		}
2651 	}
2652 
2653 out:
2654 	mutex_unlock(&mgpu_info.mutex);
2655 
2656 	return ret;
2657 }
2658 
2659 /**
2660  * amdgpu_device_ip_late_init - run late init for hardware IPs
2661  *
2662  * @adev: amdgpu_device pointer
2663  *
2664  * Late initialization pass for hardware IPs.  The list of all the hardware
2665  * IPs that make up the asic is walked and the late_init callbacks are run.
2666  * late_init covers any special initialization that an IP requires
2667  * after all of the have been initialized or something that needs to happen
2668  * late in the init process.
2669  * Returns 0 on success, negative error code on failure.
2670  */
2671 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2672 {
2673 	struct amdgpu_gpu_instance *gpu_instance;
2674 	int i = 0, r;
2675 
2676 	for (i = 0; i < adev->num_ip_blocks; i++) {
2677 		if (!adev->ip_blocks[i].status.hw)
2678 			continue;
2679 		if (adev->ip_blocks[i].version->funcs->late_init) {
2680 			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
2681 			if (r) {
2682 				dev_err(adev->dev,
2683 					"late_init of IP block <%s> failed %d\n",
2684 					adev->ip_blocks[i].version->funcs->name,
2685 					r);
2686 				return r;
2687 			}
2688 		}
2689 		adev->ip_blocks[i].status.late_initialized = true;
2690 	}
2691 
2692 	r = amdgpu_ras_late_init(adev);
2693 	if (r) {
2694 		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
2695 		return r;
2696 	}
2697 
2698 	if (!amdgpu_reset_in_recovery(adev))
2699 		amdgpu_ras_set_error_query_ready(adev, true);
2700 
2701 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2702 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2703 
2704 	amdgpu_device_fill_reset_magic(adev);
2705 
2706 	r = amdgpu_device_enable_mgpu_fan_boost();
2707 	if (r)
2708 		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
2709 
2710 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2711 	if (amdgpu_passthrough(adev) &&
2712 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2713 	     adev->asic_type == CHIP_ALDEBARAN))
2714 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2715 
2716 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2717 		mutex_lock(&mgpu_info.mutex);
2718 
2719 		/*
2720 		 * Reset device p-state to low as this was booted with high.
2721 		 *
2722 		 * This should be performed only after all devices from the same
2723 		 * hive get initialized.
2724 		 *
2725 		 * However, it's unknown how many device in the hive in advance.
2726 		 * As this is counted one by one during devices initializations.
2727 		 *
2728 		 * So, we wait for all XGMI interlinked devices initialized.
2729 		 * This may bring some delays as those devices may come from
2730 		 * different hives. But that should be OK.
2731 		 */
2732 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2733 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2734 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2735 				if (gpu_instance->adev->flags & AMD_IS_APU)
2736 					continue;
2737 
2738 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2739 						AMDGPU_XGMI_PSTATE_MIN);
2740 				if (r) {
2741 					dev_err(adev->dev,
2742 						"pstate setting failed (%d).\n",
2743 						r);
2744 					break;
2745 				}
2746 			}
2747 		}
2748 
2749 		mutex_unlock(&mgpu_info.mutex);
2750 	}
2751 
2752 	return 0;
2753 }
2754 
2755 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
2756 {
2757 	struct amdgpu_device *adev = ip_block->adev;
2758 	int r;
2759 
2760 	if (!ip_block->version->funcs->hw_fini) {
2761 		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
2762 			ip_block->version->funcs->name);
2763 	} else {
2764 		r = ip_block->version->funcs->hw_fini(ip_block);
2765 		/* XXX handle errors */
2766 		if (r) {
2767 			dev_dbg(adev->dev,
2768 				"hw_fini of IP block <%s> failed %d\n",
2769 				ip_block->version->funcs->name, r);
2770 		}
2771 	}
2772 
2773 	ip_block->status.hw = false;
2774 }
2775 
2776 /**
2777  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2778  *
2779  * @adev: amdgpu_device pointer
2780  *
2781  * For ASICs need to disable SMC first
2782  */
2783 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2784 {
2785 	int i;
2786 
2787 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
2788 		return;
2789 
2790 	for (i = 0; i < adev->num_ip_blocks; i++) {
2791 		if (!adev->ip_blocks[i].status.hw)
2792 			continue;
2793 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2794 			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2795 			break;
2796 		}
2797 	}
2798 }
2799 
2800 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2801 {
2802 	int i, r;
2803 
2804 	for (i = 0; i < adev->num_ip_blocks; i++) {
2805 		if (!adev->ip_blocks[i].version)
2806 			continue;
2807 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2808 			continue;
2809 
2810 		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
2811 		if (r) {
2812 			dev_dbg(adev->dev,
2813 				"early_fini of IP block <%s> failed %d\n",
2814 				adev->ip_blocks[i].version->funcs->name, r);
2815 		}
2816 	}
2817 
2818 	amdgpu_amdkfd_suspend(adev, true);
2819 	amdgpu_amdkfd_teardown_processes(adev);
2820 	amdgpu_userq_suspend(adev);
2821 
2822 	/* Workaround for ASICs need to disable SMC first */
2823 	amdgpu_device_smu_fini_early(adev);
2824 
2825 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2826 		if (!adev->ip_blocks[i].status.hw)
2827 			continue;
2828 
2829 		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2830 	}
2831 
2832 	if (amdgpu_sriov_vf(adev)) {
2833 		if (amdgpu_virt_release_full_gpu(adev, false))
2834 			dev_err(adev->dev,
2835 				"failed to release exclusive mode on fini\n");
2836 	}
2837 
2838 	/*
2839 	 * Driver reload on the APU can fail due to firmware validation because
2840 	 * the PSP is always running, as it is shared across the whole SoC.
2841 	 * This same issue does not occur on dGPU because it has a mechanism
2842 	 * that checks whether the PSP is running. A solution for those issues
2843 	 * in the APU is to trigger a GPU reset, but this should be done during
2844 	 * the unload phase to avoid adding boot latency and screen flicker.
2845 	 */
2846 	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
2847 		r = amdgpu_asic_reset(adev);
2848 		if (r)
2849 			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
2850 	}
2851 
2852 	return 0;
2853 }
2854 
2855 /**
2856  * amdgpu_device_ip_fini - run fini for hardware IPs
2857  *
2858  * @adev: amdgpu_device pointer
2859  *
2860  * Main teardown pass for hardware IPs.  The list of all the hardware
2861  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2862  * are run.  hw_fini tears down the hardware associated with each IP
2863  * and sw_fini tears down any software state associated with each IP.
2864  * Returns 0 on success, negative error code on failure.
2865  */
2866 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2867 {
2868 	int i, r;
2869 
2870 	amdgpu_cper_fini(adev);
2871 
2872 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2873 		amdgpu_virt_release_ras_err_handler_data(adev);
2874 
2875 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2876 		amdgpu_xgmi_remove_device(adev);
2877 
2878 	amdgpu_amdkfd_device_fini_sw(adev);
2879 
2880 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2881 		if (!adev->ip_blocks[i].status.sw)
2882 			continue;
2883 
2884 		if (!adev->ip_blocks[i].version)
2885 			continue;
2886 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2887 			amdgpu_ucode_free_bo(adev);
2888 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2889 			amdgpu_device_wb_fini(adev);
2890 			amdgpu_device_mem_scratch_fini(adev);
2891 			amdgpu_ib_pool_fini(adev);
2892 			amdgpu_seq64_fini(adev);
2893 			amdgpu_doorbell_fini(adev);
2894 		}
2895 		if (adev->ip_blocks[i].version->funcs->sw_fini) {
2896 			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
2897 			/* XXX handle errors */
2898 			if (r) {
2899 				dev_dbg(adev->dev,
2900 					"sw_fini of IP block <%s> failed %d\n",
2901 					adev->ip_blocks[i].version->funcs->name,
2902 					r);
2903 			}
2904 		}
2905 		adev->ip_blocks[i].status.sw = false;
2906 		adev->ip_blocks[i].status.valid = false;
2907 	}
2908 
2909 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2910 		if (!adev->ip_blocks[i].status.late_initialized)
2911 			continue;
2912 		if (!adev->ip_blocks[i].version)
2913 			continue;
2914 		if (adev->ip_blocks[i].version->funcs->late_fini)
2915 			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
2916 		adev->ip_blocks[i].status.late_initialized = false;
2917 	}
2918 
2919 	amdgpu_ras_fini(adev);
2920 	amdgpu_uid_fini(adev);
2921 
2922 	return 0;
2923 }
2924 
2925 /**
2926  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2927  *
2928  * @work: work_struct.
2929  */
2930 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2931 {
2932 	struct amdgpu_device *adev =
2933 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2934 	int r;
2935 
2936 	r = amdgpu_ib_ring_tests(adev);
2937 	if (r)
2938 		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
2939 }
2940 
2941 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2942 {
2943 	struct amdgpu_device *adev =
2944 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2945 
2946 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2947 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2948 
2949 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
2950 		adev->gfx.gfx_off_state = true;
2951 }
2952 
2953 /**
2954  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2955  *
2956  * @adev: amdgpu_device pointer
2957  *
2958  * Main suspend function for hardware IPs.  The list of all the hardware
2959  * IPs that make up the asic is walked, clockgating is disabled and the
2960  * suspend callbacks are run.  suspend puts the hardware and software state
2961  * in each IP into a state suitable for suspend.
2962  * Returns 0 on success, negative error code on failure.
2963  */
2964 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2965 {
2966 	int i, r, rec;
2967 
2968 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2969 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2970 
2971 	/*
2972 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2973 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2974 	 * scenario. Add the missing df cstate disablement here.
2975 	 */
2976 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2977 		dev_warn(adev->dev, "Failed to disallow df cstate");
2978 
2979 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2980 		if (!adev->ip_blocks[i].status.valid)
2981 			continue;
2982 
2983 		/* displays are handled separately */
2984 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2985 			continue;
2986 
2987 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
2988 		if (r)
2989 			goto unwind;
2990 	}
2991 
2992 	return 0;
2993 unwind:
2994 	rec = amdgpu_device_ip_resume_phase3(adev);
2995 	if (rec)
2996 		dev_err(adev->dev,
2997 			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
2998 			rec);
2999 
3000 	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
3001 
3002 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3003 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3004 
3005 	return r;
3006 }
3007 
3008 /**
3009  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3010  *
3011  * @adev: amdgpu_device pointer
3012  *
3013  * Main suspend function for hardware IPs.  The list of all the hardware
3014  * IPs that make up the asic is walked, clockgating is disabled and the
3015  * suspend callbacks are run.  suspend puts the hardware and software state
3016  * in each IP into a state suitable for suspend.
3017  * Returns 0 on success, negative error code on failure.
3018  */
3019 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3020 {
3021 	int i, r, rec;
3022 
3023 	if (adev->in_s0ix)
3024 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3025 
3026 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3027 		if (!adev->ip_blocks[i].status.valid)
3028 			continue;
3029 		/* displays are handled in phase1 */
3030 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3031 			continue;
3032 		/* PSP lost connection when err_event_athub occurs */
3033 		if (amdgpu_ras_intr_triggered() &&
3034 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3035 			adev->ip_blocks[i].status.hw = false;
3036 			continue;
3037 		}
3038 
3039 		/* skip unnecessary suspend if we do not initialize them yet */
3040 		if (!amdgpu_ip_member_of_hwini(
3041 			    adev, adev->ip_blocks[i].version->type))
3042 			continue;
3043 
3044 		/* Since we skip suspend for S0i3, we need to cancel the delayed
3045 		 * idle work here as the suspend callback never gets called.
3046 		 */
3047 		if (adev->in_s0ix &&
3048 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3049 		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3050 			cancel_delayed_work_sync(&adev->gfx.idle_work);
3051 		/* skip suspend of gfx/mes and psp for S0ix
3052 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3053 		 * like at runtime. PSP is also part of the always on hardware
3054 		 * so no need to suspend it.
3055 		 */
3056 		if (adev->in_s0ix &&
3057 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3058 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3059 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3060 			continue;
3061 
3062 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3063 		if (adev->in_s0ix &&
3064 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3065 		     IP_VERSION(5, 0, 0)) &&
3066 		    (adev->ip_blocks[i].version->type ==
3067 		     AMD_IP_BLOCK_TYPE_SDMA))
3068 			continue;
3069 
3070 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3071 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3072 		 * from this location and RLC Autoload automatically also gets loaded
3073 		 * from here based on PMFW -> PSP message during re-init sequence.
3074 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3075 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3076 		 */
3077 		if (amdgpu_in_reset(adev) &&
3078 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3079 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3080 			continue;
3081 
3082 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3083 		if (r)
3084 			goto unwind;
3085 
3086 		/* handle putting the SMC in the appropriate state */
3087 		if (!amdgpu_sriov_vf(adev)) {
3088 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3089 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3090 				if (r) {
3091 					dev_err(adev->dev,
3092 						"SMC failed to set mp1 state %d, %d\n",
3093 						adev->mp1_state, r);
3094 					goto unwind;
3095 				}
3096 			}
3097 		}
3098 	}
3099 
3100 	return 0;
3101 unwind:
3102 	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3103 	rec = amdgpu_device_ip_resume_phase1(adev);
3104 	if (rec) {
3105 		dev_err(adev->dev,
3106 			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3107 			rec);
3108 		return r;
3109 	}
3110 
3111 	rec = amdgpu_device_fw_loading(adev);
3112 	if (rec) {
3113 		dev_err(adev->dev,
3114 			"amdgpu_device_fw_loading failed during unwind: %d\n",
3115 			rec);
3116 		return r;
3117 	}
3118 
3119 	rec = amdgpu_device_ip_resume_phase2(adev);
3120 	if (rec) {
3121 		dev_err(adev->dev,
3122 			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3123 			rec);
3124 		return r;
3125 	}
3126 
3127 	return r;
3128 }
3129 
3130 /**
3131  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3132  *
3133  * @adev: amdgpu_device pointer
3134  *
3135  * Main suspend function for hardware IPs.  The list of all the hardware
3136  * IPs that make up the asic is walked, clockgating is disabled and the
3137  * suspend callbacks are run.  suspend puts the hardware and software state
3138  * in each IP into a state suitable for suspend.
3139  * Returns 0 on success, negative error code on failure.
3140  */
3141 static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3142 {
3143 	int r;
3144 
3145 	if (amdgpu_sriov_vf(adev)) {
3146 		amdgpu_virt_fini_data_exchange(adev);
3147 		amdgpu_virt_request_full_gpu(adev, false);
3148 	}
3149 
3150 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3151 
3152 	r = amdgpu_device_ip_suspend_phase1(adev);
3153 	if (r)
3154 		return r;
3155 	r = amdgpu_device_ip_suspend_phase2(adev);
3156 
3157 	if (amdgpu_sriov_vf(adev))
3158 		amdgpu_virt_release_full_gpu(adev, false);
3159 
3160 	return r;
3161 }
3162 
3163 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3164 {
3165 	int i, r;
3166 
3167 	static enum amd_ip_block_type ip_order[] = {
3168 		AMD_IP_BLOCK_TYPE_COMMON,
3169 		AMD_IP_BLOCK_TYPE_GMC,
3170 		AMD_IP_BLOCK_TYPE_PSP,
3171 		AMD_IP_BLOCK_TYPE_IH,
3172 	};
3173 
3174 	for (i = 0; i < adev->num_ip_blocks; i++) {
3175 		int j;
3176 		struct amdgpu_ip_block *block;
3177 
3178 		block = &adev->ip_blocks[i];
3179 		block->status.hw = false;
3180 
3181 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3182 
3183 			if (block->version->type != ip_order[j] ||
3184 				!block->status.valid)
3185 				continue;
3186 
3187 			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3188 			if (r) {
3189 				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3190 					 block->version->funcs->name);
3191 				return r;
3192 			}
3193 			block->status.hw = true;
3194 		}
3195 	}
3196 
3197 	return 0;
3198 }
3199 
3200 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3201 {
3202 	struct amdgpu_ip_block *block;
3203 	int i, r = 0;
3204 
3205 	static enum amd_ip_block_type ip_order[] = {
3206 		AMD_IP_BLOCK_TYPE_SMC,
3207 		AMD_IP_BLOCK_TYPE_DCE,
3208 		AMD_IP_BLOCK_TYPE_GFX,
3209 		AMD_IP_BLOCK_TYPE_SDMA,
3210 		AMD_IP_BLOCK_TYPE_MES,
3211 		AMD_IP_BLOCK_TYPE_UVD,
3212 		AMD_IP_BLOCK_TYPE_VCE,
3213 		AMD_IP_BLOCK_TYPE_VCN,
3214 		AMD_IP_BLOCK_TYPE_JPEG
3215 	};
3216 
3217 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3218 		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
3219 
3220 		if (!block)
3221 			continue;
3222 
3223 		if (block->status.valid && !block->status.hw) {
3224 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3225 				r = amdgpu_ip_block_resume(block);
3226 			} else {
3227 				r = block->version->funcs->hw_init(block);
3228 			}
3229 
3230 			if (r) {
3231 				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3232 					 block->version->funcs->name);
3233 				break;
3234 			}
3235 			block->status.hw = true;
3236 		}
3237 	}
3238 
3239 	return r;
3240 }
3241 
3242 /**
3243  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3244  *
3245  * @adev: amdgpu_device pointer
3246  *
3247  * First resume function for hardware IPs.  The list of all the hardware
3248  * IPs that make up the asic is walked and the resume callbacks are run for
3249  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3250  * after a suspend and updates the software state as necessary.  This
3251  * function is also used for restoring the GPU after a GPU reset.
3252  * Returns 0 on success, negative error code on failure.
3253  */
3254 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3255 {
3256 	int i, r;
3257 
3258 	for (i = 0; i < adev->num_ip_blocks; i++) {
3259 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3260 			continue;
3261 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3262 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3263 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3264 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3265 
3266 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3267 			if (r)
3268 				return r;
3269 		}
3270 	}
3271 
3272 	return 0;
3273 }
3274 
3275 /**
3276  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3277  *
3278  * @adev: amdgpu_device pointer
3279  *
3280  * Second resume function for hardware IPs.  The list of all the hardware
3281  * IPs that make up the asic is walked and the resume callbacks are run for
3282  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3283  * functional state after a suspend and updates the software state as
3284  * necessary.  This function is also used for restoring the GPU after a GPU
3285  * reset.
3286  * Returns 0 on success, negative error code on failure.
3287  */
3288 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3289 {
3290 	int i, r;
3291 
3292 	for (i = 0; i < adev->num_ip_blocks; i++) {
3293 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3294 			continue;
3295 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3296 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3297 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3298 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3299 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3300 			continue;
3301 		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3302 		if (r)
3303 			return r;
3304 	}
3305 
3306 	return 0;
3307 }
3308 
3309 /**
3310  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3311  *
3312  * @adev: amdgpu_device pointer
3313  *
3314  * Third resume function for hardware IPs.  The list of all the hardware
3315  * IPs that make up the asic is walked and the resume callbacks are run for
3316  * all DCE.  resume puts the hardware into a functional state after a suspend
3317  * and updates the software state as necessary.  This function is also used
3318  * for restoring the GPU after a GPU reset.
3319  *
3320  * Returns 0 on success, negative error code on failure.
3321  */
3322 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3323 {
3324 	int i, r;
3325 
3326 	for (i = 0; i < adev->num_ip_blocks; i++) {
3327 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3328 			continue;
3329 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3330 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3331 			if (r)
3332 				return r;
3333 		}
3334 	}
3335 
3336 	return 0;
3337 }
3338 
3339 /**
3340  * amdgpu_device_ip_resume - run resume for hardware IPs
3341  *
3342  * @adev: amdgpu_device pointer
3343  *
3344  * Main resume function for hardware IPs.  The hardware IPs
3345  * are split into two resume functions because they are
3346  * also used in recovering from a GPU reset and some additional
3347  * steps need to be take between them.  In this case (S3/S4) they are
3348  * run sequentially.
3349  * Returns 0 on success, negative error code on failure.
3350  */
3351 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3352 {
3353 	int r;
3354 
3355 	r = amdgpu_device_ip_resume_phase1(adev);
3356 	if (r)
3357 		return r;
3358 
3359 	r = amdgpu_device_fw_loading(adev);
3360 	if (r)
3361 		return r;
3362 
3363 	r = amdgpu_device_ip_resume_phase2(adev);
3364 
3365 	amdgpu_ttm_set_buffer_funcs_status(adev, true);
3366 
3367 	if (r)
3368 		return r;
3369 
3370 	amdgpu_fence_driver_hw_init(adev);
3371 
3372 	r = amdgpu_device_ip_resume_phase3(adev);
3373 
3374 	return r;
3375 }
3376 
3377 /**
3378  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3379  *
3380  * @adev: amdgpu_device pointer
3381  *
3382  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3383  */
3384 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3385 {
3386 	if (amdgpu_sriov_vf(adev)) {
3387 		if (adev->is_atom_fw) {
3388 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3389 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3390 		} else {
3391 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3392 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3393 		}
3394 
3395 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3396 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3397 	}
3398 }
3399 
3400 /**
3401  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3402  *
3403  * @pdev : pci device context
3404  * @asic_type: AMD asic type
3405  *
3406  * Check if there is DC (new modesetting infrastructre) support for an asic.
3407  * returns true if DC has support, false if not.
3408  */
3409 bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
3410 				       enum amd_asic_type asic_type)
3411 {
3412 	switch (asic_type) {
3413 #ifdef CONFIG_DRM_AMDGPU_SI
3414 	case CHIP_HAINAN:
3415 #endif
3416 	case CHIP_TOPAZ:
3417 		/* chips with no display hardware */
3418 		return false;
3419 #if defined(CONFIG_DRM_AMD_DC)
3420 	case CHIP_TAHITI:
3421 	case CHIP_PITCAIRN:
3422 	case CHIP_VERDE:
3423 	case CHIP_OLAND:
3424 		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
3425 	default:
3426 		return amdgpu_dc != 0;
3427 #else
3428 	default:
3429 		if (amdgpu_dc > 0)
3430 			dev_info_once(
3431 				&pdev->dev,
3432 				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3433 		return false;
3434 #endif
3435 	}
3436 }
3437 
3438 /**
3439  * amdgpu_device_has_dc_support - check if dc is supported
3440  *
3441  * @adev: amdgpu_device pointer
3442  *
3443  * Returns true for supported, false for not supported
3444  */
3445 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3446 {
3447 	if (adev->enable_virtual_display ||
3448 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3449 		return false;
3450 
3451 	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
3452 }
3453 
3454 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3455 {
3456 	struct amdgpu_device *adev =
3457 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3458 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3459 
3460 	/* It's a bug to not have a hive within this function */
3461 	if (WARN_ON(!hive))
3462 		return;
3463 
3464 	/*
3465 	 * Use task barrier to synchronize all xgmi reset works across the
3466 	 * hive. task_barrier_enter and task_barrier_exit will block
3467 	 * until all the threads running the xgmi reset works reach
3468 	 * those points. task_barrier_full will do both blocks.
3469 	 */
3470 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3471 
3472 		task_barrier_enter(&hive->tb);
3473 		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
3474 
3475 		if (adev->asic_reset_res)
3476 			goto fail;
3477 
3478 		task_barrier_exit(&hive->tb);
3479 		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
3480 
3481 		if (adev->asic_reset_res)
3482 			goto fail;
3483 
3484 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3485 	} else {
3486 
3487 		task_barrier_full(&hive->tb);
3488 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3489 	}
3490 
3491 fail:
3492 	if (adev->asic_reset_res)
3493 		dev_warn(adev->dev,
3494 			 "ASIC reset failed with error, %d for drm dev, %s",
3495 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3496 	amdgpu_put_xgmi_hive(hive);
3497 }
3498 
3499 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3500 {
3501 	char *input = amdgpu_lockup_timeout;
3502 	char *timeout_setting = NULL;
3503 	int index = 0;
3504 	long timeout;
3505 	int ret = 0;
3506 
3507 	/* By default timeout for all queues is 2 sec */
3508 	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3509 		adev->video_timeout = msecs_to_jiffies(2000);
3510 
3511 	if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
3512 		return 0;
3513 
3514 	while ((timeout_setting = strsep(&input, ",")) &&
3515 	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3516 		ret = kstrtol(timeout_setting, 0, &timeout);
3517 		if (ret)
3518 			return ret;
3519 
3520 		if (timeout == 0) {
3521 			index++;
3522 			continue;
3523 		} else if (timeout < 0) {
3524 			timeout = MAX_SCHEDULE_TIMEOUT;
3525 			dev_warn(adev->dev, "lockup timeout disabled");
3526 			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3527 		} else {
3528 			timeout = msecs_to_jiffies(timeout);
3529 		}
3530 
3531 		switch (index++) {
3532 		case 0:
3533 			adev->gfx_timeout = timeout;
3534 			break;
3535 		case 1:
3536 			adev->compute_timeout = timeout;
3537 			break;
3538 		case 2:
3539 			adev->sdma_timeout = timeout;
3540 			break;
3541 		case 3:
3542 			adev->video_timeout = timeout;
3543 			break;
3544 		default:
3545 			break;
3546 		}
3547 	}
3548 
3549 	/* When only one value specified apply it to all queues. */
3550 	if (index == 1)
3551 		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3552 			adev->video_timeout = timeout;
3553 
3554 	return ret;
3555 }
3556 
3557 /**
3558  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3559  *
3560  * @adev: amdgpu_device pointer
3561  *
3562  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3563  */
3564 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3565 {
3566 	struct iommu_domain *domain;
3567 
3568 	domain = iommu_get_domain_for_dev(adev->dev);
3569 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3570 		adev->ram_is_direct_mapped = true;
3571 }
3572 
3573 #if defined(CONFIG_HSA_AMD_P2P)
3574 /**
3575  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
3576  *
3577  * @adev: amdgpu_device pointer
3578  *
3579  * return if IOMMU remapping bar address
3580  */
3581 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
3582 {
3583 	struct iommu_domain *domain;
3584 
3585 	domain = iommu_get_domain_for_dev(adev->dev);
3586 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
3587 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
3588 		return true;
3589 
3590 	return false;
3591 }
3592 #endif
3593 
3594 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3595 {
3596 	if (amdgpu_mcbp == 1)
3597 		adev->gfx.mcbp = true;
3598 	else if (amdgpu_mcbp == 0)
3599 		adev->gfx.mcbp = false;
3600 
3601 	if (amdgpu_sriov_vf(adev))
3602 		adev->gfx.mcbp = true;
3603 
3604 	if (adev->gfx.mcbp)
3605 		dev_info(adev->dev, "MCBP is enabled\n");
3606 }
3607 
3608 static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
3609 {
3610 	int r;
3611 
3612 	r = amdgpu_atombios_sysfs_init(adev);
3613 	if (r)
3614 		drm_err(&adev->ddev,
3615 			"registering atombios sysfs failed (%d).\n", r);
3616 
3617 	r = amdgpu_pm_sysfs_init(adev);
3618 	if (r)
3619 		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
3620 
3621 	r = amdgpu_ucode_sysfs_init(adev);
3622 	if (r) {
3623 		adev->ucode_sysfs_en = false;
3624 		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
3625 	} else
3626 		adev->ucode_sysfs_en = true;
3627 
3628 	r = amdgpu_device_attr_sysfs_init(adev);
3629 	if (r)
3630 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3631 
3632 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
3633 	if (r)
3634 		dev_err(adev->dev,
3635 			"Could not create amdgpu board attributes\n");
3636 
3637 	amdgpu_fru_sysfs_init(adev);
3638 	amdgpu_reg_state_sysfs_init(adev);
3639 	amdgpu_xcp_sysfs_init(adev);
3640 	amdgpu_uma_sysfs_init(adev);
3641 
3642 	return r;
3643 }
3644 
3645 static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
3646 {
3647 	if (adev->pm.sysfs_initialized)
3648 		amdgpu_pm_sysfs_fini(adev);
3649 	if (adev->ucode_sysfs_en)
3650 		amdgpu_ucode_sysfs_fini(adev);
3651 	amdgpu_device_attr_sysfs_fini(adev);
3652 	amdgpu_fru_sysfs_fini(adev);
3653 
3654 	amdgpu_reg_state_sysfs_fini(adev);
3655 	amdgpu_xcp_sysfs_fini(adev);
3656 	amdgpu_uma_sysfs_fini(adev);
3657 }
3658 
3659 /**
3660  * amdgpu_device_init - initialize the driver
3661  *
3662  * @adev: amdgpu_device pointer
3663  * @flags: driver flags
3664  *
3665  * Initializes the driver info and hw (all asics).
3666  * Returns 0 for success or an error on failure.
3667  * Called at driver startup.
3668  */
3669 int amdgpu_device_init(struct amdgpu_device *adev,
3670 		       uint32_t flags)
3671 {
3672 	struct pci_dev *pdev = adev->pdev;
3673 	int r, i;
3674 	bool px = false;
3675 	u32 max_MBps;
3676 	int tmp;
3677 
3678 	adev->shutdown = false;
3679 	adev->flags = flags;
3680 
3681 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3682 		adev->asic_type = amdgpu_force_asic_type;
3683 	else
3684 		adev->asic_type = flags & AMD_ASIC_MASK;
3685 
3686 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3687 	if (amdgpu_emu_mode == 1)
3688 		adev->usec_timeout *= 10;
3689 	adev->gmc.gart_size = 512 * 1024 * 1024;
3690 	adev->accel_working = false;
3691 	adev->num_rings = 0;
3692 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3693 	adev->mman.buffer_funcs = NULL;
3694 	adev->mman.buffer_funcs_ring = NULL;
3695 	adev->vm_manager.vm_pte_funcs = NULL;
3696 	adev->vm_manager.vm_pte_num_scheds = 0;
3697 	adev->gmc.gmc_funcs = NULL;
3698 	adev->harvest_ip_mask = 0x0;
3699 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3700 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3701 
3702 	amdgpu_reg_access_init(adev);
3703 
3704 	dev_info(
3705 		adev->dev,
3706 		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3707 		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3708 		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3709 
3710 	/* mutex initialization are all done here so we
3711 	 * can recall function without having locking issues
3712 	 */
3713 	mutex_init(&adev->firmware.mutex);
3714 	mutex_init(&adev->pm.mutex);
3715 	mutex_init(&adev->gfx.gpu_clock_mutex);
3716 	mutex_init(&adev->srbm_mutex);
3717 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3718 	mutex_init(&adev->gfx.gfx_off_mutex);
3719 	mutex_init(&adev->gfx.partition_mutex);
3720 	mutex_init(&adev->grbm_idx_mutex);
3721 	mutex_init(&adev->mn_lock);
3722 	mutex_init(&adev->virt.vf_errors.lock);
3723 	hash_init(adev->mn_hash);
3724 	mutex_init(&adev->psp.mutex);
3725 	mutex_init(&adev->notifier_lock);
3726 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3727 	mutex_init(&adev->benchmark_mutex);
3728 	mutex_init(&adev->gfx.reset_sem_mutex);
3729 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
3730 	mutex_init(&adev->enforce_isolation_mutex);
3731 	for (i = 0; i < MAX_XCP; ++i) {
3732 		adev->isolation[i].spearhead = dma_fence_get_stub();
3733 		amdgpu_sync_create(&adev->isolation[i].active);
3734 		amdgpu_sync_create(&adev->isolation[i].prev);
3735 	}
3736 	mutex_init(&adev->gfx.userq_sch_mutex);
3737 	mutex_init(&adev->gfx.workload_profile_mutex);
3738 	mutex_init(&adev->vcn.workload_profile_mutex);
3739 
3740 	amdgpu_device_init_apu_flags(adev);
3741 
3742 	r = amdgpu_device_check_arguments(adev);
3743 	if (r)
3744 		return r;
3745 
3746 	spin_lock_init(&adev->mmio_idx_lock);
3747 	spin_lock_init(&adev->mm_stats.lock);
3748 	spin_lock_init(&adev->virt.rlcg_reg_lock);
3749 	spin_lock_init(&adev->wb.lock);
3750 
3751 	xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
3752 
3753 	INIT_LIST_HEAD(&adev->reset_list);
3754 
3755 	INIT_LIST_HEAD(&adev->ras_list);
3756 
3757 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3758 
3759 	xa_init(&adev->userq_doorbell_xa);
3760 
3761 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3762 			  amdgpu_device_delayed_init_work_handler);
3763 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3764 			  amdgpu_device_delay_enable_gfx_off);
3765 	/*
3766 	 * Initialize the enforce_isolation work structures for each XCP
3767 	 * partition.  This work handler is responsible for enforcing shader
3768 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
3769 	 * each GFX and compute ring.  If there are any fences, it schedules
3770 	 * the `enforce_isolation_work` to be run after a delay.  If there are
3771 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
3772 	 * runqueue.
3773 	 */
3774 	for (i = 0; i < MAX_XCP; i++) {
3775 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
3776 				  amdgpu_gfx_enforce_isolation_handler);
3777 		adev->gfx.enforce_isolation[i].adev = adev;
3778 		adev->gfx.enforce_isolation[i].xcp_id = i;
3779 	}
3780 
3781 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3782 	INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
3783 
3784 	amdgpu_coredump_init(adev);
3785 
3786 	adev->gfx.gfx_off_req_count = 1;
3787 	adev->gfx.gfx_off_residency = 0;
3788 	adev->gfx.gfx_off_entrycount = 0;
3789 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3790 
3791 	atomic_set(&adev->throttling_logging_enabled, 1);
3792 	/*
3793 	 * If throttling continues, logging will be performed every minute
3794 	 * to avoid log flooding. "-1" is subtracted since the thermal
3795 	 * throttling interrupt comes every second. Thus, the total logging
3796 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3797 	 * for throttling interrupt) = 60 seconds.
3798 	 */
3799 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3800 
3801 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3802 
3803 	/* Registers mapping */
3804 	/* TODO: block userspace mapping of io register */
3805 	if (adev->asic_type >= CHIP_BONAIRE) {
3806 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3807 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3808 	} else {
3809 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3810 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3811 	}
3812 
3813 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3814 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3815 
3816 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3817 	if (!adev->rmmio)
3818 		return -ENOMEM;
3819 
3820 	dev_info(adev->dev, "register mmio base: 0x%08X\n",
3821 		 (uint32_t)adev->rmmio_base);
3822 	dev_info(adev->dev, "register mmio size: %u\n",
3823 		 (unsigned int)adev->rmmio_size);
3824 
3825 	/*
3826 	 * Reset domain needs to be present early, before XGMI hive discovered
3827 	 * (if any) and initialized to use reset sem and in_gpu reset flag
3828 	 * early on during init and before calling to RREG32.
3829 	 */
3830 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3831 	if (!adev->reset_domain)
3832 		return -ENOMEM;
3833 
3834 	/* detect hw virtualization here */
3835 	amdgpu_virt_init(adev);
3836 
3837 	amdgpu_device_get_pcie_info(adev);
3838 
3839 	r = amdgpu_device_get_job_timeout_settings(adev);
3840 	if (r) {
3841 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3842 		return r;
3843 	}
3844 
3845 	amdgpu_device_set_mcbp(adev);
3846 
3847 	/*
3848 	 * By default, use default mode where all blocks are expected to be
3849 	 * initialized. At present a 'swinit' of blocks is required to be
3850 	 * completed before the need for a different level is detected.
3851 	 */
3852 	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
3853 	/* early init functions */
3854 	r = amdgpu_device_ip_early_init(adev);
3855 	if (r)
3856 		return r;
3857 
3858 	/*
3859 	 * No need to remove conflicting FBs for non-display class devices.
3860 	 * This prevents the sysfb from being freed accidently.
3861 	 */
3862 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
3863 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
3864 		/* Get rid of things like offb */
3865 		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
3866 		if (r)
3867 			return r;
3868 	}
3869 
3870 	/* Enable TMZ based on IP_VERSION */
3871 	amdgpu_gmc_tmz_set(adev);
3872 
3873 	if (amdgpu_sriov_vf(adev) &&
3874 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
3875 		/* VF MMIO access (except mailbox range) from CPU
3876 		 * will be blocked during sriov runtime
3877 		 */
3878 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
3879 
3880 	amdgpu_gmc_noretry_set(adev);
3881 	/* Need to get xgmi info early to decide the reset behavior*/
3882 	if (adev->gmc.xgmi.supported) {
3883 		if (adev->gfxhub.funcs &&
3884 		    adev->gfxhub.funcs->get_xgmi_info) {
3885 			r = adev->gfxhub.funcs->get_xgmi_info(adev);
3886 			if (r)
3887 				return r;
3888 		}
3889 	}
3890 
3891 	if (adev->gmc.xgmi.connected_to_cpu) {
3892 		if (adev->mmhub.funcs &&
3893 		    adev->mmhub.funcs->get_xgmi_info) {
3894 			r = adev->mmhub.funcs->get_xgmi_info(adev);
3895 			if (r)
3896 				return r;
3897 		}
3898 	}
3899 
3900 	/* enable PCIE atomic ops */
3901 	if (amdgpu_sriov_vf(adev)) {
3902 		if (adev->virt.fw_reserve.p_pf2vf)
3903 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3904 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3905 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3906 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3907 	 * internal path natively support atomics, set have_atomics_support to true.
3908 	 */
3909 	} else if ((adev->flags & AMD_IS_APU &&
3910 		   amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) ||
3911 		   (adev->gmc.xgmi.connected_to_cpu &&
3912 		   amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))) {
3913 		adev->have_atomics_support = true;
3914 	} else {
3915 		adev->have_atomics_support =
3916 			!pci_enable_atomic_ops_to_root(adev->pdev,
3917 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3918 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3919 	}
3920 
3921 	if (!adev->have_atomics_support)
3922 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3923 
3924 	/* doorbell bar mapping and doorbell index init*/
3925 	amdgpu_doorbell_init(adev);
3926 
3927 	if (amdgpu_emu_mode == 1) {
3928 		/* post the asic on emulation mode */
3929 		emu_soc_asic_init(adev);
3930 		goto fence_driver_init;
3931 	}
3932 
3933 	amdgpu_reset_init(adev);
3934 
3935 	/* detect if we are with an SRIOV vbios */
3936 	if (adev->bios)
3937 		amdgpu_device_detect_sriov_bios(adev);
3938 
3939 	/* check if we need to reset the asic
3940 	 *  E.g., driver was not cleanly unloaded previously, etc.
3941 	 */
3942 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3943 		if (adev->gmc.xgmi.num_physical_nodes) {
3944 			dev_info(adev->dev, "Pending hive reset.\n");
3945 			amdgpu_set_init_level(adev,
3946 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3947 		} else {
3948 				tmp = amdgpu_reset_method;
3949 				/* It should do a default reset when loading or reloading the driver,
3950 				 * regardless of the module parameter reset_method.
3951 				 */
3952 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3953 				r = amdgpu_asic_reset(adev);
3954 				amdgpu_reset_method = tmp;
3955 		}
3956 
3957 		if (r) {
3958 		  dev_err(adev->dev, "asic reset on init failed\n");
3959 		  goto failed;
3960 		}
3961 	}
3962 
3963 	/* Post card if necessary */
3964 	if (amdgpu_device_need_post(adev)) {
3965 		if (!adev->bios) {
3966 			dev_err(adev->dev, "no vBIOS found\n");
3967 			r = -EINVAL;
3968 			goto failed;
3969 		}
3970 		dev_info(adev->dev, "GPU posting now...\n");
3971 		r = amdgpu_device_asic_init(adev);
3972 		if (r) {
3973 			dev_err(adev->dev, "gpu post error!\n");
3974 			goto failed;
3975 		}
3976 	}
3977 
3978 	if (adev->bios) {
3979 		if (adev->is_atom_fw) {
3980 			/* Initialize clocks */
3981 			r = amdgpu_atomfirmware_get_clock_info(adev);
3982 			if (r) {
3983 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3984 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3985 				goto failed;
3986 			}
3987 		} else {
3988 			/* Initialize clocks */
3989 			r = amdgpu_atombios_get_clock_info(adev);
3990 			if (r) {
3991 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3992 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3993 				goto failed;
3994 			}
3995 			/* init i2c buses */
3996 			amdgpu_i2c_init(adev);
3997 		}
3998 	}
3999 
4000 fence_driver_init:
4001 	/* Fence driver */
4002 	r = amdgpu_fence_driver_sw_init(adev);
4003 	if (r) {
4004 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4005 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4006 		goto failed;
4007 	}
4008 
4009 	/* init the mode config */
4010 	drm_mode_config_init(adev_to_drm(adev));
4011 
4012 	r = amdgpu_device_ip_init(adev);
4013 	if (r) {
4014 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4015 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4016 		goto release_ras_con;
4017 	}
4018 
4019 	amdgpu_fence_driver_hw_init(adev);
4020 
4021 	dev_info(adev->dev,
4022 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4023 			adev->gfx.config.max_shader_engines,
4024 			adev->gfx.config.max_sh_per_se,
4025 			adev->gfx.config.max_cu_per_sh,
4026 			adev->gfx.cu_info.number);
4027 
4028 	adev->accel_working = true;
4029 
4030 	amdgpu_vm_check_compute_bug(adev);
4031 
4032 	/* Initialize the buffer migration limit. */
4033 	if (amdgpu_moverate >= 0)
4034 		max_MBps = amdgpu_moverate;
4035 	else
4036 		max_MBps = 8; /* Allow 8 MB/s. */
4037 	/* Get a log2 for easy divisions. */
4038 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4039 
4040 	/*
4041 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4042 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4043 	 * gpu instance is counted less.
4044 	 */
4045 	amdgpu_register_gpu_instance(adev);
4046 
4047 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4048 	 * explicit gating rather than handling it automatically.
4049 	 */
4050 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4051 		r = amdgpu_device_ip_late_init(adev);
4052 		if (r) {
4053 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4054 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4055 			goto release_ras_con;
4056 		}
4057 		/* must succeed. */
4058 		amdgpu_ras_resume(adev);
4059 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4060 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4061 	}
4062 
4063 	if (amdgpu_sriov_vf(adev)) {
4064 		amdgpu_virt_release_full_gpu(adev, true);
4065 		flush_delayed_work(&adev->delayed_init_work);
4066 	}
4067 
4068 	/* Don't init kfd if whole hive need to be reset during init */
4069 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4070 		kgd2kfd_init_zone_device(adev);
4071 		kfd_update_svm_support_properties(adev);
4072 	}
4073 
4074 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4075 		amdgpu_xgmi_reset_on_init(adev);
4076 
4077 	/*
4078 	 * Place those sysfs registering after `late_init`. As some of those
4079 	 * operations performed in `late_init` might affect the sysfs
4080 	 * interfaces creating.
4081 	 */
4082 	r = amdgpu_device_sys_interface_init(adev);
4083 
4084 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4085 		r = amdgpu_pmu_init(adev);
4086 	if (r)
4087 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4088 
4089 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4090 	if (amdgpu_device_cache_pci_state(adev->pdev))
4091 		pci_restore_state(pdev);
4092 
4093 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4094 	/* this will fail for cards that aren't VGA class devices, just
4095 	 * ignore it
4096 	 */
4097 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4098 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4099 
4100 	px = amdgpu_device_supports_px(adev);
4101 
4102 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4103 				apple_gmux_detect(NULL, NULL)))
4104 		vga_switcheroo_register_client(adev->pdev,
4105 					       &amdgpu_switcheroo_ops, px);
4106 
4107 	if (px)
4108 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4109 
4110 	amdgpu_device_check_iommu_direct_map(adev);
4111 
4112 	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4113 	r = register_pm_notifier(&adev->pm_nb);
4114 	if (r)
4115 		goto failed;
4116 
4117 	return 0;
4118 
4119 release_ras_con:
4120 	if (amdgpu_sriov_vf(adev))
4121 		amdgpu_virt_release_full_gpu(adev, true);
4122 
4123 	/* failed in exclusive mode due to timeout */
4124 	if (amdgpu_sriov_vf(adev) &&
4125 		!amdgpu_sriov_runtime(adev) &&
4126 		amdgpu_virt_mmio_blocked(adev) &&
4127 		!amdgpu_virt_wait_reset(adev)) {
4128 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4129 		/* Don't send request since VF is inactive. */
4130 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4131 		adev->virt.ops = NULL;
4132 		r = -EAGAIN;
4133 	}
4134 	amdgpu_release_ras_context(adev);
4135 
4136 failed:
4137 	amdgpu_vf_error_trans_all(adev);
4138 
4139 	return r;
4140 }
4141 
4142 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4143 {
4144 
4145 	/* Clear all CPU mappings pointing to this device */
4146 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4147 
4148 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4149 	amdgpu_doorbell_fini(adev);
4150 
4151 	iounmap(adev->rmmio);
4152 	adev->rmmio = NULL;
4153 	if (adev->mman.aper_base_kaddr)
4154 		iounmap(adev->mman.aper_base_kaddr);
4155 	adev->mman.aper_base_kaddr = NULL;
4156 
4157 	/* Memory manager related */
4158 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4159 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4160 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4161 	}
4162 }
4163 
4164 /**
4165  * amdgpu_device_fini_hw - tear down the driver
4166  *
4167  * @adev: amdgpu_device pointer
4168  *
4169  * Tear down the driver info (all asics).
4170  * Called at driver shutdown.
4171  */
4172 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4173 {
4174 	dev_info(adev->dev, "finishing device.\n");
4175 	flush_delayed_work(&adev->delayed_init_work);
4176 
4177 	if (adev->mman.initialized)
4178 		drain_workqueue(adev->mman.bdev.wq);
4179 	adev->shutdown = true;
4180 
4181 	unregister_pm_notifier(&adev->pm_nb);
4182 
4183 	/* make sure IB test finished before entering exclusive mode
4184 	 * to avoid preemption on IB test
4185 	 */
4186 	if (amdgpu_sriov_vf(adev)) {
4187 		amdgpu_virt_request_full_gpu(adev, false);
4188 		amdgpu_virt_fini_data_exchange(adev);
4189 	}
4190 
4191 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
4192 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
4193 
4194 	/* disable all interrupts */
4195 	amdgpu_irq_disable_all(adev);
4196 	if (adev->mode_info.mode_config_initialized) {
4197 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4198 			drm_helper_force_disable_all(adev_to_drm(adev));
4199 		else
4200 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4201 	}
4202 	amdgpu_fence_driver_hw_fini(adev);
4203 
4204 	amdgpu_device_sys_interface_fini(adev);
4205 
4206 	/* disable ras feature must before hw fini */
4207 	amdgpu_ras_pre_fini(adev);
4208 
4209 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4210 
4211 	/*
4212 	 * device went through surprise hotplug; we need to destroy topology
4213 	 * before ip_fini_early to prevent kfd locking refcount issues by calling
4214 	 * amdgpu_amdkfd_suspend()
4215 	 */
4216 	if (pci_dev_is_disconnected(adev->pdev))
4217 		amdgpu_amdkfd_device_fini_sw(adev);
4218 
4219 	amdgpu_device_ip_fini_early(adev);
4220 
4221 	amdgpu_irq_fini_hw(adev);
4222 
4223 	if (adev->mman.initialized)
4224 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4225 
4226 	amdgpu_gart_dummy_page_fini(adev);
4227 
4228 	if (pci_dev_is_disconnected(adev->pdev))
4229 		amdgpu_device_unmap_mmio(adev);
4230 
4231 }
4232 
4233 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4234 {
4235 	int i, idx;
4236 	bool px;
4237 
4238 	amdgpu_device_ip_fini(adev);
4239 	amdgpu_fence_driver_sw_fini(adev);
4240 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4241 	adev->accel_working = false;
4242 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4243 	for (i = 0; i < MAX_XCP; ++i) {
4244 		dma_fence_put(adev->isolation[i].spearhead);
4245 		amdgpu_sync_free(&adev->isolation[i].active);
4246 		amdgpu_sync_free(&adev->isolation[i].prev);
4247 	}
4248 
4249 	amdgpu_reset_fini(adev);
4250 
4251 	/* free i2c buses */
4252 	amdgpu_i2c_fini(adev);
4253 
4254 	if (adev->bios) {
4255 		if (amdgpu_emu_mode != 1)
4256 			amdgpu_atombios_fini(adev);
4257 		amdgpu_bios_release(adev);
4258 	}
4259 
4260 	kfree(adev->fru_info);
4261 	adev->fru_info = NULL;
4262 
4263 	kfree(adev->xcp_mgr);
4264 	adev->xcp_mgr = NULL;
4265 
4266 	px = amdgpu_device_supports_px(adev);
4267 
4268 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4269 				apple_gmux_detect(NULL, NULL)))
4270 		vga_switcheroo_unregister_client(adev->pdev);
4271 
4272 	if (px)
4273 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4274 
4275 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4276 		vga_client_unregister(adev->pdev);
4277 
4278 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4279 
4280 		iounmap(adev->rmmio);
4281 		adev->rmmio = NULL;
4282 		drm_dev_exit(idx);
4283 	}
4284 
4285 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4286 		amdgpu_pmu_fini(adev);
4287 	if (adev->discovery.bin)
4288 		amdgpu_discovery_fini(adev);
4289 
4290 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4291 	adev->reset_domain = NULL;
4292 
4293 	kfree(adev->pci_state);
4294 	kfree(adev->pcie_reset_ctx.swds_pcistate);
4295 	kfree(adev->pcie_reset_ctx.swus_pcistate);
4296 }
4297 
4298 /**
4299  * amdgpu_device_evict_resources - evict device resources
4300  * @adev: amdgpu device object
4301  *
4302  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4303  * of the vram memory type. Mainly used for evicting device resources
4304  * at suspend time.
4305  *
4306  */
4307 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4308 {
4309 	int ret;
4310 
4311 	/* No need to evict vram on APUs unless going to S4 */
4312 	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4313 		return 0;
4314 
4315 	/* No need to evict when going to S5 through S4 callbacks */
4316 	if (system_state == SYSTEM_POWER_OFF)
4317 		return 0;
4318 
4319 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4320 	if (ret) {
4321 		dev_warn(adev->dev, "evicting device resources failed\n");
4322 		return ret;
4323 	}
4324 
4325 	if (adev->in_s4) {
4326 		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
4327 		if (ret)
4328 			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
4329 	}
4330 	return ret;
4331 }
4332 
4333 /*
4334  * Suspend & resume.
4335  */
4336 /**
4337  * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4338  * @nb: notifier block
4339  * @mode: suspend mode
4340  * @data: data
4341  *
4342  * This function is called when the system is about to suspend or hibernate.
4343  * It is used to set the appropriate flags so that eviction can be optimized
4344  * in the pm prepare callback.
4345  */
4346 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
4347 				     void *data)
4348 {
4349 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
4350 
4351 	switch (mode) {
4352 	case PM_HIBERNATION_PREPARE:
4353 		adev->in_s4 = true;
4354 		break;
4355 	case PM_POST_HIBERNATION:
4356 		adev->in_s4 = false;
4357 		break;
4358 	}
4359 
4360 	return NOTIFY_DONE;
4361 }
4362 
4363 /**
4364  * amdgpu_device_prepare - prepare for device suspend
4365  *
4366  * @dev: drm dev pointer
4367  *
4368  * Prepare to put the hw in the suspend state (all asics).
4369  * Returns 0 for success or an error on failure.
4370  * Called at driver suspend.
4371  */
4372 int amdgpu_device_prepare(struct drm_device *dev)
4373 {
4374 	struct amdgpu_device *adev = drm_to_adev(dev);
4375 	int i, r;
4376 
4377 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4378 		return 0;
4379 
4380 	/* Evict the majority of BOs before starting suspend sequence */
4381 	r = amdgpu_device_evict_resources(adev);
4382 	if (r)
4383 		return r;
4384 
4385 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4386 
4387 	for (i = 0; i < adev->num_ip_blocks; i++) {
4388 		if (!adev->ip_blocks[i].status.valid)
4389 			continue;
4390 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4391 			continue;
4392 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4393 		if (r)
4394 			return r;
4395 	}
4396 
4397 	return 0;
4398 }
4399 
4400 /**
4401  * amdgpu_device_complete - complete power state transition
4402  *
4403  * @dev: drm dev pointer
4404  *
4405  * Undo the changes from amdgpu_device_prepare. This will be
4406  * called on all resume transitions, including those that failed.
4407  */
4408 void amdgpu_device_complete(struct drm_device *dev)
4409 {
4410 	struct amdgpu_device *adev = drm_to_adev(dev);
4411 	int i;
4412 
4413 	for (i = 0; i < adev->num_ip_blocks; i++) {
4414 		if (!adev->ip_blocks[i].status.valid)
4415 			continue;
4416 		if (!adev->ip_blocks[i].version->funcs->complete)
4417 			continue;
4418 		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
4419 	}
4420 }
4421 
4422 /**
4423  * amdgpu_device_suspend - initiate device suspend
4424  *
4425  * @dev: drm dev pointer
4426  * @notify_clients: notify in-kernel DRM clients
4427  *
4428  * Puts the hw in the suspend state (all asics).
4429  * Returns 0 for success or an error on failure.
4430  * Called at driver suspend.
4431  */
4432 int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4433 {
4434 	struct amdgpu_device *adev = drm_to_adev(dev);
4435 	int r, rec;
4436 
4437 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4438 		return 0;
4439 
4440 	adev->in_suspend = true;
4441 
4442 	if (amdgpu_sriov_vf(adev)) {
4443 		if (!adev->in_runpm)
4444 			amdgpu_amdkfd_suspend_process(adev);
4445 		amdgpu_virt_fini_data_exchange(adev);
4446 		r = amdgpu_virt_request_full_gpu(adev, false);
4447 		if (r)
4448 			return r;
4449 	}
4450 
4451 	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
4452 	if (r)
4453 		goto unwind_sriov;
4454 
4455 	if (notify_clients)
4456 		drm_client_dev_suspend(adev_to_drm(adev));
4457 
4458 	cancel_delayed_work_sync(&adev->delayed_init_work);
4459 
4460 	amdgpu_ras_suspend(adev);
4461 
4462 	r = amdgpu_device_ip_suspend_phase1(adev);
4463 	if (r)
4464 		goto unwind_smartshift;
4465 
4466 	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4467 	r = amdgpu_userq_suspend(adev);
4468 	if (r)
4469 		goto unwind_ip_phase1;
4470 
4471 	r = amdgpu_device_evict_resources(adev);
4472 	if (r)
4473 		goto unwind_userq;
4474 
4475 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4476 
4477 	amdgpu_fence_driver_hw_fini(adev);
4478 
4479 	r = amdgpu_device_ip_suspend_phase2(adev);
4480 	if (r)
4481 		goto unwind_evict;
4482 
4483 	if (amdgpu_sriov_vf(adev))
4484 		amdgpu_virt_release_full_gpu(adev, false);
4485 
4486 	return 0;
4487 
4488 unwind_evict:
4489 	amdgpu_ttm_set_buffer_funcs_status(adev, true);
4490 	amdgpu_fence_driver_hw_init(adev);
4491 
4492 unwind_userq:
4493 	rec = amdgpu_userq_resume(adev);
4494 	if (rec) {
4495 		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
4496 		return r;
4497 	}
4498 	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4499 	if (rec) {
4500 		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
4501 		return r;
4502 	}
4503 
4504 unwind_ip_phase1:
4505 	/* suspend phase 1 = resume phase 3 */
4506 	rec = amdgpu_device_ip_resume_phase3(adev);
4507 	if (rec) {
4508 		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
4509 		return r;
4510 	}
4511 
4512 unwind_smartshift:
4513 	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
4514 	if (rec) {
4515 		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
4516 		return r;
4517 	}
4518 
4519 	if (notify_clients)
4520 		drm_client_dev_resume(adev_to_drm(adev));
4521 
4522 	amdgpu_ras_resume(adev);
4523 
4524 unwind_sriov:
4525 	if (amdgpu_sriov_vf(adev)) {
4526 		rec = amdgpu_virt_request_full_gpu(adev, true);
4527 		if (rec) {
4528 			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
4529 			return r;
4530 		}
4531 	}
4532 
4533 	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
4534 
4535 	return r;
4536 }
4537 
4538 static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
4539 {
4540 	int r;
4541 	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
4542 
4543 	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
4544 	 * may not work. The access could be blocked by nBIF protection as VF isn't in
4545 	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
4546 	 * so that QEMU reprograms MSIX table.
4547 	 */
4548 	amdgpu_restore_msix(adev);
4549 
4550 	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4551 	if (r)
4552 		return r;
4553 
4554 	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
4555 		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
4556 
4557 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
4558 	adev->vm_manager.vram_base_offset +=
4559 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
4560 
4561 	return 0;
4562 }
4563 
4564 /**
4565  * amdgpu_device_resume - initiate device resume
4566  *
4567  * @dev: drm dev pointer
4568  * @notify_clients: notify in-kernel DRM clients
4569  *
4570  * Bring the hw back to operating state (all asics).
4571  * Returns 0 for success or an error on failure.
4572  * Called at driver resume.
4573  */
4574 int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
4575 {
4576 	struct amdgpu_device *adev = drm_to_adev(dev);
4577 	int r = 0;
4578 
4579 	if (amdgpu_sriov_vf(adev)) {
4580 		r = amdgpu_virt_request_full_gpu(adev, true);
4581 		if (r)
4582 			return r;
4583 	}
4584 
4585 	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
4586 		r = amdgpu_virt_resume(adev);
4587 		if (r)
4588 			goto exit;
4589 	}
4590 
4591 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4592 		return 0;
4593 
4594 	if (adev->in_s0ix)
4595 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4596 
4597 	/* post card */
4598 	if (amdgpu_device_need_post(adev)) {
4599 		r = amdgpu_device_asic_init(adev);
4600 		if (r)
4601 			dev_err(adev->dev, "amdgpu asic init failed\n");
4602 	}
4603 
4604 	r = amdgpu_device_ip_resume(adev);
4605 
4606 	if (r) {
4607 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4608 		goto exit;
4609 	}
4610 
4611 	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4612 	if (r)
4613 		goto exit;
4614 
4615 	r = amdgpu_userq_resume(adev);
4616 	if (r)
4617 		goto exit;
4618 
4619 	r = amdgpu_device_ip_late_init(adev);
4620 	if (r)
4621 		goto exit;
4622 
4623 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4624 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4625 exit:
4626 	if (amdgpu_sriov_vf(adev)) {
4627 		amdgpu_virt_init_data_exchange(adev);
4628 		amdgpu_virt_release_full_gpu(adev, true);
4629 
4630 		if (!r && !adev->in_runpm)
4631 			r = amdgpu_amdkfd_resume_process(adev);
4632 	}
4633 
4634 	if (r)
4635 		return r;
4636 
4637 	/* Make sure IB tests flushed */
4638 	flush_delayed_work(&adev->delayed_init_work);
4639 
4640 	if (notify_clients)
4641 		drm_client_dev_resume(adev_to_drm(adev));
4642 
4643 	amdgpu_ras_resume(adev);
4644 
4645 	if (adev->mode_info.num_crtc) {
4646 		/*
4647 		 * Most of the connector probing functions try to acquire runtime pm
4648 		 * refs to ensure that the GPU is powered on when connector polling is
4649 		 * performed. Since we're calling this from a runtime PM callback,
4650 		 * trying to acquire rpm refs will cause us to deadlock.
4651 		 *
4652 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4653 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4654 		 */
4655 #ifdef CONFIG_PM
4656 		dev->dev->power.disable_depth++;
4657 #endif
4658 		if (!adev->dc_enabled)
4659 			drm_helper_hpd_irq_event(dev);
4660 		else
4661 			drm_kms_helper_hotplug_event(dev);
4662 #ifdef CONFIG_PM
4663 		dev->dev->power.disable_depth--;
4664 #endif
4665 	}
4666 
4667 	amdgpu_vram_mgr_clear_reset_blocks(adev);
4668 	adev->in_suspend = false;
4669 
4670 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
4671 		dev_warn(adev->dev, "smart shift update failed\n");
4672 
4673 	return 0;
4674 }
4675 
4676 /**
4677  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4678  *
4679  * @adev: amdgpu_device pointer
4680  *
4681  * The list of all the hardware IPs that make up the asic is walked and
4682  * the check_soft_reset callbacks are run.  check_soft_reset determines
4683  * if the asic is still hung or not.
4684  * Returns true if any of the IPs are still in a hung state, false if not.
4685  */
4686 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4687 {
4688 	int i;
4689 	bool asic_hang = false;
4690 
4691 	if (amdgpu_sriov_vf(adev))
4692 		return true;
4693 
4694 	if (amdgpu_asic_need_full_reset(adev))
4695 		return true;
4696 
4697 	for (i = 0; i < adev->num_ip_blocks; i++) {
4698 		if (!adev->ip_blocks[i].status.valid)
4699 			continue;
4700 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4701 			adev->ip_blocks[i].status.hang =
4702 				adev->ip_blocks[i].version->funcs->check_soft_reset(
4703 					&adev->ip_blocks[i]);
4704 		if (adev->ip_blocks[i].status.hang) {
4705 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4706 			asic_hang = true;
4707 		}
4708 	}
4709 	return asic_hang;
4710 }
4711 
4712 /**
4713  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4714  *
4715  * @adev: amdgpu_device pointer
4716  *
4717  * The list of all the hardware IPs that make up the asic is walked and the
4718  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4719  * handles any IP specific hardware or software state changes that are
4720  * necessary for a soft reset to succeed.
4721  * Returns 0 on success, negative error code on failure.
4722  */
4723 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4724 {
4725 	int i, r = 0;
4726 
4727 	for (i = 0; i < adev->num_ip_blocks; i++) {
4728 		if (!adev->ip_blocks[i].status.valid)
4729 			continue;
4730 		if (adev->ip_blocks[i].status.hang &&
4731 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4732 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
4733 			if (r)
4734 				return r;
4735 		}
4736 	}
4737 
4738 	return 0;
4739 }
4740 
4741 /**
4742  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4743  *
4744  * @adev: amdgpu_device pointer
4745  *
4746  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4747  * reset is necessary to recover.
4748  * Returns true if a full asic reset is required, false if not.
4749  */
4750 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4751 {
4752 	int i;
4753 
4754 	if (amdgpu_asic_need_full_reset(adev))
4755 		return true;
4756 
4757 	for (i = 0; i < adev->num_ip_blocks; i++) {
4758 		if (!adev->ip_blocks[i].status.valid)
4759 			continue;
4760 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4761 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4762 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4763 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4764 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4765 			if (adev->ip_blocks[i].status.hang) {
4766 				dev_info(adev->dev, "Some block need full reset!\n");
4767 				return true;
4768 			}
4769 		}
4770 	}
4771 	return false;
4772 }
4773 
4774 /**
4775  * amdgpu_device_ip_soft_reset - do a soft reset
4776  *
4777  * @adev: amdgpu_device pointer
4778  *
4779  * The list of all the hardware IPs that make up the asic is walked and the
4780  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4781  * IP specific hardware or software state changes that are necessary to soft
4782  * reset the IP.
4783  * Returns 0 on success, negative error code on failure.
4784  */
4785 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4786 {
4787 	int i, r = 0;
4788 
4789 	for (i = 0; i < adev->num_ip_blocks; i++) {
4790 		if (!adev->ip_blocks[i].status.valid)
4791 			continue;
4792 		if (adev->ip_blocks[i].status.hang &&
4793 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4794 			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
4795 			if (r)
4796 				return r;
4797 		}
4798 	}
4799 
4800 	return 0;
4801 }
4802 
4803 /**
4804  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4805  *
4806  * @adev: amdgpu_device pointer
4807  *
4808  * The list of all the hardware IPs that make up the asic is walked and the
4809  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4810  * handles any IP specific hardware or software state changes that are
4811  * necessary after the IP has been soft reset.
4812  * Returns 0 on success, negative error code on failure.
4813  */
4814 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4815 {
4816 	int i, r = 0;
4817 
4818 	for (i = 0; i < adev->num_ip_blocks; i++) {
4819 		if (!adev->ip_blocks[i].status.valid)
4820 			continue;
4821 		if (adev->ip_blocks[i].status.hang &&
4822 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4823 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
4824 		if (r)
4825 			return r;
4826 	}
4827 
4828 	return 0;
4829 }
4830 
4831 /**
4832  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4833  *
4834  * @adev: amdgpu_device pointer
4835  * @reset_context: amdgpu reset context pointer
4836  *
4837  * do VF FLR and reinitialize Asic
4838  * return 0 means succeeded otherwise failed
4839  */
4840 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4841 				     struct amdgpu_reset_context *reset_context)
4842 {
4843 	int r;
4844 	struct amdgpu_hive_info *hive = NULL;
4845 
4846 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
4847 		if (!amdgpu_ras_get_fed_status(adev))
4848 			amdgpu_virt_ready_to_reset(adev);
4849 		amdgpu_virt_wait_reset(adev);
4850 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
4851 		r = amdgpu_virt_request_full_gpu(adev, true);
4852 	} else {
4853 		r = amdgpu_virt_reset_gpu(adev);
4854 	}
4855 	if (r)
4856 		return r;
4857 
4858 	amdgpu_ras_clear_err_state(adev);
4859 	amdgpu_irq_gpu_reset_resume_helper(adev);
4860 
4861 	/* some sw clean up VF needs to do before recover */
4862 	amdgpu_virt_post_reset(adev);
4863 
4864 	/* Resume IP prior to SMC */
4865 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4866 	if (r)
4867 		return r;
4868 
4869 	amdgpu_virt_init_data_exchange(adev);
4870 
4871 	r = amdgpu_device_fw_loading(adev);
4872 	if (r)
4873 		return r;
4874 
4875 	/* now we are okay to resume SMC/CP/SDMA */
4876 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4877 	if (r)
4878 		return r;
4879 
4880 	hive = amdgpu_get_xgmi_hive(adev);
4881 	/* Update PSP FW topology after reset */
4882 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4883 		r = amdgpu_xgmi_update_topology(hive, adev);
4884 	if (hive)
4885 		amdgpu_put_xgmi_hive(hive);
4886 	if (r)
4887 		return r;
4888 
4889 	r = amdgpu_ib_ring_tests(adev);
4890 	if (r)
4891 		return r;
4892 
4893 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
4894 		amdgpu_inc_vram_lost(adev);
4895 
4896 	/* need to be called during full access so we can't do it later like
4897 	 * bare-metal does.
4898 	 */
4899 	amdgpu_amdkfd_post_reset(adev);
4900 	amdgpu_virt_release_full_gpu(adev, true);
4901 
4902 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4903 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
4904 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
4905 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
4906 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4907 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
4908 		amdgpu_ras_resume(adev);
4909 
4910 	amdgpu_virt_ras_telemetry_post_reset(adev);
4911 
4912 	return 0;
4913 }
4914 
4915 /**
4916  * amdgpu_device_has_job_running - check if there is any unfinished job
4917  *
4918  * @adev: amdgpu_device pointer
4919  *
4920  * check if there is any job running on the device when guest driver receives
4921  * FLR notification from host driver. If there are still jobs running, then
4922  * the guest driver will not respond the FLR reset. Instead, let the job hit
4923  * the timeout and guest driver then issue the reset request.
4924  */
4925 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4926 {
4927 	int i;
4928 
4929 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4930 		struct amdgpu_ring *ring = adev->rings[i];
4931 
4932 		if (!amdgpu_ring_sched_ready(ring))
4933 			continue;
4934 
4935 		if (amdgpu_fence_count_emitted(ring))
4936 			return true;
4937 	}
4938 	return false;
4939 }
4940 
4941 /**
4942  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4943  *
4944  * @adev: amdgpu_device pointer
4945  *
4946  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4947  * a hung GPU.
4948  */
4949 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4950 {
4951 
4952 	if (amdgpu_gpu_recovery == 0)
4953 		goto disabled;
4954 
4955 	/* Skip soft reset check in fatal error mode */
4956 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4957 		return true;
4958 
4959 	if (amdgpu_sriov_vf(adev))
4960 		return true;
4961 
4962 	if (amdgpu_gpu_recovery == -1) {
4963 		switch (adev->asic_type) {
4964 #ifdef CONFIG_DRM_AMDGPU_SI
4965 		case CHIP_VERDE:
4966 		case CHIP_TAHITI:
4967 		case CHIP_PITCAIRN:
4968 		case CHIP_OLAND:
4969 		case CHIP_HAINAN:
4970 #endif
4971 #ifdef CONFIG_DRM_AMDGPU_CIK
4972 		case CHIP_KAVERI:
4973 		case CHIP_KABINI:
4974 		case CHIP_MULLINS:
4975 #endif
4976 		case CHIP_CARRIZO:
4977 		case CHIP_STONEY:
4978 		case CHIP_CYAN_SKILLFISH:
4979 			goto disabled;
4980 		default:
4981 			break;
4982 		}
4983 	}
4984 
4985 	return true;
4986 
4987 disabled:
4988 		dev_info(adev->dev, "GPU recovery disabled.\n");
4989 		return false;
4990 }
4991 
4992 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4993 {
4994 	u32 i;
4995 	int ret = 0;
4996 
4997 	if (adev->bios)
4998 		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4999 
5000 	dev_info(adev->dev, "GPU mode1 reset\n");
5001 
5002 	/* Cache the state before bus master disable. The saved config space
5003 	 * values are used in other cases like restore after mode-2 reset.
5004 	 */
5005 	amdgpu_device_cache_pci_state(adev->pdev);
5006 
5007 	/* disable BM */
5008 	pci_clear_master(adev->pdev);
5009 
5010 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5011 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5012 		ret = amdgpu_dpm_mode1_reset(adev);
5013 	} else {
5014 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5015 		ret = psp_gpu_reset(adev);
5016 	}
5017 
5018 	if (ret)
5019 		goto mode1_reset_failed;
5020 
5021 	/* enable mmio access after mode 1 reset completed */
5022 	adev->no_hw_access = false;
5023 
5024 	/* ensure no_hw_access is updated before we access hw */
5025 	smp_mb();
5026 
5027 	amdgpu_device_load_pci_state(adev->pdev);
5028 	ret = amdgpu_psp_wait_for_bootloader(adev);
5029 	if (ret)
5030 		goto mode1_reset_failed;
5031 
5032 	/* wait for asic to come out of reset */
5033 	for (i = 0; i < adev->usec_timeout; i++) {
5034 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5035 
5036 		if (memsize != 0xffffffff)
5037 			break;
5038 		udelay(1);
5039 	}
5040 
5041 	if (i >= adev->usec_timeout) {
5042 		ret = -ETIMEDOUT;
5043 		goto mode1_reset_failed;
5044 	}
5045 
5046 	if (adev->bios)
5047 		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5048 
5049 	return 0;
5050 
5051 mode1_reset_failed:
5052 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5053 	return ret;
5054 }
5055 
5056 int amdgpu_device_link_reset(struct amdgpu_device *adev)
5057 {
5058 	int ret = 0;
5059 
5060 	dev_info(adev->dev, "GPU link reset\n");
5061 
5062 	if (!amdgpu_reset_in_dpc(adev))
5063 		ret = amdgpu_dpm_link_reset(adev);
5064 
5065 	if (ret)
5066 		goto link_reset_failed;
5067 
5068 	ret = amdgpu_psp_wait_for_bootloader(adev);
5069 	if (ret)
5070 		goto link_reset_failed;
5071 
5072 	return 0;
5073 
5074 link_reset_failed:
5075 	dev_err(adev->dev, "GPU link reset failed\n");
5076 	return ret;
5077 }
5078 
5079 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5080 				 struct amdgpu_reset_context *reset_context)
5081 {
5082 	int i, r = 0;
5083 	struct amdgpu_job *job = NULL;
5084 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5085 	bool need_full_reset =
5086 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5087 
5088 	if (reset_context->reset_req_dev == adev)
5089 		job = reset_context->job;
5090 
5091 	if (amdgpu_sriov_vf(adev))
5092 		amdgpu_virt_pre_reset(adev);
5093 
5094 	amdgpu_fence_driver_isr_toggle(adev, true);
5095 
5096 	/* block all schedulers and reset given job's ring */
5097 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5098 		struct amdgpu_ring *ring = adev->rings[i];
5099 
5100 		if (!amdgpu_ring_sched_ready(ring))
5101 			continue;
5102 
5103 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5104 		amdgpu_fence_driver_force_completion(ring);
5105 	}
5106 
5107 	amdgpu_fence_driver_isr_toggle(adev, false);
5108 
5109 	if (job && job->vm)
5110 		drm_sched_increase_karma(&job->base);
5111 
5112 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5113 	/* If reset handler not implemented, continue; otherwise return */
5114 	if (r == -EOPNOTSUPP)
5115 		r = 0;
5116 	else
5117 		return r;
5118 
5119 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5120 	if (!amdgpu_sriov_vf(adev)) {
5121 
5122 		if (!need_full_reset)
5123 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5124 
5125 		if (!need_full_reset && amdgpu_gpu_recovery &&
5126 		    amdgpu_device_ip_check_soft_reset(adev)) {
5127 			amdgpu_device_ip_pre_soft_reset(adev);
5128 			r = amdgpu_device_ip_soft_reset(adev);
5129 			amdgpu_device_ip_post_soft_reset(adev);
5130 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5131 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5132 				need_full_reset = true;
5133 			}
5134 		}
5135 
5136 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5137 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5138 			/* Trigger ip dump before we reset the asic */
5139 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5140 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5141 					tmp_adev->ip_blocks[i].version->funcs
5142 						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5143 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5144 		}
5145 
5146 		if (need_full_reset)
5147 			r = amdgpu_device_ip_suspend(adev);
5148 		if (need_full_reset)
5149 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5150 		else
5151 			clear_bit(AMDGPU_NEED_FULL_RESET,
5152 				  &reset_context->flags);
5153 	}
5154 
5155 	return r;
5156 }
5157 
5158 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5159 {
5160 	struct list_head *device_list_handle;
5161 	bool full_reset, vram_lost = false;
5162 	struct amdgpu_device *tmp_adev;
5163 	int r, init_level;
5164 
5165 	device_list_handle = reset_context->reset_device_list;
5166 
5167 	if (!device_list_handle)
5168 		return -EINVAL;
5169 
5170 	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5171 
5172 	/**
5173 	 * If it's reset on init, it's default init level, otherwise keep level
5174 	 * as recovery level.
5175 	 */
5176 	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5177 			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5178 	else
5179 			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5180 
5181 	r = 0;
5182 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5183 		amdgpu_set_init_level(tmp_adev, init_level);
5184 		if (full_reset) {
5185 			/* post card */
5186 			amdgpu_reset_set_dpc_status(tmp_adev, false);
5187 			amdgpu_ras_clear_err_state(tmp_adev);
5188 			r = amdgpu_device_asic_init(tmp_adev);
5189 			if (r) {
5190 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5191 			} else {
5192 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5193 
5194 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5195 				if (r)
5196 					goto out;
5197 
5198 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5199 
5200 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5201 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5202 
5203 				if (vram_lost) {
5204 					dev_info(
5205 						tmp_adev->dev,
5206 						"VRAM is lost due to GPU reset!\n");
5207 					amdgpu_inc_vram_lost(tmp_adev);
5208 				}
5209 
5210 				r = amdgpu_device_fw_loading(tmp_adev);
5211 				if (r)
5212 					return r;
5213 
5214 				r = amdgpu_xcp_restore_partition_mode(
5215 					tmp_adev->xcp_mgr);
5216 				if (r)
5217 					goto out;
5218 
5219 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5220 				if (r)
5221 					goto out;
5222 
5223 				amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5224 
5225 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5226 				if (r)
5227 					goto out;
5228 
5229 				if (vram_lost)
5230 					amdgpu_device_fill_reset_magic(tmp_adev);
5231 
5232 				/*
5233 				 * Add this ASIC as tracked as reset was already
5234 				 * complete successfully.
5235 				 */
5236 				amdgpu_register_gpu_instance(tmp_adev);
5237 
5238 				if (!reset_context->hive &&
5239 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5240 					amdgpu_xgmi_add_device(tmp_adev);
5241 
5242 				r = amdgpu_device_ip_late_init(tmp_adev);
5243 				if (r)
5244 					goto out;
5245 
5246 				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
5247 				if (r)
5248 					goto out;
5249 
5250 				drm_client_dev_resume(adev_to_drm(tmp_adev));
5251 
5252 				/*
5253 				 * The GPU enters bad state once faulty pages
5254 				 * by ECC has reached the threshold, and ras
5255 				 * recovery is scheduled next. So add one check
5256 				 * here to break recovery if it indeed exceeds
5257 				 * bad page threshold, and remind user to
5258 				 * retire this GPU or setting one bigger
5259 				 * bad_page_threshold value to fix this once
5260 				 * probing driver again.
5261 				 */
5262 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5263 					/* must succeed. */
5264 					amdgpu_ras_resume(tmp_adev);
5265 				} else {
5266 					r = -EINVAL;
5267 					goto out;
5268 				}
5269 
5270 				/* Update PSP FW topology after reset */
5271 				if (reset_context->hive &&
5272 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5273 					r = amdgpu_xgmi_update_topology(
5274 						reset_context->hive, tmp_adev);
5275 			}
5276 		}
5277 
5278 out:
5279 		if (!r) {
5280 			/* IP init is complete now, set level as default */
5281 			amdgpu_set_init_level(tmp_adev,
5282 					      AMDGPU_INIT_LEVEL_DEFAULT);
5283 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5284 			r = amdgpu_ib_ring_tests(tmp_adev);
5285 			if (r) {
5286 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5287 				r = -EAGAIN;
5288 				goto end;
5289 			}
5290 		}
5291 
5292 		if (r)
5293 			tmp_adev->asic_reset_res = r;
5294 	}
5295 
5296 end:
5297 	return r;
5298 }
5299 
5300 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5301 			 struct amdgpu_reset_context *reset_context)
5302 {
5303 	struct amdgpu_device *tmp_adev = NULL;
5304 	bool need_full_reset, skip_hw_reset;
5305 	int r = 0;
5306 
5307 	/* Try reset handler method first */
5308 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5309 				    reset_list);
5310 
5311 	reset_context->reset_device_list = device_list_handle;
5312 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5313 	/* If reset handler not implemented, continue; otherwise return */
5314 	if (r == -EOPNOTSUPP)
5315 		r = 0;
5316 	else
5317 		return r;
5318 
5319 	/* Reset handler not implemented, use the default method */
5320 	need_full_reset =
5321 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5322 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5323 
5324 	/*
5325 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5326 	 * to allow proper links negotiation in FW (within 1 sec)
5327 	 */
5328 	if (!skip_hw_reset && need_full_reset) {
5329 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5330 			/* For XGMI run all resets in parallel to speed up the process */
5331 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5332 				if (!queue_work(system_unbound_wq,
5333 						&tmp_adev->xgmi_reset_work))
5334 					r = -EALREADY;
5335 			} else
5336 				r = amdgpu_asic_reset(tmp_adev);
5337 
5338 			if (r) {
5339 				dev_err(tmp_adev->dev,
5340 					"ASIC reset failed with error, %d for drm dev, %s",
5341 					r, adev_to_drm(tmp_adev)->unique);
5342 				goto out;
5343 			}
5344 		}
5345 
5346 		/* For XGMI wait for all resets to complete before proceed */
5347 		if (!r) {
5348 			list_for_each_entry(tmp_adev, device_list_handle,
5349 					    reset_list) {
5350 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5351 					flush_work(&tmp_adev->xgmi_reset_work);
5352 					r = tmp_adev->asic_reset_res;
5353 					if (r)
5354 						break;
5355 				}
5356 			}
5357 		}
5358 	}
5359 
5360 	if (!r && amdgpu_ras_intr_triggered()) {
5361 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5362 			amdgpu_ras_reset_error_count(tmp_adev,
5363 						     AMDGPU_RAS_BLOCK__MMHUB);
5364 		}
5365 
5366 		amdgpu_ras_intr_cleared();
5367 	}
5368 
5369 	r = amdgpu_device_reinit_after_reset(reset_context);
5370 	if (r == -EAGAIN)
5371 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5372 	else
5373 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5374 
5375 out:
5376 	return r;
5377 }
5378 
5379 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5380 {
5381 
5382 	switch (amdgpu_asic_reset_method(adev)) {
5383 	case AMD_RESET_METHOD_MODE1:
5384 	case AMD_RESET_METHOD_LINK:
5385 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5386 		break;
5387 	case AMD_RESET_METHOD_MODE2:
5388 		adev->mp1_state = PP_MP1_STATE_RESET;
5389 		break;
5390 	default:
5391 		adev->mp1_state = PP_MP1_STATE_NONE;
5392 		break;
5393 	}
5394 }
5395 
5396 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5397 {
5398 	amdgpu_vf_error_trans_all(adev);
5399 	adev->mp1_state = PP_MP1_STATE_NONE;
5400 }
5401 
5402 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5403 {
5404 	struct pci_dev *p = NULL;
5405 
5406 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5407 			adev->pdev->bus->number, 1);
5408 	if (p) {
5409 		pm_runtime_enable(&(p->dev));
5410 		pm_runtime_resume(&(p->dev));
5411 	}
5412 
5413 	pci_dev_put(p);
5414 }
5415 
5416 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5417 {
5418 	enum amd_reset_method reset_method;
5419 	struct pci_dev *p = NULL;
5420 	u64 expires;
5421 
5422 	/*
5423 	 * For now, only BACO and mode1 reset are confirmed
5424 	 * to suffer the audio issue without proper suspended.
5425 	 */
5426 	reset_method = amdgpu_asic_reset_method(adev);
5427 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5428 	     (reset_method != AMD_RESET_METHOD_MODE1))
5429 		return -EINVAL;
5430 
5431 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5432 			adev->pdev->bus->number, 1);
5433 	if (!p)
5434 		return -ENODEV;
5435 
5436 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5437 	if (!expires)
5438 		/*
5439 		 * If we cannot get the audio device autosuspend delay,
5440 		 * a fixed 4S interval will be used. Considering 3S is
5441 		 * the audio controller default autosuspend delay setting.
5442 		 * 4S used here is guaranteed to cover that.
5443 		 */
5444 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5445 
5446 	while (!pm_runtime_status_suspended(&(p->dev))) {
5447 		if (!pm_runtime_suspend(&(p->dev)))
5448 			break;
5449 
5450 		if (expires < ktime_get_mono_fast_ns()) {
5451 			dev_warn(adev->dev, "failed to suspend display audio\n");
5452 			pci_dev_put(p);
5453 			/* TODO: abort the succeeding gpu reset? */
5454 			return -ETIMEDOUT;
5455 		}
5456 	}
5457 
5458 	pm_runtime_disable(&(p->dev));
5459 
5460 	pci_dev_put(p);
5461 	return 0;
5462 }
5463 
5464 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5465 {
5466 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5467 
5468 #if defined(CONFIG_DEBUG_FS)
5469 	if (!amdgpu_sriov_vf(adev))
5470 		cancel_work(&adev->reset_work);
5471 #endif
5472 	cancel_work(&adev->userq_reset_work);
5473 
5474 	if (adev->kfd.dev)
5475 		cancel_work(&adev->kfd.reset_work);
5476 
5477 	if (amdgpu_sriov_vf(adev))
5478 		cancel_work(&adev->virt.flr_work);
5479 
5480 	if (con && adev->ras_enabled)
5481 		cancel_work(&con->recovery_work);
5482 
5483 }
5484 
5485 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5486 {
5487 	struct amdgpu_device *tmp_adev;
5488 	int ret = 0;
5489 
5490 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5491 		ret |= amdgpu_device_bus_status_check(tmp_adev);
5492 	}
5493 
5494 	return ret;
5495 }
5496 
5497 static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
5498 					  struct list_head *device_list,
5499 					  struct amdgpu_hive_info *hive)
5500 {
5501 	struct amdgpu_device *tmp_adev = NULL;
5502 
5503 	/*
5504 	 * Build list of devices to reset.
5505 	 * In case we are in XGMI hive mode, resort the device list
5506 	 * to put adev in the 1st position.
5507 	 */
5508 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5509 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5510 			list_add_tail(&tmp_adev->reset_list, device_list);
5511 			if (adev->shutdown)
5512 				tmp_adev->shutdown = true;
5513 			if (amdgpu_reset_in_dpc(adev))
5514 				tmp_adev->pcie_reset_ctx.in_link_reset = true;
5515 		}
5516 		if (!list_is_first(&adev->reset_list, device_list))
5517 			list_rotate_to_front(&adev->reset_list, device_list);
5518 	} else {
5519 		list_add_tail(&adev->reset_list, device_list);
5520 	}
5521 }
5522 
5523 static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
5524 						  struct list_head *device_list)
5525 {
5526 	struct amdgpu_device *tmp_adev = NULL;
5527 
5528 	if (list_empty(device_list))
5529 		return;
5530 	tmp_adev =
5531 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5532 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5533 }
5534 
5535 static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
5536 						  struct list_head *device_list)
5537 {
5538 	struct amdgpu_device *tmp_adev = NULL;
5539 
5540 	if (list_empty(device_list))
5541 		return;
5542 	tmp_adev =
5543 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5544 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5545 }
5546 
5547 static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
5548 					  struct amdgpu_job *job,
5549 					  struct amdgpu_reset_context *reset_context,
5550 					  struct list_head *device_list,
5551 					  struct amdgpu_hive_info *hive,
5552 					  bool need_emergency_restart)
5553 {
5554 	struct amdgpu_device *tmp_adev = NULL;
5555 	int i;
5556 
5557 	/* block all schedulers and reset given job's ring */
5558 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5559 		amdgpu_device_set_mp1_state(tmp_adev);
5560 
5561 		/*
5562 		 * Try to put the audio codec into suspend state
5563 		 * before gpu reset started.
5564 		 *
5565 		 * Due to the power domain of the graphics device
5566 		 * is shared with AZ power domain. Without this,
5567 		 * we may change the audio hardware from behind
5568 		 * the audio driver's back. That will trigger
5569 		 * some audio codec errors.
5570 		 */
5571 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5572 			tmp_adev->pcie_reset_ctx.audio_suspended = true;
5573 
5574 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5575 
5576 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5577 
5578 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5579 
5580 		/*
5581 		 * Mark these ASICs to be reset as untracked first
5582 		 * And add them back after reset completed
5583 		 */
5584 		amdgpu_unregister_gpu_instance(tmp_adev);
5585 
5586 		drm_client_dev_suspend(adev_to_drm(tmp_adev));
5587 
5588 		/* disable ras on ALL IPs */
5589 		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
5590 		    amdgpu_device_ip_need_full_reset(tmp_adev))
5591 			amdgpu_ras_suspend(tmp_adev);
5592 
5593 		amdgpu_userq_pre_reset(tmp_adev);
5594 
5595 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5596 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5597 
5598 			if (!amdgpu_ring_sched_ready(ring))
5599 				continue;
5600 
5601 			drm_sched_wqueue_stop(&ring->sched);
5602 
5603 			if (need_emergency_restart)
5604 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5605 		}
5606 		atomic_inc(&tmp_adev->gpu_reset_counter);
5607 	}
5608 }
5609 
5610 static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
5611 			      struct list_head *device_list,
5612 			      struct amdgpu_reset_context *reset_context)
5613 {
5614 	struct amdgpu_device *tmp_adev = NULL;
5615 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5616 	int r = 0;
5617 
5618 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5619 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5620 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5621 		/*TODO Should we stop ?*/
5622 		if (r) {
5623 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5624 				  r, adev_to_drm(tmp_adev)->unique);
5625 			tmp_adev->asic_reset_res = r;
5626 		}
5627 	}
5628 
5629 	/* Actual ASIC resets if needed.*/
5630 	/* Host driver will handle XGMI hive reset for SRIOV */
5631 	if (amdgpu_sriov_vf(adev)) {
5632 
5633 		/* Bail out of reset early */
5634 		if (amdgpu_ras_is_rma(adev))
5635 			return -ENODEV;
5636 
5637 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5638 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5639 			amdgpu_ras_set_fed(adev, true);
5640 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5641 		}
5642 
5643 		r = amdgpu_device_reset_sriov(adev, reset_context);
5644 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5645 			amdgpu_virt_release_full_gpu(adev, true);
5646 			goto retry;
5647 		}
5648 		if (r)
5649 			adev->asic_reset_res = r;
5650 	} else {
5651 		r = amdgpu_do_asic_reset(device_list, reset_context);
5652 		if (r && r == -EAGAIN)
5653 			goto retry;
5654 	}
5655 
5656 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5657 		/*
5658 		 * Drop any pending non scheduler resets queued before reset is done.
5659 		 * Any reset scheduled after this point would be valid. Scheduler resets
5660 		 * were already dropped during drm_sched_stop and no new ones can come
5661 		 * in before drm_sched_start.
5662 		 */
5663 		amdgpu_device_stop_pending_resets(tmp_adev);
5664 	}
5665 
5666 	return r;
5667 }
5668 
5669 static int amdgpu_device_sched_resume(struct list_head *device_list,
5670 			      struct amdgpu_reset_context *reset_context,
5671 			      bool   job_signaled)
5672 {
5673 	struct amdgpu_device *tmp_adev = NULL;
5674 	int i, r = 0;
5675 
5676 	/* Post ASIC reset for all devs .*/
5677 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5678 
5679 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5680 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5681 
5682 			if (!amdgpu_ring_sched_ready(ring))
5683 				continue;
5684 
5685 			drm_sched_wqueue_start(&ring->sched);
5686 		}
5687 
5688 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5689 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5690 
5691 		if (tmp_adev->asic_reset_res) {
5692 			/* bad news, how to tell it to userspace ?
5693 			 * for ras error, we should report GPU bad status instead of
5694 			 * reset failure
5695 			 */
5696 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5697 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5698 				dev_info(
5699 					tmp_adev->dev,
5700 					"GPU reset(%d) failed with error %d\n",
5701 					atomic_read(
5702 						&tmp_adev->gpu_reset_counter),
5703 					tmp_adev->asic_reset_res);
5704 			amdgpu_vf_error_put(tmp_adev,
5705 					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
5706 					    tmp_adev->asic_reset_res);
5707 			if (!r)
5708 				r = tmp_adev->asic_reset_res;
5709 			tmp_adev->asic_reset_res = 0;
5710 		} else {
5711 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
5712 				 atomic_read(&tmp_adev->gpu_reset_counter));
5713 			if (amdgpu_acpi_smart_shift_update(tmp_adev,
5714 							   AMDGPU_SS_DEV_D0))
5715 				dev_warn(tmp_adev->dev,
5716 					 "smart shift update failed\n");
5717 		}
5718 	}
5719 
5720 	return r;
5721 }
5722 
5723 static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
5724 			      struct list_head *device_list,
5725 			      bool   need_emergency_restart)
5726 {
5727 	struct amdgpu_device *tmp_adev = NULL;
5728 
5729 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5730 		/* unlock kfd: SRIOV would do it separately */
5731 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5732 			amdgpu_amdkfd_post_reset(tmp_adev);
5733 
5734 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5735 		 * need to bring up kfd here if it's not be initialized before
5736 		 */
5737 		if (!adev->kfd.init_complete)
5738 			amdgpu_amdkfd_device_init(adev);
5739 
5740 		if (tmp_adev->pcie_reset_ctx.audio_suspended)
5741 			amdgpu_device_resume_display_audio(tmp_adev);
5742 
5743 		amdgpu_device_unset_mp1_state(tmp_adev);
5744 
5745 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5746 
5747 	}
5748 }
5749 
5750 
5751 /**
5752  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5753  *
5754  * @adev: amdgpu_device pointer
5755  * @job: which job trigger hang
5756  * @reset_context: amdgpu reset context pointer
5757  *
5758  * Attempt to reset the GPU if it has hung (all asics).
5759  * Attempt to do soft-reset or full-reset and reinitialize Asic
5760  * Returns 0 for success or an error on failure.
5761  */
5762 
5763 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5764 			      struct amdgpu_job *job,
5765 			      struct amdgpu_reset_context *reset_context)
5766 {
5767 	struct list_head device_list;
5768 	bool job_signaled = false;
5769 	struct amdgpu_hive_info *hive = NULL;
5770 	int r = 0;
5771 	bool need_emergency_restart = false;
5772 	/* save the pasid here as the job may be freed before the end of the reset */
5773 	int pasid = job ? job->pasid : -EINVAL;
5774 
5775 	/*
5776 	 * If it reaches here because of hang/timeout and a RAS error is
5777 	 * detected at the same time, let RAS recovery take care of it.
5778 	 */
5779 	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
5780 	    !amdgpu_sriov_vf(adev) &&
5781 	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
5782 		dev_dbg(adev->dev,
5783 			"Gpu recovery from source: %d yielding to RAS error recovery handling",
5784 			reset_context->src);
5785 		return 0;
5786 	}
5787 
5788 	/*
5789 	 * Special case: RAS triggered and full reset isn't supported
5790 	 */
5791 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5792 
5793 	/*
5794 	 * Flush RAM to disk so that after reboot
5795 	 * the user can read log and see why the system rebooted.
5796 	 */
5797 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5798 		amdgpu_ras_get_context(adev)->reboot) {
5799 		dev_warn(adev->dev, "Emergency reboot.");
5800 
5801 		ksys_sync_helper();
5802 		emergency_restart();
5803 	}
5804 
5805 	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
5806 		 need_emergency_restart ? "jobs stop" : "reset",
5807 		 reset_context->src);
5808 
5809 	if (!amdgpu_sriov_vf(adev))
5810 		hive = amdgpu_get_xgmi_hive(adev);
5811 	if (hive)
5812 		mutex_lock(&hive->hive_lock);
5813 
5814 	reset_context->job = job;
5815 	reset_context->hive = hive;
5816 	INIT_LIST_HEAD(&device_list);
5817 
5818 	amdgpu_device_recovery_prepare(adev, &device_list, hive);
5819 
5820 	if (!amdgpu_sriov_vf(adev)) {
5821 		r = amdgpu_device_health_check(&device_list);
5822 		if (r)
5823 			goto end_reset;
5824 	}
5825 
5826 	/* Cannot be called after locking reset domain */
5827 	amdgpu_ras_pre_reset(adev, &device_list);
5828 
5829 	/* We need to lock reset domain only once both for XGMI and single device */
5830 	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
5831 
5832 	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
5833 				      hive, need_emergency_restart);
5834 	if (need_emergency_restart)
5835 		goto skip_sched_resume;
5836 	/*
5837 	 * Must check guilty signal here since after this point all old
5838 	 * HW fences are force signaled.
5839 	 *
5840 	 * job->base holds a reference to parent fence
5841 	 */
5842 	if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) {
5843 		job_signaled = true;
5844 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5845 		goto skip_hw_reset;
5846 	}
5847 
5848 	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
5849 	if (r)
5850 		goto reset_unlock;
5851 skip_hw_reset:
5852 	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
5853 	if (r)
5854 		goto reset_unlock;
5855 skip_sched_resume:
5856 	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
5857 reset_unlock:
5858 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
5859 	amdgpu_ras_post_reset(adev, &device_list);
5860 end_reset:
5861 	if (hive) {
5862 		mutex_unlock(&hive->hive_lock);
5863 		amdgpu_put_xgmi_hive(hive);
5864 	}
5865 
5866 	if (r)
5867 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5868 
5869 	atomic_set(&adev->reset_domain->reset_res, r);
5870 
5871 	if (!r) {
5872 		struct amdgpu_task_info *ti = NULL;
5873 
5874 		/*
5875 		 * The job may already be freed at this point via the sched tdr workqueue so
5876 		 * use the cached pasid.
5877 		 */
5878 		if (pasid >= 0)
5879 			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
5880 
5881 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
5882 				     ti ? &ti->task : NULL);
5883 
5884 		amdgpu_vm_put_task_info(ti);
5885 	}
5886 
5887 	return r;
5888 }
5889 
5890 /**
5891  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5892  *
5893  * @adev: amdgpu_device pointer
5894  * @speed: pointer to the speed of the link
5895  * @width: pointer to the width of the link
5896  *
5897  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5898  * first physical partner to an AMD dGPU.
5899  * This will exclude any virtual switches and links.
5900  */
5901 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5902 					    enum pci_bus_speed *speed,
5903 					    enum pcie_link_width *width)
5904 {
5905 	struct pci_dev *parent = adev->pdev;
5906 
5907 	if (!speed || !width)
5908 		return;
5909 
5910 	*speed = PCI_SPEED_UNKNOWN;
5911 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5912 
5913 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5914 		while ((parent = pci_upstream_bridge(parent))) {
5915 			/* skip upstream/downstream switches internal to dGPU*/
5916 			if (parent->vendor == PCI_VENDOR_ID_ATI)
5917 				continue;
5918 			*speed = pcie_get_speed_cap(parent);
5919 			*width = pcie_get_width_cap(parent);
5920 			break;
5921 		}
5922 	} else {
5923 		/* use the current speeds rather than max if switching is not supported */
5924 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5925 	}
5926 }
5927 
5928 /**
5929  * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
5930  *
5931  * @adev: amdgpu_device pointer
5932  * @speed: pointer to the speed of the link
5933  * @width: pointer to the width of the link
5934  *
5935  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5936  * AMD dGPU which may be a virtual upstream bridge.
5937  */
5938 static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
5939 					enum pci_bus_speed *speed,
5940 					enum pcie_link_width *width)
5941 {
5942 	struct pci_dev *parent = adev->pdev;
5943 
5944 	if (!speed || !width)
5945 		return;
5946 
5947 	parent = pci_upstream_bridge(parent);
5948 	if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
5949 		/* use the upstream/downstream switches internal to dGPU */
5950 		*speed = pcie_get_speed_cap(parent);
5951 		*width = pcie_get_width_cap(parent);
5952 		while ((parent = pci_upstream_bridge(parent))) {
5953 			if (parent->vendor == PCI_VENDOR_ID_ATI) {
5954 				/* use the upstream/downstream switches internal to dGPU */
5955 				*speed = pcie_get_speed_cap(parent);
5956 				*width = pcie_get_width_cap(parent);
5957 			}
5958 		}
5959 	} else {
5960 		/* use the device itself */
5961 		*speed = pcie_get_speed_cap(adev->pdev);
5962 		*width = pcie_get_width_cap(adev->pdev);
5963 	}
5964 }
5965 
5966 /**
5967  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5968  *
5969  * @adev: amdgpu_device pointer
5970  *
5971  * Fetches and stores in the driver the PCIE capabilities (gen speed
5972  * and lanes) of the slot the device is in. Handles APUs and
5973  * virtualized environments where PCIE config space may not be available.
5974  */
5975 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5976 {
5977 	enum pci_bus_speed speed_cap, platform_speed_cap;
5978 	enum pcie_link_width platform_link_width, link_width;
5979 
5980 	if (amdgpu_pcie_gen_cap)
5981 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5982 
5983 	if (amdgpu_pcie_lane_cap)
5984 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5985 
5986 	/* covers APUs as well */
5987 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5988 		if (adev->pm.pcie_gen_mask == 0)
5989 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5990 		if (adev->pm.pcie_mlw_mask == 0)
5991 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5992 		return;
5993 	}
5994 
5995 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5996 		return;
5997 
5998 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5999 					&platform_link_width);
6000 	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
6001 
6002 	if (adev->pm.pcie_gen_mask == 0) {
6003 		/* asic caps */
6004 		if (speed_cap == PCI_SPEED_UNKNOWN) {
6005 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6006 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6007 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6008 		} else {
6009 			if (speed_cap == PCIE_SPEED_32_0GT)
6010 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6011 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6012 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6013 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6014 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6015 			else if (speed_cap == PCIE_SPEED_16_0GT)
6016 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6017 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6018 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6019 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6020 			else if (speed_cap == PCIE_SPEED_8_0GT)
6021 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6022 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6023 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6024 			else if (speed_cap == PCIE_SPEED_5_0GT)
6025 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6026 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6027 			else
6028 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6029 		}
6030 		/* platform caps */
6031 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6032 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6033 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6034 		} else {
6035 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6036 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6037 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6038 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6039 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6040 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6041 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6042 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6043 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6044 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6045 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6046 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6047 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6048 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6049 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6050 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6051 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6052 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6053 			else
6054 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6055 
6056 		}
6057 	}
6058 	if (adev->pm.pcie_mlw_mask == 0) {
6059 		/* asic caps */
6060 		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6061 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6062 		} else {
6063 			switch (link_width) {
6064 			case PCIE_LNK_X32:
6065 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6066 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6067 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6068 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6069 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6070 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6071 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6072 				break;
6073 			case PCIE_LNK_X16:
6074 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6075 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6076 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6077 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6078 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6079 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6080 				break;
6081 			case PCIE_LNK_X12:
6082 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6083 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6084 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6085 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6086 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6087 				break;
6088 			case PCIE_LNK_X8:
6089 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6090 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6091 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6092 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6093 				break;
6094 			case PCIE_LNK_X4:
6095 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6096 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6097 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6098 				break;
6099 			case PCIE_LNK_X2:
6100 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6101 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6102 				break;
6103 			case PCIE_LNK_X1:
6104 				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6105 				break;
6106 			default:
6107 				break;
6108 			}
6109 		}
6110 		/* platform caps */
6111 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6112 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6113 		} else {
6114 			switch (platform_link_width) {
6115 			case PCIE_LNK_X32:
6116 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6117 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6118 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6119 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6120 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6121 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6122 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6123 				break;
6124 			case PCIE_LNK_X16:
6125 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6126 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6127 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6128 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6129 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6130 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6131 				break;
6132 			case PCIE_LNK_X12:
6133 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6134 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6135 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6136 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6137 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6138 				break;
6139 			case PCIE_LNK_X8:
6140 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6141 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6142 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6143 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6144 				break;
6145 			case PCIE_LNK_X4:
6146 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6147 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6148 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6149 				break;
6150 			case PCIE_LNK_X2:
6151 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6152 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6153 				break;
6154 			case PCIE_LNK_X1:
6155 				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6156 				break;
6157 			default:
6158 				break;
6159 			}
6160 		}
6161 	}
6162 }
6163 
6164 /**
6165  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6166  *
6167  * @adev: amdgpu_device pointer
6168  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6169  *
6170  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6171  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6172  * @peer_adev.
6173  */
6174 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6175 				      struct amdgpu_device *peer_adev)
6176 {
6177 #ifdef CONFIG_HSA_AMD_P2P
6178 	bool p2p_access =
6179 		!adev->gmc.xgmi.connected_to_cpu &&
6180 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6181 	if (!p2p_access)
6182 		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6183 			pci_name(peer_adev->pdev));
6184 
6185 	bool is_large_bar = adev->gmc.visible_vram_size &&
6186 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6187 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6188 
6189 	if (!p2p_addressable) {
6190 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6191 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6192 		resource_size_t aper_limit =
6193 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6194 
6195 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6196 				     aper_limit & address_mask);
6197 	}
6198 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6199 #else
6200 	return false;
6201 #endif
6202 }
6203 
6204 int amdgpu_device_baco_enter(struct amdgpu_device *adev)
6205 {
6206 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6207 
6208 	if (!amdgpu_device_supports_baco(adev))
6209 		return -ENOTSUPP;
6210 
6211 	if (ras && adev->ras_enabled &&
6212 	    adev->nbio.funcs->enable_doorbell_interrupt)
6213 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6214 
6215 	return amdgpu_dpm_baco_enter(adev);
6216 }
6217 
6218 int amdgpu_device_baco_exit(struct amdgpu_device *adev)
6219 {
6220 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6221 	int ret = 0;
6222 
6223 	if (!amdgpu_device_supports_baco(adev))
6224 		return -ENOTSUPP;
6225 
6226 	ret = amdgpu_dpm_baco_exit(adev);
6227 	if (ret)
6228 		return ret;
6229 
6230 	if (ras && adev->ras_enabled &&
6231 	    adev->nbio.funcs->enable_doorbell_interrupt)
6232 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6233 
6234 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6235 	    adev->nbio.funcs->clear_doorbell_interrupt)
6236 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6237 
6238 	return 0;
6239 }
6240 
6241 /**
6242  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6243  * @pdev: PCI device struct
6244  * @state: PCI channel state
6245  *
6246  * Description: Called when a PCI error is detected.
6247  *
6248  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6249  */
6250 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6251 {
6252 	struct drm_device *dev = pci_get_drvdata(pdev);
6253 	struct amdgpu_device *adev = drm_to_adev(dev);
6254 	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
6255 		amdgpu_get_xgmi_hive(adev);
6256 	struct amdgpu_reset_context reset_context;
6257 	struct list_head device_list;
6258 
6259 	dev_info(adev->dev, "PCI error: detected callback!!\n");
6260 
6261 	adev->pci_channel_state = state;
6262 
6263 	switch (state) {
6264 	case pci_channel_io_normal:
6265 		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6266 		return PCI_ERS_RESULT_CAN_RECOVER;
6267 	case pci_channel_io_frozen:
6268 		/* Fatal error, prepare for slot reset */
6269 		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6270 		if (hive) {
6271 			/* Hive devices should be able to support FW based
6272 			 * link reset on other devices, if not return.
6273 			 */
6274 			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6275 				dev_warn(adev->dev,
6276 					 "No support for XGMI hive yet...\n");
6277 				return PCI_ERS_RESULT_DISCONNECT;
6278 			}
6279 			/* Set dpc status only if device is part of hive
6280 			 * Non-hive devices should be able to recover after
6281 			 * link reset.
6282 			 */
6283 			amdgpu_reset_set_dpc_status(adev, true);
6284 
6285 			mutex_lock(&hive->hive_lock);
6286 		}
6287 		memset(&reset_context, 0, sizeof(reset_context));
6288 		INIT_LIST_HEAD(&device_list);
6289 
6290 		amdgpu_device_recovery_prepare(adev, &device_list, hive);
6291 		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6292 		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6293 					      hive, false);
6294 		if (hive)
6295 			mutex_unlock(&hive->hive_lock);
6296 		return PCI_ERS_RESULT_NEED_RESET;
6297 	case pci_channel_io_perm_failure:
6298 		/* Permanent error, prepare for device removal */
6299 		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6300 		return PCI_ERS_RESULT_DISCONNECT;
6301 	}
6302 
6303 	return PCI_ERS_RESULT_NEED_RESET;
6304 }
6305 
6306 /**
6307  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6308  * @pdev: pointer to PCI device
6309  */
6310 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6311 {
6312 	struct drm_device *dev = pci_get_drvdata(pdev);
6313 	struct amdgpu_device *adev = drm_to_adev(dev);
6314 
6315 	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6316 
6317 	/* TODO - dump whatever for debugging purposes */
6318 
6319 	/* This called only if amdgpu_pci_error_detected returns
6320 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6321 	 * works, no need to reset slot.
6322 	 */
6323 
6324 	return PCI_ERS_RESULT_RECOVERED;
6325 }
6326 
6327 /**
6328  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6329  * @pdev: PCI device struct
6330  *
6331  * Description: This routine is called by the pci error recovery
6332  * code after the PCI slot has been reset, just before we
6333  * should resume normal operations.
6334  */
6335 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6336 {
6337 	struct drm_device *dev = pci_get_drvdata(pdev);
6338 	struct amdgpu_device *adev = drm_to_adev(dev);
6339 	struct amdgpu_reset_context reset_context;
6340 	struct amdgpu_device *tmp_adev;
6341 	struct amdgpu_hive_info *hive;
6342 	struct list_head device_list;
6343 	struct pci_dev *link_dev;
6344 	int r = 0, i, timeout;
6345 	u32 memsize;
6346 	u16 status;
6347 
6348 	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6349 
6350 	memset(&reset_context, 0, sizeof(reset_context));
6351 	INIT_LIST_HEAD(&device_list);
6352 	hive = amdgpu_get_xgmi_hive(adev);
6353 	if (hive) {
6354 		mutex_lock(&hive->hive_lock);
6355 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6356 			list_add_tail(&tmp_adev->reset_list, &device_list);
6357 	} else {
6358 		list_add_tail(&adev->reset_list, &device_list);
6359 	}
6360 
6361 	if (adev->pcie_reset_ctx.swus)
6362 		link_dev = adev->pcie_reset_ctx.swus;
6363 	else
6364 		link_dev = adev->pdev;
6365 	/* wait for asic to come out of reset, timeout = 10s */
6366 	timeout = 10000;
6367 	do {
6368 		usleep_range(10000, 10500);
6369 		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
6370 		timeout -= 10;
6371 	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
6372 		 (status != PCI_VENDOR_ID_AMD));
6373 
6374 	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
6375 		r = -ETIME;
6376 		goto out;
6377 	}
6378 
6379 	amdgpu_device_load_switch_state(adev);
6380 	/* Restore PCI confspace */
6381 	amdgpu_device_load_pci_state(pdev);
6382 
6383 	/* confirm  ASIC came out of reset */
6384 	for (i = 0; i < adev->usec_timeout; i++) {
6385 		memsize = amdgpu_asic_get_config_memsize(adev);
6386 
6387 		if (memsize != 0xffffffff)
6388 			break;
6389 		udelay(1);
6390 	}
6391 	if (memsize == 0xffffffff) {
6392 		r = -ETIME;
6393 		goto out;
6394 	}
6395 
6396 	reset_context.method = AMD_RESET_METHOD_NONE;
6397 	reset_context.reset_req_dev = adev;
6398 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6399 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
6400 
6401 	if (hive) {
6402 		reset_context.hive = hive;
6403 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6404 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
6405 	} else {
6406 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6407 	}
6408 
6409 	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
6410 out:
6411 	if (!r) {
6412 		if (amdgpu_device_cache_pci_state(adev->pdev))
6413 			pci_restore_state(adev->pdev);
6414 		dev_info(adev->dev, "PCIe error recovery succeeded\n");
6415 	} else {
6416 		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6417 		if (hive) {
6418 			list_for_each_entry(tmp_adev, &device_list, reset_list)
6419 				amdgpu_device_unset_mp1_state(tmp_adev);
6420 		}
6421 		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6422 	}
6423 
6424 	if (hive) {
6425 		mutex_unlock(&hive->hive_lock);
6426 		amdgpu_put_xgmi_hive(hive);
6427 	}
6428 
6429 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6430 }
6431 
6432 /**
6433  * amdgpu_pci_resume() - resume normal ops after PCI reset
6434  * @pdev: pointer to PCI device
6435  *
6436  * Called when the error recovery driver tells us that its
6437  * OK to resume normal operation.
6438  */
6439 void amdgpu_pci_resume(struct pci_dev *pdev)
6440 {
6441 	struct drm_device *dev = pci_get_drvdata(pdev);
6442 	struct amdgpu_device *adev = drm_to_adev(dev);
6443 	struct list_head device_list;
6444 	struct amdgpu_hive_info *hive = NULL;
6445 	struct amdgpu_device *tmp_adev = NULL;
6446 
6447 	dev_info(adev->dev, "PCI error: resume callback!!\n");
6448 
6449 	/* Only continue execution for the case of pci_channel_io_frozen */
6450 	if (adev->pci_channel_state != pci_channel_io_frozen)
6451 		return;
6452 
6453 	INIT_LIST_HEAD(&device_list);
6454 
6455 	hive = amdgpu_get_xgmi_hive(adev);
6456 	if (hive) {
6457 		mutex_lock(&hive->hive_lock);
6458 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6459 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
6460 			list_add_tail(&tmp_adev->reset_list, &device_list);
6461 		}
6462 	} else
6463 		list_add_tail(&adev->reset_list, &device_list);
6464 
6465 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
6466 	amdgpu_device_gpu_resume(adev, &device_list, false);
6467 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6468 
6469 	if (hive) {
6470 		mutex_unlock(&hive->hive_lock);
6471 		amdgpu_put_xgmi_hive(hive);
6472 	}
6473 }
6474 
6475 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
6476 {
6477 	struct pci_dev *swus, *swds;
6478 	int r;
6479 
6480 	swds = pci_upstream_bridge(adev->pdev);
6481 	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
6482 	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
6483 		return;
6484 	swus = pci_upstream_bridge(swds);
6485 	if (!swus ||
6486 	    (swus->vendor != PCI_VENDOR_ID_ATI &&
6487 	     swus->vendor != PCI_VENDOR_ID_AMD) ||
6488 	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
6489 		return;
6490 
6491 	/* If already saved, return */
6492 	if (adev->pcie_reset_ctx.swus)
6493 		return;
6494 	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
6495 	r = pci_save_state(swds);
6496 	if (r)
6497 		return;
6498 	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
6499 
6500 	r = pci_save_state(swus);
6501 	if (r)
6502 		return;
6503 	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
6504 
6505 	adev->pcie_reset_ctx.swus = swus;
6506 }
6507 
6508 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
6509 {
6510 	struct pci_dev *pdev;
6511 	int r;
6512 
6513 	if (!adev->pcie_reset_ctx.swds_pcistate ||
6514 	    !adev->pcie_reset_ctx.swus_pcistate)
6515 		return;
6516 
6517 	pdev = adev->pcie_reset_ctx.swus;
6518 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
6519 	if (!r) {
6520 		pci_restore_state(pdev);
6521 	} else {
6522 		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
6523 		return;
6524 	}
6525 
6526 	pdev = pci_upstream_bridge(adev->pdev);
6527 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
6528 	if (!r)
6529 		pci_restore_state(pdev);
6530 	else
6531 		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
6532 }
6533 
6534 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6535 {
6536 	struct drm_device *dev = pci_get_drvdata(pdev);
6537 	struct amdgpu_device *adev = drm_to_adev(dev);
6538 	int r;
6539 
6540 	if (amdgpu_sriov_vf(adev))
6541 		return false;
6542 
6543 	r = pci_save_state(pdev);
6544 	if (!r) {
6545 		kfree(adev->pci_state);
6546 
6547 		adev->pci_state = pci_store_saved_state(pdev);
6548 
6549 		if (!adev->pci_state) {
6550 			dev_err(adev->dev, "Failed to store PCI saved state");
6551 			return false;
6552 		}
6553 	} else {
6554 		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
6555 		return false;
6556 	}
6557 
6558 	amdgpu_device_cache_switch_state(adev);
6559 
6560 	return true;
6561 }
6562 
6563 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6564 {
6565 	struct drm_device *dev = pci_get_drvdata(pdev);
6566 	struct amdgpu_device *adev = drm_to_adev(dev);
6567 	int r;
6568 
6569 	if (!adev->pci_state)
6570 		return false;
6571 
6572 	r = pci_load_saved_state(pdev, adev->pci_state);
6573 
6574 	if (!r) {
6575 		pci_restore_state(pdev);
6576 	} else {
6577 		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
6578 		return false;
6579 	}
6580 
6581 	return true;
6582 }
6583 
6584 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6585 		struct amdgpu_ring *ring)
6586 {
6587 #ifdef CONFIG_X86_64
6588 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6589 		return;
6590 #endif
6591 	if (adev->gmc.xgmi.connected_to_cpu)
6592 		return;
6593 
6594 	if (ring && ring->funcs->emit_hdp_flush) {
6595 		amdgpu_ring_emit_hdp_flush(ring);
6596 		return;
6597 	}
6598 
6599 	if (!ring && amdgpu_sriov_runtime(adev)) {
6600 		if (!amdgpu_kiq_hdp_flush(adev))
6601 			return;
6602 	}
6603 
6604 	amdgpu_hdp_flush(adev, ring);
6605 }
6606 
6607 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6608 		struct amdgpu_ring *ring)
6609 {
6610 #ifdef CONFIG_X86_64
6611 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6612 		return;
6613 #endif
6614 	if (adev->gmc.xgmi.connected_to_cpu)
6615 		return;
6616 
6617 	amdgpu_hdp_invalidate(adev, ring);
6618 }
6619 
6620 int amdgpu_in_reset(struct amdgpu_device *adev)
6621 {
6622 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6623 }
6624 
6625 /**
6626  * amdgpu_device_halt() - bring hardware to some kind of halt state
6627  *
6628  * @adev: amdgpu_device pointer
6629  *
6630  * Bring hardware to some kind of halt state so that no one can touch it
6631  * any more. It will help to maintain error context when error occurred.
6632  * Compare to a simple hang, the system will keep stable at least for SSH
6633  * access. Then it should be trivial to inspect the hardware state and
6634  * see what's going on. Implemented as following:
6635  *
6636  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6637  *    clears all CPU mappings to device, disallows remappings through page faults
6638  * 2. amdgpu_irq_disable_all() disables all interrupts
6639  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6640  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6641  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6642  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6643  *    flush any in flight DMA operations
6644  */
6645 void amdgpu_device_halt(struct amdgpu_device *adev)
6646 {
6647 	struct pci_dev *pdev = adev->pdev;
6648 	struct drm_device *ddev = adev_to_drm(adev);
6649 
6650 	amdgpu_xcp_dev_unplug(adev);
6651 	drm_dev_unplug(ddev);
6652 
6653 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
6654 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
6655 
6656 	amdgpu_irq_disable_all(adev);
6657 
6658 	amdgpu_fence_driver_hw_fini(adev);
6659 
6660 	adev->no_hw_access = true;
6661 
6662 	amdgpu_device_unmap_mmio(adev);
6663 
6664 	pci_disable_device(pdev);
6665 	pci_wait_for_pending_transaction(pdev);
6666 }
6667 
6668 /**
6669  * amdgpu_device_get_gang - return a reference to the current gang
6670  * @adev: amdgpu_device pointer
6671  *
6672  * Returns: A new reference to the current gang leader.
6673  */
6674 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6675 {
6676 	struct dma_fence *fence;
6677 
6678 	rcu_read_lock();
6679 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6680 	rcu_read_unlock();
6681 	return fence;
6682 }
6683 
6684 /**
6685  * amdgpu_device_switch_gang - switch to a new gang
6686  * @adev: amdgpu_device pointer
6687  * @gang: the gang to switch to
6688  *
6689  * Try to switch to a new gang.
6690  * Returns: NULL if we switched to the new gang or a reference to the current
6691  * gang leader.
6692  */
6693 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6694 					    struct dma_fence *gang)
6695 {
6696 	struct dma_fence *old = NULL;
6697 
6698 	dma_fence_get(gang);
6699 	do {
6700 		dma_fence_put(old);
6701 		old = amdgpu_device_get_gang(adev);
6702 		if (old == gang)
6703 			break;
6704 
6705 		if (!dma_fence_is_signaled(old)) {
6706 			dma_fence_put(gang);
6707 			return old;
6708 		}
6709 
6710 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6711 			 old, gang) != old);
6712 
6713 	/*
6714 	 * Drop it once for the exchanged reference in adev and once for the
6715 	 * thread local reference acquired in amdgpu_device_get_gang().
6716 	 */
6717 	dma_fence_put(old);
6718 	dma_fence_put(old);
6719 	return NULL;
6720 }
6721 
6722 /**
6723  * amdgpu_device_enforce_isolation - enforce HW isolation
6724  * @adev: the amdgpu device pointer
6725  * @ring: the HW ring the job is supposed to run on
6726  * @job: the job which is about to be pushed to the HW ring
6727  *
6728  * Makes sure that only one client at a time can use the GFX block.
6729  * Returns: The dependency to wait on before the job can be pushed to the HW.
6730  * The function is called multiple times until NULL is returned.
6731  */
6732 struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
6733 						  struct amdgpu_ring *ring,
6734 						  struct amdgpu_job *job)
6735 {
6736 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
6737 	struct drm_sched_fence *f = job->base.s_fence;
6738 	struct dma_fence *dep;
6739 	void *owner;
6740 	int r;
6741 
6742 	/*
6743 	 * For now enforce isolation only for the GFX block since we only need
6744 	 * the cleaner shader on those rings.
6745 	 */
6746 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
6747 	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6748 		return NULL;
6749 
6750 	/*
6751 	 * All submissions where enforce isolation is false are handled as if
6752 	 * they come from a single client. Use ~0l as the owner to distinct it
6753 	 * from kernel submissions where the owner is NULL.
6754 	 */
6755 	owner = job->enforce_isolation ? f->owner : (void *)~0l;
6756 
6757 	mutex_lock(&adev->enforce_isolation_mutex);
6758 
6759 	/*
6760 	 * The "spearhead" submission is the first one which changes the
6761 	 * ownership to its client. We always need to wait for it to be
6762 	 * pushed to the HW before proceeding with anything.
6763 	 */
6764 	if (&f->scheduled != isolation->spearhead &&
6765 	    !dma_fence_is_signaled(isolation->spearhead)) {
6766 		dep = isolation->spearhead;
6767 		goto out_grab_ref;
6768 	}
6769 
6770 	if (isolation->owner != owner) {
6771 
6772 		/*
6773 		 * Wait for any gang to be assembled before switching to a
6774 		 * different owner or otherwise we could deadlock the
6775 		 * submissions.
6776 		 */
6777 		if (!job->gang_submit) {
6778 			dep = amdgpu_device_get_gang(adev);
6779 			if (!dma_fence_is_signaled(dep))
6780 				goto out_return_dep;
6781 			dma_fence_put(dep);
6782 		}
6783 
6784 		dma_fence_put(isolation->spearhead);
6785 		isolation->spearhead = dma_fence_get(&f->scheduled);
6786 		amdgpu_sync_move(&isolation->active, &isolation->prev);
6787 		trace_amdgpu_isolation(isolation->owner, owner);
6788 		isolation->owner = owner;
6789 	}
6790 
6791 	/*
6792 	 * Specifying the ring here helps to pipeline submissions even when
6793 	 * isolation is enabled. If that is not desired for testing NULL can be
6794 	 * used instead of the ring to enforce a CPU round trip while switching
6795 	 * between clients.
6796 	 */
6797 	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
6798 	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
6799 	if (r)
6800 		dev_warn(adev->dev, "OOM tracking isolation\n");
6801 
6802 out_grab_ref:
6803 	dma_fence_get(dep);
6804 out_return_dep:
6805 	mutex_unlock(&adev->enforce_isolation_mutex);
6806 	return dep;
6807 }
6808 
6809 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6810 {
6811 	switch (adev->asic_type) {
6812 #ifdef CONFIG_DRM_AMDGPU_SI
6813 	case CHIP_HAINAN:
6814 #endif
6815 	case CHIP_TOPAZ:
6816 		/* chips with no display hardware */
6817 		return false;
6818 #ifdef CONFIG_DRM_AMDGPU_SI
6819 	case CHIP_TAHITI:
6820 	case CHIP_PITCAIRN:
6821 	case CHIP_VERDE:
6822 	case CHIP_OLAND:
6823 #endif
6824 #ifdef CONFIG_DRM_AMDGPU_CIK
6825 	case CHIP_BONAIRE:
6826 	case CHIP_HAWAII:
6827 	case CHIP_KAVERI:
6828 	case CHIP_KABINI:
6829 	case CHIP_MULLINS:
6830 #endif
6831 	case CHIP_TONGA:
6832 	case CHIP_FIJI:
6833 	case CHIP_POLARIS10:
6834 	case CHIP_POLARIS11:
6835 	case CHIP_POLARIS12:
6836 	case CHIP_VEGAM:
6837 	case CHIP_CARRIZO:
6838 	case CHIP_STONEY:
6839 		/* chips with display hardware */
6840 		return true;
6841 	default:
6842 		/* IP discovery */
6843 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6844 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6845 			return false;
6846 		return true;
6847 	}
6848 }
6849 
6850 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
6851 {
6852 	ssize_t size = 0;
6853 
6854 	if (!ring || !ring->adev)
6855 		return size;
6856 
6857 	if (amdgpu_device_should_recover_gpu(ring->adev))
6858 		size |= AMDGPU_RESET_TYPE_FULL;
6859 
6860 	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
6861 	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
6862 		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
6863 
6864 	return size;
6865 }
6866 
6867 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
6868 {
6869 	ssize_t size = 0;
6870 
6871 	if (supported_reset == 0) {
6872 		size += sysfs_emit_at(buf, size, "unsupported");
6873 		size += sysfs_emit_at(buf, size, "\n");
6874 		return size;
6875 
6876 	}
6877 
6878 	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
6879 		size += sysfs_emit_at(buf, size, "soft ");
6880 
6881 	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
6882 		size += sysfs_emit_at(buf, size, "queue ");
6883 
6884 	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
6885 		size += sysfs_emit_at(buf, size, "pipe ");
6886 
6887 	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
6888 		size += sysfs_emit_at(buf, size, "full ");
6889 
6890 	size += sysfs_emit_at(buf, size, "\n");
6891 	return size;
6892 }
6893 
6894 void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
6895 			   enum amdgpu_uid_type type, uint8_t inst,
6896 			   uint64_t uid)
6897 {
6898 	if (!uid_info)
6899 		return;
6900 
6901 	if (type >= AMDGPU_UID_TYPE_MAX) {
6902 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6903 			     type);
6904 		return;
6905 	}
6906 
6907 	if (inst >= AMDGPU_UID_INST_MAX) {
6908 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6909 			     inst);
6910 		return;
6911 	}
6912 
6913 	if (uid_info->uid[type][inst] != 0) {
6914 		dev_warn_once(
6915 			uid_info->adev->dev,
6916 			"Overwriting existing UID %llu for type %d instance %d\n",
6917 			uid_info->uid[type][inst], type, inst);
6918 	}
6919 
6920 	uid_info->uid[type][inst] = uid;
6921 }
6922 
6923 u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
6924 			  enum amdgpu_uid_type type, uint8_t inst)
6925 {
6926 	if (!uid_info)
6927 		return 0;
6928 
6929 	if (type >= AMDGPU_UID_TYPE_MAX) {
6930 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6931 			     type);
6932 		return 0;
6933 	}
6934 
6935 	if (inst >= AMDGPU_UID_INST_MAX) {
6936 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6937 			     inst);
6938 		return 0;
6939 	}
6940 
6941 	return uid_info->uid[type][inst];
6942 }
6943