xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 #ifndef __AMDGPU_XGMI_H__
23 #define __AMDGPU_XGMI_H__
24 
25 #include <drm/task_barrier.h>
26 #include "amdgpu_psp.h"
27 #include "amdgpu_ras.h"
28 
29 struct amdgpu_hive_info {
30 	struct kobject kobj;
31 	uint64_t hive_id;
32 	struct list_head device_list;
33 	struct list_head node;
34 	atomic_t number_devices;
35 	struct mutex hive_lock;
36 	int hi_req_count;
37 	struct amdgpu_device *hi_req_gpu;
38 	struct task_barrier tb;
39 	enum {
40 		AMDGPU_XGMI_PSTATE_MIN,
41 		AMDGPU_XGMI_PSTATE_MAX_VEGA20,
42 		AMDGPU_XGMI_PSTATE_UNKNOWN
43 	} pstate;
44 
45 	struct amdgpu_reset_domain *reset_domain;
46 	atomic_t ras_recovery;
47 	struct ras_event_manager event_mgr;
48 	struct work_struct reset_on_init_work;
49 	atomic_t requested_nps_mode;
50 };
51 
52 struct amdgpu_pcs_ras_field {
53 	const char *err_name;
54 	uint32_t pcs_err_mask;
55 	uint32_t pcs_err_shift;
56 };
57 
58 extern struct amdgpu_xgmi_ras  xgmi_ras;
59 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
60 void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
61 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
62 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
63 int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
64 int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
65 int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
66 		struct amdgpu_device *peer_adev);
67 int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
68 		struct amdgpu_device *peer_adev);
69 bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
70 					struct amdgpu_device *peer_adev);
71 uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
72 					   uint64_t addr);
73 static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
74 		struct amdgpu_device *bo_adev)
75 {
76 	return (amdgpu_use_xgmi_p2p &&
77 		adev != bo_adev &&
78 		adev->gmc.xgmi.hive_id &&
79 		adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
80 }
81 int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
82 int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
83 
84 int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
85 				   struct amdgpu_hive_info *hive,
86 				   int req_nps_mode);
87 int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
88 				int global_link_num);
89 
90 #endif
91