1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #ifndef __AMDGPU_XGMI_H__ 23 #define __AMDGPU_XGMI_H__ 24 25 #include <drm/task_barrier.h> 26 #include "amdgpu_ras.h" 27 28 enum amdgpu_xgmi_link_speed { 29 XGMI_SPEED_16GT = 16, 30 XGMI_SPEED_25GT = 25, 31 XGMI_SPEED_32GT = 32 32 }; 33 34 struct amdgpu_hive_info { 35 struct kobject kobj; 36 uint64_t hive_id; 37 struct list_head device_list; 38 struct list_head node; 39 atomic_t number_devices; 40 struct mutex hive_lock; 41 int hi_req_count; 42 struct amdgpu_device *hi_req_gpu; 43 struct task_barrier tb; 44 enum { 45 AMDGPU_XGMI_PSTATE_MIN, 46 AMDGPU_XGMI_PSTATE_MAX_VEGA20, 47 AMDGPU_XGMI_PSTATE_UNKNOWN 48 } pstate; 49 50 struct amdgpu_reset_domain *reset_domain; 51 atomic_t ras_recovery; 52 struct ras_event_manager event_mgr; 53 struct work_struct reset_on_init_work; 54 atomic_t requested_nps_mode; 55 }; 56 57 struct amdgpu_pcs_ras_field { 58 const char *err_name; 59 uint32_t pcs_err_mask; 60 uint32_t pcs_err_shift; 61 }; 62 63 /** 64 * Bandwidth range reporting comes in two modes. 65 * 66 * PER_LINK - range for any xgmi link 67 * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer 68 */ 69 enum amdgpu_xgmi_bw_mode { 70 AMDGPU_XGMI_BW_MODE_PER_LINK = 0, 71 AMDGPU_XGMI_BW_MODE_PER_PEER 72 }; 73 74 enum amdgpu_xgmi_bw_unit { 75 AMDGPU_XGMI_BW_UNIT_GBYTES = 0, 76 AMDGPU_XGMI_BW_UNIT_MBYTES 77 }; 78 79 struct amdgpu_xgmi_ras { 80 struct amdgpu_ras_block_object ras_block; 81 }; 82 extern struct amdgpu_xgmi_ras xgmi_ras; 83 84 struct amdgpu_xgmi { 85 /* from psp */ 86 u64 node_id; 87 u64 hive_id; 88 /* fixed per family */ 89 u64 node_segment_size; 90 /* physical node (0-3) */ 91 unsigned physical_node_id; 92 /* number of nodes (0-4) */ 93 unsigned num_physical_nodes; 94 /* gpu list in the same hive */ 95 struct list_head head; 96 bool supported; 97 struct ras_common_if *ras_if; 98 bool connected_to_cpu; 99 struct amdgpu_xgmi_ras *ras; 100 enum amdgpu_xgmi_link_speed max_speed; 101 uint8_t max_width; 102 }; 103 104 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); 105 void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); 106 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); 107 int amdgpu_xgmi_add_device(struct amdgpu_device *adev); 108 int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); 109 int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); 110 int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); 111 int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev, 112 enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit, 113 uint32_t *min_bw, uint32_t *max_bw); 114 bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, 115 struct amdgpu_device *peer_adev); 116 uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, 117 uint64_t addr); 118 bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, 119 struct amdgpu_device *bo_adev); 120 int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev); 121 int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev); 122 123 int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, 124 struct amdgpu_hive_info *hive, 125 int req_nps_mode); 126 int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, 127 int global_link_num); 128 int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num); 129 130 void amdgpu_xgmi_early_init(struct amdgpu_device *adev); 131 uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev); 132 133 #endif 134