1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include "amdgpu_reset.h" 25 #include "aldebaran.h" 26 #include "sienna_cichlid.h" 27 #include "smu_v13_0_10.h" 28 29 int amdgpu_reset_init(struct amdgpu_device *adev) 30 { 31 int ret = 0; 32 33 switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { 34 case IP_VERSION(13, 0, 2): 35 case IP_VERSION(13, 0, 6): 36 case IP_VERSION(13, 0, 14): 37 ret = aldebaran_reset_init(adev); 38 break; 39 case IP_VERSION(11, 0, 7): 40 ret = sienna_cichlid_reset_init(adev); 41 break; 42 case IP_VERSION(13, 0, 10): 43 ret = smu_v13_0_10_reset_init(adev); 44 break; 45 default: 46 break; 47 } 48 49 return ret; 50 } 51 52 int amdgpu_reset_fini(struct amdgpu_device *adev) 53 { 54 int ret = 0; 55 56 switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { 57 case IP_VERSION(13, 0, 2): 58 case IP_VERSION(13, 0, 6): 59 case IP_VERSION(13, 0, 14): 60 ret = aldebaran_reset_fini(adev); 61 break; 62 case IP_VERSION(11, 0, 7): 63 ret = sienna_cichlid_reset_fini(adev); 64 break; 65 case IP_VERSION(13, 0, 10): 66 ret = smu_v13_0_10_reset_fini(adev); 67 break; 68 default: 69 break; 70 } 71 72 return ret; 73 } 74 75 int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, 76 struct amdgpu_reset_context *reset_context) 77 { 78 struct amdgpu_reset_handler *reset_handler = NULL; 79 80 if (adev->reset_cntl && adev->reset_cntl->get_reset_handler) 81 reset_handler = adev->reset_cntl->get_reset_handler( 82 adev->reset_cntl, reset_context); 83 if (!reset_handler) 84 return -EOPNOTSUPP; 85 86 return reset_handler->prepare_hwcontext(adev->reset_cntl, 87 reset_context); 88 } 89 90 int amdgpu_reset_perform_reset(struct amdgpu_device *adev, 91 struct amdgpu_reset_context *reset_context) 92 { 93 int ret; 94 struct amdgpu_reset_handler *reset_handler = NULL; 95 96 if (adev->reset_cntl) 97 reset_handler = adev->reset_cntl->get_reset_handler( 98 adev->reset_cntl, reset_context); 99 if (!reset_handler) 100 return -EOPNOTSUPP; 101 102 ret = reset_handler->perform_reset(adev->reset_cntl, reset_context); 103 if (ret) 104 return ret; 105 106 return reset_handler->restore_hwcontext(adev->reset_cntl, 107 reset_context); 108 } 109 110 111 void amdgpu_reset_destroy_reset_domain(struct kref *ref) 112 { 113 struct amdgpu_reset_domain *reset_domain = container_of(ref, 114 struct amdgpu_reset_domain, 115 refcount); 116 if (reset_domain->wq) 117 destroy_workqueue(reset_domain->wq); 118 119 kvfree(reset_domain); 120 } 121 122 struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type, 123 char *wq_name) 124 { 125 struct amdgpu_reset_domain *reset_domain; 126 127 reset_domain = kvzalloc(sizeof(struct amdgpu_reset_domain), GFP_KERNEL); 128 if (!reset_domain) { 129 DRM_ERROR("Failed to allocate amdgpu_reset_domain!"); 130 return NULL; 131 } 132 133 reset_domain->type = type; 134 kref_init(&reset_domain->refcount); 135 136 reset_domain->wq = create_singlethread_workqueue(wq_name); 137 if (!reset_domain->wq) { 138 DRM_ERROR("Failed to allocate wq for amdgpu_reset_domain!"); 139 amdgpu_reset_put_reset_domain(reset_domain); 140 return NULL; 141 142 } 143 144 atomic_set(&reset_domain->in_gpu_reset, 0); 145 atomic_set(&reset_domain->reset_res, 0); 146 init_rwsem(&reset_domain->sem); 147 148 return reset_domain; 149 } 150 151 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain) 152 { 153 atomic_set(&reset_domain->in_gpu_reset, 1); 154 down_write(&reset_domain->sem); 155 } 156 157 158 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) 159 { 160 atomic_set(&reset_domain->in_gpu_reset, 0); 161 up_write(&reset_domain->sem); 162 } 163 164 void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, 165 size_t len) 166 { 167 if (!buf || !len) 168 return; 169 170 switch (rst_ctxt->src) { 171 case AMDGPU_RESET_SRC_JOB: 172 if (rst_ctxt->job) { 173 snprintf(buf, len, "job hang on ring:%s", 174 rst_ctxt->job->base.sched->name); 175 } else { 176 strscpy(buf, "job hang", len); 177 } 178 break; 179 case AMDGPU_RESET_SRC_RAS: 180 strscpy(buf, "RAS error", len); 181 break; 182 case AMDGPU_RESET_SRC_MES: 183 strscpy(buf, "MES hang", len); 184 break; 185 case AMDGPU_RESET_SRC_HWS: 186 strscpy(buf, "HWS hang", len); 187 break; 188 case AMDGPU_RESET_SRC_USER: 189 strscpy(buf, "user trigger", len); 190 break; 191 default: 192 strscpy(buf, "unknown", len); 193 } 194 } 195