1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/dma-mapping.h> 25 26 #include "amdgpu.h" 27 #include "amdgpu_ih.h" 28 #include "amdgpu_reset.h" 29 30 /** 31 * amdgpu_ih_ring_init - initialize the IH state 32 * 33 * @adev: amdgpu_device pointer 34 * @ih: ih ring to initialize 35 * @ring_size: ring size to allocate 36 * @use_bus_addr: true when we can use dma_alloc_coherent 37 * 38 * Initializes the IH state and allocates a buffer 39 * for the IH ring buffer. 40 * Returns 0 for success, errors for failure. 41 */ 42 int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, 43 unsigned ring_size, bool use_bus_addr) 44 { 45 u32 rb_bufsz; 46 int r; 47 48 /* Align ring size */ 49 rb_bufsz = order_base_2(ring_size / 4); 50 ring_size = (1 << rb_bufsz) * 4; 51 ih->ring_size = ring_size; 52 ih->ptr_mask = ih->ring_size - 1; 53 ih->rptr = 0; 54 ih->use_bus_addr = use_bus_addr; 55 56 if (use_bus_addr) { 57 dma_addr_t dma_addr; 58 59 if (ih->ring) 60 return 0; 61 62 /* add 8 bytes for the rptr/wptr shadows and 63 * add them to the end of the ring allocation. 64 */ 65 ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8, 66 &dma_addr, GFP_KERNEL); 67 if (ih->ring == NULL) 68 return -ENOMEM; 69 70 ih->gpu_addr = dma_addr; 71 ih->wptr_addr = dma_addr + ih->ring_size; 72 ih->wptr_cpu = &ih->ring[ih->ring_size / 4]; 73 ih->rptr_addr = dma_addr + ih->ring_size + 4; 74 ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1]; 75 } else { 76 unsigned wptr_offs, rptr_offs; 77 78 r = amdgpu_device_wb_get(adev, &wptr_offs); 79 if (r) 80 return r; 81 82 r = amdgpu_device_wb_get(adev, &rptr_offs); 83 if (r) { 84 amdgpu_device_wb_free(adev, wptr_offs); 85 return r; 86 } 87 88 r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE, 89 AMDGPU_GEM_DOMAIN_GTT, 90 &ih->ring_obj, &ih->gpu_addr, 91 (void **)&ih->ring); 92 if (r) { 93 amdgpu_device_wb_free(adev, rptr_offs); 94 amdgpu_device_wb_free(adev, wptr_offs); 95 return r; 96 } 97 98 ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4; 99 ih->wptr_cpu = &adev->wb.wb[wptr_offs]; 100 ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4; 101 ih->rptr_cpu = &adev->wb.wb[rptr_offs]; 102 } 103 104 init_waitqueue_head(&ih->wait_process); 105 return 0; 106 } 107 108 /** 109 * amdgpu_ih_ring_fini - tear down the IH state 110 * 111 * @adev: amdgpu_device pointer 112 * @ih: ih ring to tear down 113 * 114 * Tears down the IH state and frees buffer 115 * used for the IH ring buffer. 116 */ 117 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) 118 { 119 120 if (!ih->ring) 121 return; 122 123 if (ih->use_bus_addr) { 124 125 /* add 8 bytes for the rptr/wptr shadows and 126 * add them to the end of the ring allocation. 127 */ 128 dma_free_coherent(adev->dev, ih->ring_size + 8, 129 (void *)ih->ring, ih->gpu_addr); 130 ih->ring = NULL; 131 } else { 132 amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr, 133 (void **)&ih->ring); 134 amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4); 135 amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4); 136 } 137 } 138 139 /** 140 * amdgpu_ih_ring_write - write IV to the ring buffer 141 * 142 * @adev: amdgpu_device pointer 143 * @ih: ih ring to write to 144 * @iv: the iv to write 145 * @num_dw: size of the iv in dw 146 * 147 * Writes an IV to the ring buffer using the CPU and increment the wptr. 148 * Used for testing and delegating IVs to a software ring. 149 */ 150 void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, 151 const uint32_t *iv, unsigned int num_dw) 152 { 153 uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2; 154 unsigned int i; 155 156 for (i = 0; i < num_dw; ++i) 157 ih->ring[wptr++] = cpu_to_le32(iv[i]); 158 159 wptr <<= 2; 160 wptr &= ih->ptr_mask; 161 162 /* Only commit the new wptr if we don't overflow */ 163 if (wptr != READ_ONCE(ih->rptr)) { 164 wmb(); 165 WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr)); 166 } else if (adev->irq.retry_cam_enabled) { 167 dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n", 168 wptr, ih->rptr); 169 } 170 } 171 172 /** 173 * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint 174 * 175 * @adev: amdgpu_device pointer 176 * @ih: ih ring to process 177 * 178 * Used to ensure ring has processed IVs up to the checkpoint write pointer. 179 */ 180 int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, 181 struct amdgpu_ih_ring *ih) 182 { 183 uint32_t checkpoint_wptr; 184 uint64_t checkpoint_ts; 185 long timeout = HZ; 186 187 if (!ih->enabled || adev->shutdown) 188 return -ENODEV; 189 190 checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); 191 /* Order wptr with ring data. */ 192 rmb(); 193 checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); 194 195 return wait_event_interruptible_timeout(ih->wait_process, 196 amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) || 197 ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout); 198 } 199 200 /** 201 * amdgpu_ih_process - interrupt handler 202 * 203 * @adev: amdgpu_device pointer 204 * @ih: ih ring to process 205 * 206 * Interrupt hander (VI), walk the IH ring. 207 * Returns irq process return code. 208 */ 209 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) 210 { 211 unsigned int count; 212 u32 wptr; 213 214 if (!ih->enabled || adev->shutdown) 215 return IRQ_NONE; 216 217 wptr = amdgpu_ih_get_wptr(adev, ih); 218 219 restart_ih: 220 count = AMDGPU_IH_MAX_NUM_IVS; 221 DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); 222 223 /* Order reading of wptr vs. reading of IH ring data */ 224 rmb(); 225 226 while (ih->rptr != wptr && --count) { 227 amdgpu_irq_dispatch(adev, ih); 228 ih->rptr &= ih->ptr_mask; 229 } 230 231 if (!ih->overflow) 232 amdgpu_ih_set_rptr(adev, ih); 233 234 wake_up_all(&ih->wait_process); 235 236 /* make sure wptr hasn't changed while processing */ 237 wptr = amdgpu_ih_get_wptr(adev, ih); 238 if (wptr != ih->rptr) 239 if (!ih->overflow) 240 goto restart_ih; 241 242 if (ih->overflow) 243 if (amdgpu_sriov_runtime(adev)) 244 WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, 245 &adev->virt.flr_work), 246 "Failed to queue work! at %s", 247 __func__); 248 249 return IRQ_HANDLED; 250 } 251 252 /** 253 * amdgpu_ih_decode_iv_helper - decode an interrupt vector 254 * 255 * @adev: amdgpu_device pointer 256 * @ih: ih ring to process 257 * @entry: IV entry 258 * 259 * Decodes the interrupt vector at the current rptr 260 * position and also advance the position for Vega10 261 * and later GPUs. 262 */ 263 void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, 264 struct amdgpu_ih_ring *ih, 265 struct amdgpu_iv_entry *entry) 266 { 267 /* wptr/rptr are in bytes! */ 268 u32 ring_index = ih->rptr >> 2; 269 uint32_t dw[8]; 270 271 dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); 272 dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); 273 dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); 274 dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); 275 dw[4] = le32_to_cpu(ih->ring[ring_index + 4]); 276 dw[5] = le32_to_cpu(ih->ring[ring_index + 5]); 277 dw[6] = le32_to_cpu(ih->ring[ring_index + 6]); 278 dw[7] = le32_to_cpu(ih->ring[ring_index + 7]); 279 280 entry->client_id = dw[0] & 0xff; 281 entry->src_id = (dw[0] >> 8) & 0xff; 282 entry->ring_id = (dw[0] >> 16) & 0xff; 283 entry->vmid = (dw[0] >> 24) & 0xf; 284 entry->vmid_src = (dw[0] >> 31); 285 entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); 286 entry->timestamp_src = dw[2] >> 31; 287 entry->pasid = dw[3] & 0xffff; 288 entry->node_id = (dw[3] >> 16) & 0xff; 289 entry->src_data[0] = dw[4]; 290 entry->src_data[1] = dw[5]; 291 entry->src_data[2] = dw[6]; 292 entry->src_data[3] = dw[7]; 293 294 /* wptr/rptr are in bytes! */ 295 ih->rptr += 32; 296 } 297 298 uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, 299 signed int offset) 300 { 301 uint32_t iv_size = 32; 302 uint32_t ring_index; 303 uint32_t dw1, dw2; 304 305 rptr += iv_size * offset; 306 ring_index = (rptr & ih->ptr_mask) >> 2; 307 308 dw1 = le32_to_cpu(ih->ring[ring_index + 1]); 309 dw2 = le32_to_cpu(ih->ring[ring_index + 2]); 310 return dw1 | ((u64)(dw2 & 0xffff) << 32); 311 } 312 313 const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) 314 { 315 return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" : 316 ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown"; 317 } 318