1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <christian.koenig@amd.com> 29 */ 30 31 #include <drm/drmP.h> 32 #include "amdgpu.h" 33 #include "amdgpu_trace.h" 34 35 struct amdgpu_sync_entry { 36 struct hlist_node node; 37 struct fence *fence; 38 }; 39 40 /** 41 * amdgpu_sync_create - zero init sync object 42 * 43 * @sync: sync object to initialize 44 * 45 * Just clear the sync object for now. 46 */ 47 void amdgpu_sync_create(struct amdgpu_sync *sync) 48 { 49 unsigned i; 50 51 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) 52 sync->semaphores[i] = NULL; 53 54 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 55 sync->sync_to[i] = NULL; 56 57 hash_init(sync->fences); 58 sync->last_vm_update = NULL; 59 } 60 61 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) 62 { 63 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 64 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 65 66 if (a_fence) 67 return a_fence->ring->adev == adev; 68 69 if (s_fence) { 70 struct amdgpu_ring *ring; 71 72 ring = container_of(s_fence->sched, struct amdgpu_ring, sched); 73 return ring->adev == adev; 74 } 75 76 return false; 77 } 78 79 static bool amdgpu_sync_test_owner(struct fence *f, void *owner) 80 { 81 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 82 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 83 if (s_fence) 84 return s_fence->owner == owner; 85 if (a_fence) 86 return a_fence->owner == owner; 87 return false; 88 } 89 90 /** 91 * amdgpu_sync_fence - remember to sync to this fence 92 * 93 * @sync: sync object to add fence to 94 * @fence: fence to sync to 95 * 96 */ 97 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, 98 struct fence *f) 99 { 100 struct amdgpu_sync_entry *e; 101 struct amdgpu_fence *fence; 102 struct amdgpu_fence *other; 103 struct fence *tmp, *later; 104 105 if (!f) 106 return 0; 107 108 if (amdgpu_sync_same_dev(adev, f) && 109 amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) { 110 if (sync->last_vm_update) { 111 tmp = sync->last_vm_update; 112 BUG_ON(f->context != tmp->context); 113 later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp; 114 sync->last_vm_update = fence_get(later); 115 fence_put(tmp); 116 } else 117 sync->last_vm_update = fence_get(f); 118 } 119 120 fence = to_amdgpu_fence(f); 121 if (!fence || fence->ring->adev != adev) { 122 hash_for_each_possible(sync->fences, e, node, f->context) { 123 struct fence *new; 124 if (unlikely(e->fence->context != f->context)) 125 continue; 126 new = fence_get(fence_later(e->fence, f)); 127 if (new) { 128 fence_put(e->fence); 129 e->fence = new; 130 } 131 return 0; 132 } 133 134 e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); 135 if (!e) 136 return -ENOMEM; 137 138 hash_add(sync->fences, &e->node, f->context); 139 e->fence = fence_get(f); 140 return 0; 141 } 142 143 other = sync->sync_to[fence->ring->idx]; 144 sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( 145 amdgpu_fence_later(fence, other)); 146 amdgpu_fence_unref(&other); 147 148 return 0; 149 } 150 151 static void *amdgpu_sync_get_owner(struct fence *f) 152 { 153 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 154 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 155 156 if (s_fence) 157 return s_fence->owner; 158 else if (a_fence) 159 return a_fence->owner; 160 return AMDGPU_FENCE_OWNER_UNDEFINED; 161 } 162 163 /** 164 * amdgpu_sync_resv - use the semaphores to sync to a reservation object 165 * 166 * @sync: sync object to add fences from reservation object to 167 * @resv: reservation object with embedded fence 168 * @shared: true if we should only sync to the exclusive fence 169 * 170 * Sync to the fence using the semaphore objects 171 */ 172 int amdgpu_sync_resv(struct amdgpu_device *adev, 173 struct amdgpu_sync *sync, 174 struct reservation_object *resv, 175 void *owner) 176 { 177 struct reservation_object_list *flist; 178 struct fence *f; 179 void *fence_owner; 180 unsigned i; 181 int r = 0; 182 183 if (resv == NULL) 184 return -EINVAL; 185 186 /* always sync to the exclusive fence */ 187 f = reservation_object_get_excl(resv); 188 r = amdgpu_sync_fence(adev, sync, f); 189 190 flist = reservation_object_get_list(resv); 191 if (!flist || r) 192 return r; 193 194 for (i = 0; i < flist->shared_count; ++i) { 195 f = rcu_dereference_protected(flist->shared[i], 196 reservation_object_held(resv)); 197 if (amdgpu_sync_same_dev(adev, f)) { 198 /* VM updates are only interesting 199 * for other VM updates and moves. 200 */ 201 fence_owner = amdgpu_sync_get_owner(f); 202 if ((owner != AMDGPU_FENCE_OWNER_MOVE) && 203 (fence_owner != AMDGPU_FENCE_OWNER_MOVE) && 204 ((owner == AMDGPU_FENCE_OWNER_VM) != 205 (fence_owner == AMDGPU_FENCE_OWNER_VM))) 206 continue; 207 208 /* Ignore fence from the same owner as 209 * long as it isn't undefined. 210 */ 211 if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && 212 fence_owner == owner) 213 continue; 214 } 215 216 r = amdgpu_sync_fence(adev, sync, f); 217 if (r) 218 break; 219 } 220 return r; 221 } 222 223 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) 224 { 225 struct amdgpu_sync_entry *e; 226 struct hlist_node *tmp; 227 struct fence *f; 228 int i; 229 230 hash_for_each_safe(sync->fences, i, tmp, e, node) { 231 232 f = e->fence; 233 234 hash_del(&e->node); 235 kfree(e); 236 237 if (!fence_is_signaled(f)) 238 return f; 239 240 fence_put(f); 241 } 242 return NULL; 243 } 244 245 int amdgpu_sync_wait(struct amdgpu_sync *sync) 246 { 247 struct amdgpu_sync_entry *e; 248 struct hlist_node *tmp; 249 int i, r; 250 251 hash_for_each_safe(sync->fences, i, tmp, e, node) { 252 r = fence_wait(e->fence, false); 253 if (r) 254 return r; 255 256 hash_del(&e->node); 257 fence_put(e->fence); 258 kfree(e); 259 } 260 261 if (amdgpu_enable_semaphores) 262 return 0; 263 264 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 265 struct amdgpu_fence *fence = sync->sync_to[i]; 266 if (!fence) 267 continue; 268 269 r = fence_wait(&fence->base, false); 270 if (r) 271 return r; 272 } 273 274 return 0; 275 } 276 277 /** 278 * amdgpu_sync_rings - sync ring to all registered fences 279 * 280 * @sync: sync object to use 281 * @ring: ring that needs sync 282 * 283 * Ensure that all registered fences are signaled before letting 284 * the ring continue. The caller must hold the ring lock. 285 */ 286 int amdgpu_sync_rings(struct amdgpu_sync *sync, 287 struct amdgpu_ring *ring) 288 { 289 struct amdgpu_device *adev = ring->adev; 290 unsigned count = 0; 291 int i, r; 292 293 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 294 struct amdgpu_fence *fence = sync->sync_to[i]; 295 struct amdgpu_semaphore *semaphore; 296 struct amdgpu_ring *other = adev->rings[i]; 297 298 /* check if we really need to sync */ 299 if (!amdgpu_fence_need_sync(fence, ring)) 300 continue; 301 302 /* prevent GPU deadlocks */ 303 if (!other->ready) { 304 dev_err(adev->dev, "Syncing to a disabled ring!"); 305 return -EINVAL; 306 } 307 308 if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores || 309 (count >= AMDGPU_NUM_SYNCS)) { 310 /* not enough room, wait manually */ 311 r = fence_wait(&fence->base, false); 312 if (r) 313 return r; 314 continue; 315 } 316 r = amdgpu_semaphore_create(adev, &semaphore); 317 if (r) 318 return r; 319 320 sync->semaphores[count++] = semaphore; 321 322 /* allocate enough space for sync command */ 323 r = amdgpu_ring_alloc(other, 16); 324 if (r) 325 return r; 326 327 /* emit the signal semaphore */ 328 if (!amdgpu_semaphore_emit_signal(other, semaphore)) { 329 /* signaling wasn't successful wait manually */ 330 amdgpu_ring_undo(other); 331 r = fence_wait(&fence->base, false); 332 if (r) 333 return r; 334 continue; 335 } 336 337 /* we assume caller has already allocated space on waiters ring */ 338 if (!amdgpu_semaphore_emit_wait(ring, semaphore)) { 339 /* waiting wasn't successful wait manually */ 340 amdgpu_ring_undo(other); 341 r = fence_wait(&fence->base, false); 342 if (r) 343 return r; 344 continue; 345 } 346 347 amdgpu_ring_commit(other); 348 amdgpu_fence_note_sync(fence, ring); 349 } 350 351 return 0; 352 } 353 354 /** 355 * amdgpu_sync_free - free the sync object 356 * 357 * @adev: amdgpu_device pointer 358 * @sync: sync object to use 359 * @fence: fence to use for the free 360 * 361 * Free the sync object by freeing all semaphores in it. 362 */ 363 void amdgpu_sync_free(struct amdgpu_device *adev, 364 struct amdgpu_sync *sync, 365 struct fence *fence) 366 { 367 struct amdgpu_sync_entry *e; 368 struct hlist_node *tmp; 369 unsigned i; 370 371 hash_for_each_safe(sync->fences, i, tmp, e, node) { 372 hash_del(&e->node); 373 fence_put(e->fence); 374 kfree(e); 375 } 376 377 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) 378 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); 379 380 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 381 amdgpu_fence_unref(&sync->sync_to[i]); 382 383 fence_put(sync->last_vm_update); 384 } 385