1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "xe_device_types.h" 9 #include "xe_force_wake.h" 10 #include "xe_gt_stats.h" 11 #include "xe_gt_types.h" 12 #include "xe_guc_ct.h" 13 #include "xe_guc_tlb_inval.h" 14 #include "xe_mmio.h" 15 #include "xe_pm.h" 16 #include "xe_tlb_inval.h" 17 #include "xe_trace.h" 18 19 /** 20 * DOC: Xe TLB invalidation 21 * 22 * Xe TLB invalidation is implemented in two layers. The first is the frontend 23 * API, which provides an interface for TLB invalidations to the driver code. 24 * The frontend handles seqno assignment, synchronization (fences), and the 25 * timeout mechanism. The frontend is implemented via an embedded structure 26 * xe_tlb_inval that includes a set of ops hooking into the backend. The backend 27 * interacts with the hardware (or firmware) to perform the actual invalidation. 28 */ 29 30 #define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS 31 32 static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) 33 { 34 if (WARN_ON_ONCE(!fence->tlb_inval)) 35 return; 36 37 xe_pm_runtime_put(fence->tlb_inval->xe); 38 fence->tlb_inval = NULL; /* fini() should be called once */ 39 } 40 41 static void 42 xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) 43 { 44 bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); 45 46 lockdep_assert_held(&fence->tlb_inval->pending_lock); 47 48 list_del(&fence->link); 49 trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); 50 xe_tlb_inval_fence_fini(fence); 51 dma_fence_signal(&fence->base); 52 if (!stack) 53 dma_fence_put(&fence->base); 54 } 55 56 static void 57 xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence) 58 { 59 struct xe_tlb_inval *tlb_inval = fence->tlb_inval; 60 61 spin_lock_irq(&tlb_inval->pending_lock); 62 xe_tlb_inval_fence_signal(fence); 63 spin_unlock_irq(&tlb_inval->pending_lock); 64 } 65 66 static void xe_tlb_inval_fence_timeout(struct work_struct *work) 67 { 68 struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval, 69 fence_tdr.work); 70 struct xe_device *xe = tlb_inval->xe; 71 struct xe_tlb_inval_fence *fence, *next; 72 long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval); 73 74 tlb_inval->ops->flush(tlb_inval); 75 76 spin_lock_irq(&tlb_inval->pending_lock); 77 list_for_each_entry_safe(fence, next, 78 &tlb_inval->pending_fences, link) { 79 s64 since_inval_ms = ktime_ms_delta(ktime_get(), 80 fence->inval_time); 81 82 if (msecs_to_jiffies(since_inval_ms) < timeout_delay) 83 break; 84 85 trace_xe_tlb_inval_fence_timeout(xe, fence); 86 drm_err(&xe->drm, 87 "TLB invalidation fence timeout, seqno=%d recv=%d", 88 fence->seqno, tlb_inval->seqno_recv); 89 90 fence->base.error = -ETIME; 91 xe_tlb_inval_fence_signal(fence); 92 } 93 if (!list_empty(&tlb_inval->pending_fences)) 94 queue_delayed_work(tlb_inval->timeout_wq, &tlb_inval->fence_tdr, 95 timeout_delay); 96 spin_unlock_irq(&tlb_inval->pending_lock); 97 } 98 99 /** 100 * tlb_inval_fini - Clean up TLB invalidation state 101 * @drm: @drm_device 102 * @arg: pointer to struct @xe_tlb_inval 103 * 104 * Cancel pending fence workers and clean up any additional 105 * TLB invalidation state. 106 */ 107 static void tlb_inval_fini(struct drm_device *drm, void *arg) 108 { 109 struct xe_tlb_inval *tlb_inval = arg; 110 111 xe_tlb_inval_reset(tlb_inval); 112 } 113 114 /** 115 * xe_gt_tlb_inval_init_early() - Initialize TLB invalidation state 116 * @gt: GT structure 117 * 118 * Initialize TLB invalidation state, purely software initialization, should 119 * be called once during driver load. 120 * 121 * Return: 0 on success, negative error code on error. 122 */ 123 int xe_gt_tlb_inval_init_early(struct xe_gt *gt) 124 { 125 struct xe_device *xe = gt_to_xe(gt); 126 struct xe_tlb_inval *tlb_inval = >->tlb_inval; 127 int err; 128 129 tlb_inval->xe = xe; 130 tlb_inval->seqno = 1; 131 INIT_LIST_HEAD(&tlb_inval->pending_fences); 132 spin_lock_init(&tlb_inval->pending_lock); 133 spin_lock_init(&tlb_inval->lock); 134 INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout); 135 136 err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock); 137 if (err) 138 return err; 139 140 tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, 141 "gt-tbl-inval-job-wq", 142 WQ_MEM_RECLAIM); 143 if (IS_ERR(tlb_inval->job_wq)) 144 return PTR_ERR(tlb_inval->job_wq); 145 146 tlb_inval->timeout_wq = gt->ordered_wq; 147 if (IS_ERR(tlb_inval->timeout_wq)) 148 return PTR_ERR(tlb_inval->timeout_wq); 149 150 /* XXX: Blindly setting up backend to GuC */ 151 xe_guc_tlb_inval_init_early(>->uc.guc, tlb_inval); 152 153 return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval); 154 } 155 156 /** 157 * xe_tlb_inval_reset() - TLB invalidation reset 158 * @tlb_inval: TLB invalidation client 159 * 160 * Signal any pending invalidation fences, should be called during a GT reset 161 */ 162 void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval) 163 { 164 struct xe_tlb_inval_fence *fence, *next; 165 int pending_seqno; 166 167 /* 168 * we can get here before the backends are even initialized if we're 169 * wedging very early, in which case there are not going to be any 170 * pendind fences so we can bail immediately. 171 */ 172 if (!tlb_inval->ops->initialized(tlb_inval)) 173 return; 174 175 /* 176 * Backend is already disabled at this point. No new TLB requests can 177 * appear. 178 */ 179 180 mutex_lock(&tlb_inval->seqno_lock); 181 spin_lock_irq(&tlb_inval->pending_lock); 182 cancel_delayed_work(&tlb_inval->fence_tdr); 183 /* 184 * We might have various kworkers waiting for TLB flushes to complete 185 * which are not tracked with an explicit TLB fence, however at this 186 * stage that will never happen since the backend is already disabled, 187 * so make sure we signal them here under the assumption that we have 188 * completed a full GT reset. 189 */ 190 if (tlb_inval->seqno == 1) 191 pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; 192 else 193 pending_seqno = tlb_inval->seqno - 1; 194 WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno); 195 196 list_for_each_entry_safe(fence, next, 197 &tlb_inval->pending_fences, link) 198 xe_tlb_inval_fence_signal(fence); 199 spin_unlock_irq(&tlb_inval->pending_lock); 200 mutex_unlock(&tlb_inval->seqno_lock); 201 } 202 203 /** 204 * xe_tlb_inval_reset_timeout() - Reset TLB inval fence timeout 205 * @tlb_inval: TLB invalidation client 206 * 207 * Reset the TLB invalidation timeout timer. 208 */ 209 static void xe_tlb_inval_reset_timeout(struct xe_tlb_inval *tlb_inval) 210 { 211 lockdep_assert_held(&tlb_inval->pending_lock); 212 213 mod_delayed_work(system_wq, &tlb_inval->fence_tdr, 214 tlb_inval->ops->timeout_delay(tlb_inval)); 215 } 216 217 static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno) 218 { 219 int seqno_recv = READ_ONCE(tlb_inval->seqno_recv); 220 221 lockdep_assert_held(&tlb_inval->pending_lock); 222 223 if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) 224 return false; 225 226 if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) 227 return true; 228 229 return seqno_recv >= seqno; 230 } 231 232 static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence) 233 { 234 struct xe_tlb_inval *tlb_inval = fence->tlb_inval; 235 236 fence->seqno = tlb_inval->seqno; 237 trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence); 238 239 spin_lock_irq(&tlb_inval->pending_lock); 240 fence->inval_time = ktime_get(); 241 list_add_tail(&fence->link, &tlb_inval->pending_fences); 242 243 if (list_is_singular(&tlb_inval->pending_fences)) 244 queue_delayed_work(tlb_inval->timeout_wq, &tlb_inval->fence_tdr, 245 tlb_inval->ops->timeout_delay(tlb_inval)); 246 spin_unlock_irq(&tlb_inval->pending_lock); 247 248 tlb_inval->seqno = (tlb_inval->seqno + 1) % 249 TLB_INVALIDATION_SEQNO_MAX; 250 if (!tlb_inval->seqno) 251 tlb_inval->seqno = 1; 252 } 253 254 #define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \ 255 ({ \ 256 int __ret; \ 257 \ 258 xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \ 259 xe_assert((__tlb_inval)->xe, (__fence)); \ 260 \ 261 mutex_lock(&(__tlb_inval)->seqno_lock); \ 262 xe_tlb_inval_fence_prep((__fence)); \ 263 __ret = op((__tlb_inval), (__fence)->seqno, ##args); \ 264 if (__ret < 0) \ 265 xe_tlb_inval_fence_signal_unlocked((__fence)); \ 266 mutex_unlock(&(__tlb_inval)->seqno_lock); \ 267 \ 268 __ret == -ECANCELED ? 0 : __ret; \ 269 }) 270 271 /** 272 * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs 273 * @tlb_inval: TLB invalidation client 274 * @fence: invalidation fence which will be signal on TLB invalidation 275 * completion 276 * 277 * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and 278 * caller can use the invalidation fence to wait for completion. 279 * 280 * Return: 0 on success, negative error code on error 281 */ 282 int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, 283 struct xe_tlb_inval_fence *fence) 284 { 285 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all); 286 } 287 288 /** 289 * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT 290 * @tlb_inval: TLB invalidation client 291 * 292 * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and 293 * caller can use the invalidation fence to wait for completion. 294 * 295 * Return: 0 on success, negative error code on error 296 */ 297 int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) 298 { 299 struct xe_tlb_inval_fence fence, *fence_ptr = &fence; 300 int ret; 301 302 xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true); 303 ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt); 304 xe_tlb_inval_fence_wait(fence_ptr); 305 306 return ret; 307 } 308 309 /** 310 * xe_tlb_inval_range() - Issue a TLB invalidation for an address range 311 * @tlb_inval: TLB invalidation client 312 * @fence: invalidation fence which will be signal on TLB invalidation 313 * completion 314 * @start: start address 315 * @end: end address 316 * @asid: address space id 317 * @prl_sa: suballocation of page reclaim list if used, NULL indicates PPC flush 318 * 319 * Issue a range based TLB invalidation if supported, if not fallback to a full 320 * TLB invalidation. Completion of TLB is asynchronous and caller can use 321 * the invalidation fence to wait for completion. 322 * 323 * Return: Negative error code on error, 0 on success 324 */ 325 int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, 326 struct xe_tlb_inval_fence *fence, u64 start, u64 end, 327 u32 asid, struct drm_suballoc *prl_sa) 328 { 329 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, 330 start, end, asid, prl_sa); 331 } 332 333 /** 334 * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM 335 * @tlb_inval: TLB invalidation client 336 * @vm: VM to invalidate 337 * 338 * Invalidate entire VM's address space 339 */ 340 void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm) 341 { 342 struct xe_tlb_inval_fence fence; 343 u64 range = 1ull << vm->xe->info.va_bits; 344 345 xe_tlb_inval_fence_init(tlb_inval, &fence, true); 346 xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid, NULL); 347 xe_tlb_inval_fence_wait(&fence); 348 } 349 350 /** 351 * xe_tlb_inval_done_handler() - TLB invalidation done handler 352 * @tlb_inval: TLB invalidation client 353 * @seqno: seqno of invalidation that is done 354 * 355 * Update recv seqno, signal any TLB invalidation fences, and restart TDR 356 */ 357 void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno) 358 { 359 struct xe_device *xe = tlb_inval->xe; 360 struct xe_tlb_inval_fence *fence, *next; 361 unsigned long flags; 362 363 /* 364 * This can also be run both directly from the IRQ handler and also in 365 * process_g2h_msg(). Only one may process any individual CT message, 366 * however the order they are processed here could result in skipping a 367 * seqno. To handle that we just process all the seqnos from the last 368 * seqno_recv up to and including the one in msg[0]. The delta should be 369 * very small so there shouldn't be much of pending_fences we actually 370 * need to iterate over here. 371 * 372 * From GuC POV we expect the seqnos to always appear in-order, so if we 373 * see something later in the timeline we can be sure that anything 374 * appearing earlier has already signalled, just that we have yet to 375 * officially process the CT message like if racing against 376 * process_g2h_msg(). 377 */ 378 spin_lock_irqsave(&tlb_inval->pending_lock, flags); 379 if (seqno == TLB_INVALIDATION_SEQNO_INVALID) { 380 xe_tlb_inval_reset_timeout(tlb_inval); 381 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 382 return; 383 } 384 385 if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) { 386 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 387 return; 388 } 389 390 WRITE_ONCE(tlb_inval->seqno_recv, seqno); 391 392 list_for_each_entry_safe(fence, next, 393 &tlb_inval->pending_fences, link) { 394 trace_xe_tlb_inval_fence_recv(xe, fence); 395 396 if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno)) 397 break; 398 399 xe_tlb_inval_fence_signal(fence); 400 } 401 402 if (!list_empty(&tlb_inval->pending_fences)) 403 mod_delayed_work(tlb_inval->timeout_wq, 404 &tlb_inval->fence_tdr, 405 tlb_inval->ops->timeout_delay(tlb_inval)); 406 else 407 cancel_delayed_work(&tlb_inval->fence_tdr); 408 409 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 410 } 411 412 static const char * 413 xe_inval_fence_get_driver_name(struct dma_fence *dma_fence) 414 { 415 return "xe"; 416 } 417 418 static const char * 419 xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence) 420 { 421 return "tlb_inval_fence"; 422 } 423 424 static const struct dma_fence_ops inval_fence_ops = { 425 .get_driver_name = xe_inval_fence_get_driver_name, 426 .get_timeline_name = xe_inval_fence_get_timeline_name, 427 }; 428 429 /** 430 * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence 431 * @tlb_inval: TLB invalidation client 432 * @fence: TLB invalidation fence to initialize 433 * @stack: fence is stack variable 434 * 435 * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini 436 * will be automatically called when fence is signalled (all fences must signal), 437 * even on error. 438 */ 439 void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, 440 struct xe_tlb_inval_fence *fence, 441 bool stack) 442 { 443 xe_pm_runtime_get_noresume(tlb_inval->xe); 444 445 spin_lock_irq(&tlb_inval->lock); 446 dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock, 447 dma_fence_context_alloc(1), 1); 448 spin_unlock_irq(&tlb_inval->lock); 449 INIT_LIST_HEAD(&fence->link); 450 if (stack) 451 set_bit(FENCE_STACK_BIT, &fence->base.flags); 452 else 453 dma_fence_get(&fence->base); 454 fence->tlb_inval = tlb_inval; 455 } 456