1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "abi/guc_actions_abi.h" 9 #include "xe_device.h" 10 #include "xe_force_wake.h" 11 #include "xe_gt.h" 12 #include "xe_gt_printk.h" 13 #include "xe_gt_stats.h" 14 #include "xe_guc.h" 15 #include "xe_guc_ct.h" 16 #include "xe_guc_tlb_inval.h" 17 #include "xe_mmio.h" 18 #include "xe_pm.h" 19 #include "xe_tlb_inval.h" 20 #include "xe_trace.h" 21 22 /** 23 * DOC: Xe TLB invalidation 24 * 25 * Xe TLB invalidation is implemented in two layers. The first is the frontend 26 * API, which provides an interface for TLB invalidations to the driver code. 27 * The frontend handles seqno assignment, synchronization (fences), and the 28 * timeout mechanism. The frontend is implemented via an embedded structure 29 * xe_tlb_inval that includes a set of ops hooking into the backend. The backend 30 * interacts with the hardware (or firmware) to perform the actual invalidation. 31 */ 32 33 #define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS 34 35 static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) 36 { 37 if (WARN_ON_ONCE(!fence->tlb_inval)) 38 return; 39 40 xe_pm_runtime_put(fence->tlb_inval->xe); 41 fence->tlb_inval = NULL; /* fini() should be called once */ 42 } 43 44 static void 45 xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) 46 { 47 bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); 48 49 lockdep_assert_held(&fence->tlb_inval->pending_lock); 50 51 list_del(&fence->link); 52 trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); 53 xe_tlb_inval_fence_fini(fence); 54 dma_fence_signal(&fence->base); 55 if (!stack) 56 dma_fence_put(&fence->base); 57 } 58 59 static void 60 xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence) 61 { 62 struct xe_tlb_inval *tlb_inval = fence->tlb_inval; 63 64 spin_lock_irq(&tlb_inval->pending_lock); 65 xe_tlb_inval_fence_signal(fence); 66 spin_unlock_irq(&tlb_inval->pending_lock); 67 } 68 69 static void xe_tlb_inval_fence_timeout(struct work_struct *work) 70 { 71 struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval, 72 fence_tdr.work); 73 struct xe_device *xe = tlb_inval->xe; 74 struct xe_tlb_inval_fence *fence, *next; 75 long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval); 76 77 tlb_inval->ops->flush(tlb_inval); 78 79 spin_lock_irq(&tlb_inval->pending_lock); 80 list_for_each_entry_safe(fence, next, 81 &tlb_inval->pending_fences, link) { 82 s64 since_inval_ms = ktime_ms_delta(ktime_get(), 83 fence->inval_time); 84 85 if (msecs_to_jiffies(since_inval_ms) < timeout_delay) 86 break; 87 88 trace_xe_tlb_inval_fence_timeout(xe, fence); 89 drm_err(&xe->drm, 90 "TLB invalidation fence timeout, seqno=%d recv=%d", 91 fence->seqno, tlb_inval->seqno_recv); 92 93 fence->base.error = -ETIME; 94 xe_tlb_inval_fence_signal(fence); 95 } 96 if (!list_empty(&tlb_inval->pending_fences)) 97 queue_delayed_work(system_wq, &tlb_inval->fence_tdr, 98 timeout_delay); 99 spin_unlock_irq(&tlb_inval->pending_lock); 100 } 101 102 /** 103 * tlb_inval_fini - Clean up TLB invalidation state 104 * @drm: @drm_device 105 * @arg: pointer to struct @xe_tlb_inval 106 * 107 * Cancel pending fence workers and clean up any additional 108 * TLB invalidation state. 109 */ 110 static void tlb_inval_fini(struct drm_device *drm, void *arg) 111 { 112 struct xe_tlb_inval *tlb_inval = arg; 113 114 xe_tlb_inval_reset(tlb_inval); 115 } 116 117 /** 118 * xe_gt_tlb_inval_init - Initialize TLB invalidation state 119 * @gt: GT structure 120 * 121 * Initialize TLB invalidation state, purely software initialization, should 122 * be called once during driver load. 123 * 124 * Return: 0 on success, negative error code on error. 125 */ 126 int xe_gt_tlb_inval_init_early(struct xe_gt *gt) 127 { 128 struct xe_device *xe = gt_to_xe(gt); 129 struct xe_tlb_inval *tlb_inval = >->tlb_inval; 130 int err; 131 132 tlb_inval->xe = xe; 133 tlb_inval->seqno = 1; 134 INIT_LIST_HEAD(&tlb_inval->pending_fences); 135 spin_lock_init(&tlb_inval->pending_lock); 136 spin_lock_init(&tlb_inval->lock); 137 INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout); 138 139 err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock); 140 if (err) 141 return err; 142 143 tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, 144 "gt-tbl-inval-job-wq", 145 WQ_MEM_RECLAIM); 146 if (IS_ERR(tlb_inval->job_wq)) 147 return PTR_ERR(tlb_inval->job_wq); 148 149 /* XXX: Blindly setting up backend to GuC */ 150 xe_guc_tlb_inval_init_early(>->uc.guc, tlb_inval); 151 152 return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval); 153 } 154 155 /** 156 * xe_tlb_inval_reset() - TLB invalidation reset 157 * @tlb_inval: TLB invalidation client 158 * 159 * Signal any pending invalidation fences, should be called during a GT reset 160 */ 161 void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval) 162 { 163 struct xe_tlb_inval_fence *fence, *next; 164 int pending_seqno; 165 166 /* 167 * we can get here before the backends are even initialized if we're 168 * wedging very early, in which case there are not going to be any 169 * pendind fences so we can bail immediately. 170 */ 171 if (!tlb_inval->ops->initialized(tlb_inval)) 172 return; 173 174 /* 175 * Backend is already disabled at this point. No new TLB requests can 176 * appear. 177 */ 178 179 mutex_lock(&tlb_inval->seqno_lock); 180 spin_lock_irq(&tlb_inval->pending_lock); 181 cancel_delayed_work(&tlb_inval->fence_tdr); 182 /* 183 * We might have various kworkers waiting for TLB flushes to complete 184 * which are not tracked with an explicit TLB fence, however at this 185 * stage that will never happen since the backend is already disabled, 186 * so make sure we signal them here under the assumption that we have 187 * completed a full GT reset. 188 */ 189 if (tlb_inval->seqno == 1) 190 pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; 191 else 192 pending_seqno = tlb_inval->seqno - 1; 193 WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno); 194 195 list_for_each_entry_safe(fence, next, 196 &tlb_inval->pending_fences, link) 197 xe_tlb_inval_fence_signal(fence); 198 spin_unlock_irq(&tlb_inval->pending_lock); 199 mutex_unlock(&tlb_inval->seqno_lock); 200 } 201 202 static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno) 203 { 204 int seqno_recv = READ_ONCE(tlb_inval->seqno_recv); 205 206 lockdep_assert_held(&tlb_inval->pending_lock); 207 208 if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) 209 return false; 210 211 if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) 212 return true; 213 214 return seqno_recv >= seqno; 215 } 216 217 static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence) 218 { 219 struct xe_tlb_inval *tlb_inval = fence->tlb_inval; 220 221 fence->seqno = tlb_inval->seqno; 222 trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence); 223 224 spin_lock_irq(&tlb_inval->pending_lock); 225 fence->inval_time = ktime_get(); 226 list_add_tail(&fence->link, &tlb_inval->pending_fences); 227 228 if (list_is_singular(&tlb_inval->pending_fences)) 229 queue_delayed_work(system_wq, &tlb_inval->fence_tdr, 230 tlb_inval->ops->timeout_delay(tlb_inval)); 231 spin_unlock_irq(&tlb_inval->pending_lock); 232 233 tlb_inval->seqno = (tlb_inval->seqno + 1) % 234 TLB_INVALIDATION_SEQNO_MAX; 235 if (!tlb_inval->seqno) 236 tlb_inval->seqno = 1; 237 } 238 239 #define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \ 240 ({ \ 241 int __ret; \ 242 \ 243 xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \ 244 xe_assert((__tlb_inval)->xe, (__fence)); \ 245 \ 246 mutex_lock(&(__tlb_inval)->seqno_lock); \ 247 xe_tlb_inval_fence_prep((__fence)); \ 248 __ret = op((__tlb_inval), (__fence)->seqno, ##args); \ 249 if (__ret < 0) \ 250 xe_tlb_inval_fence_signal_unlocked((__fence)); \ 251 mutex_unlock(&(__tlb_inval)->seqno_lock); \ 252 \ 253 __ret == -ECANCELED ? 0 : __ret; \ 254 }) 255 256 /** 257 * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs 258 * @tlb_inval: TLB invalidation client 259 * @fence: invalidation fence which will be signal on TLB invalidation 260 * completion 261 * 262 * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and 263 * caller can use the invalidation fence to wait for completion. 264 * 265 * Return: 0 on success, negative error code on error 266 */ 267 int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, 268 struct xe_tlb_inval_fence *fence) 269 { 270 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all); 271 } 272 273 /** 274 * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT 275 * @tlb_inval: TLB invalidation client 276 * 277 * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and 278 * caller can use the invalidation fence to wait for completion. 279 * 280 * Return: 0 on success, negative error code on error 281 */ 282 int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) 283 { 284 struct xe_tlb_inval_fence fence, *fence_ptr = &fence; 285 int ret; 286 287 xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true); 288 ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt); 289 xe_tlb_inval_fence_wait(fence_ptr); 290 291 return ret; 292 } 293 294 /** 295 * xe_tlb_inval_range() - Issue a TLB invalidation for an address range 296 * @tlb_inval: TLB invalidation client 297 * @fence: invalidation fence which will be signal on TLB invalidation 298 * completion 299 * @start: start address 300 * @end: end address 301 * @asid: address space id 302 * 303 * Issue a range based TLB invalidation if supported, if not fallback to a full 304 * TLB invalidation. Completion of TLB is asynchronous and caller can use 305 * the invalidation fence to wait for completion. 306 * 307 * Return: Negative error code on error, 0 on success 308 */ 309 int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, 310 struct xe_tlb_inval_fence *fence, u64 start, u64 end, 311 u32 asid) 312 { 313 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, 314 start, end, asid); 315 } 316 317 /** 318 * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM 319 * @tlb_inval: TLB invalidation client 320 * @vm: VM to invalidate 321 * 322 * Invalidate entire VM's address space 323 */ 324 void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm) 325 { 326 struct xe_tlb_inval_fence fence; 327 u64 range = 1ull << vm->xe->info.va_bits; 328 329 xe_tlb_inval_fence_init(tlb_inval, &fence, true); 330 xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); 331 xe_tlb_inval_fence_wait(&fence); 332 } 333 334 /** 335 * xe_tlb_inval_done_handler() - TLB invalidation done handler 336 * @tlb_inval: TLB invalidation client 337 * @seqno: seqno of invalidation that is done 338 * 339 * Update recv seqno, signal any TLB invalidation fences, and restart TDR 340 */ 341 void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno) 342 { 343 struct xe_device *xe = tlb_inval->xe; 344 struct xe_tlb_inval_fence *fence, *next; 345 unsigned long flags; 346 347 /* 348 * This can also be run both directly from the IRQ handler and also in 349 * process_g2h_msg(). Only one may process any individual CT message, 350 * however the order they are processed here could result in skipping a 351 * seqno. To handle that we just process all the seqnos from the last 352 * seqno_recv up to and including the one in msg[0]. The delta should be 353 * very small so there shouldn't be much of pending_fences we actually 354 * need to iterate over here. 355 * 356 * From GuC POV we expect the seqnos to always appear in-order, so if we 357 * see something later in the timeline we can be sure that anything 358 * appearing earlier has already signalled, just that we have yet to 359 * officially process the CT message like if racing against 360 * process_g2h_msg(). 361 */ 362 spin_lock_irqsave(&tlb_inval->pending_lock, flags); 363 if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) { 364 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 365 return; 366 } 367 368 WRITE_ONCE(tlb_inval->seqno_recv, seqno); 369 370 list_for_each_entry_safe(fence, next, 371 &tlb_inval->pending_fences, link) { 372 trace_xe_tlb_inval_fence_recv(xe, fence); 373 374 if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno)) 375 break; 376 377 xe_tlb_inval_fence_signal(fence); 378 } 379 380 if (!list_empty(&tlb_inval->pending_fences)) 381 mod_delayed_work(system_wq, 382 &tlb_inval->fence_tdr, 383 tlb_inval->ops->timeout_delay(tlb_inval)); 384 else 385 cancel_delayed_work(&tlb_inval->fence_tdr); 386 387 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 388 } 389 390 static const char * 391 xe_inval_fence_get_driver_name(struct dma_fence *dma_fence) 392 { 393 return "xe"; 394 } 395 396 static const char * 397 xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence) 398 { 399 return "tlb_inval_fence"; 400 } 401 402 static const struct dma_fence_ops inval_fence_ops = { 403 .get_driver_name = xe_inval_fence_get_driver_name, 404 .get_timeline_name = xe_inval_fence_get_timeline_name, 405 }; 406 407 /** 408 * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence 409 * @tlb_inval: TLB invalidation client 410 * @fence: TLB invalidation fence to initialize 411 * @stack: fence is stack variable 412 * 413 * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini 414 * will be automatically called when fence is signalled (all fences must signal), 415 * even on error. 416 */ 417 void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, 418 struct xe_tlb_inval_fence *fence, 419 bool stack) 420 { 421 xe_pm_runtime_get_noresume(tlb_inval->xe); 422 423 spin_lock_irq(&tlb_inval->lock); 424 dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock, 425 dma_fence_context_alloc(1), 1); 426 spin_unlock_irq(&tlb_inval->lock); 427 INIT_LIST_HEAD(&fence->link); 428 if (stack) 429 set_bit(FENCE_STACK_BIT, &fence->base.flags); 430 else 431 dma_fence_get(&fence->base); 432 fence->tlb_inval = tlb_inval; 433 } 434