1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "abi/guc_actions_abi.h" 9 #include "xe_device.h" 10 #include "xe_force_wake.h" 11 #include "xe_gt.h" 12 #include "xe_gt_printk.h" 13 #include "xe_guc.h" 14 #include "xe_guc_ct.h" 15 #include "xe_guc_tlb_inval.h" 16 #include "xe_gt_stats.h" 17 #include "xe_tlb_inval.h" 18 #include "xe_mmio.h" 19 #include "xe_pm.h" 20 #include "xe_tlb_inval.h" 21 #include "xe_trace.h" 22 23 /** 24 * DOC: Xe TLB invalidation 25 * 26 * Xe TLB invalidation is implemented in two layers. The first is the frontend 27 * API, which provides an interface for TLB invalidations to the driver code. 28 * The frontend handles seqno assignment, synchronization (fences), and the 29 * timeout mechanism. The frontend is implemented via an embedded structure 30 * xe_tlb_inval that includes a set of ops hooking into the backend. The backend 31 * interacts with the hardware (or firmware) to perform the actual invalidation. 32 */ 33 34 #define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS 35 36 static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) 37 { 38 if (WARN_ON_ONCE(!fence->tlb_inval)) 39 return; 40 41 xe_pm_runtime_put(fence->tlb_inval->xe); 42 fence->tlb_inval = NULL; /* fini() should be called once */ 43 } 44 45 static void 46 xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) 47 { 48 bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); 49 50 lockdep_assert_held(&fence->tlb_inval->pending_lock); 51 52 list_del(&fence->link); 53 trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); 54 xe_tlb_inval_fence_fini(fence); 55 dma_fence_signal(&fence->base); 56 if (!stack) 57 dma_fence_put(&fence->base); 58 } 59 60 static void 61 xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence) 62 { 63 struct xe_tlb_inval *tlb_inval = fence->tlb_inval; 64 65 spin_lock_irq(&tlb_inval->pending_lock); 66 xe_tlb_inval_fence_signal(fence); 67 spin_unlock_irq(&tlb_inval->pending_lock); 68 } 69 70 static void xe_tlb_inval_fence_timeout(struct work_struct *work) 71 { 72 struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval, 73 fence_tdr.work); 74 struct xe_device *xe = tlb_inval->xe; 75 struct xe_tlb_inval_fence *fence, *next; 76 long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval); 77 78 tlb_inval->ops->flush(tlb_inval); 79 80 spin_lock_irq(&tlb_inval->pending_lock); 81 list_for_each_entry_safe(fence, next, 82 &tlb_inval->pending_fences, link) { 83 s64 since_inval_ms = ktime_ms_delta(ktime_get(), 84 fence->inval_time); 85 86 if (msecs_to_jiffies(since_inval_ms) < timeout_delay) 87 break; 88 89 trace_xe_tlb_inval_fence_timeout(xe, fence); 90 drm_err(&xe->drm, 91 "TLB invalidation fence timeout, seqno=%d recv=%d", 92 fence->seqno, tlb_inval->seqno_recv); 93 94 fence->base.error = -ETIME; 95 xe_tlb_inval_fence_signal(fence); 96 } 97 if (!list_empty(&tlb_inval->pending_fences)) 98 queue_delayed_work(system_wq, &tlb_inval->fence_tdr, 99 timeout_delay); 100 spin_unlock_irq(&tlb_inval->pending_lock); 101 } 102 103 /** 104 * tlb_inval_fini - Clean up TLB invalidation state 105 * @drm: @drm_device 106 * @arg: pointer to struct @xe_tlb_inval 107 * 108 * Cancel pending fence workers and clean up any additional 109 * TLB invalidation state. 110 */ 111 static void tlb_inval_fini(struct drm_device *drm, void *arg) 112 { 113 struct xe_tlb_inval *tlb_inval = arg; 114 115 xe_tlb_inval_reset(tlb_inval); 116 } 117 118 /** 119 * xe_gt_tlb_inval_init - Initialize TLB invalidation state 120 * @gt: GT structure 121 * 122 * Initialize TLB invalidation state, purely software initialization, should 123 * be called once during driver load. 124 * 125 * Return: 0 on success, negative error code on error. 126 */ 127 int xe_gt_tlb_inval_init_early(struct xe_gt *gt) 128 { 129 struct xe_device *xe = gt_to_xe(gt); 130 struct xe_tlb_inval *tlb_inval = >->tlb_inval; 131 int err; 132 133 tlb_inval->xe = xe; 134 tlb_inval->seqno = 1; 135 INIT_LIST_HEAD(&tlb_inval->pending_fences); 136 spin_lock_init(&tlb_inval->pending_lock); 137 spin_lock_init(&tlb_inval->lock); 138 INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout); 139 140 err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock); 141 if (err) 142 return err; 143 144 tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, 145 "gt-tbl-inval-job-wq", 146 WQ_MEM_RECLAIM); 147 if (IS_ERR(tlb_inval->job_wq)) 148 return PTR_ERR(tlb_inval->job_wq); 149 150 /* XXX: Blindly setting up backend to GuC */ 151 xe_guc_tlb_inval_init_early(>->uc.guc, tlb_inval); 152 153 return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval); 154 } 155 156 /** 157 * xe_tlb_inval_reset() - TLB invalidation reset 158 * @tlb_inval: TLB invalidation client 159 * 160 * Signal any pending invalidation fences, should be called during a GT reset 161 */ 162 void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval) 163 { 164 struct xe_tlb_inval_fence *fence, *next; 165 int pending_seqno; 166 167 /* 168 * we can get here before the backends are even initialized if we're 169 * wedging very early, in which case there are not going to be any 170 * pendind fences so we can bail immediately. 171 */ 172 if (!tlb_inval->ops->initialized(tlb_inval)) 173 return; 174 175 /* 176 * Backend is already disabled at this point. No new TLB requests can 177 * appear. 178 */ 179 180 mutex_lock(&tlb_inval->seqno_lock); 181 spin_lock_irq(&tlb_inval->pending_lock); 182 cancel_delayed_work(&tlb_inval->fence_tdr); 183 /* 184 * We might have various kworkers waiting for TLB flushes to complete 185 * which are not tracked with an explicit TLB fence, however at this 186 * stage that will never happen since the backend is already disabled, 187 * so make sure we signal them here under the assumption that we have 188 * completed a full GT reset. 189 */ 190 if (tlb_inval->seqno == 1) 191 pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; 192 else 193 pending_seqno = tlb_inval->seqno - 1; 194 WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno); 195 196 list_for_each_entry_safe(fence, next, 197 &tlb_inval->pending_fences, link) 198 xe_tlb_inval_fence_signal(fence); 199 spin_unlock_irq(&tlb_inval->pending_lock); 200 mutex_unlock(&tlb_inval->seqno_lock); 201 } 202 203 static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno) 204 { 205 int seqno_recv = READ_ONCE(tlb_inval->seqno_recv); 206 207 lockdep_assert_held(&tlb_inval->pending_lock); 208 209 if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) 210 return false; 211 212 if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) 213 return true; 214 215 return seqno_recv >= seqno; 216 } 217 218 static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence) 219 { 220 struct xe_tlb_inval *tlb_inval = fence->tlb_inval; 221 222 fence->seqno = tlb_inval->seqno; 223 trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence); 224 225 spin_lock_irq(&tlb_inval->pending_lock); 226 fence->inval_time = ktime_get(); 227 list_add_tail(&fence->link, &tlb_inval->pending_fences); 228 229 if (list_is_singular(&tlb_inval->pending_fences)) 230 queue_delayed_work(system_wq, &tlb_inval->fence_tdr, 231 tlb_inval->ops->timeout_delay(tlb_inval)); 232 spin_unlock_irq(&tlb_inval->pending_lock); 233 234 tlb_inval->seqno = (tlb_inval->seqno + 1) % 235 TLB_INVALIDATION_SEQNO_MAX; 236 if (!tlb_inval->seqno) 237 tlb_inval->seqno = 1; 238 } 239 240 #define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \ 241 ({ \ 242 int __ret; \ 243 \ 244 xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \ 245 xe_assert((__tlb_inval)->xe, (__fence)); \ 246 \ 247 mutex_lock(&(__tlb_inval)->seqno_lock); \ 248 xe_tlb_inval_fence_prep((__fence)); \ 249 __ret = op((__tlb_inval), (__fence)->seqno, ##args); \ 250 if (__ret < 0) \ 251 xe_tlb_inval_fence_signal_unlocked((__fence)); \ 252 mutex_unlock(&(__tlb_inval)->seqno_lock); \ 253 \ 254 __ret == -ECANCELED ? 0 : __ret; \ 255 }) 256 257 /** 258 * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs 259 * @tlb_inval: TLB invalidation client 260 * @fence: invalidation fence which will be signal on TLB invalidation 261 * completion 262 * 263 * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and 264 * caller can use the invalidation fence to wait for completion. 265 * 266 * Return: 0 on success, negative error code on error 267 */ 268 int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, 269 struct xe_tlb_inval_fence *fence) 270 { 271 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all); 272 } 273 274 /** 275 * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT 276 * @tlb_inval: TLB invalidation client 277 * 278 * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and 279 * caller can use the invalidation fence to wait for completion. 280 * 281 * Return: 0 on success, negative error code on error 282 */ 283 int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) 284 { 285 struct xe_tlb_inval_fence fence, *fence_ptr = &fence; 286 int ret; 287 288 xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true); 289 ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt); 290 xe_tlb_inval_fence_wait(fence_ptr); 291 292 return ret; 293 } 294 295 /** 296 * xe_tlb_inval_range() - Issue a TLB invalidation for an address range 297 * @tlb_inval: TLB invalidation client 298 * @fence: invalidation fence which will be signal on TLB invalidation 299 * completion 300 * @start: start address 301 * @end: end address 302 * @asid: address space id 303 * 304 * Issue a range based TLB invalidation if supported, if not fallback to a full 305 * TLB invalidation. Completion of TLB is asynchronous and caller can use 306 * the invalidation fence to wait for completion. 307 * 308 * Return: Negative error code on error, 0 on success 309 */ 310 int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, 311 struct xe_tlb_inval_fence *fence, u64 start, u64 end, 312 u32 asid) 313 { 314 return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, 315 start, end, asid); 316 } 317 318 /** 319 * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM 320 * @tlb_inval: TLB invalidation client 321 * @vm: VM to invalidate 322 * 323 * Invalidate entire VM's address space 324 */ 325 void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm) 326 { 327 struct xe_tlb_inval_fence fence; 328 u64 range = 1ull << vm->xe->info.va_bits; 329 330 xe_tlb_inval_fence_init(tlb_inval, &fence, true); 331 xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); 332 xe_tlb_inval_fence_wait(&fence); 333 } 334 335 /** 336 * xe_tlb_inval_done_handler() - TLB invalidation done handler 337 * @tlb_inval: TLB invalidation client 338 * @seqno: seqno of invalidation that is done 339 * 340 * Update recv seqno, signal any TLB invalidation fences, and restart TDR 341 */ 342 void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno) 343 { 344 struct xe_device *xe = tlb_inval->xe; 345 struct xe_tlb_inval_fence *fence, *next; 346 unsigned long flags; 347 348 /* 349 * This can also be run both directly from the IRQ handler and also in 350 * process_g2h_msg(). Only one may process any individual CT message, 351 * however the order they are processed here could result in skipping a 352 * seqno. To handle that we just process all the seqnos from the last 353 * seqno_recv up to and including the one in msg[0]. The delta should be 354 * very small so there shouldn't be much of pending_fences we actually 355 * need to iterate over here. 356 * 357 * From GuC POV we expect the seqnos to always appear in-order, so if we 358 * see something later in the timeline we can be sure that anything 359 * appearing earlier has already signalled, just that we have yet to 360 * officially process the CT message like if racing against 361 * process_g2h_msg(). 362 */ 363 spin_lock_irqsave(&tlb_inval->pending_lock, flags); 364 if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) { 365 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 366 return; 367 } 368 369 WRITE_ONCE(tlb_inval->seqno_recv, seqno); 370 371 list_for_each_entry_safe(fence, next, 372 &tlb_inval->pending_fences, link) { 373 trace_xe_tlb_inval_fence_recv(xe, fence); 374 375 if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno)) 376 break; 377 378 xe_tlb_inval_fence_signal(fence); 379 } 380 381 if (!list_empty(&tlb_inval->pending_fences)) 382 mod_delayed_work(system_wq, 383 &tlb_inval->fence_tdr, 384 tlb_inval->ops->timeout_delay(tlb_inval)); 385 else 386 cancel_delayed_work(&tlb_inval->fence_tdr); 387 388 spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 389 } 390 391 static const char * 392 xe_inval_fence_get_driver_name(struct dma_fence *dma_fence) 393 { 394 return "xe"; 395 } 396 397 static const char * 398 xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence) 399 { 400 return "tlb_inval_fence"; 401 } 402 403 static const struct dma_fence_ops inval_fence_ops = { 404 .get_driver_name = xe_inval_fence_get_driver_name, 405 .get_timeline_name = xe_inval_fence_get_timeline_name, 406 }; 407 408 /** 409 * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence 410 * @tlb_inval: TLB invalidation client 411 * @fence: TLB invalidation fence to initialize 412 * @stack: fence is stack variable 413 * 414 * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini 415 * will be automatically called when fence is signalled (all fences must signal), 416 * even on error. 417 */ 418 void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, 419 struct xe_tlb_inval_fence *fence, 420 bool stack) 421 { 422 xe_pm_runtime_get_noresume(tlb_inval->xe); 423 424 spin_lock_irq(&tlb_inval->lock); 425 dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock, 426 dma_fence_context_alloc(1), 1); 427 spin_unlock_irq(&tlb_inval->lock); 428 INIT_LIST_HEAD(&fence->link); 429 if (stack) 430 set_bit(FENCE_STACK_BIT, &fence->base.flags); 431 else 432 dma_fence_get(&fence->base); 433 fence->tlb_inval = tlb_inval; 434 } 435