1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include "abi/guc_actions_abi.h" 7 8 #include "xe_device.h" 9 #include "xe_exec_queue.h" 10 #include "xe_exec_queue_types.h" 11 #include "xe_gt_stats.h" 12 #include "xe_gt_types.h" 13 #include "xe_guc.h" 14 #include "xe_guc_ct.h" 15 #include "xe_guc_exec_queue_types.h" 16 #include "xe_guc_tlb_inval.h" 17 #include "xe_force_wake.h" 18 #include "xe_mmio.h" 19 #include "xe_sa.h" 20 #include "xe_tlb_inval.h" 21 #include "xe_vm.h" 22 23 #include "regs/xe_guc_regs.h" 24 25 /* 26 * XXX: The seqno algorithm relies on TLB invalidation being processed in order 27 * which they currently are by the GuC, if that changes the algorithm will need 28 * to be updated. 29 */ 30 31 static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len) 32 { 33 struct xe_gt *gt = guc_to_gt(guc); 34 35 xe_gt_assert(gt, action[1]); /* Seqno */ 36 37 xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); 38 return xe_guc_ct_send(&guc->ct, action, len, 39 G2H_LEN_DW_TLB_INVALIDATE, 1); 40 } 41 42 #define MAKE_INVAL_OP_FLUSH(type, flush_cache) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ 43 XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ 44 (flush_cache ? \ 45 XE_GUC_TLB_INVAL_FLUSH_CACHE : 0)) 46 47 #define MAKE_INVAL_OP(type) MAKE_INVAL_OP_FLUSH(type, true) 48 49 static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno) 50 { 51 struct xe_guc *guc = tlb_inval->private; 52 u32 action[] = { 53 XE_GUC_ACTION_TLB_INVALIDATION_ALL, 54 seqno, 55 MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), 56 }; 57 58 return send_tlb_inval(guc, action, ARRAY_SIZE(action)); 59 } 60 61 static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) 62 { 63 struct xe_guc *guc = tlb_inval->private; 64 struct xe_gt *gt = guc_to_gt(guc); 65 struct xe_device *xe = guc_to_xe(guc); 66 67 /* 68 * Returning -ECANCELED in this function is squashed at the caller and 69 * signals waiters. 70 */ 71 72 if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) { 73 u32 action[] = { 74 XE_GUC_ACTION_TLB_INVALIDATION, 75 seqno, 76 MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), 77 }; 78 79 return send_tlb_inval(guc, action, ARRAY_SIZE(action)); 80 } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { 81 struct xe_mmio *mmio = >->mmio; 82 83 if (IS_SRIOV_VF(xe)) 84 return -ECANCELED; 85 86 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); 87 if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { 88 xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, 89 PVC_GUC_TLB_INV_DESC1_INVALIDATE); 90 xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, 91 PVC_GUC_TLB_INV_DESC0_VALID); 92 } else { 93 xe_mmio_write32(mmio, GUC_TLB_INV_CR, 94 GUC_TLB_INV_CR_INVALIDATE); 95 } 96 } 97 98 return -ECANCELED; 99 } 100 101 static int send_page_reclaim(struct xe_guc *guc, u32 seqno, 102 u64 gpu_addr) 103 { 104 struct xe_gt *gt = guc_to_gt(guc); 105 u32 action[] = { 106 XE_GUC_ACTION_PAGE_RECLAMATION, 107 seqno, 108 lower_32_bits(gpu_addr), 109 upper_32_bits(gpu_addr), 110 }; 111 112 xe_gt_stats_incr(gt, XE_GT_STATS_ID_PRL_ISSUED_COUNT, 1); 113 114 return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 115 G2H_LEN_DW_PAGE_RECLAMATION, 1); 116 } 117 118 static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end) 119 { 120 u64 orig_start = *start; 121 u64 length = *end - *start; 122 u64 align; 123 124 if (length < SZ_4K) 125 length = SZ_4K; 126 127 align = roundup_pow_of_two(length); 128 *start = ALIGN_DOWN(*start, align); 129 *end = ALIGN(*end, align); 130 length = align; 131 while (*start + length < *end) { 132 length <<= 1; 133 *start = ALIGN_DOWN(orig_start, length); 134 } 135 136 if (length >= SZ_2M) { 137 length = max_t(u64, SZ_16M, length); 138 *start = ALIGN_DOWN(orig_start, length); 139 } 140 141 xe_gt_assert(gt, length >= SZ_4K); 142 xe_gt_assert(gt, is_power_of_2(length)); 143 xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, 144 ilog2(SZ_2M) + 1))); 145 xe_gt_assert(gt, IS_ALIGNED(*start, length)); 146 147 return length; 148 } 149 150 /* 151 * Ensure that roundup_pow_of_two(length) doesn't overflow. 152 * Note that roundup_pow_of_two() operates on unsigned long, 153 * not on u64. 154 */ 155 #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) 156 157 static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start, 158 u64 end, u32 id, u32 type, 159 struct drm_suballoc *prl_sa) 160 { 161 #define MAX_TLB_INVALIDATION_LEN 7 162 struct xe_gt *gt = guc_to_gt(guc); 163 struct xe_device *xe = guc_to_xe(guc); 164 u32 action[MAX_TLB_INVALIDATION_LEN]; 165 u64 length = end - start; 166 int len = 0, err; 167 168 xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE && 169 !xe->info.has_ctx_tlb_inval) || 170 (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX && 171 xe->info.has_ctx_tlb_inval)); 172 173 action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; 174 action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID; 175 if (!gt_to_xe(gt)->info.has_range_tlb_inval || 176 length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { 177 action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); 178 } else { 179 u64 normalize_len = normalize_invalidation_range(gt, &start, 180 &end); 181 bool need_flush = !prl_sa && 182 seqno != TLB_INVALIDATION_SEQNO_INVALID; 183 184 /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */ 185 action[len++] = MAKE_INVAL_OP_FLUSH(type, need_flush); 186 action[len++] = id; 187 action[len++] = lower_32_bits(start); 188 action[len++] = upper_32_bits(start); 189 action[len++] = ilog2(normalize_len) - ilog2(SZ_4K); 190 } 191 192 xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); 193 #undef MAX_TLB_INVALIDATION_LEN 194 195 err = send_tlb_inval(guc, action, len); 196 if (!err && prl_sa) { 197 xe_gt_assert(gt, seqno != TLB_INVALIDATION_SEQNO_INVALID); 198 err = send_page_reclaim(guc, seqno, xe_sa_bo_gpu_addr(prl_sa)); 199 } 200 return err; 201 } 202 203 static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, 204 u64 start, u64 end, u32 asid, 205 struct drm_suballoc *prl_sa) 206 { 207 struct xe_guc *guc = tlb_inval->private; 208 209 lockdep_assert_held(&tlb_inval->seqno_lock); 210 211 if (guc_to_xe(guc)->info.force_execlist) 212 return -ECANCELED; 213 214 return send_tlb_inval_ppgtt(guc, seqno, start, end, asid, 215 XE_GUC_TLB_INVAL_PAGE_SELECTIVE, prl_sa); 216 } 217 218 static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, 219 u64 start, u64 end, u32 asid, 220 struct drm_suballoc *prl_sa) 221 { 222 struct xe_guc *guc = tlb_inval->private; 223 struct xe_device *xe = guc_to_xe(guc); 224 struct xe_exec_queue *q, *next, *last_q = NULL; 225 struct xe_vm *vm; 226 LIST_HEAD(tlb_inval_list); 227 int err = 0, id = guc_to_gt(guc)->info.id; 228 229 lockdep_assert_held(&tlb_inval->seqno_lock); 230 231 if (xe->info.force_execlist) 232 return -ECANCELED; 233 234 vm = xe_device_asid_to_vm(xe, asid); 235 if (IS_ERR(vm)) 236 return PTR_ERR(vm); 237 238 down_read(&vm->exec_queues.lock); 239 240 /* 241 * XXX: Randomly picking a threshold for now. This will need to be 242 * tuned based on expected UMD queue counts and performance profiling. 243 */ 244 #define EXEC_QUEUE_COUNT_FULL_THRESHOLD 8 245 if (vm->exec_queues.count[id] >= EXEC_QUEUE_COUNT_FULL_THRESHOLD) { 246 u32 action[] = { 247 XE_GUC_ACTION_TLB_INVALIDATION, 248 seqno, 249 MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), 250 }; 251 252 err = send_tlb_inval(guc, action, ARRAY_SIZE(action)); 253 goto err_unlock; 254 } 255 #undef EXEC_QUEUE_COUNT_FULL_THRESHOLD 256 257 /* 258 * Move exec queues to a temporary list to issue invalidations. The exec 259 * queue must active and a reference must be taken to prevent concurrent 260 * deregistrations. 261 * 262 * List modification is safe because we hold 'vm->exec_queues.lock' for 263 * reading, which prevents external modifications. Using a per-GT list 264 * is also safe since 'tlb_inval->seqno_lock' ensures no other GT users 265 * can enter this code path. 266 */ 267 list_for_each_entry_safe(q, next, &vm->exec_queues.list[id], 268 vm_exec_queue_link) { 269 if (q->ops->active(q) && xe_exec_queue_get_unless_zero(q)) { 270 last_q = q; 271 list_move_tail(&q->vm_exec_queue_link, &tlb_inval_list); 272 } 273 } 274 275 if (!last_q) { 276 /* 277 * We can't break fence ordering for TLB invalidation jobs, if 278 * TLB invalidations are inflight issue a dummy invalidation to 279 * maintain ordering. Nor can we move safely the seqno_recv when 280 * returning -ECANCELED if TLB invalidations are in flight. Use 281 * GGTT invalidation as dummy invalidation given ASID 282 * invalidations are unsupported here. 283 */ 284 if (xe_tlb_inval_idle(tlb_inval)) 285 err = -ECANCELED; 286 else 287 err = send_tlb_inval_ggtt(tlb_inval, seqno); 288 goto err_unlock; 289 } 290 291 list_for_each_entry_safe(q, next, &tlb_inval_list, vm_exec_queue_link) { 292 struct drm_suballoc *__prl_sa = NULL; 293 int __seqno = TLB_INVALIDATION_SEQNO_INVALID; 294 u32 type = XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX; 295 296 xe_assert(xe, q->vm == vm); 297 298 if (err) 299 goto unref; 300 301 if (last_q == q) { 302 __prl_sa = prl_sa; 303 __seqno = seqno; 304 } 305 306 err = send_tlb_inval_ppgtt(guc, __seqno, start, end, 307 q->guc->id, type, __prl_sa); 308 309 unref: 310 /* 311 * Must always return exec queue to original list / drop 312 * reference 313 */ 314 list_move_tail(&q->vm_exec_queue_link, 315 &vm->exec_queues.list[id]); 316 xe_exec_queue_put(q); 317 } 318 319 err_unlock: 320 up_read(&vm->exec_queues.lock); 321 xe_vm_put(vm); 322 323 return err; 324 } 325 326 static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) 327 { 328 struct xe_guc *guc = tlb_inval->private; 329 330 return xe_guc_ct_initialized(&guc->ct); 331 } 332 333 static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval) 334 { 335 struct xe_guc *guc = tlb_inval->private; 336 337 LNL_FLUSH_WORK(&guc->ct.g2h_worker); 338 } 339 340 static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) 341 { 342 struct xe_guc *guc = tlb_inval->private; 343 344 /* this reflects what HW/GuC needs to process TLB inv request */ 345 const long hw_tlb_timeout = HZ / 4; 346 347 /* this estimates actual delay caused by the CTB transport */ 348 long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct); 349 350 return hw_tlb_timeout + 2 * delay; 351 } 352 353 static const struct xe_tlb_inval_ops guc_tlb_inval_asid_ops = { 354 .all = send_tlb_inval_all, 355 .ggtt = send_tlb_inval_ggtt, 356 .ppgtt = send_tlb_inval_asid_ppgtt, 357 .initialized = tlb_inval_initialized, 358 .flush = tlb_inval_flush, 359 .timeout_delay = tlb_inval_timeout_delay, 360 }; 361 362 static const struct xe_tlb_inval_ops guc_tlb_inval_ctx_ops = { 363 .ggtt = send_tlb_inval_ggtt, 364 .all = send_tlb_inval_all, 365 .ppgtt = send_tlb_inval_ctx_ppgtt, 366 .initialized = tlb_inval_initialized, 367 .flush = tlb_inval_flush, 368 .timeout_delay = tlb_inval_timeout_delay, 369 }; 370 371 /** 372 * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early 373 * @guc: GuC object 374 * @tlb_inval: TLB invalidation client 375 * 376 * Initialize GuC TLB invalidation by setting back pointer in TLB invalidation 377 * client to the GuC and setting GuC backend ops. 378 */ 379 void xe_guc_tlb_inval_init_early(struct xe_guc *guc, 380 struct xe_tlb_inval *tlb_inval) 381 { 382 struct xe_device *xe = guc_to_xe(guc); 383 384 tlb_inval->private = guc; 385 386 if (xe->info.has_ctx_tlb_inval) 387 tlb_inval->ops = &guc_tlb_inval_ctx_ops; 388 else 389 tlb_inval->ops = &guc_tlb_inval_asid_ops; 390 } 391 392 /** 393 * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler 394 * @guc: guc 395 * @msg: message indicating TLB invalidation done 396 * @len: length of message 397 * 398 * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any 399 * invalidation fences for seqno. Algorithm for this depends on seqno being 400 * received in-order and asserts this assumption. 401 * 402 * Return: 0 on success, -EPROTO for malformed messages. 403 */ 404 int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 405 { 406 struct xe_gt *gt = guc_to_gt(guc); 407 408 if (unlikely(len != 1)) 409 return -EPROTO; 410 411 xe_tlb_inval_done_handler(>->tlb_inval, msg[0]); 412 413 return 0; 414 } 415