1*db16f9d9SMatthew Brost // SPDX-License-Identifier: MIT 2*db16f9d9SMatthew Brost /* 3*db16f9d9SMatthew Brost * Copyright © 2025 Intel Corporation 4*db16f9d9SMatthew Brost */ 5*db16f9d9SMatthew Brost 6*db16f9d9SMatthew Brost #include "abi/guc_actions_abi.h" 7*db16f9d9SMatthew Brost 8*db16f9d9SMatthew Brost #include "xe_device.h" 9*db16f9d9SMatthew Brost #include "xe_gt_stats.h" 10*db16f9d9SMatthew Brost #include "xe_gt_types.h" 11*db16f9d9SMatthew Brost #include "xe_guc.h" 12*db16f9d9SMatthew Brost #include "xe_guc_ct.h" 13*db16f9d9SMatthew Brost #include "xe_guc_tlb_inval.h" 14*db16f9d9SMatthew Brost #include "xe_force_wake.h" 15*db16f9d9SMatthew Brost #include "xe_mmio.h" 16*db16f9d9SMatthew Brost #include "xe_tlb_inval.h" 17*db16f9d9SMatthew Brost 18*db16f9d9SMatthew Brost #include "regs/xe_guc_regs.h" 19*db16f9d9SMatthew Brost 20*db16f9d9SMatthew Brost /* 21*db16f9d9SMatthew Brost * XXX: The seqno algorithm relies on TLB invalidation being processed in order 22*db16f9d9SMatthew Brost * which they currently are by the GuC, if that changes the algorithm will need 23*db16f9d9SMatthew Brost * to be updated. 24*db16f9d9SMatthew Brost */ 25*db16f9d9SMatthew Brost 26*db16f9d9SMatthew Brost static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len) 27*db16f9d9SMatthew Brost { 28*db16f9d9SMatthew Brost struct xe_gt *gt = guc_to_gt(guc); 29*db16f9d9SMatthew Brost 30*db16f9d9SMatthew Brost xe_gt_assert(gt, action[1]); /* Seqno */ 31*db16f9d9SMatthew Brost 32*db16f9d9SMatthew Brost xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); 33*db16f9d9SMatthew Brost return xe_guc_ct_send(&guc->ct, action, len, 34*db16f9d9SMatthew Brost G2H_LEN_DW_TLB_INVALIDATE, 1); 35*db16f9d9SMatthew Brost } 36*db16f9d9SMatthew Brost 37*db16f9d9SMatthew Brost #define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ 38*db16f9d9SMatthew Brost XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ 39*db16f9d9SMatthew Brost XE_GUC_TLB_INVAL_FLUSH_CACHE) 40*db16f9d9SMatthew Brost 41*db16f9d9SMatthew Brost static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno) 42*db16f9d9SMatthew Brost { 43*db16f9d9SMatthew Brost struct xe_guc *guc = tlb_inval->private; 44*db16f9d9SMatthew Brost u32 action[] = { 45*db16f9d9SMatthew Brost XE_GUC_ACTION_TLB_INVALIDATION_ALL, 46*db16f9d9SMatthew Brost seqno, 47*db16f9d9SMatthew Brost MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), 48*db16f9d9SMatthew Brost }; 49*db16f9d9SMatthew Brost 50*db16f9d9SMatthew Brost return send_tlb_inval(guc, action, ARRAY_SIZE(action)); 51*db16f9d9SMatthew Brost } 52*db16f9d9SMatthew Brost 53*db16f9d9SMatthew Brost static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) 54*db16f9d9SMatthew Brost { 55*db16f9d9SMatthew Brost struct xe_guc *guc = tlb_inval->private; 56*db16f9d9SMatthew Brost struct xe_gt *gt = guc_to_gt(guc); 57*db16f9d9SMatthew Brost struct xe_device *xe = guc_to_xe(guc); 58*db16f9d9SMatthew Brost 59*db16f9d9SMatthew Brost /* 60*db16f9d9SMatthew Brost * Returning -ECANCELED in this function is squashed at the caller and 61*db16f9d9SMatthew Brost * signals waiters. 62*db16f9d9SMatthew Brost */ 63*db16f9d9SMatthew Brost 64*db16f9d9SMatthew Brost if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) { 65*db16f9d9SMatthew Brost u32 action[] = { 66*db16f9d9SMatthew Brost XE_GUC_ACTION_TLB_INVALIDATION, 67*db16f9d9SMatthew Brost seqno, 68*db16f9d9SMatthew Brost MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), 69*db16f9d9SMatthew Brost }; 70*db16f9d9SMatthew Brost 71*db16f9d9SMatthew Brost return send_tlb_inval(guc, action, ARRAY_SIZE(action)); 72*db16f9d9SMatthew Brost } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { 73*db16f9d9SMatthew Brost struct xe_mmio *mmio = >->mmio; 74*db16f9d9SMatthew Brost unsigned int fw_ref; 75*db16f9d9SMatthew Brost 76*db16f9d9SMatthew Brost if (IS_SRIOV_VF(xe)) 77*db16f9d9SMatthew Brost return -ECANCELED; 78*db16f9d9SMatthew Brost 79*db16f9d9SMatthew Brost fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 80*db16f9d9SMatthew Brost if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { 81*db16f9d9SMatthew Brost xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, 82*db16f9d9SMatthew Brost PVC_GUC_TLB_INV_DESC1_INVALIDATE); 83*db16f9d9SMatthew Brost xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, 84*db16f9d9SMatthew Brost PVC_GUC_TLB_INV_DESC0_VALID); 85*db16f9d9SMatthew Brost } else { 86*db16f9d9SMatthew Brost xe_mmio_write32(mmio, GUC_TLB_INV_CR, 87*db16f9d9SMatthew Brost GUC_TLB_INV_CR_INVALIDATE); 88*db16f9d9SMatthew Brost } 89*db16f9d9SMatthew Brost xe_force_wake_put(gt_to_fw(gt), fw_ref); 90*db16f9d9SMatthew Brost } 91*db16f9d9SMatthew Brost 92*db16f9d9SMatthew Brost return -ECANCELED; 93*db16f9d9SMatthew Brost } 94*db16f9d9SMatthew Brost 95*db16f9d9SMatthew Brost /* 96*db16f9d9SMatthew Brost * Ensure that roundup_pow_of_two(length) doesn't overflow. 97*db16f9d9SMatthew Brost * Note that roundup_pow_of_two() operates on unsigned long, 98*db16f9d9SMatthew Brost * not on u64. 99*db16f9d9SMatthew Brost */ 100*db16f9d9SMatthew Brost #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) 101*db16f9d9SMatthew Brost 102*db16f9d9SMatthew Brost static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, 103*db16f9d9SMatthew Brost u64 start, u64 end, u32 asid) 104*db16f9d9SMatthew Brost { 105*db16f9d9SMatthew Brost #define MAX_TLB_INVALIDATION_LEN 7 106*db16f9d9SMatthew Brost struct xe_guc *guc = tlb_inval->private; 107*db16f9d9SMatthew Brost struct xe_gt *gt = guc_to_gt(guc); 108*db16f9d9SMatthew Brost u32 action[MAX_TLB_INVALIDATION_LEN]; 109*db16f9d9SMatthew Brost u64 length = end - start; 110*db16f9d9SMatthew Brost int len = 0; 111*db16f9d9SMatthew Brost 112*db16f9d9SMatthew Brost if (guc_to_xe(guc)->info.force_execlist) 113*db16f9d9SMatthew Brost return -ECANCELED; 114*db16f9d9SMatthew Brost 115*db16f9d9SMatthew Brost action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; 116*db16f9d9SMatthew Brost action[len++] = seqno; 117*db16f9d9SMatthew Brost if (!gt_to_xe(gt)->info.has_range_tlb_inval || 118*db16f9d9SMatthew Brost length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { 119*db16f9d9SMatthew Brost action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); 120*db16f9d9SMatthew Brost } else { 121*db16f9d9SMatthew Brost u64 orig_start = start; 122*db16f9d9SMatthew Brost u64 align; 123*db16f9d9SMatthew Brost 124*db16f9d9SMatthew Brost if (length < SZ_4K) 125*db16f9d9SMatthew Brost length = SZ_4K; 126*db16f9d9SMatthew Brost 127*db16f9d9SMatthew Brost /* 128*db16f9d9SMatthew Brost * We need to invalidate a higher granularity if start address 129*db16f9d9SMatthew Brost * is not aligned to length. When start is not aligned with 130*db16f9d9SMatthew Brost * length we need to find the length large enough to create an 131*db16f9d9SMatthew Brost * address mask covering the required range. 132*db16f9d9SMatthew Brost */ 133*db16f9d9SMatthew Brost align = roundup_pow_of_two(length); 134*db16f9d9SMatthew Brost start = ALIGN_DOWN(start, align); 135*db16f9d9SMatthew Brost end = ALIGN(end, align); 136*db16f9d9SMatthew Brost length = align; 137*db16f9d9SMatthew Brost while (start + length < end) { 138*db16f9d9SMatthew Brost length <<= 1; 139*db16f9d9SMatthew Brost start = ALIGN_DOWN(orig_start, length); 140*db16f9d9SMatthew Brost } 141*db16f9d9SMatthew Brost 142*db16f9d9SMatthew Brost /* 143*db16f9d9SMatthew Brost * Minimum invalidation size for a 2MB page that the hardware 144*db16f9d9SMatthew Brost * expects is 16MB 145*db16f9d9SMatthew Brost */ 146*db16f9d9SMatthew Brost if (length >= SZ_2M) { 147*db16f9d9SMatthew Brost length = max_t(u64, SZ_16M, length); 148*db16f9d9SMatthew Brost start = ALIGN_DOWN(orig_start, length); 149*db16f9d9SMatthew Brost } 150*db16f9d9SMatthew Brost 151*db16f9d9SMatthew Brost xe_gt_assert(gt, length >= SZ_4K); 152*db16f9d9SMatthew Brost xe_gt_assert(gt, is_power_of_2(length)); 153*db16f9d9SMatthew Brost xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, 154*db16f9d9SMatthew Brost ilog2(SZ_2M) + 1))); 155*db16f9d9SMatthew Brost xe_gt_assert(gt, IS_ALIGNED(start, length)); 156*db16f9d9SMatthew Brost 157*db16f9d9SMatthew Brost action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); 158*db16f9d9SMatthew Brost action[len++] = asid; 159*db16f9d9SMatthew Brost action[len++] = lower_32_bits(start); 160*db16f9d9SMatthew Brost action[len++] = upper_32_bits(start); 161*db16f9d9SMatthew Brost action[len++] = ilog2(length) - ilog2(SZ_4K); 162*db16f9d9SMatthew Brost } 163*db16f9d9SMatthew Brost 164*db16f9d9SMatthew Brost xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); 165*db16f9d9SMatthew Brost 166*db16f9d9SMatthew Brost return send_tlb_inval(guc, action, len); 167*db16f9d9SMatthew Brost } 168*db16f9d9SMatthew Brost 169*db16f9d9SMatthew Brost static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) 170*db16f9d9SMatthew Brost { 171*db16f9d9SMatthew Brost struct xe_guc *guc = tlb_inval->private; 172*db16f9d9SMatthew Brost 173*db16f9d9SMatthew Brost return xe_guc_ct_initialized(&guc->ct); 174*db16f9d9SMatthew Brost } 175*db16f9d9SMatthew Brost 176*db16f9d9SMatthew Brost static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval) 177*db16f9d9SMatthew Brost { 178*db16f9d9SMatthew Brost struct xe_guc *guc = tlb_inval->private; 179*db16f9d9SMatthew Brost 180*db16f9d9SMatthew Brost LNL_FLUSH_WORK(&guc->ct.g2h_worker); 181*db16f9d9SMatthew Brost } 182*db16f9d9SMatthew Brost 183*db16f9d9SMatthew Brost static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) 184*db16f9d9SMatthew Brost { 185*db16f9d9SMatthew Brost struct xe_guc *guc = tlb_inval->private; 186*db16f9d9SMatthew Brost 187*db16f9d9SMatthew Brost /* this reflects what HW/GuC needs to process TLB inv request */ 188*db16f9d9SMatthew Brost const long hw_tlb_timeout = HZ / 4; 189*db16f9d9SMatthew Brost 190*db16f9d9SMatthew Brost /* this estimates actual delay caused by the CTB transport */ 191*db16f9d9SMatthew Brost long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct); 192*db16f9d9SMatthew Brost 193*db16f9d9SMatthew Brost return hw_tlb_timeout + 2 * delay; 194*db16f9d9SMatthew Brost } 195*db16f9d9SMatthew Brost 196*db16f9d9SMatthew Brost static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { 197*db16f9d9SMatthew Brost .all = send_tlb_inval_all, 198*db16f9d9SMatthew Brost .ggtt = send_tlb_inval_ggtt, 199*db16f9d9SMatthew Brost .ppgtt = send_tlb_inval_ppgtt, 200*db16f9d9SMatthew Brost .initialized = tlb_inval_initialized, 201*db16f9d9SMatthew Brost .flush = tlb_inval_flush, 202*db16f9d9SMatthew Brost .timeout_delay = tlb_inval_timeout_delay, 203*db16f9d9SMatthew Brost }; 204*db16f9d9SMatthew Brost 205*db16f9d9SMatthew Brost /** 206*db16f9d9SMatthew Brost * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early 207*db16f9d9SMatthew Brost * @guc: GuC object 208*db16f9d9SMatthew Brost * @tlb_inval: TLB invalidation client 209*db16f9d9SMatthew Brost * 210*db16f9d9SMatthew Brost * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation 211*db16f9d9SMatthew Brost * client to the GuC and setting GuC backend ops. 212*db16f9d9SMatthew Brost */ 213*db16f9d9SMatthew Brost void xe_guc_tlb_inval_init_early(struct xe_guc *guc, 214*db16f9d9SMatthew Brost struct xe_tlb_inval *tlb_inval) 215*db16f9d9SMatthew Brost { 216*db16f9d9SMatthew Brost tlb_inval->private = guc; 217*db16f9d9SMatthew Brost tlb_inval->ops = &guc_tlb_inval_ops; 218*db16f9d9SMatthew Brost } 219*db16f9d9SMatthew Brost 220*db16f9d9SMatthew Brost /** 221*db16f9d9SMatthew Brost * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler 222*db16f9d9SMatthew Brost * @guc: guc 223*db16f9d9SMatthew Brost * @msg: message indicating TLB invalidation done 224*db16f9d9SMatthew Brost * @len: length of message 225*db16f9d9SMatthew Brost * 226*db16f9d9SMatthew Brost * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any 227*db16f9d9SMatthew Brost * invalidation fences for seqno. Algorithm for this depends on seqno being 228*db16f9d9SMatthew Brost * received in-order and asserts this assumption. 229*db16f9d9SMatthew Brost * 230*db16f9d9SMatthew Brost * Return: 0 on success, -EPROTO for malformed messages. 231*db16f9d9SMatthew Brost */ 232*db16f9d9SMatthew Brost int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 233*db16f9d9SMatthew Brost { 234*db16f9d9SMatthew Brost struct xe_gt *gt = guc_to_gt(guc); 235*db16f9d9SMatthew Brost 236*db16f9d9SMatthew Brost if (unlikely(len != 1)) 237*db16f9d9SMatthew Brost return -EPROTO; 238*db16f9d9SMatthew Brost 239*db16f9d9SMatthew Brost xe_tlb_inval_done_handler(>->tlb_inval, msg[0]); 240*db16f9d9SMatthew Brost 241*db16f9d9SMatthew Brost return 0; 242*db16f9d9SMatthew Brost } 243