xref: /linux/drivers/gpu/drm/i915/gt/intel_tlb.c (revision 001821b0e79716c4e17c71d8e053a23599a7a508)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include "i915_drv.h"
7 #include "i915_perf_oa_regs.h"
8 #include "intel_engine_pm.h"
9 #include "intel_gt.h"
10 #include "intel_gt_mcr.h"
11 #include "intel_gt_pm.h"
12 #include "intel_gt_print.h"
13 #include "intel_gt_regs.h"
14 #include "intel_tlb.h"
15 #include "uc/intel_guc.h"
16 
17 /*
18  * HW architecture suggest typical invalidation time at 40us,
19  * with pessimistic cases up to 100us and a recommendation to
20  * cap at 1ms. We go a bit higher just in case.
21  */
22 #define TLB_INVAL_TIMEOUT_US 100
23 #define TLB_INVAL_TIMEOUT_MS 4
24 
25 /*
26  * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
27  * but are now considered MCR registers.  Since they exist within a GAM range,
28  * the primary instance of the register rolls up the status from each unit.
29  */
30 static int wait_for_invalidate(struct intel_engine_cs *engine)
31 {
32 	if (engine->tlb_inv.mcr)
33 		return intel_gt_mcr_wait_for_reg(engine->gt,
34 						 engine->tlb_inv.reg.mcr_reg,
35 						 engine->tlb_inv.done,
36 						 0,
37 						 TLB_INVAL_TIMEOUT_US,
38 						 TLB_INVAL_TIMEOUT_MS);
39 	else
40 		return __intel_wait_for_register_fw(engine->gt->uncore,
41 						    engine->tlb_inv.reg.reg,
42 						    engine->tlb_inv.done,
43 						    0,
44 						    TLB_INVAL_TIMEOUT_US,
45 						    TLB_INVAL_TIMEOUT_MS,
46 						    NULL);
47 }
48 
49 static void mmio_invalidate_full(struct intel_gt *gt)
50 {
51 	struct drm_i915_private *i915 = gt->i915;
52 	struct intel_uncore *uncore = gt->uncore;
53 	struct intel_engine_cs *engine;
54 	intel_engine_mask_t awake, tmp;
55 	enum intel_engine_id id;
56 	unsigned long flags;
57 
58 	if (GRAPHICS_VER(i915) < 8)
59 		return;
60 
61 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
62 
63 	intel_gt_mcr_lock(gt, &flags);
64 	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
65 
66 	awake = 0;
67 	for_each_engine(engine, gt, id) {
68 		if (!intel_engine_pm_is_awake(engine))
69 			continue;
70 
71 		if (engine->tlb_inv.mcr)
72 			intel_gt_mcr_multicast_write_fw(gt,
73 							engine->tlb_inv.reg.mcr_reg,
74 							engine->tlb_inv.request);
75 		else
76 			intel_uncore_write_fw(uncore,
77 					      engine->tlb_inv.reg.reg,
78 					      engine->tlb_inv.request);
79 
80 		awake |= engine->mask;
81 	}
82 
83 	GT_TRACE(gt, "invalidated engines %08x\n", awake);
84 
85 	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
86 	if (awake &&
87 	    (IS_TIGERLAKE(i915) ||
88 	     IS_DG1(i915) ||
89 	     IS_ROCKETLAKE(i915) ||
90 	     IS_ALDERLAKE_S(i915) ||
91 	     IS_ALDERLAKE_P(i915)))
92 		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
93 
94 	spin_unlock(&uncore->lock);
95 	intel_gt_mcr_unlock(gt, flags);
96 
97 	for_each_engine_masked(engine, gt, awake, tmp) {
98 		if (wait_for_invalidate(engine))
99 			gt_err_ratelimited(gt,
100 					   "%s TLB invalidation did not complete in %ums!\n",
101 					   engine->name, TLB_INVAL_TIMEOUT_MS);
102 	}
103 
104 	/*
105 	 * Use delayed put since a) we mostly expect a flurry of TLB
106 	 * invalidations so it is good to avoid paying the forcewake cost and
107 	 * b) it works around a bug in Icelake which cannot cope with too rapid
108 	 * transitions.
109 	 */
110 	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
111 }
112 
113 static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
114 {
115 	u32 cur = intel_gt_tlb_seqno(gt);
116 
117 	/* Only skip if a *full* TLB invalidate barrier has passed */
118 	return (s32)(cur - ALIGN(seqno, 2)) > 0;
119 }
120 
121 void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
122 {
123 	intel_wakeref_t wakeref;
124 
125 	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
126 		return;
127 
128 	if (intel_gt_is_wedged(gt))
129 		return;
130 
131 	if (tlb_seqno_passed(gt, seqno))
132 		return;
133 
134 	with_intel_gt_pm_if_awake(gt, wakeref) {
135 		struct intel_guc *guc = gt_to_guc(gt);
136 
137 		mutex_lock(&gt->tlb.invalidate_lock);
138 		if (tlb_seqno_passed(gt, seqno))
139 			goto unlock;
140 
141 		if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
142 			/*
143 			 * Only perform GuC TLB invalidation if GuC is ready.
144 			 * The only time GuC could not be ready is on GT reset,
145 			 * which would clobber all the TLBs anyways, making
146 			 * any TLB invalidation path here unnecessary.
147 			 */
148 			if (intel_guc_is_ready(guc))
149 				intel_guc_invalidate_tlb_engines(guc);
150 		} else {
151 			mmio_invalidate_full(gt);
152 		}
153 
154 		write_seqcount_invalidate(&gt->tlb.seqno);
155 unlock:
156 		mutex_unlock(&gt->tlb.invalidate_lock);
157 	}
158 }
159 
160 void intel_gt_init_tlb(struct intel_gt *gt)
161 {
162 	mutex_init(&gt->tlb.invalidate_lock);
163 	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
164 }
165 
166 void intel_gt_fini_tlb(struct intel_gt *gt)
167 {
168 	mutex_destroy(&gt->tlb.invalidate_lock);
169 }
170 
171 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
172 #include "selftest_tlb.c"
173 #endif
174