1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include "gen2_engine_cs.h" 7 #include "i915_drv.h" 8 #include "intel_engine.h" 9 #include "intel_gpu_commands.h" 10 #include "intel_gt.h" 11 #include "intel_gt_irq.h" 12 #include "intel_ring.h" 13 14 int gen2_emit_flush(struct i915_request *rq, u32 mode) 15 { 16 unsigned int num_store_dw = 12; 17 u32 cmd, *cs; 18 19 cmd = MI_FLUSH; 20 if (mode & EMIT_INVALIDATE) 21 cmd |= MI_READ_FLUSH; 22 23 cs = intel_ring_begin(rq, 2 + 4 * num_store_dw); 24 if (IS_ERR(cs)) 25 return PTR_ERR(cs); 26 27 *cs++ = cmd; 28 while (num_store_dw--) { 29 *cs++ = MI_STORE_DWORD_INDEX; 30 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); 31 *cs++ = 0; 32 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; 33 } 34 *cs++ = cmd; 35 36 intel_ring_advance(rq, cs); 37 38 return 0; 39 } 40 41 int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) 42 { 43 u32 cmd, *cs; 44 int i; 45 46 /* 47 * read/write caches: 48 * 49 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 50 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 51 * also flushed at 2d versus 3d pipeline switches. 52 * 53 * read-only caches: 54 * 55 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 56 * MI_READ_FLUSH is set, and is always flushed on 965. 57 * 58 * I915_GEM_DOMAIN_COMMAND may not exist? 59 * 60 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 61 * invalidated when MI_EXE_FLUSH is set. 62 * 63 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 64 * invalidated with every MI_FLUSH. 65 * 66 * TLBs: 67 * 68 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 69 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 70 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 71 * are flushed at any MI_FLUSH. 72 */ 73 74 cmd = MI_FLUSH; 75 if (mode & EMIT_INVALIDATE) { 76 cmd |= MI_EXE_FLUSH; 77 if (IS_G4X(rq->engine->i915) || IS_GEN(rq->engine->i915, 5)) 78 cmd |= MI_INVALIDATE_ISP; 79 } 80 81 i = 2; 82 if (mode & EMIT_INVALIDATE) 83 i += 20; 84 85 cs = intel_ring_begin(rq, i); 86 if (IS_ERR(cs)) 87 return PTR_ERR(cs); 88 89 *cs++ = cmd; 90 91 /* 92 * A random delay to let the CS invalidate take effect? Without this 93 * delay, the GPU relocation path fails as the CS does not see 94 * the updated contents. Just as important, if we apply the flushes 95 * to the EMIT_FLUSH branch (i.e. immediately after the relocation 96 * write and before the invalidate on the next batch), the relocations 97 * still fail. This implies that is a delay following invalidation 98 * that is required to reset the caches as opposed to a delay to 99 * ensure the memory is written. 100 */ 101 if (mode & EMIT_INVALIDATE) { 102 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; 103 *cs++ = intel_gt_scratch_offset(rq->engine->gt, 104 INTEL_GT_SCRATCH_FIELD_DEFAULT) | 105 PIPE_CONTROL_GLOBAL_GTT; 106 *cs++ = 0; 107 *cs++ = 0; 108 109 for (i = 0; i < 12; i++) 110 *cs++ = MI_FLUSH; 111 112 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; 113 *cs++ = intel_gt_scratch_offset(rq->engine->gt, 114 INTEL_GT_SCRATCH_FIELD_DEFAULT) | 115 PIPE_CONTROL_GLOBAL_GTT; 116 *cs++ = 0; 117 *cs++ = 0; 118 } 119 120 *cs++ = cmd; 121 122 intel_ring_advance(rq, cs); 123 124 return 0; 125 } 126 127 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode) 128 { 129 u32 *cs; 130 131 cs = intel_ring_begin(rq, 2); 132 if (IS_ERR(cs)) 133 return PTR_ERR(cs); 134 135 *cs++ = MI_FLUSH; 136 *cs++ = MI_NOOP; 137 intel_ring_advance(rq, cs); 138 139 return 0; 140 } 141 142 static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs, 143 int flush, int post) 144 { 145 GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); 146 GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); 147 148 *cs++ = MI_FLUSH; 149 150 while (flush--) { 151 *cs++ = MI_STORE_DWORD_INDEX; 152 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); 153 *cs++ = rq->fence.seqno; 154 } 155 156 while (post--) { 157 *cs++ = MI_STORE_DWORD_INDEX; 158 *cs++ = I915_GEM_HWS_SEQNO_ADDR; 159 *cs++ = rq->fence.seqno; 160 } 161 162 *cs++ = MI_USER_INTERRUPT; 163 164 rq->tail = intel_ring_offset(rq, cs); 165 assert_ring_tail_valid(rq->ring, rq->tail); 166 167 return cs; 168 } 169 170 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs) 171 { 172 return __gen2_emit_breadcrumb(rq, cs, 16, 8); 173 } 174 175 u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) 176 { 177 return __gen2_emit_breadcrumb(rq, cs, 8, 8); 178 } 179 180 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 181 #define I830_BATCH_LIMIT SZ_256K 182 #define I830_TLB_ENTRIES (2) 183 #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT) 184 int i830_emit_bb_start(struct i915_request *rq, 185 u64 offset, u32 len, 186 unsigned int dispatch_flags) 187 { 188 u32 *cs, cs_offset = 189 intel_gt_scratch_offset(rq->engine->gt, 190 INTEL_GT_SCRATCH_FIELD_DEFAULT); 191 192 GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); 193 194 cs = intel_ring_begin(rq, 6); 195 if (IS_ERR(cs)) 196 return PTR_ERR(cs); 197 198 /* Evict the invalid PTE TLBs */ 199 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; 200 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; 201 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ 202 *cs++ = cs_offset; 203 *cs++ = 0xdeadbeef; 204 *cs++ = MI_NOOP; 205 intel_ring_advance(rq, cs); 206 207 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { 208 if (len > I830_BATCH_LIMIT) 209 return -ENOSPC; 210 211 cs = intel_ring_begin(rq, 6 + 2); 212 if (IS_ERR(cs)) 213 return PTR_ERR(cs); 214 215 /* 216 * Blit the batch (which has now all relocs applied) to the 217 * stable batch scratch bo area (so that the CS never 218 * stumbles over its tlb invalidation bug) ... 219 */ 220 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); 221 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; 222 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; 223 *cs++ = cs_offset; 224 *cs++ = 4096; 225 *cs++ = offset; 226 227 *cs++ = MI_FLUSH; 228 *cs++ = MI_NOOP; 229 intel_ring_advance(rq, cs); 230 231 /* ... and execute it. */ 232 offset = cs_offset; 233 } 234 235 if (!(dispatch_flags & I915_DISPATCH_SECURE)) 236 offset |= MI_BATCH_NON_SECURE; 237 238 cs = intel_ring_begin(rq, 2); 239 if (IS_ERR(cs)) 240 return PTR_ERR(cs); 241 242 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 243 *cs++ = offset; 244 intel_ring_advance(rq, cs); 245 246 return 0; 247 } 248 249 int gen3_emit_bb_start(struct i915_request *rq, 250 u64 offset, u32 len, 251 unsigned int dispatch_flags) 252 { 253 u32 *cs; 254 255 if (!(dispatch_flags & I915_DISPATCH_SECURE)) 256 offset |= MI_BATCH_NON_SECURE; 257 258 cs = intel_ring_begin(rq, 2); 259 if (IS_ERR(cs)) 260 return PTR_ERR(cs); 261 262 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 263 *cs++ = offset; 264 intel_ring_advance(rq, cs); 265 266 return 0; 267 } 268 269 int gen4_emit_bb_start(struct i915_request *rq, 270 u64 offset, u32 length, 271 unsigned int dispatch_flags) 272 { 273 u32 security; 274 u32 *cs; 275 276 security = MI_BATCH_NON_SECURE_I965; 277 if (dispatch_flags & I915_DISPATCH_SECURE) 278 security = 0; 279 280 cs = intel_ring_begin(rq, 2); 281 if (IS_ERR(cs)) 282 return PTR_ERR(cs); 283 284 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security; 285 *cs++ = offset; 286 intel_ring_advance(rq, cs); 287 288 return 0; 289 } 290 291 void gen2_irq_enable(struct intel_engine_cs *engine) 292 { 293 struct drm_i915_private *i915 = engine->i915; 294 295 i915->irq_mask &= ~engine->irq_enable_mask; 296 intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); 297 ENGINE_POSTING_READ16(engine, RING_IMR); 298 } 299 300 void gen2_irq_disable(struct intel_engine_cs *engine) 301 { 302 struct drm_i915_private *i915 = engine->i915; 303 304 i915->irq_mask |= engine->irq_enable_mask; 305 intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); 306 } 307 308 void gen3_irq_enable(struct intel_engine_cs *engine) 309 { 310 engine->i915->irq_mask &= ~engine->irq_enable_mask; 311 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); 312 intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); 313 } 314 315 void gen3_irq_disable(struct intel_engine_cs *engine) 316 { 317 engine->i915->irq_mask |= engine->irq_enable_mask; 318 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); 319 } 320 321 void gen5_irq_enable(struct intel_engine_cs *engine) 322 { 323 gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); 324 } 325 326 void gen5_irq_disable(struct intel_engine_cs *engine) 327 { 328 gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); 329 } 330