1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * DOC: Shader validator for VC4. 26 * 27 * Since the VC4 has no IOMMU between it and system memory, a user 28 * with access to execute shaders could escalate privilege by 29 * overwriting system memory (using the VPM write address register in 30 * the general-purpose DMA mode) or reading system memory it shouldn't 31 * (reading it as a texture, uniform data, or direct-addressed TMU 32 * lookup). 33 * 34 * The shader validator walks over a shader's BO, ensuring that its 35 * accesses are appropriately bounded, and recording where texture 36 * accesses are made so that we can do relocations for them in the 37 * uniform stream. 38 * 39 * Shader BO are immutable for their lifetimes (enforced by not 40 * allowing mmaps, GEM prime export, or rendering to from a CL), so 41 * this validation is only performed at BO creation time. 42 */ 43 44 #include <drm/drm_print.h> 45 46 #include "vc4_drv.h" 47 #include "vc4_qpu_defines.h" 48 49 #define LIVE_REG_COUNT (32 + 32 + 4) 50 51 struct vc4_shader_validation_state { 52 /* Current IP being validated. */ 53 uint32_t ip; 54 55 /* IP at the end of the BO, do not read shader[max_ip] */ 56 uint32_t max_ip; 57 58 uint64_t *shader; 59 60 struct vc4_texture_sample_info tmu_setup[2]; 61 int tmu_write_count[2]; 62 63 /* For registers that were last written to by a MIN instruction with 64 * one argument being a uniform, the address of the uniform. 65 * Otherwise, ~0. 66 * 67 * This is used for the validation of direct address memory reads. 68 */ 69 uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; 70 bool live_max_clamp_regs[LIVE_REG_COUNT]; 71 uint32_t live_immediates[LIVE_REG_COUNT]; 72 73 /* Bitfield of which IPs are used as branch targets. 74 * 75 * Used for validation that the uniform stream is updated at the right 76 * points and clearing the texturing/clamping state. 77 */ 78 unsigned long *branch_targets; 79 80 /* Set when entering a basic block, and cleared when the uniform 81 * address update is found. This is used to make sure that we don't 82 * read uniforms when the address is undefined. 83 */ 84 bool needs_uniform_address_update; 85 86 /* Set when we find a backwards branch. If the branch is backwards, 87 * the taraget is probably doing an address reset to read uniforms, 88 * and so we need to be sure that a uniforms address is present in the 89 * stream, even if the shader didn't need to read uniforms in later 90 * basic blocks. 91 */ 92 bool needs_uniform_address_for_loop; 93 94 /* Set when we find an instruction writing the top half of the 95 * register files. If we allowed writing the unusable regs in 96 * a threaded shader, then the other shader running on our 97 * QPU's clamp validation would be invalid. 98 */ 99 bool all_registers_used; 100 }; 101 102 static uint32_t 103 waddr_to_live_reg_index(uint32_t waddr, bool is_b) 104 { 105 if (waddr < 32) { 106 if (is_b) 107 return 32 + waddr; 108 else 109 return waddr; 110 } else if (waddr <= QPU_W_ACC3) { 111 return 64 + waddr - QPU_W_ACC0; 112 } else { 113 return ~0; 114 } 115 } 116 117 static uint32_t 118 raddr_add_a_to_live_reg_index(uint64_t inst) 119 { 120 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 121 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 122 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 123 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 124 125 if (add_a == QPU_MUX_A) 126 return raddr_a; 127 else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) 128 return 32 + raddr_b; 129 else if (add_a <= QPU_MUX_R3) 130 return 64 + add_a; 131 else 132 return ~0; 133 } 134 135 static bool 136 live_reg_is_upper_half(uint32_t lri) 137 { 138 return (lri >= 16 && lri < 32) || 139 (lri >= 32 + 16 && lri < 32 + 32); 140 } 141 142 static bool 143 is_tmu_submit(uint32_t waddr) 144 { 145 return (waddr == QPU_W_TMU0_S || 146 waddr == QPU_W_TMU1_S); 147 } 148 149 static bool 150 is_tmu_write(uint32_t waddr) 151 { 152 return (waddr >= QPU_W_TMU0_S && 153 waddr <= QPU_W_TMU1_B); 154 } 155 156 static bool 157 record_texture_sample(struct vc4_validated_shader_info *validated_shader, 158 struct vc4_shader_validation_state *validation_state, 159 int tmu) 160 { 161 uint32_t s = validated_shader->num_texture_samples; 162 int i; 163 struct vc4_texture_sample_info *temp_samples; 164 165 temp_samples = krealloc(validated_shader->texture_samples, 166 (s + 1) * sizeof(*temp_samples), 167 GFP_KERNEL); 168 if (!temp_samples) 169 return false; 170 171 memcpy(&temp_samples[s], 172 &validation_state->tmu_setup[tmu], 173 sizeof(*temp_samples)); 174 175 validated_shader->num_texture_samples = s + 1; 176 validated_shader->texture_samples = temp_samples; 177 178 for (i = 0; i < 4; i++) 179 validation_state->tmu_setup[tmu].p_offset[i] = ~0; 180 181 return true; 182 } 183 184 static bool 185 check_tmu_write(struct vc4_validated_shader_info *validated_shader, 186 struct vc4_shader_validation_state *validation_state, 187 bool is_mul) 188 { 189 uint64_t inst = validation_state->shader[validation_state->ip]; 190 uint32_t waddr = (is_mul ? 191 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 192 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 193 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 194 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 195 int tmu = waddr > QPU_W_TMU0_B; 196 bool submit = is_tmu_submit(waddr); 197 bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; 198 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 199 200 if (is_direct) { 201 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 202 uint32_t clamp_reg, clamp_offset; 203 204 if (sig == QPU_SIG_SMALL_IMM) { 205 DRM_DEBUG("direct TMU read used small immediate\n"); 206 return false; 207 } 208 209 /* Make sure that this texture load is an add of the base 210 * address of the UBO to a clamped offset within the UBO. 211 */ 212 if (is_mul || 213 QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 214 DRM_DEBUG("direct TMU load wasn't an add\n"); 215 return false; 216 } 217 218 /* We assert that the clamped address is the first 219 * argument, and the UBO base address is the second argument. 220 * This is arbitrary, but simpler than supporting flipping the 221 * two either way. 222 */ 223 clamp_reg = raddr_add_a_to_live_reg_index(inst); 224 if (clamp_reg == ~0) { 225 DRM_DEBUG("direct TMU load wasn't clamped\n"); 226 return false; 227 } 228 229 clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; 230 if (clamp_offset == ~0) { 231 DRM_DEBUG("direct TMU load wasn't clamped\n"); 232 return false; 233 } 234 235 /* Store the clamp value's offset in p1 (see reloc_tex() in 236 * vc4_validate.c). 237 */ 238 validation_state->tmu_setup[tmu].p_offset[1] = 239 clamp_offset; 240 241 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 242 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 243 DRM_DEBUG("direct TMU load didn't add to a uniform\n"); 244 return false; 245 } 246 247 validation_state->tmu_setup[tmu].is_direct = true; 248 } else { 249 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && 250 raddr_b == QPU_R_UNIF)) { 251 DRM_DEBUG("uniform read in the same instruction as " 252 "texture setup.\n"); 253 return false; 254 } 255 } 256 257 if (validation_state->tmu_write_count[tmu] >= 4) { 258 DRM_DEBUG("TMU%d got too many parameters before dispatch\n", 259 tmu); 260 return false; 261 } 262 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = 263 validated_shader->uniforms_size; 264 validation_state->tmu_write_count[tmu]++; 265 /* Since direct uses a RADDR uniform reference, it will get counted in 266 * check_instruction_reads() 267 */ 268 if (!is_direct) { 269 if (validation_state->needs_uniform_address_update) { 270 DRM_DEBUG("Texturing with undefined uniform address\n"); 271 return false; 272 } 273 274 validated_shader->uniforms_size += 4; 275 } 276 277 if (submit) { 278 if (!record_texture_sample(validated_shader, 279 validation_state, tmu)) { 280 return false; 281 } 282 283 validation_state->tmu_write_count[tmu] = 0; 284 } 285 286 return true; 287 } 288 289 static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) 290 { 291 uint32_t o = validated_shader->num_uniform_addr_offsets; 292 uint32_t num_uniforms = validated_shader->uniforms_size / 4; 293 u32 *offsets; 294 295 offsets = krealloc_array(validated_shader->uniform_addr_offsets, 296 o + 1, 297 sizeof(*validated_shader->uniform_addr_offsets), 298 GFP_KERNEL); 299 if (!offsets) 300 return false; 301 302 validated_shader->uniform_addr_offsets = offsets; 303 validated_shader->uniform_addr_offsets[o] = num_uniforms; 304 validated_shader->num_uniform_addr_offsets++; 305 306 return true; 307 } 308 309 static bool 310 validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, 311 struct vc4_shader_validation_state *validation_state, 312 bool is_mul) 313 { 314 uint64_t inst = validation_state->shader[validation_state->ip]; 315 u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 316 u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 317 u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 318 u32 add_lri = raddr_add_a_to_live_reg_index(inst); 319 /* We want our reset to be pointing at whatever uniform follows the 320 * uniforms base address. 321 */ 322 u32 expected_offset = validated_shader->uniforms_size + 4; 323 324 /* We only support absolute uniform address changes, and we 325 * require that they be in the current basic block before any 326 * of its uniform reads. 327 * 328 * One could potentially emit more efficient QPU code, by 329 * noticing that (say) an if statement does uniform control 330 * flow for all threads and that the if reads the same number 331 * of uniforms on each side. However, this scheme is easy to 332 * validate so it's all we allow for now. 333 */ 334 switch (QPU_GET_FIELD(inst, QPU_SIG)) { 335 case QPU_SIG_NONE: 336 case QPU_SIG_SCOREBOARD_UNLOCK: 337 case QPU_SIG_COLOR_LOAD: 338 case QPU_SIG_LOAD_TMU0: 339 case QPU_SIG_LOAD_TMU1: 340 break; 341 default: 342 DRM_DEBUG("uniforms address change must be " 343 "normal math\n"); 344 return false; 345 } 346 347 if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 348 DRM_DEBUG("Uniform address reset must be an ADD.\n"); 349 return false; 350 } 351 352 if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { 353 DRM_DEBUG("Uniform address reset must be unconditional.\n"); 354 return false; 355 } 356 357 if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && 358 !(inst & QPU_PM)) { 359 DRM_DEBUG("No packing allowed on uniforms reset\n"); 360 return false; 361 } 362 363 if (add_lri == -1) { 364 DRM_DEBUG("First argument of uniform address write must be " 365 "an immediate value.\n"); 366 return false; 367 } 368 369 if (validation_state->live_immediates[add_lri] != expected_offset) { 370 DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n", 371 validation_state->live_immediates[add_lri], 372 expected_offset); 373 return false; 374 } 375 376 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 377 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 378 DRM_DEBUG("Second argument of uniform address write must be " 379 "a uniform.\n"); 380 return false; 381 } 382 383 validation_state->needs_uniform_address_update = false; 384 validation_state->needs_uniform_address_for_loop = false; 385 return require_uniform_address_uniform(validated_shader); 386 } 387 388 static bool 389 check_reg_write(struct vc4_validated_shader_info *validated_shader, 390 struct vc4_shader_validation_state *validation_state, 391 bool is_mul) 392 { 393 uint64_t inst = validation_state->shader[validation_state->ip]; 394 uint32_t waddr = (is_mul ? 395 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 396 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 397 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 398 bool ws = inst & QPU_WS; 399 bool is_b = is_mul ^ ws; 400 u32 lri = waddr_to_live_reg_index(waddr, is_b); 401 402 if (lri != -1) { 403 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 404 uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); 405 406 if (sig == QPU_SIG_LOAD_IMM && 407 QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && 408 ((is_mul && cond_mul == QPU_COND_ALWAYS) || 409 (!is_mul && cond_add == QPU_COND_ALWAYS))) { 410 validation_state->live_immediates[lri] = 411 QPU_GET_FIELD(inst, QPU_LOAD_IMM); 412 } else { 413 validation_state->live_immediates[lri] = ~0; 414 } 415 416 if (live_reg_is_upper_half(lri)) 417 validation_state->all_registers_used = true; 418 } 419 420 switch (waddr) { 421 case QPU_W_UNIFORMS_ADDRESS: 422 if (is_b) { 423 DRM_DEBUG("relative uniforms address change " 424 "unsupported\n"); 425 return false; 426 } 427 428 return validate_uniform_address_write(validated_shader, 429 validation_state, 430 is_mul); 431 432 case QPU_W_TLB_COLOR_MS: 433 case QPU_W_TLB_COLOR_ALL: 434 case QPU_W_TLB_Z: 435 /* These only interact with the tile buffer, not main memory, 436 * so they're safe. 437 */ 438 return true; 439 440 case QPU_W_TMU0_S: 441 case QPU_W_TMU0_T: 442 case QPU_W_TMU0_R: 443 case QPU_W_TMU0_B: 444 case QPU_W_TMU1_S: 445 case QPU_W_TMU1_T: 446 case QPU_W_TMU1_R: 447 case QPU_W_TMU1_B: 448 return check_tmu_write(validated_shader, validation_state, 449 is_mul); 450 451 case QPU_W_HOST_INT: 452 case QPU_W_TMU_NOSWAP: 453 case QPU_W_TLB_ALPHA_MASK: 454 case QPU_W_MUTEX_RELEASE: 455 /* XXX: I haven't thought about these, so don't support them 456 * for now. 457 */ 458 DRM_DEBUG("Unsupported waddr %d\n", waddr); 459 return false; 460 461 case QPU_W_VPM_ADDR: 462 DRM_DEBUG("General VPM DMA unsupported\n"); 463 return false; 464 465 case QPU_W_VPM: 466 case QPU_W_VPMVCD_SETUP: 467 /* We allow VPM setup in general, even including VPM DMA 468 * configuration setup, because the (unsafe) DMA can only be 469 * triggered by QPU_W_VPM_ADDR writes. 470 */ 471 return true; 472 473 case QPU_W_TLB_STENCIL_SETUP: 474 return true; 475 } 476 477 return true; 478 } 479 480 static void 481 track_live_clamps(struct vc4_validated_shader_info *validated_shader, 482 struct vc4_shader_validation_state *validation_state) 483 { 484 uint64_t inst = validation_state->shader[validation_state->ip]; 485 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 486 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 487 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 488 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 489 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 490 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 491 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 492 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 493 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 494 bool ws = inst & QPU_WS; 495 uint32_t lri_add_a, lri_add, lri_mul; 496 bool add_a_is_min_0; 497 498 /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), 499 * before we clear previous live state. 500 */ 501 lri_add_a = raddr_add_a_to_live_reg_index(inst); 502 add_a_is_min_0 = (lri_add_a != ~0 && 503 validation_state->live_max_clamp_regs[lri_add_a]); 504 505 /* Clear live state for registers written by our instruction. */ 506 lri_add = waddr_to_live_reg_index(waddr_add, ws); 507 lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); 508 if (lri_mul != ~0) { 509 validation_state->live_max_clamp_regs[lri_mul] = false; 510 validation_state->live_min_clamp_offsets[lri_mul] = ~0; 511 } 512 if (lri_add != ~0) { 513 validation_state->live_max_clamp_regs[lri_add] = false; 514 validation_state->live_min_clamp_offsets[lri_add] = ~0; 515 } else { 516 /* Nothing further to do for live tracking, since only ADDs 517 * generate new live clamp registers. 518 */ 519 return; 520 } 521 522 /* Now, handle remaining live clamp tracking for the ADD operation. */ 523 524 if (cond_add != QPU_COND_ALWAYS) 525 return; 526 527 if (op_add == QPU_A_MAX) { 528 /* Track live clamps of a value to a minimum of 0 (in either 529 * arg). 530 */ 531 if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || 532 (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { 533 return; 534 } 535 536 validation_state->live_max_clamp_regs[lri_add] = true; 537 } else if (op_add == QPU_A_MIN) { 538 /* Track live clamps of a value clamped to a minimum of 0 and 539 * a maximum of some uniform's offset. 540 */ 541 if (!add_a_is_min_0) 542 return; 543 544 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 545 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && 546 sig != QPU_SIG_SMALL_IMM)) { 547 return; 548 } 549 550 validation_state->live_min_clamp_offsets[lri_add] = 551 validated_shader->uniforms_size; 552 } 553 } 554 555 static bool 556 check_instruction_writes(struct vc4_validated_shader_info *validated_shader, 557 struct vc4_shader_validation_state *validation_state) 558 { 559 uint64_t inst = validation_state->shader[validation_state->ip]; 560 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 561 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 562 bool ok; 563 564 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { 565 DRM_DEBUG("ADD and MUL both set up textures\n"); 566 return false; 567 } 568 569 ok = (check_reg_write(validated_shader, validation_state, false) && 570 check_reg_write(validated_shader, validation_state, true)); 571 572 track_live_clamps(validated_shader, validation_state); 573 574 return ok; 575 } 576 577 static bool 578 check_branch(uint64_t inst, 579 struct vc4_validated_shader_info *validated_shader, 580 struct vc4_shader_validation_state *validation_state, 581 int ip) 582 { 583 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 584 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 585 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 586 587 if ((int)branch_imm < 0) 588 validation_state->needs_uniform_address_for_loop = true; 589 590 /* We don't want to have to worry about validation of this, and 591 * there's no need for it. 592 */ 593 if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { 594 DRM_DEBUG("branch instruction at %d wrote a register.\n", 595 validation_state->ip); 596 return false; 597 } 598 599 return true; 600 } 601 602 static bool 603 check_instruction_reads(struct vc4_validated_shader_info *validated_shader, 604 struct vc4_shader_validation_state *validation_state) 605 { 606 uint64_t inst = validation_state->shader[validation_state->ip]; 607 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 608 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 609 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 610 611 if (raddr_a == QPU_R_UNIF || 612 (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { 613 /* This can't overflow the uint32_t, because we're reading 8 614 * bytes of instruction to increment by 4 here, so we'd 615 * already be OOM. 616 */ 617 validated_shader->uniforms_size += 4; 618 619 if (validation_state->needs_uniform_address_update) { 620 DRM_DEBUG("Uniform read with undefined uniform " 621 "address\n"); 622 return false; 623 } 624 } 625 626 if ((raddr_a >= 16 && raddr_a < 32) || 627 (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) { 628 validation_state->all_registers_used = true; 629 } 630 631 return true; 632 } 633 634 /* Make sure that all branches are absolute and point within the shader, and 635 * note their targets for later. 636 */ 637 static bool 638 vc4_validate_branches(struct vc4_shader_validation_state *validation_state) 639 { 640 uint32_t max_branch_target = 0; 641 int ip; 642 int last_branch = -2; 643 644 for (ip = 0; ip < validation_state->max_ip; ip++) { 645 uint64_t inst = validation_state->shader[ip]; 646 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 647 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 648 uint32_t after_delay_ip = ip + 4; 649 uint32_t branch_target_ip; 650 651 if (sig == QPU_SIG_PROG_END) { 652 /* There are two delay slots after program end is 653 * signaled that are still executed, then we're 654 * finished. validation_state->max_ip is the 655 * instruction after the last valid instruction in the 656 * program. 657 */ 658 validation_state->max_ip = ip + 3; 659 continue; 660 } 661 662 if (sig != QPU_SIG_BRANCH) 663 continue; 664 665 if (ip - last_branch < 4) { 666 DRM_DEBUG("Branch at %d during delay slots\n", ip); 667 return false; 668 } 669 last_branch = ip; 670 671 if (inst & QPU_BRANCH_REG) { 672 DRM_DEBUG("branching from register relative " 673 "not supported\n"); 674 return false; 675 } 676 677 if (!(inst & QPU_BRANCH_REL)) { 678 DRM_DEBUG("relative branching required\n"); 679 return false; 680 } 681 682 /* The actual branch target is the instruction after the delay 683 * slots, plus whatever byte offset is in the low 32 bits of 684 * the instruction. Make sure we're not branching beyond the 685 * end of the shader object. 686 */ 687 if (branch_imm % sizeof(inst) != 0) { 688 DRM_DEBUG("branch target not aligned\n"); 689 return false; 690 } 691 692 branch_target_ip = after_delay_ip + (branch_imm >> 3); 693 if (branch_target_ip >= validation_state->max_ip) { 694 DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n", 695 ip, branch_target_ip, 696 validation_state->max_ip); 697 return false; 698 } 699 set_bit(branch_target_ip, validation_state->branch_targets); 700 701 /* Make sure that the non-branching path is also not outside 702 * the shader. 703 */ 704 if (after_delay_ip >= validation_state->max_ip) { 705 DRM_DEBUG("Branch at %d continues past shader end " 706 "(%d/%d)\n", 707 ip, after_delay_ip, validation_state->max_ip); 708 return false; 709 } 710 set_bit(after_delay_ip, validation_state->branch_targets); 711 max_branch_target = max(max_branch_target, after_delay_ip); 712 } 713 714 if (max_branch_target > validation_state->max_ip - 3) { 715 DRM_DEBUG("Branch landed after QPU_SIG_PROG_END"); 716 return false; 717 } 718 719 return true; 720 } 721 722 /* Resets any known state for the shader, used when we may be branched to from 723 * multiple locations in the program (or at shader start). 724 */ 725 static void 726 reset_validation_state(struct vc4_shader_validation_state *validation_state) 727 { 728 int i; 729 730 for (i = 0; i < 8; i++) 731 validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; 732 733 for (i = 0; i < LIVE_REG_COUNT; i++) { 734 validation_state->live_min_clamp_offsets[i] = ~0; 735 validation_state->live_max_clamp_regs[i] = false; 736 validation_state->live_immediates[i] = ~0; 737 } 738 } 739 740 static bool 741 texturing_in_progress(struct vc4_shader_validation_state *validation_state) 742 { 743 return (validation_state->tmu_write_count[0] != 0 || 744 validation_state->tmu_write_count[1] != 0); 745 } 746 747 static bool 748 vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) 749 { 750 uint32_t ip = validation_state->ip; 751 752 if (!test_bit(ip, validation_state->branch_targets)) 753 return true; 754 755 if (texturing_in_progress(validation_state)) { 756 DRM_DEBUG("Branch target landed during TMU setup\n"); 757 return false; 758 } 759 760 /* Reset our live values tracking, since this instruction may have 761 * multiple predecessors. 762 * 763 * One could potentially do analysis to determine that, for 764 * example, all predecessors have a live max clamp in the same 765 * register, but we don't bother with that. 766 */ 767 reset_validation_state(validation_state); 768 769 /* Since we've entered a basic block from potentially multiple 770 * predecessors, we need the uniforms address to be updated before any 771 * unforms are read. We require that after any branch point, the next 772 * uniform to be loaded is a uniform address offset. That uniform's 773 * offset will be marked by the uniform address register write 774 * validation, or a one-off the end-of-program check. 775 */ 776 validation_state->needs_uniform_address_update = true; 777 778 return true; 779 } 780 781 struct vc4_validated_shader_info * 782 vc4_validate_shader(struct drm_gem_dma_object *shader_obj) 783 { 784 struct vc4_dev *vc4 = to_vc4_dev(shader_obj->base.dev); 785 bool found_shader_end = false; 786 int shader_end_ip = 0; 787 uint32_t last_thread_switch_ip = -3; 788 uint32_t ip; 789 struct vc4_validated_shader_info *validated_shader = NULL; 790 struct vc4_shader_validation_state validation_state; 791 792 if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) 793 return NULL; 794 795 memset(&validation_state, 0, sizeof(validation_state)); 796 validation_state.shader = shader_obj->vaddr; 797 validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); 798 799 reset_validation_state(&validation_state); 800 801 validation_state.branch_targets = 802 kcalloc(BITS_TO_LONGS(validation_state.max_ip), 803 sizeof(unsigned long), GFP_KERNEL); 804 if (!validation_state.branch_targets) 805 goto fail; 806 807 validated_shader = kzalloc_objs(*validated_shader, 1); 808 if (!validated_shader) 809 goto fail; 810 811 if (!vc4_validate_branches(&validation_state)) 812 goto fail; 813 814 for (ip = 0; ip < validation_state.max_ip; ip++) { 815 uint64_t inst = validation_state.shader[ip]; 816 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 817 818 validation_state.ip = ip; 819 820 if (!vc4_handle_branch_target(&validation_state)) 821 goto fail; 822 823 if (ip == last_thread_switch_ip + 3) { 824 /* Reset r0-r3 live clamp data */ 825 int i; 826 827 for (i = 64; i < LIVE_REG_COUNT; i++) { 828 validation_state.live_min_clamp_offsets[i] = ~0; 829 validation_state.live_max_clamp_regs[i] = false; 830 validation_state.live_immediates[i] = ~0; 831 } 832 } 833 834 switch (sig) { 835 case QPU_SIG_NONE: 836 case QPU_SIG_WAIT_FOR_SCOREBOARD: 837 case QPU_SIG_SCOREBOARD_UNLOCK: 838 case QPU_SIG_COLOR_LOAD: 839 case QPU_SIG_LOAD_TMU0: 840 case QPU_SIG_LOAD_TMU1: 841 case QPU_SIG_PROG_END: 842 case QPU_SIG_SMALL_IMM: 843 case QPU_SIG_THREAD_SWITCH: 844 case QPU_SIG_LAST_THREAD_SWITCH: 845 if (!check_instruction_writes(validated_shader, 846 &validation_state)) { 847 DRM_DEBUG("Bad write at ip %d\n", ip); 848 goto fail; 849 } 850 851 if (!check_instruction_reads(validated_shader, 852 &validation_state)) 853 goto fail; 854 855 if (sig == QPU_SIG_PROG_END) { 856 found_shader_end = true; 857 shader_end_ip = ip; 858 } 859 860 if (sig == QPU_SIG_THREAD_SWITCH || 861 sig == QPU_SIG_LAST_THREAD_SWITCH) { 862 validated_shader->is_threaded = true; 863 864 if (ip < last_thread_switch_ip + 3) { 865 DRM_DEBUG("Thread switch too soon after " 866 "last switch at ip %d\n", ip); 867 goto fail; 868 } 869 last_thread_switch_ip = ip; 870 } 871 872 break; 873 874 case QPU_SIG_LOAD_IMM: 875 if (!check_instruction_writes(validated_shader, 876 &validation_state)) { 877 DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip); 878 goto fail; 879 } 880 break; 881 882 case QPU_SIG_BRANCH: 883 if (!check_branch(inst, validated_shader, 884 &validation_state, ip)) 885 goto fail; 886 887 if (ip < last_thread_switch_ip + 3) { 888 DRM_DEBUG("Branch in thread switch at ip %d", 889 ip); 890 goto fail; 891 } 892 893 break; 894 default: 895 DRM_DEBUG("Unsupported QPU signal %d at " 896 "instruction %d\n", sig, ip); 897 goto fail; 898 } 899 900 /* There are two delay slots after program end is signaled 901 * that are still executed, then we're finished. 902 */ 903 if (found_shader_end && ip == shader_end_ip + 2) 904 break; 905 } 906 907 if (ip == validation_state.max_ip) { 908 DRM_DEBUG("shader failed to terminate before " 909 "shader BO end at %zd\n", 910 shader_obj->base.size); 911 goto fail; 912 } 913 914 /* Might corrupt other thread */ 915 if (validated_shader->is_threaded && 916 validation_state.all_registers_used) { 917 DRM_DEBUG("Shader uses threading, but uses the upper " 918 "half of the registers, too\n"); 919 goto fail; 920 } 921 922 /* If we did a backwards branch and we haven't emitted a uniforms 923 * reset since then, we still need the uniforms stream to have the 924 * uniforms address available so that the backwards branch can do its 925 * uniforms reset. 926 * 927 * We could potentially prove that the backwards branch doesn't 928 * contain any uses of uniforms until program exit, but that doesn't 929 * seem to be worth the trouble. 930 */ 931 if (validation_state.needs_uniform_address_for_loop) { 932 if (!require_uniform_address_uniform(validated_shader)) 933 goto fail; 934 validated_shader->uniforms_size += 4; 935 } 936 937 /* Again, no chance of integer overflow here because the worst case 938 * scenario is 8 bytes of uniforms plus handles per 8-byte 939 * instruction. 940 */ 941 validated_shader->uniforms_src_size = 942 (validated_shader->uniforms_size + 943 4 * validated_shader->num_texture_samples); 944 945 kfree(validation_state.branch_targets); 946 947 return validated_shader; 948 949 fail: 950 kfree(validation_state.branch_targets); 951 if (validated_shader) { 952 kfree(validated_shader->uniform_addr_offsets); 953 kfree(validated_shader->texture_samples); 954 kfree(validated_shader); 955 } 956 return NULL; 957 } 958