1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * DOC: Shader validator for VC4. 26 * 27 * The VC4 has no IOMMU between it and system memory, so a user with 28 * access to execute shaders could escalate privilege by overwriting 29 * system memory (using the VPM write address register in the 30 * general-purpose DMA mode) or reading system memory it shouldn't 31 * (reading it as a texture, or uniform data, or vertex data). 32 * 33 * This walks over a shader BO, ensuring that its accesses are 34 * appropriately bounded, and recording how many texture accesses are 35 * made and where so that we can do relocations for them in the 36 * uniform stream. 37 */ 38 39 #include "vc4_drv.h" 40 #include "vc4_qpu_defines.h" 41 42 struct vc4_shader_validation_state { 43 struct vc4_texture_sample_info tmu_setup[2]; 44 int tmu_write_count[2]; 45 46 /* For registers that were last written to by a MIN instruction with 47 * one argument being a uniform, the address of the uniform. 48 * Otherwise, ~0. 49 * 50 * This is used for the validation of direct address memory reads. 51 */ 52 uint32_t live_min_clamp_offsets[32 + 32 + 4]; 53 bool live_max_clamp_regs[32 + 32 + 4]; 54 }; 55 56 static uint32_t 57 waddr_to_live_reg_index(uint32_t waddr, bool is_b) 58 { 59 if (waddr < 32) { 60 if (is_b) 61 return 32 + waddr; 62 else 63 return waddr; 64 } else if (waddr <= QPU_W_ACC3) { 65 return 64 + waddr - QPU_W_ACC0; 66 } else { 67 return ~0; 68 } 69 } 70 71 static uint32_t 72 raddr_add_a_to_live_reg_index(uint64_t inst) 73 { 74 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 75 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 76 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 77 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 78 79 if (add_a == QPU_MUX_A) 80 return raddr_a; 81 else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) 82 return 32 + raddr_b; 83 else if (add_a <= QPU_MUX_R3) 84 return 64 + add_a; 85 else 86 return ~0; 87 } 88 89 static bool 90 is_tmu_submit(uint32_t waddr) 91 { 92 return (waddr == QPU_W_TMU0_S || 93 waddr == QPU_W_TMU1_S); 94 } 95 96 static bool 97 is_tmu_write(uint32_t waddr) 98 { 99 return (waddr >= QPU_W_TMU0_S && 100 waddr <= QPU_W_TMU1_B); 101 } 102 103 static bool 104 record_texture_sample(struct vc4_validated_shader_info *validated_shader, 105 struct vc4_shader_validation_state *validation_state, 106 int tmu) 107 { 108 uint32_t s = validated_shader->num_texture_samples; 109 int i; 110 struct vc4_texture_sample_info *temp_samples; 111 112 temp_samples = krealloc(validated_shader->texture_samples, 113 (s + 1) * sizeof(*temp_samples), 114 GFP_KERNEL); 115 if (!temp_samples) 116 return false; 117 118 memcpy(&temp_samples[s], 119 &validation_state->tmu_setup[tmu], 120 sizeof(*temp_samples)); 121 122 validated_shader->num_texture_samples = s + 1; 123 validated_shader->texture_samples = temp_samples; 124 125 for (i = 0; i < 4; i++) 126 validation_state->tmu_setup[tmu].p_offset[i] = ~0; 127 128 return true; 129 } 130 131 static bool 132 check_tmu_write(uint64_t inst, 133 struct vc4_validated_shader_info *validated_shader, 134 struct vc4_shader_validation_state *validation_state, 135 bool is_mul) 136 { 137 uint32_t waddr = (is_mul ? 138 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 139 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 140 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 141 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 142 int tmu = waddr > QPU_W_TMU0_B; 143 bool submit = is_tmu_submit(waddr); 144 bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; 145 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 146 147 if (is_direct) { 148 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 149 uint32_t clamp_reg, clamp_offset; 150 151 if (sig == QPU_SIG_SMALL_IMM) { 152 DRM_ERROR("direct TMU read used small immediate\n"); 153 return false; 154 } 155 156 /* Make sure that this texture load is an add of the base 157 * address of the UBO to a clamped offset within the UBO. 158 */ 159 if (is_mul || 160 QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 161 DRM_ERROR("direct TMU load wasn't an add\n"); 162 return false; 163 } 164 165 /* We assert that the the clamped address is the first 166 * argument, and the UBO base address is the second argument. 167 * This is arbitrary, but simpler than supporting flipping the 168 * two either way. 169 */ 170 clamp_reg = raddr_add_a_to_live_reg_index(inst); 171 if (clamp_reg == ~0) { 172 DRM_ERROR("direct TMU load wasn't clamped\n"); 173 return false; 174 } 175 176 clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; 177 if (clamp_offset == ~0) { 178 DRM_ERROR("direct TMU load wasn't clamped\n"); 179 return false; 180 } 181 182 /* Store the clamp value's offset in p1 (see reloc_tex() in 183 * vc4_validate.c). 184 */ 185 validation_state->tmu_setup[tmu].p_offset[1] = 186 clamp_offset; 187 188 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 189 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 190 DRM_ERROR("direct TMU load didn't add to a uniform\n"); 191 return false; 192 } 193 194 validation_state->tmu_setup[tmu].is_direct = true; 195 } else { 196 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && 197 raddr_b == QPU_R_UNIF)) { 198 DRM_ERROR("uniform read in the same instruction as " 199 "texture setup.\n"); 200 return false; 201 } 202 } 203 204 if (validation_state->tmu_write_count[tmu] >= 4) { 205 DRM_ERROR("TMU%d got too many parameters before dispatch\n", 206 tmu); 207 return false; 208 } 209 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = 210 validated_shader->uniforms_size; 211 validation_state->tmu_write_count[tmu]++; 212 /* Since direct uses a RADDR uniform reference, it will get counted in 213 * check_instruction_reads() 214 */ 215 if (!is_direct) 216 validated_shader->uniforms_size += 4; 217 218 if (submit) { 219 if (!record_texture_sample(validated_shader, 220 validation_state, tmu)) { 221 return false; 222 } 223 224 validation_state->tmu_write_count[tmu] = 0; 225 } 226 227 return true; 228 } 229 230 static bool 231 check_reg_write(uint64_t inst, 232 struct vc4_validated_shader_info *validated_shader, 233 struct vc4_shader_validation_state *validation_state, 234 bool is_mul) 235 { 236 uint32_t waddr = (is_mul ? 237 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 238 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 239 240 switch (waddr) { 241 case QPU_W_UNIFORMS_ADDRESS: 242 /* XXX: We'll probably need to support this for reladdr, but 243 * it's definitely a security-related one. 244 */ 245 DRM_ERROR("uniforms address load unsupported\n"); 246 return false; 247 248 case QPU_W_TLB_COLOR_MS: 249 case QPU_W_TLB_COLOR_ALL: 250 case QPU_W_TLB_Z: 251 /* These only interact with the tile buffer, not main memory, 252 * so they're safe. 253 */ 254 return true; 255 256 case QPU_W_TMU0_S: 257 case QPU_W_TMU0_T: 258 case QPU_W_TMU0_R: 259 case QPU_W_TMU0_B: 260 case QPU_W_TMU1_S: 261 case QPU_W_TMU1_T: 262 case QPU_W_TMU1_R: 263 case QPU_W_TMU1_B: 264 return check_tmu_write(inst, validated_shader, validation_state, 265 is_mul); 266 267 case QPU_W_HOST_INT: 268 case QPU_W_TMU_NOSWAP: 269 case QPU_W_TLB_ALPHA_MASK: 270 case QPU_W_MUTEX_RELEASE: 271 /* XXX: I haven't thought about these, so don't support them 272 * for now. 273 */ 274 DRM_ERROR("Unsupported waddr %d\n", waddr); 275 return false; 276 277 case QPU_W_VPM_ADDR: 278 DRM_ERROR("General VPM DMA unsupported\n"); 279 return false; 280 281 case QPU_W_VPM: 282 case QPU_W_VPMVCD_SETUP: 283 /* We allow VPM setup in general, even including VPM DMA 284 * configuration setup, because the (unsafe) DMA can only be 285 * triggered by QPU_W_VPM_ADDR writes. 286 */ 287 return true; 288 289 case QPU_W_TLB_STENCIL_SETUP: 290 return true; 291 } 292 293 return true; 294 } 295 296 static void 297 track_live_clamps(uint64_t inst, 298 struct vc4_validated_shader_info *validated_shader, 299 struct vc4_shader_validation_state *validation_state) 300 { 301 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 302 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 303 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 304 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 305 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 306 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 307 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 308 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 309 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 310 bool ws = inst & QPU_WS; 311 uint32_t lri_add_a, lri_add, lri_mul; 312 bool add_a_is_min_0; 313 314 /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), 315 * before we clear previous live state. 316 */ 317 lri_add_a = raddr_add_a_to_live_reg_index(inst); 318 add_a_is_min_0 = (lri_add_a != ~0 && 319 validation_state->live_max_clamp_regs[lri_add_a]); 320 321 /* Clear live state for registers written by our instruction. */ 322 lri_add = waddr_to_live_reg_index(waddr_add, ws); 323 lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); 324 if (lri_mul != ~0) { 325 validation_state->live_max_clamp_regs[lri_mul] = false; 326 validation_state->live_min_clamp_offsets[lri_mul] = ~0; 327 } 328 if (lri_add != ~0) { 329 validation_state->live_max_clamp_regs[lri_add] = false; 330 validation_state->live_min_clamp_offsets[lri_add] = ~0; 331 } else { 332 /* Nothing further to do for live tracking, since only ADDs 333 * generate new live clamp registers. 334 */ 335 return; 336 } 337 338 /* Now, handle remaining live clamp tracking for the ADD operation. */ 339 340 if (cond_add != QPU_COND_ALWAYS) 341 return; 342 343 if (op_add == QPU_A_MAX) { 344 /* Track live clamps of a value to a minimum of 0 (in either 345 * arg). 346 */ 347 if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || 348 (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { 349 return; 350 } 351 352 validation_state->live_max_clamp_regs[lri_add] = true; 353 } else if (op_add == QPU_A_MIN) { 354 /* Track live clamps of a value clamped to a minimum of 0 and 355 * a maximum of some uniform's offset. 356 */ 357 if (!add_a_is_min_0) 358 return; 359 360 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 361 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && 362 sig != QPU_SIG_SMALL_IMM)) { 363 return; 364 } 365 366 validation_state->live_min_clamp_offsets[lri_add] = 367 validated_shader->uniforms_size; 368 } 369 } 370 371 static bool 372 check_instruction_writes(uint64_t inst, 373 struct vc4_validated_shader_info *validated_shader, 374 struct vc4_shader_validation_state *validation_state) 375 { 376 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 377 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 378 bool ok; 379 380 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { 381 DRM_ERROR("ADD and MUL both set up textures\n"); 382 return false; 383 } 384 385 ok = (check_reg_write(inst, validated_shader, validation_state, 386 false) && 387 check_reg_write(inst, validated_shader, validation_state, 388 true)); 389 390 track_live_clamps(inst, validated_shader, validation_state); 391 392 return ok; 393 } 394 395 static bool 396 check_instruction_reads(uint64_t inst, 397 struct vc4_validated_shader_info *validated_shader) 398 { 399 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 400 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 401 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 402 403 if (raddr_a == QPU_R_UNIF || 404 (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { 405 /* This can't overflow the uint32_t, because we're reading 8 406 * bytes of instruction to increment by 4 here, so we'd 407 * already be OOM. 408 */ 409 validated_shader->uniforms_size += 4; 410 } 411 412 return true; 413 } 414 415 struct vc4_validated_shader_info * 416 vc4_validate_shader(struct drm_gem_cma_object *shader_obj) 417 { 418 bool found_shader_end = false; 419 int shader_end_ip = 0; 420 uint32_t ip, max_ip; 421 uint64_t *shader; 422 struct vc4_validated_shader_info *validated_shader; 423 struct vc4_shader_validation_state validation_state; 424 int i; 425 426 memset(&validation_state, 0, sizeof(validation_state)); 427 428 for (i = 0; i < 8; i++) 429 validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; 430 for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) 431 validation_state.live_min_clamp_offsets[i] = ~0; 432 433 shader = shader_obj->vaddr; 434 max_ip = shader_obj->base.size / sizeof(uint64_t); 435 436 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); 437 if (!validated_shader) 438 return NULL; 439 440 for (ip = 0; ip < max_ip; ip++) { 441 uint64_t inst = shader[ip]; 442 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 443 444 switch (sig) { 445 case QPU_SIG_NONE: 446 case QPU_SIG_WAIT_FOR_SCOREBOARD: 447 case QPU_SIG_SCOREBOARD_UNLOCK: 448 case QPU_SIG_COLOR_LOAD: 449 case QPU_SIG_LOAD_TMU0: 450 case QPU_SIG_LOAD_TMU1: 451 case QPU_SIG_PROG_END: 452 case QPU_SIG_SMALL_IMM: 453 if (!check_instruction_writes(inst, validated_shader, 454 &validation_state)) { 455 DRM_ERROR("Bad write at ip %d\n", ip); 456 goto fail; 457 } 458 459 if (!check_instruction_reads(inst, validated_shader)) 460 goto fail; 461 462 if (sig == QPU_SIG_PROG_END) { 463 found_shader_end = true; 464 shader_end_ip = ip; 465 } 466 467 break; 468 469 case QPU_SIG_LOAD_IMM: 470 if (!check_instruction_writes(inst, validated_shader, 471 &validation_state)) { 472 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); 473 goto fail; 474 } 475 break; 476 477 default: 478 DRM_ERROR("Unsupported QPU signal %d at " 479 "instruction %d\n", sig, ip); 480 goto fail; 481 } 482 483 /* There are two delay slots after program end is signaled 484 * that are still executed, then we're finished. 485 */ 486 if (found_shader_end && ip == shader_end_ip + 2) 487 break; 488 } 489 490 if (ip == max_ip) { 491 DRM_ERROR("shader failed to terminate before " 492 "shader BO end at %zd\n", 493 shader_obj->base.size); 494 goto fail; 495 } 496 497 /* Again, no chance of integer overflow here because the worst case 498 * scenario is 8 bytes of uniforms plus handles per 8-byte 499 * instruction. 500 */ 501 validated_shader->uniforms_src_size = 502 (validated_shader->uniforms_size + 503 4 * validated_shader->num_texture_samples); 504 505 return validated_shader; 506 507 fail: 508 if (validated_shader) { 509 kfree(validated_shader->texture_samples); 510 kfree(validated_shader); 511 } 512 return NULL; 513 } 514