1 /* 2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 /* 6 * r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*- 7 * 8 * Copyright (C) The Weather Channel, Inc. 2002. 9 * Copyright (C) 2004 Nicolai Haehnle. 10 * All Rights Reserved. 11 * 12 * The Weather Channel (TM) funded Tungsten Graphics to develop the 13 * initial release of the Radeon 8500 driver under the XFree86 license. 14 * This notice must be preserved. 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a 17 * copy of this software and associated documentation files (the "Software"), 18 * to deal in the Software without restriction, including without limitation 19 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 * and/or sell copies of the Software, and to permit persons to whom the 21 * Software is furnished to do so, subject to the following conditions: 22 * 23 * The above copyright notice and this permission notice (including the next 24 * paragraph) shall be included in all copies or substantial portions of the 25 * Software. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 30 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 31 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 32 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 33 * DEALINGS IN THE SOFTWARE. 34 * 35 * Authors: 36 * Nicolai Haehnle <prefect_@gmx.net> 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 #include "drm.h" 42 #include "radeon_drm.h" 43 #include "drmP.h" 44 #include "radeon_drv.h" 45 #include "r300_reg.h" 46 47 #define R300_SIMULTANEOUS_CLIPRECTS 4 48 49 /* 50 * Values for R300_RE_CLIPRECT_CNTL depending on the number of 51 * cliprects 52 */ 53 static const int r300_cliprect_cntl[4] = { 54 0xAAAA, 55 0xEEEE, 56 0xFEFE, 57 0xFFFE 58 }; 59 60 /* 61 * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command 62 * buffer, starting with index n. 63 */ 64 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, 65 drm_radeon_kcmd_buffer_t *cmdbuf, int n) 66 { 67 drm_clip_rect_t box; 68 int nr; 69 int i; 70 RING_LOCALS; 71 72 nr = cmdbuf->nbox - n; 73 if (nr > R300_SIMULTANEOUS_CLIPRECTS) 74 nr = R300_SIMULTANEOUS_CLIPRECTS; 75 76 DRM_DEBUG("%i cliprects\n", nr); 77 78 if (nr) { 79 BEGIN_RING(6 + nr * 2); 80 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1)); 81 82 for (i = 0; i < nr; ++i) { 83 if (DRM_COPY_FROM_USER_UNCHECKED 84 (&box, &cmdbuf->boxes[n + i], sizeof (box))) { 85 DRM_ERROR("copy cliprect faulted\n"); 86 return (EFAULT); 87 } 88 89 box.x1 = 90 (box.x1 + 91 R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; 92 box.y1 = 93 (box.y1 + 94 R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; 95 box.x2 = 96 (box.x2 + 97 R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; 98 box.y2 = 99 (box.y2 + 100 R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; 101 102 OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) | 103 (box.y1 << R300_CLIPRECT_Y_SHIFT)); 104 OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) | 105 (box.y2 << R300_CLIPRECT_Y_SHIFT)); 106 } 107 108 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]); 109 110 /* 111 * TODO/SECURITY: Force scissors to a safe value, otherwise 112 * the client might be able to trample over memory. 113 * The impact should be very limited, but I'd rather be safe 114 * than sorry. 115 */ 116 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1)); 117 OUT_RING(0); 118 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK); 119 ADVANCE_RING(); 120 } else { 121 /* 122 * Why we allow zero cliprect rendering: 123 * There are some commands in a command buffer that must be 124 * submitted even when there are no cliprects, e.g. DMA buffer 125 * discard or state setting (though state setting could be 126 * avoided by simulating a loss of context). 127 * 128 * Now since the cmdbuf interface is so chaotic right now (and 129 * is bound to remain that way for a bit until things settle 130 * down), it is basically impossible to filter out the commands 131 * that are necessary and those that aren't. 132 * 133 * So I choose the safe way and don't do any filtering at all; 134 * instead, I simply set up the engine so that all rendering 135 * can't produce any fragments. 136 */ 137 BEGIN_RING(2); 138 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0); 139 ADVANCE_RING(); 140 } 141 142 return (0); 143 } 144 145 static u8 r300_reg_flags[0x10000 >> 2]; 146 147 void 148 r300_init_reg_flags(void) 149 { 150 int i; 151 (void) memset(r300_reg_flags, 0, 0x10000 >> 2); 152 #define ADD_RANGE_MARK(reg, count, mark) \ 153 for (i = ((reg) >> 2); i < ((reg) >> 2) + (count); i++)\ 154 r300_reg_flags[i] |= (mark); 155 156 #define MARK_SAFE 1 157 #define MARK_CHECK_OFFSET 2 158 159 #define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE) 160 161 /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */ 162 ADD_RANGE(R300_SE_VPORT_XSCALE, 6); 163 ADD_RANGE(0x2080, 1); 164 ADD_RANGE(R300_SE_VTE_CNTL, 2); 165 ADD_RANGE(0x2134, 2); 166 ADD_RANGE(0x2140, 1); 167 ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2); 168 ADD_RANGE(0x21DC, 1); 169 ADD_RANGE(0x221C, 1); 170 ADD_RANGE(0x2220, 4); 171 ADD_RANGE(0x2288, 1); 172 ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2); 173 ADD_RANGE(R300_VAP_PVS_CNTL_1, 3); 174 ADD_RANGE(R300_GB_ENABLE, 1); 175 ADD_RANGE(R300_GB_MSPOS0, 5); 176 ADD_RANGE(R300_TX_CNTL, 1); 177 ADD_RANGE(R300_TX_ENABLE, 1); 178 ADD_RANGE(0x4200, 4); 179 ADD_RANGE(0x4214, 1); 180 ADD_RANGE(R300_RE_POINTSIZE, 1); 181 ADD_RANGE(0x4230, 3); 182 ADD_RANGE(R300_RE_LINE_CNT, 1); 183 ADD_RANGE(0x4238, 1); 184 ADD_RANGE(0x4260, 3); 185 ADD_RANGE(0x4274, 4); 186 ADD_RANGE(0x4288, 5); 187 ADD_RANGE(0x42A0, 1); 188 ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4); 189 ADD_RANGE(0x42B4, 1); 190 ADD_RANGE(R300_RE_CULL_CNTL, 1); 191 ADD_RANGE(0x42C0, 2); 192 ADD_RANGE(R300_RS_CNTL_0, 2); 193 ADD_RANGE(R300_RS_INTERP_0, 8); 194 ADD_RANGE(R300_RS_ROUTE_0, 8); 195 ADD_RANGE(0x43A4, 2); 196 ADD_RANGE(0x43E8, 1); 197 ADD_RANGE(R300_PFS_CNTL_0, 3); 198 ADD_RANGE(R300_PFS_NODE_0, 4); 199 ADD_RANGE(R300_PFS_TEXI_0, 64); 200 ADD_RANGE(0x46A4, 5); 201 ADD_RANGE(R300_PFS_INSTR0_0, 64); 202 ADD_RANGE(R300_PFS_INSTR1_0, 64); 203 ADD_RANGE(R300_PFS_INSTR2_0, 64); 204 ADD_RANGE(R300_PFS_INSTR3_0, 64); 205 ADD_RANGE(0x4BC0, 1); 206 ADD_RANGE(0x4BC8, 3); 207 ADD_RANGE(R300_PP_ALPHA_TEST, 2); 208 ADD_RANGE(0x4BD8, 1); 209 ADD_RANGE(R300_PFS_PARAM_0_X, 64); 210 ADD_RANGE(0x4E00, 1); 211 ADD_RANGE(R300_RB3D_CBLEND, 2); 212 ADD_RANGE(R300_RB3D_COLORMASK, 1); 213 ADD_RANGE(0x4E10, 3); 214 ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); 215 /* check offset */ 216 ADD_RANGE(R300_RB3D_COLORPITCH0, 1); 217 ADD_RANGE(0x4E50, 9); 218 ADD_RANGE(0x4E88, 1); 219 ADD_RANGE(0x4EA0, 2); 220 ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3); 221 ADD_RANGE(0x4F10, 4); 222 ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); 223 /* check offset */ 224 ADD_RANGE(R300_RB3D_DEPTHPITCH, 1); 225 ADD_RANGE(0x4F28, 1); 226 ADD_RANGE(0x4F30, 2); 227 ADD_RANGE(0x4F44, 1); 228 ADD_RANGE(0x4F54, 1); 229 230 ADD_RANGE(R300_TX_FILTER_0, 16); 231 ADD_RANGE(R300_TX_FILTER1_0, 16); 232 ADD_RANGE(R300_TX_SIZE_0, 16); 233 ADD_RANGE(R300_TX_FORMAT_0, 16); 234 ADD_RANGE(R300_TX_PITCH_0, 16); 235 /* Texture offset is dangerous and needs more checking */ 236 ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET); 237 ADD_RANGE(R300_TX_CHROMA_KEY_0, 16); 238 ADD_RANGE(R300_TX_BORDER_COLOR_0, 16); 239 240 /* Sporadic registers used as primitives are emitted */ 241 ADD_RANGE(0x4f18, 1); 242 ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1); 243 ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8); 244 ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8); 245 246 } 247 248 static __inline__ int r300_check_range(unsigned reg, int count) 249 { 250 int i; 251 if (reg & ~0xffff) 252 return (-1); 253 for (i = (reg >> 2); i < (reg >> 2) + count; i++) 254 if (r300_reg_flags[i] != MARK_SAFE) 255 return (1); 256 return (0); 257 } 258 259 static inline int 260 r300_emit_carefully_checked_packet0(drm_radeon_private_t *dev_priv, 261 drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header) 262 { 263 int reg; 264 int sz; 265 int i; 266 int values[64]; 267 RING_LOCALS; 268 269 sz = header.packet0.count; 270 reg = (header.packet0.reghi << 8) | header.packet0.reglo; 271 272 if ((sz > 64) || (sz < 0)) { 273 DRM_ERROR("Cannot emit more than 64 values at a time " 274 "(reg=%04x sz=%d)\n", reg, sz); 275 return (EINVAL); 276 } 277 for (i = 0; i < sz; i++) { 278 values[i] = ((int *)(uintptr_t)cmdbuf->buf)[i]; 279 switch (r300_reg_flags[(reg >> 2) + i]) { 280 case MARK_SAFE: 281 break; 282 case MARK_CHECK_OFFSET: 283 if (!radeon_check_offset(dev_priv, (u32) values[i])) { 284 DRM_ERROR("Offset failed range check " 285 "(reg=%04x sz=%d)\n", reg, sz); 286 return (EINVAL); 287 } 288 break; 289 default: 290 DRM_ERROR("Register %04x failed check as flag=%02x\n", 291 reg + i * 4, r300_reg_flags[(reg >> 2) + i]); 292 return (EINVAL); 293 } 294 } 295 296 BEGIN_RING(1 + sz); 297 OUT_RING(CP_PACKET0(reg, sz - 1)); 298 OUT_RING_TABLE(values, sz); 299 ADVANCE_RING(); 300 301 cmdbuf->buf += sz * 4; 302 cmdbuf->bufsz -= sz * 4; 303 304 return (0); 305 } 306 307 /* 308 * Emits a packet0 setting arbitrary registers. 309 * Called by r300_do_cp_cmdbuf. 310 * 311 * Note that checks are performed on contents and addresses of the registers 312 */ 313 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv, 314 drm_radeon_kcmd_buffer_t *cmdbuf, 315 drm_r300_cmd_header_t header) 316 { 317 int reg; 318 int sz; 319 RING_LOCALS; 320 321 sz = header.packet0.count; 322 reg = (header.packet0.reghi << 8) | header.packet0.reglo; 323 324 if (!sz) 325 return (0); 326 327 if (sz * 4 > cmdbuf->bufsz) 328 return (EINVAL); 329 330 if (reg + sz * 4 >= 0x10000) { 331 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", 332 reg, sz); 333 return (EINVAL); 334 } 335 336 if (r300_check_range(reg, sz)) { 337 /* go and check everything */ 338 return (r300_emit_carefully_checked_packet0(dev_priv, 339 cmdbuf, header)); 340 } 341 /* 342 * the rest of the data is safe to emit, whatever the values 343 * the user passed 344 */ 345 346 BEGIN_RING(1 + sz); 347 OUT_RING(CP_PACKET0(reg, sz - 1)); 348 OUT_RING_TABLE(cmdbuf->buf, sz); 349 ADVANCE_RING(); 350 351 cmdbuf->buf += sz * 4; 352 cmdbuf->bufsz -= sz * 4; 353 354 return (0); 355 } 356 357 /* 358 * Uploads user-supplied vertex program instructions or parameters onto 359 * the graphics card. 360 * Called by r300_do_cp_cmdbuf. 361 */ 362 static inline int r300_emit_vpu(drm_radeon_private_t *dev_priv, 363 drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header) 364 { 365 int sz; 366 int addr; 367 RING_LOCALS; 368 369 sz = header.vpu.count; 370 addr = (header.vpu.adrhi << 8) | header.vpu.adrlo; 371 372 if (!sz) 373 return (0); 374 if (sz * 16 > cmdbuf->bufsz) 375 return (EINVAL); 376 377 BEGIN_RING(5 + sz * 4); 378 /* Wait for VAP to come to senses.. */ 379 /* 380 * there is no need to emit it multiple times, (only once before 381 * VAP is programmed, but this optimization is for later 382 */ 383 OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0); 384 OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr); 385 OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1)); 386 OUT_RING_TABLE(cmdbuf->buf, sz * 4); 387 388 ADVANCE_RING(); 389 390 cmdbuf->buf += sz * 16; 391 cmdbuf->bufsz -= sz * 16; 392 393 return (0); 394 } 395 396 /* 397 * Emit a clear packet from userspace. 398 * Called by r300_emit_packet3. 399 */ 400 static inline int r300_emit_clear(drm_radeon_private_t *dev_priv, 401 drm_radeon_kcmd_buffer_t *cmdbuf) 402 { 403 RING_LOCALS; 404 405 if (8 * 4 > cmdbuf->bufsz) 406 return (EINVAL); 407 408 BEGIN_RING(10); 409 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); 410 OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | 411 (1 << R300_PRIM_NUM_VERTICES_SHIFT)); 412 OUT_RING_TABLE(cmdbuf->buf, 8); 413 ADVANCE_RING(); 414 415 cmdbuf->buf += 8 * 4; 416 cmdbuf->bufsz -= 8 * 4; 417 418 return (0); 419 } 420 421 static inline int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv, 422 drm_radeon_kcmd_buffer_t *cmdbuf, u32 header) 423 { 424 int count, i, k; 425 #define MAX_ARRAY_PACKET 64 426 u32 payload[MAX_ARRAY_PACKET]; 427 u32 narrays; 428 RING_LOCALS; 429 430 count = (header >> 16) & 0x3fff; 431 432 if ((count + 1) > MAX_ARRAY_PACKET) { 433 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", 434 count); 435 return (EINVAL); 436 } 437 (void) memset(payload, 0, MAX_ARRAY_PACKET * 4); 438 (void) memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4); 439 440 /* carefully check packet contents */ 441 442 narrays = payload[0]; 443 k = 0; 444 i = 1; 445 while ((k < narrays) && (i < (count + 1))) { 446 i++; /* skip attribute field */ 447 if (!radeon_check_offset(dev_priv, payload[i])) { 448 DRM_ERROR("Offset failed range check (k=%d i=%d) " 449 "while processing 3D_LOAD_VBPNTR packet.\n", 450 k, i); 451 return (EINVAL); 452 } 453 k++; 454 i++; 455 if (k == narrays) 456 break; 457 /* have one more to process, they come in pairs */ 458 if (!radeon_check_offset(dev_priv, payload[i])) { 459 DRM_ERROR("Offset failed range check (k=%d i=%d) " 460 "while processing 3D_LOAD_VBPNTR packet.\n", 461 k, i); 462 return (EINVAL); 463 } 464 k++; 465 i++; 466 } 467 /* do the counts match what we expect ? */ 468 if ((k != narrays) || (i != (count + 1))) { 469 DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet " 470 "(k=%d i=%d narrays=%d count+1=%d).\n", 471 k, i, narrays, count + 1); 472 return (EINVAL); 473 } 474 475 /* all clear, output packet */ 476 477 BEGIN_RING(count + 2); 478 OUT_RING(header); 479 OUT_RING_TABLE(payload, count + 1); 480 ADVANCE_RING(); 481 482 cmdbuf->buf += (count + 2) * 4; 483 cmdbuf->bufsz -= (count + 2) * 4; 484 485 return (0); 486 } 487 488 static inline int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv, 489 drm_radeon_kcmd_buffer_t *cmdbuf) 490 { 491 u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf; 492 int count, ret; 493 RING_LOCALS; 494 495 count = (cmd[0] >> 16) & 0x3fff; 496 497 if (cmd[0] & 0x8000) { 498 u32 offset; 499 500 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 501 RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { 502 offset = cmd[2] << 10; 503 ret = !radeon_check_offset(dev_priv, offset); 504 if (ret) { 505 DRM_ERROR("Invalid bitblt first offset " 506 "is %08X\n", offset); 507 return (EINVAL); 508 } 509 } 510 511 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) && 512 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { 513 offset = cmd[3] << 10; 514 ret = !radeon_check_offset(dev_priv, offset); 515 if (ret) { 516 DRM_ERROR("Invalid bitblt second offset " 517 "is %08X\n", offset); 518 return (EINVAL); 519 } 520 521 } 522 } 523 524 BEGIN_RING(count+2); 525 OUT_RING(cmd[0]); 526 OUT_RING_TABLE((cmdbuf->buf + 4), count + 1); 527 ADVANCE_RING(); 528 529 cmdbuf->buf += (count+2)*4; 530 cmdbuf->bufsz -= (count+2)*4; 531 532 return (0); 533 } 534 535 536 static inline int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv, 537 drm_radeon_kcmd_buffer_t *cmdbuf) 538 { 539 u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf; 540 int count, ret; 541 RING_LOCALS; 542 543 count = (cmd[0]>>16) & 0x3fff; 544 545 if ((cmd[1] & 0x8000ffff) != 0x80000810) { 546 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]); 547 return (EINVAL); 548 } 549 ret = !radeon_check_offset(dev_priv, cmd[2]); 550 if (ret) { 551 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]); 552 return (EINVAL); 553 } 554 555 BEGIN_RING(count+2); 556 OUT_RING(cmd[0]); 557 OUT_RING_TABLE(cmdbuf->buf + 4, count + 1); 558 ADVANCE_RING(); 559 560 cmdbuf->buf += (count+2)*4; 561 cmdbuf->bufsz -= (count+2)*4; 562 563 return (0); 564 } 565 566 567 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv, 568 drm_radeon_kcmd_buffer_t *cmdbuf) 569 { 570 u32 header; 571 int count; 572 RING_LOCALS; 573 574 if (4 > cmdbuf->bufsz) 575 return (EINVAL); 576 577 /* 578 * Fixme !! This simply emits a packet without much checking. 579 * We need to be smarter. 580 */ 581 582 /* obtain first word - actual packet3 header */ 583 header = *(u32 *)(uintptr_t)cmdbuf->buf; 584 585 /* Is it packet 3 ? */ 586 if ((header >> 30) != 0x3) { 587 DRM_ERROR("Not a packet3 header (0x%08x)\n", header); 588 return (EINVAL); 589 } 590 591 count = (header >> 16) & 0x3fff; 592 593 /* Check again now that we know how much data to expect */ 594 if ((count + 2) * 4 > cmdbuf->bufsz) { 595 DRM_ERROR("Expected packet3 of length %d but have only " 596 "%d bytes left\n", (count + 2) * 4, cmdbuf->bufsz); 597 return (EINVAL); 598 } 599 600 /* Is it a packet type we know about ? */ 601 switch (header & 0xff00) { 602 case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */ 603 return (r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header)); 604 605 case RADEON_CNTL_BITBLT_MULTI: 606 return (r300_emit_bitblt_multi(dev_priv, cmdbuf)); 607 608 case RADEON_CP_INDX_BUFFER: 609 // DRAW_INDX_2 without INDX_BUFFER seems to lock 610 // up the GPU 611 return (r300_emit_indx_buffer(dev_priv, cmdbuf)); 612 613 case RADEON_CP_3D_DRAW_IMMD_2: 614 /* triggers drawing using in-packet vertex data */ 615 case RADEON_CP_3D_DRAW_VBUF_2: 616 /* triggers drawing of vertex buffers setup elsewhere */ 617 case RADEON_CP_3D_DRAW_INDX_2: 618 /* triggers drawing using indices to vertex buffer */ 619 case RADEON_WAIT_FOR_IDLE: 620 case RADEON_CP_NOP: 621 /* these packets are safe */ 622 break; 623 default: 624 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header); 625 return (EINVAL); 626 } 627 628 BEGIN_RING(count + 2); 629 OUT_RING(header); 630 OUT_RING_TABLE((cmdbuf->buf + 4), count + 1); 631 ADVANCE_RING(); 632 633 cmdbuf->buf += (count + 2) * 4; 634 cmdbuf->bufsz -= (count + 2) * 4; 635 636 return (0); 637 } 638 639 /* 640 * Emit a rendering packet3 from userspace. 641 * Called by r300_do_cp_cmdbuf. 642 */ 643 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, 644 drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header) 645 { 646 int n; 647 int ret; 648 char *orig_buf = cmdbuf->buf; 649 int orig_bufsz = cmdbuf->bufsz; 650 651 /* 652 * This is a do-while-loop so that we run the interior at least once, 653 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale. 654 */ 655 n = 0; 656 do { 657 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) { 658 ret = r300_emit_cliprects(dev_priv, cmdbuf, n); 659 if (ret) 660 return (ret); 661 662 cmdbuf->buf = orig_buf; 663 cmdbuf->bufsz = orig_bufsz; 664 } 665 666 switch (header.packet3.packet) { 667 case R300_CMD_PACKET3_CLEAR: 668 DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n"); 669 ret = r300_emit_clear(dev_priv, cmdbuf); 670 if (ret) { 671 DRM_ERROR("r300_emit_clear failed\n"); 672 return (ret); 673 } 674 break; 675 676 case R300_CMD_PACKET3_RAW: 677 DRM_DEBUG("R300_CMD_PACKET3_RAW\n"); 678 ret = r300_emit_raw_packet3(dev_priv, cmdbuf); 679 if (ret) { 680 DRM_ERROR("r300_emit_raw_packet3 failed\n"); 681 return (ret); 682 } 683 break; 684 685 default: 686 DRM_ERROR("bad packet3 type %i at %p\n", 687 header.packet3.packet, 688 cmdbuf->buf - sizeof (header)); 689 return (EINVAL); 690 } 691 692 n += R300_SIMULTANEOUS_CLIPRECTS; 693 } while (n < cmdbuf->nbox); 694 695 return (0); 696 } 697 698 /* 699 * Some of the R300 chips seem to be extremely touchy about the two registers 700 * that are configured in r300_pacify. 701 * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace 702 * sends a command buffer that contains only state setting commands and a 703 * vertex program/parameter upload sequence, this will eventually lead to a 704 * lockup, unless the sequence is bracketed by calls to r300_pacify. 705 * So we should take great care to *always* call r300_pacify before 706 * *anything* 3D related, and again afterwards. This is what the 707 * call bracket in r300_do_cp_cmdbuf is for. 708 */ 709 710 /* 711 * Emit the sequence to pacify R300. 712 */ 713 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) 714 { 715 RING_LOCALS; 716 717 BEGIN_RING(6); 718 OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); 719 OUT_RING(0xa); 720 OUT_RING(CP_PACKET0(0x4f18, 0)); 721 OUT_RING(0x3); 722 OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0)); 723 OUT_RING(0x0); 724 ADVANCE_RING(); 725 } 726 727 /* 728 * Called by r300_do_cp_cmdbuf to update the internal buffer age and state. 729 * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must 730 * be careful about how this function is called. 731 */ 732 static void r300_discard_buffer(drm_device_t *dev, drm_buf_t *buf) 733 { 734 drm_radeon_private_t *dev_priv = dev->dev_private; 735 drm_radeon_buf_priv_t *buf_priv = buf->dev_private; 736 737 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch; 738 buf->pending = 1; 739 buf->used = 0; 740 } 741 742 static int r300_scratch(drm_radeon_private_t *dev_priv, 743 drm_radeon_kcmd_buffer_t *cmdbuf, 744 drm_r300_cmd_header_t header) 745 { 746 u32 *ref_age_base; 747 u32 i, buf_idx, h_pending; 748 RING_LOCALS; 749 750 if (cmdbuf->bufsz < sizeof (uint64_t) + 751 header.scratch.n_bufs * sizeof (buf_idx)) { 752 return (EINVAL); 753 } 754 755 if (header.scratch.reg >= 5) { 756 return (EINVAL); 757 } 758 759 dev_priv->scratch_ages[header.scratch.reg] ++; 760 761 ref_age_base = (u32 *)(uintptr_t)*((uint64_t *)(uintptr_t)cmdbuf->buf); 762 763 cmdbuf->buf += sizeof (uint64_t); 764 cmdbuf->bufsz -= sizeof (uint64_t); 765 766 for (i = 0; i < header.scratch.n_bufs; i++) { 767 buf_idx = *(u32 *)(uintptr_t)cmdbuf->buf; 768 buf_idx *= 2; /* 8 bytes per buf */ 769 770 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, 771 &dev_priv->scratch_ages[header.scratch.reg], 772 sizeof (u32))) { 773 return (EINVAL); 774 } 775 776 if (DRM_COPY_FROM_USER(&h_pending, 777 ref_age_base + buf_idx + 1, sizeof (u32))) { 778 return (EINVAL); 779 } 780 781 if (h_pending == 0) { 782 return (EINVAL); 783 } 784 785 h_pending--; 786 787 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, 788 &h_pending, sizeof (u32))) { 789 return (EINVAL); 790 } 791 792 cmdbuf->buf += sizeof (buf_idx); 793 cmdbuf->bufsz -= sizeof (buf_idx); 794 } 795 796 BEGIN_RING(2); 797 OUT_RING(CP_PACKET0(RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0)); 798 OUT_RING(dev_priv->scratch_ages[header.scratch.reg]); 799 ADVANCE_RING(); 800 801 return (0); 802 } 803 804 /* 805 * Parses and validates a user-supplied command buffer and emits appropriate 806 * commands on the DMA ring buffer. 807 * Called by the ioctl handler function radeon_cp_cmdbuf. 808 */ 809 /*ARGSUSED*/ 810 int 811 r300_do_cp_cmdbuf(drm_device_t *dev, 812 drm_file_t *fpriv, drm_radeon_kcmd_buffer_t *cmdbuf) 813 { 814 drm_radeon_private_t *dev_priv = dev->dev_private; 815 drm_device_dma_t *dma = dev->dma; 816 drm_buf_t *buf = NULL; 817 int emit_dispatch_age = 0; 818 int ret = 0; 819 820 DRM_DEBUG("\n"); 821 822 /* 823 * See the comment above r300_emit_begin3d for why this call 824 * must be here, and what the cleanup gotos are for. 825 */ 826 r300_pacify(dev_priv); 827 828 if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) { 829 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0); 830 if (ret) 831 goto cleanup; 832 } 833 834 while (cmdbuf->bufsz >= sizeof (drm_r300_cmd_header_t)) { 835 int idx; 836 drm_r300_cmd_header_t header; 837 838 header.u = *(unsigned int *)(uintptr_t)cmdbuf->buf; 839 840 cmdbuf->buf += sizeof (header); 841 cmdbuf->bufsz -= sizeof (header); 842 843 switch (header.header.cmd_type) { 844 case R300_CMD_PACKET0: 845 DRM_DEBUG("R300_CMD_PACKET0\n"); 846 ret = r300_emit_packet0(dev_priv, cmdbuf, header); 847 if (ret) { 848 DRM_ERROR("r300_emit_packet0 failed\n"); 849 goto cleanup; 850 } 851 break; 852 853 case R300_CMD_VPU: 854 DRM_DEBUG("R300_CMD_VPU\n"); 855 ret = r300_emit_vpu(dev_priv, cmdbuf, header); 856 if (ret) { 857 DRM_ERROR("r300_emit_vpu failed\n"); 858 goto cleanup; 859 } 860 break; 861 862 case R300_CMD_PACKET3: 863 DRM_DEBUG("R300_CMD_PACKET3\n"); 864 ret = r300_emit_packet3(dev_priv, cmdbuf, header); 865 if (ret) { 866 DRM_ERROR("r300_emit_packet3 failed\n"); 867 goto cleanup; 868 } 869 break; 870 871 case R300_CMD_END3D: 872 DRM_DEBUG("R300_CMD_END3D\n"); 873 /* 874 * TODO: 875 * Ideally userspace driver should not need to issue 876 * this call, i.e. the drm driver should issue it 877 * automatically and prevent lockups. In practice, we 878 * do not understand why this call is needed and what 879 * it does (except for some vague guesses that it has 880 * to do with cache coherence) and so the user space 881 * driver does it. 882 * 883 * Once we are sure which uses prevent lockups the code 884 * could be moved into the kernel and the userspace 885 * driver will not need to use this command. 886 * 887 * Note that issuing this command does not hurt anything 888 * except, possibly, performance 889 */ 890 r300_pacify(dev_priv); 891 break; 892 893 case R300_CMD_CP_DELAY: 894 /* simple enough, we can do it here */ 895 DRM_DEBUG("R300_CMD_CP_DELAY\n"); 896 { 897 int i; 898 RING_LOCALS; 899 900 BEGIN_RING(header.delay.count); 901 for (i = 0; i < header.delay.count; i++) 902 OUT_RING(RADEON_CP_PACKET2); 903 ADVANCE_RING(); 904 } 905 break; 906 907 case R300_CMD_DMA_DISCARD: 908 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n"); 909 idx = header.dma.buf_idx; 910 if (idx < 0 || idx >= dma->buf_count) { 911 DRM_ERROR("buffer index %d (of %d max)\n", 912 idx, dma->buf_count - 1); 913 ret = EINVAL; 914 goto cleanup; 915 } 916 917 buf = dma->buflist[idx]; 918 if (buf->filp != fpriv || buf->pending) { 919 DRM_ERROR("bad buffer %p %p %d\n", 920 buf->filp, fpriv, buf->pending); 921 ret = EINVAL; 922 goto cleanup; 923 } 924 925 emit_dispatch_age = 1; 926 r300_discard_buffer(dev, buf); 927 break; 928 929 case R300_CMD_WAIT: 930 /* simple enough, we can do it here */ 931 DRM_DEBUG("R300_CMD_WAIT\n"); 932 if (header.wait.flags == 0) 933 break; /* nothing to do */ 934 935 { 936 RING_LOCALS; 937 938 BEGIN_RING(2); 939 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); 940 OUT_RING((header.wait.flags & 0xf) << 14); 941 ADVANCE_RING(); 942 } 943 break; 944 945 case R300_CMD_SCRATCH: 946 DRM_DEBUG("R300_CMD_SCRATCH\n"); 947 ret = r300_scratch(dev_priv, cmdbuf, header); 948 if (ret) { 949 DRM_ERROR("r300_scratch failed\n"); 950 goto cleanup; 951 } 952 break; 953 954 default: 955 DRM_ERROR("bad cmd_type %i at %p\n", 956 header.header.cmd_type, 957 cmdbuf->buf - sizeof (header)); 958 ret = EINVAL; 959 goto cleanup; 960 } 961 } 962 963 DRM_DEBUG("END\n"); 964 965 cleanup: 966 r300_pacify(dev_priv); 967 968 /* 969 * We emit the vertex buffer age here, outside the pacifier "brackets" 970 * for two reasons: 971 * (1) This may coalesce multiple age emissions into a single one and 972 * (2) more importantly, some chips lock up hard when scratch registers 973 * are written inside the pacifier bracket. 974 */ 975 if (emit_dispatch_age) { 976 RING_LOCALS; 977 978 /* Emit the vertex buffer age */ 979 BEGIN_RING(2); 980 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch); 981 ADVANCE_RING(); 982 } 983 984 COMMIT_RING(); 985 986 return (ret); 987 } 988