xref: /titanic_44/usr/src/uts/intel/io/drm/r300_cmdbuf.c (revision 1cb875ae88fb9463b368e725c2444776595895cb)
1 /*
2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 /*
6  * r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
7  *
8  * Copyright (C) The Weather Channel, Inc.  2002.
9  * Copyright (C) 2004 Nicolai Haehnle.
10  * All Rights Reserved.
11  *
12  * The Weather Channel (TM) funded Tungsten Graphics to develop the
13  * initial release of the Radeon 8500 driver under the XFree86 license.
14  * This notice must be preserved.
15  *
16  * Permission is hereby granted, free of charge, to any person obtaining a
17  * copy of this software and associated documentation files (the "Software"),
18  * to deal in the Software without restriction, including without limitation
19  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
20  * and/or sell copies of the Software, and to permit persons to whom the
21  * Software is furnished to do so, subject to the following conditions:
22  *
23  * The above copyright notice and this permission notice (including the next
24  * paragraph) shall be included in all copies or substantial portions of the
25  * Software.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
30  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
31  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
32  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
33  * DEALINGS IN THE SOFTWARE.
34  *
35  * Authors:
36  *    Nicolai Haehnle <prefect_@gmx.net>
37  */
38 
39 #pragma ident	"%Z%%M%	%I%	%E% SMI"
40 
41 #include "drm.h"
42 #include "radeon_drm.h"
43 #include "drmP.h"
44 #include "radeon_drv.h"
45 #include "r300_reg.h"
46 
47 #define	R300_SIMULTANEOUS_CLIPRECTS		4
48 
49 /*
50  * Values for R300_RE_CLIPRECT_CNTL depending on the number of
51  * cliprects
52  */
53 static const int r300_cliprect_cntl[4] = {
54 	0xAAAA,
55 	0xEEEE,
56 	0xFEFE,
57 	0xFFFE
58 };
59 
60 /*
61  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
62  * buffer, starting with index n.
63  */
64 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
65     drm_radeon_kcmd_buffer_t *cmdbuf, int n)
66 {
67 	drm_clip_rect_t box;
68 	int nr;
69 	int i;
70 	RING_LOCALS;
71 
72 	nr = cmdbuf->nbox - n;
73 	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
74 		nr = R300_SIMULTANEOUS_CLIPRECTS;
75 
76 	DRM_DEBUG("%i cliprects\n", nr);
77 
78 	if (nr) {
79 		BEGIN_RING(6 + nr * 2);
80 		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
81 
82 		for (i = 0; i < nr; ++i) {
83 			if (DRM_COPY_FROM_USER_UNCHECKED
84 			    (&box, &cmdbuf->boxes[n + i], sizeof (box))) {
85 				DRM_ERROR("copy cliprect faulted\n");
86 				return (EFAULT);
87 			}
88 
89 			box.x1 =
90 			    (box.x1 +
91 			    R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
92 			box.y1 =
93 			    (box.y1 +
94 			    R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
95 			box.x2 =
96 			    (box.x2 +
97 			    R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
98 			box.y2 =
99 			    (box.y2 +
100 			    R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
101 
102 			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
103 			    (box.y1 << R300_CLIPRECT_Y_SHIFT));
104 			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
105 			    (box.y2 << R300_CLIPRECT_Y_SHIFT));
106 		}
107 
108 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
109 
110 		/*
111 		 * TODO/SECURITY: Force scissors to a safe value, otherwise
112 		 * the client might be able to trample over memory.
113 		 * The impact should be very limited, but I'd rather be safe
114 		 * than sorry.
115 		 */
116 		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
117 		OUT_RING(0);
118 		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
119 		ADVANCE_RING();
120 	} else {
121 		/*
122 		 * Why we allow zero cliprect rendering:
123 		 * There are some commands in a command buffer that must be
124 		 * submitted even when there are no cliprects, e.g. DMA buffer
125 		 * discard or state setting (though state setting could be
126 		 * avoided by simulating a loss of context).
127 		 *
128 		 * Now since the cmdbuf interface is so chaotic right now (and
129 		 * is bound to remain that way for a bit until things settle
130 		 * down), it is basically impossible to filter out the commands
131 		 * that are necessary and those that aren't.
132 		 *
133 		 * So I choose the safe way and don't do any filtering at all;
134 		 * instead, I simply set up the engine so that all rendering
135 		 * can't produce any fragments.
136 		 */
137 		BEGIN_RING(2);
138 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
139 		ADVANCE_RING();
140 	}
141 
142 	return (0);
143 }
144 
145 static u8 r300_reg_flags[0x10000 >> 2];
146 
147 void
148 r300_init_reg_flags(void)
149 {
150 	int i;
151 	(void) memset(r300_reg_flags, 0, 0x10000 >> 2);
152 #define	ADD_RANGE_MARK(reg, count, mark) \
153 		for (i = ((reg) >> 2); i < ((reg) >> 2) + (count); i++)\
154 			r300_reg_flags[i] |= (mark);
155 
156 #define	MARK_SAFE		1
157 #define	MARK_CHECK_OFFSET	2
158 
159 #define	ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
160 
161 	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
162 	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
163 	ADD_RANGE(0x2080, 1);
164 	ADD_RANGE(R300_SE_VTE_CNTL, 2);
165 	ADD_RANGE(0x2134, 2);
166 	ADD_RANGE(0x2140, 1);
167 	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
168 	ADD_RANGE(0x21DC, 1);
169 	ADD_RANGE(0x221C, 1);
170 	ADD_RANGE(0x2220, 4);
171 	ADD_RANGE(0x2288, 1);
172 	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
173 	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
174 	ADD_RANGE(R300_GB_ENABLE, 1);
175 	ADD_RANGE(R300_GB_MSPOS0, 5);
176 	ADD_RANGE(R300_TX_CNTL, 1);
177 	ADD_RANGE(R300_TX_ENABLE, 1);
178 	ADD_RANGE(0x4200, 4);
179 	ADD_RANGE(0x4214, 1);
180 	ADD_RANGE(R300_RE_POINTSIZE, 1);
181 	ADD_RANGE(0x4230, 3);
182 	ADD_RANGE(R300_RE_LINE_CNT, 1);
183 	ADD_RANGE(0x4238, 1);
184 	ADD_RANGE(0x4260, 3);
185 	ADD_RANGE(0x4274, 4);
186 	ADD_RANGE(0x4288, 5);
187 	ADD_RANGE(0x42A0, 1);
188 	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
189 	ADD_RANGE(0x42B4, 1);
190 	ADD_RANGE(R300_RE_CULL_CNTL, 1);
191 	ADD_RANGE(0x42C0, 2);
192 	ADD_RANGE(R300_RS_CNTL_0, 2);
193 	ADD_RANGE(R300_RS_INTERP_0, 8);
194 	ADD_RANGE(R300_RS_ROUTE_0, 8);
195 	ADD_RANGE(0x43A4, 2);
196 	ADD_RANGE(0x43E8, 1);
197 	ADD_RANGE(R300_PFS_CNTL_0, 3);
198 	ADD_RANGE(R300_PFS_NODE_0, 4);
199 	ADD_RANGE(R300_PFS_TEXI_0, 64);
200 	ADD_RANGE(0x46A4, 5);
201 	ADD_RANGE(R300_PFS_INSTR0_0, 64);
202 	ADD_RANGE(R300_PFS_INSTR1_0, 64);
203 	ADD_RANGE(R300_PFS_INSTR2_0, 64);
204 	ADD_RANGE(R300_PFS_INSTR3_0, 64);
205 	ADD_RANGE(0x4BC0, 1);
206 	ADD_RANGE(0x4BC8, 3);
207 	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
208 	ADD_RANGE(0x4BD8, 1);
209 	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
210 	ADD_RANGE(0x4E00, 1);
211 	ADD_RANGE(R300_RB3D_CBLEND, 2);
212 	ADD_RANGE(R300_RB3D_COLORMASK, 1);
213 	ADD_RANGE(0x4E10, 3);
214 	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);
215 					/* check offset */
216 	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
217 	ADD_RANGE(0x4E50, 9);
218 	ADD_RANGE(0x4E88, 1);
219 	ADD_RANGE(0x4EA0, 2);
220 	ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
221 	ADD_RANGE(0x4F10, 4);
222 	ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);
223 					/* check offset */
224 	ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
225 	ADD_RANGE(0x4F28, 1);
226 	ADD_RANGE(0x4F30, 2);
227 	ADD_RANGE(0x4F44, 1);
228 	ADD_RANGE(0x4F54, 1);
229 
230 	ADD_RANGE(R300_TX_FILTER_0, 16);
231 	ADD_RANGE(R300_TX_FILTER1_0, 16);
232 	ADD_RANGE(R300_TX_SIZE_0, 16);
233 	ADD_RANGE(R300_TX_FORMAT_0, 16);
234 	ADD_RANGE(R300_TX_PITCH_0, 16);
235 	/* Texture offset is dangerous and needs more checking */
236 	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
237 	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
238 	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
239 
240 	/* Sporadic registers used as primitives are emitted */
241 	ADD_RANGE(0x4f18, 1);
242 	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
243 	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
244 	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
245 
246 }
247 
248 static __inline__ int r300_check_range(unsigned reg, int count)
249 {
250 	int i;
251 	if (reg & ~0xffff)
252 		return (-1);
253 	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
254 		if (r300_reg_flags[i] != MARK_SAFE)
255 			return (1);
256 	return (0);
257 }
258 
259 static inline int
260 r300_emit_carefully_checked_packet0(drm_radeon_private_t *dev_priv,
261     drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
262 {
263 	int reg;
264 	int sz;
265 	int i;
266 	int values[64];
267 	RING_LOCALS;
268 
269 	sz = header.packet0.count;
270 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
271 
272 	if ((sz > 64) || (sz < 0)) {
273 		DRM_ERROR("Cannot emit more than 64 values at a time "
274 		    "(reg=%04x sz=%d)\n", reg, sz);
275 		return (EINVAL);
276 	}
277 	for (i = 0; i < sz; i++) {
278 		values[i] = ((int *)(uintptr_t)cmdbuf->buf)[i];
279 		switch (r300_reg_flags[(reg >> 2) + i]) {
280 		case MARK_SAFE:
281 			break;
282 		case MARK_CHECK_OFFSET:
283 			if (!RADEON_CHECK_OFFSET(dev_priv, (u32) values[i])) {
284 				DRM_ERROR("Offset failed range check "
285 				    "(reg=%04x sz=%d)\n", reg, sz);
286 				return (EINVAL);
287 			}
288 			break;
289 		default:
290 			DRM_ERROR("Register %04x failed check as flag=%02x\n",
291 			    reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
292 			return (EINVAL);
293 		}
294 	}
295 
296 	BEGIN_RING(1 + sz);
297 	OUT_RING(CP_PACKET0(reg, sz - 1));
298 	OUT_RING_TABLE(values, sz);
299 	ADVANCE_RING();
300 
301 	cmdbuf->buf += sz * 4;
302 	cmdbuf->bufsz -= sz * 4;
303 
304 	return (0);
305 }
306 
307 /*
308  * Emits a packet0 setting arbitrary registers.
309  * Called by r300_do_cp_cmdbuf.
310  *
311  * Note that checks are performed on contents and addresses of the registers
312  */
313 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
314 					drm_radeon_kcmd_buffer_t *cmdbuf,
315 					drm_r300_cmd_header_t header)
316 {
317 	int reg;
318 	int sz;
319 	RING_LOCALS;
320 
321 	sz = header.packet0.count;
322 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
323 
324 	if (!sz)
325 		return (0);
326 
327 	if (sz * 4 > cmdbuf->bufsz)
328 		return (EINVAL);
329 
330 	if (reg + sz * 4 >= 0x10000) {
331 		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n",
332 		    reg, sz);
333 		return (EINVAL);
334 	}
335 
336 	if (r300_check_range(reg, sz)) {
337 		/* go and check everything */
338 		return (r300_emit_carefully_checked_packet0(dev_priv,
339 		    cmdbuf, header));
340 	}
341 	/*
342 	 * the rest of the data is safe to emit, whatever the values
343 	 * the user passed
344 	 */
345 
346 	BEGIN_RING(1 + sz);
347 	OUT_RING(CP_PACKET0(reg, sz - 1));
348 	OUT_RING_TABLE(cmdbuf->buf, sz);
349 	ADVANCE_RING();
350 
351 	cmdbuf->buf += sz * 4;
352 	cmdbuf->bufsz -= sz * 4;
353 
354 	return (0);
355 }
356 
357 /*
358  * Uploads user-supplied vertex program instructions or parameters onto
359  * the graphics card.
360  * Called by r300_do_cp_cmdbuf.
361  */
362 static inline int r300_emit_vpu(drm_radeon_private_t *dev_priv,
363     drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
364 {
365 	int sz;
366 	int addr;
367 	RING_LOCALS;
368 
369 	sz = header.vpu.count;
370 	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
371 
372 	if (!sz)
373 		return (0);
374 	if (sz * 16 > cmdbuf->bufsz)
375 		return (EINVAL);
376 
377 	BEGIN_RING(5 + sz * 4);
378 	/* Wait for VAP to come to senses.. */
379 	/*
380 	 * there is no need to emit it multiple times, (only once before
381 	 * VAP is programmed, but this optimization is for later
382 	 */
383 	OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
384 	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
385 	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
386 	OUT_RING_TABLE(cmdbuf->buf, sz * 4);
387 
388 	ADVANCE_RING();
389 
390 	cmdbuf->buf += sz * 16;
391 	cmdbuf->bufsz -= sz * 16;
392 
393 	return (0);
394 }
395 
396 /*
397  * Emit a clear packet from userspace.
398  * Called by r300_emit_packet3.
399  */
400 static inline int r300_emit_clear(drm_radeon_private_t *dev_priv,
401     drm_radeon_kcmd_buffer_t *cmdbuf)
402 {
403 	RING_LOCALS;
404 
405 	if (8 * 4 > cmdbuf->bufsz)
406 		return (EINVAL);
407 
408 	BEGIN_RING(10);
409 	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
410 	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
411 	    (1 << R300_PRIM_NUM_VERTICES_SHIFT));
412 	OUT_RING_TABLE(cmdbuf->buf, 8);
413 	ADVANCE_RING();
414 
415 	cmdbuf->buf += 8 * 4;
416 	cmdbuf->bufsz -= 8 * 4;
417 
418 	return (0);
419 }
420 
421 static inline int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
422     drm_radeon_kcmd_buffer_t *cmdbuf, u32 header)
423 {
424 	int count, i, k;
425 #define	MAX_ARRAY_PACKET		64
426 	u32 payload[MAX_ARRAY_PACKET];
427 	u32 narrays;
428 	RING_LOCALS;
429 
430 	count = (header >> 16) & 0x3fff;
431 
432 	if ((count + 1) > MAX_ARRAY_PACKET) {
433 		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
434 		    count);
435 		return (EINVAL);
436 	}
437 	(void) memset(payload, 0, MAX_ARRAY_PACKET * 4);
438 	(void) memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
439 
440 	/* carefully check packet contents */
441 
442 	narrays = payload[0];
443 	k = 0;
444 	i = 1;
445 	while ((k < narrays) && (i < (count + 1))) {
446 		i++;		/* skip attribute field */
447 		if (!RADEON_CHECK_OFFSET(dev_priv, payload[i])) {
448 			DRM_ERROR("Offset failed range check (k=%d i=%d) "
449 			    "while processing 3D_LOAD_VBPNTR packet.\n",
450 			    k, i);
451 			return (EINVAL);
452 		}
453 		k++;
454 		i++;
455 		if (k == narrays)
456 			break;
457 		/* have one more to process, they come in pairs */
458 		if (!RADEON_CHECK_OFFSET(dev_priv, payload[i])) {
459 			DRM_ERROR("Offset failed range check (k=%d i=%d) "
460 			    "while processing 3D_LOAD_VBPNTR packet.\n",
461 			    k, i);
462 			return (EINVAL);
463 		}
464 		k++;
465 		i++;
466 	}
467 	/* do the counts match what we expect ? */
468 	if ((k != narrays) || (i != (count + 1))) {
469 		DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet "
470 		    "(k=%d i=%d narrays=%d count+1=%d).\n",
471 		    k, i, narrays, count + 1);
472 		return (EINVAL);
473 	}
474 
475 	/* all clear, output packet */
476 
477 	BEGIN_RING(count + 2);
478 	OUT_RING(header);
479 	OUT_RING_TABLE(payload, count + 1);
480 	ADVANCE_RING();
481 
482 	cmdbuf->buf += (count + 2) * 4;
483 	cmdbuf->bufsz -= (count + 2) * 4;
484 
485 	return (0);
486 }
487 
488 static inline int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
489     drm_radeon_kcmd_buffer_t *cmdbuf)
490 {
491 	u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
492 	int count, ret;
493 	RING_LOCALS;
494 
495 	count = (cmd[0] >> 16) & 0x3fff;
496 
497 	if (cmd[0] & 0x8000) {
498 		u32 offset;
499 
500 		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
501 		    RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
502 			offset = cmd[2] << 10;
503 			ret = !RADEON_CHECK_OFFSET(dev_priv, offset);
504 			if (ret) {
505 				DRM_ERROR("Invalid bitblt first offset "
506 				    "is %08X\n", offset);
507 				return (EINVAL);
508 			}
509 		}
510 
511 		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
512 		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
513 			offset = cmd[3] << 10;
514 			ret = !RADEON_CHECK_OFFSET(dev_priv, offset);
515 			if (ret) {
516 				DRM_ERROR("Invalid bitblt second offset "
517 				    "is %08X\n", offset);
518 				return (EINVAL);
519 			}
520 
521 		}
522 	}
523 
524 	BEGIN_RING(count+2);
525 	OUT_RING(cmd[0]);
526 	OUT_RING_TABLE((cmdbuf->buf + 4), count + 1);
527 	ADVANCE_RING();
528 
529 	cmdbuf->buf += (count+2)*4;
530 	cmdbuf->bufsz -= (count+2)*4;
531 
532 	return (0);
533 }
534 
535 
536 static inline int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
537     drm_radeon_kcmd_buffer_t *cmdbuf)
538 {
539 	u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
540 	int count, ret;
541 	RING_LOCALS;
542 
543 	count = (cmd[0]>>16) & 0x3fff;
544 
545 	if ((cmd[1] & 0x8000ffff) != 0x80000810) {
546 		DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
547 		return (EINVAL);
548 	}
549 	ret = !RADEON_CHECK_OFFSET(dev_priv, cmd[2]);
550 	if (ret) {
551 		DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
552 		return (EINVAL);
553 	}
554 
555 	BEGIN_RING(count+2);
556 	OUT_RING(cmd[0]);
557 	OUT_RING_TABLE(cmdbuf->buf + 4, count + 1);
558 	ADVANCE_RING();
559 
560 	cmdbuf->buf += (count+2)*4;
561 	cmdbuf->bufsz -= (count+2)*4;
562 
563 	return (0);
564 }
565 
566 
567 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
568 					    drm_radeon_kcmd_buffer_t *cmdbuf)
569 {
570 	u32 header;
571 	int count;
572 	RING_LOCALS;
573 
574 	if (4 > cmdbuf->bufsz)
575 		return (EINVAL);
576 
577 	/*
578 	 * Fixme !! This simply emits a packet without much checking.
579 	 * We need to be smarter.
580 	 */
581 
582 	/* obtain first word - actual packet3 header */
583 	header = *(u32 *)(uintptr_t)cmdbuf->buf;
584 
585 	/* Is it packet 3 ? */
586 	if ((header >> 30) != 0x3) {
587 		DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
588 		return (EINVAL);
589 	}
590 
591 	count = (header >> 16) & 0x3fff;
592 
593 	/* Check again now that we know how much data to expect */
594 	if ((count + 2) * 4 > cmdbuf->bufsz) {
595 		DRM_ERROR("Expected packet3 of length %d but have only "
596 		    "%d bytes left\n", (count + 2) * 4, cmdbuf->bufsz);
597 		return (EINVAL);
598 	}
599 
600 	/* Is it a packet type we know about ? */
601 	switch (header & 0xff00) {
602 	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
603 		return (r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header));
604 
605 	case RADEON_CNTL_BITBLT_MULTI:
606 		return (r300_emit_bitblt_multi(dev_priv, cmdbuf));
607 
608 	case RADEON_CP_INDX_BUFFER:
609 			// DRAW_INDX_2 without INDX_BUFFER seems to lock
610 			// up the GPU
611 		return (r300_emit_indx_buffer(dev_priv, cmdbuf));
612 
613 	case RADEON_CP_3D_DRAW_IMMD_2:
614 			/* triggers drawing using in-packet vertex data */
615 	case RADEON_CP_3D_DRAW_VBUF_2:
616 			/* triggers drawing of vertex buffers setup elsewhere */
617 	case RADEON_CP_3D_DRAW_INDX_2:
618 			/* triggers drawing using indices to vertex buffer */
619 	case RADEON_WAIT_FOR_IDLE:
620 	case RADEON_CP_NOP:
621 		/* these packets are safe */
622 		break;
623 	default:
624 		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
625 		return (EINVAL);
626 	}
627 
628 	BEGIN_RING(count + 2);
629 	OUT_RING(header);
630 	OUT_RING_TABLE((cmdbuf->buf + 4), count + 1);
631 	ADVANCE_RING();
632 
633 	cmdbuf->buf += (count + 2) * 4;
634 	cmdbuf->bufsz -= (count + 2) * 4;
635 
636 	return (0);
637 }
638 
639 /*
640  * Emit a rendering packet3 from userspace.
641  * Called by r300_do_cp_cmdbuf.
642  */
643 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
644     drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
645 {
646 	int n;
647 	int ret;
648 	char *orig_buf = cmdbuf->buf;
649 	int orig_bufsz = cmdbuf->bufsz;
650 
651 	/*
652 	 * This is a do-while-loop so that we run the interior at least once,
653 	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
654 	 */
655 	n = 0;
656 	do {
657 		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
658 			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
659 			if (ret)
660 				return (ret);
661 
662 			cmdbuf->buf = orig_buf;
663 			cmdbuf->bufsz = orig_bufsz;
664 		}
665 
666 		switch (header.packet3.packet) {
667 		case R300_CMD_PACKET3_CLEAR:
668 			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
669 			ret = r300_emit_clear(dev_priv, cmdbuf);
670 			if (ret) {
671 				DRM_ERROR("r300_emit_clear failed\n");
672 				return (ret);
673 			}
674 			break;
675 
676 		case R300_CMD_PACKET3_RAW:
677 			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
678 			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
679 			if (ret) {
680 				DRM_ERROR("r300_emit_raw_packet3 failed\n");
681 				return (ret);
682 			}
683 			break;
684 
685 		default:
686 			DRM_ERROR("bad packet3 type %i at %p\n",
687 			    header.packet3.packet,
688 			    cmdbuf->buf - sizeof (header));
689 			return (EINVAL);
690 		}
691 
692 		n += R300_SIMULTANEOUS_CLIPRECTS;
693 	} while (n < cmdbuf->nbox);
694 
695 	return (0);
696 }
697 
698 /*
699  * Some of the R300 chips seem to be extremely touchy about the two registers
700  * that are configured in r300_pacify.
701  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
702  * sends a command buffer that contains only state setting commands and a
703  * vertex program/parameter upload sequence, this will eventually lead to a
704  * lockup, unless the sequence is bracketed by calls to r300_pacify.
705  * So we should take great care to *always* call r300_pacify before
706  * *anything* 3D related, and again afterwards. This is what the
707  * call bracket in r300_do_cp_cmdbuf is for.
708  */
709 
710 /*
711  * Emit the sequence to pacify R300.
712  */
713 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
714 {
715 	RING_LOCALS;
716 
717 	BEGIN_RING(6);
718 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
719 	OUT_RING(0xa);
720 	OUT_RING(CP_PACKET0(0x4f18, 0));
721 	OUT_RING(0x3);
722 	OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
723 	OUT_RING(0x0);
724 	ADVANCE_RING();
725 }
726 
727 /*
728  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
729  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
730  * be careful about how this function is called.
731  */
732 static void r300_discard_buffer(drm_device_t *dev, drm_buf_t *buf)
733 {
734 	drm_radeon_private_t *dev_priv = dev->dev_private;
735 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
736 
737 	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
738 	buf->pending = 1;
739 	buf->used = 0;
740 }
741 
742 static int r300_scratch(drm_radeon_private_t *dev_priv,
743 			drm_radeon_kcmd_buffer_t *cmdbuf,
744 			drm_r300_cmd_header_t header)
745 {
746 	u32 *ref_age_base;
747 	u32 i, buf_idx, h_pending;
748 	RING_LOCALS;
749 
750 	if (cmdbuf->bufsz < sizeof (uint64_t) +
751 	    header.scratch.n_bufs * sizeof (buf_idx)) {
752 		return (EINVAL);
753 	}
754 
755 	if (header.scratch.reg >= 5) {
756 		return (EINVAL);
757 	}
758 
759 	dev_priv->scratch_ages[header.scratch.reg] ++;
760 
761 	ref_age_base = (u32 *)(uintptr_t)*((uint64_t *)(uintptr_t)cmdbuf->buf);
762 
763 	cmdbuf->buf += sizeof (uint64_t);
764 	cmdbuf->bufsz -= sizeof (uint64_t);
765 
766 	for (i = 0; i < header.scratch.n_bufs; i++) {
767 		buf_idx = *(u32 *)(uintptr_t)cmdbuf->buf;
768 		buf_idx *= 2; /* 8 bytes per buf */
769 
770 		if (DRM_COPY_TO_USER(ref_age_base + buf_idx,
771 		    &dev_priv->scratch_ages[header.scratch.reg],
772 		    sizeof (u32))) {
773 			return (EINVAL);
774 		}
775 
776 		if (DRM_COPY_FROM_USER(&h_pending,
777 		    ref_age_base + buf_idx + 1, sizeof (u32))) {
778 			return (EINVAL);
779 		}
780 
781 		if (h_pending == 0) {
782 			return (EINVAL);
783 		}
784 
785 		h_pending--;
786 
787 		if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1,
788 		    &h_pending, sizeof (u32))) {
789 			return (EINVAL);
790 		}
791 
792 		cmdbuf->buf += sizeof (buf_idx);
793 		cmdbuf->bufsz -= sizeof (buf_idx);
794 	}
795 
796 	BEGIN_RING(2);
797 	OUT_RING(CP_PACKET0(RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0));
798 	OUT_RING(dev_priv->scratch_ages[header.scratch.reg]);
799 	ADVANCE_RING();
800 
801 	return (0);
802 }
803 
804 /*
805  * Parses and validates a user-supplied command buffer and emits appropriate
806  * commands on the DMA ring buffer.
807  * Called by the ioctl handler function radeon_cp_cmdbuf.
808  */
809 /*ARGSUSED*/
810 int
811 r300_do_cp_cmdbuf(drm_device_t *dev,
812     drm_file_t *fpriv, drm_radeon_kcmd_buffer_t *cmdbuf)
813 {
814 	drm_radeon_private_t *dev_priv = dev->dev_private;
815 	drm_device_dma_t *dma = dev->dma;
816 	drm_buf_t *buf = NULL;
817 	int emit_dispatch_age = 0;
818 	int ret = 0;
819 
820 	DRM_DEBUG("\n");
821 
822 	/*
823 	 * See the comment above r300_emit_begin3d for why this call
824 	 * must be here, and what the cleanup gotos are for.
825 	 */
826 	r300_pacify(dev_priv);
827 
828 	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
829 		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
830 		if (ret)
831 			goto cleanup;
832 	}
833 
834 	while (cmdbuf->bufsz >= sizeof (drm_r300_cmd_header_t)) {
835 		int idx;
836 		drm_r300_cmd_header_t header;
837 
838 		header.u = *(unsigned int *)(uintptr_t)cmdbuf->buf;
839 
840 		cmdbuf->buf += sizeof (header);
841 		cmdbuf->bufsz -= sizeof (header);
842 
843 		switch (header.header.cmd_type) {
844 		case R300_CMD_PACKET0:
845 			DRM_DEBUG("R300_CMD_PACKET0\n");
846 			ret = r300_emit_packet0(dev_priv, cmdbuf, header);
847 			if (ret) {
848 				DRM_ERROR("r300_emit_packet0 failed\n");
849 				goto cleanup;
850 			}
851 			break;
852 
853 		case R300_CMD_VPU:
854 			DRM_DEBUG("R300_CMD_VPU\n");
855 			ret = r300_emit_vpu(dev_priv, cmdbuf, header);
856 			if (ret) {
857 				DRM_ERROR("r300_emit_vpu failed\n");
858 				goto cleanup;
859 			}
860 			break;
861 
862 		case R300_CMD_PACKET3:
863 			DRM_DEBUG("R300_CMD_PACKET3\n");
864 			ret = r300_emit_packet3(dev_priv, cmdbuf, header);
865 			if (ret) {
866 				DRM_ERROR("r300_emit_packet3 failed\n");
867 				goto cleanup;
868 			}
869 			break;
870 
871 		case R300_CMD_END3D:
872 			DRM_DEBUG("R300_CMD_END3D\n");
873 			/*
874 			 * TODO:
875 			 * Ideally userspace driver should not need to issue
876 			 * this call, i.e. the drm driver should issue it
877 			 * automatically and prevent lockups. In practice, we
878 			 * do not understand why this call is needed and what
879 			 * it does (except for some vague guesses that it has
880 			 * to do with cache coherence) and so the user space
881 			 * driver does it.
882 			 *
883 			 * Once we are sure which uses prevent lockups the code
884 			 * could be moved into the kernel and the userspace
885 			 * driver will not need to use this command.
886 			 *
887 			 * Note that issuing this command does not hurt anything
888 			 * except, possibly, performance
889 			 */
890 			r300_pacify(dev_priv);
891 			break;
892 
893 		case R300_CMD_CP_DELAY:
894 			/* simple enough, we can do it here */
895 			DRM_DEBUG("R300_CMD_CP_DELAY\n");
896 			{
897 				int i;
898 				RING_LOCALS;
899 
900 				BEGIN_RING(header.delay.count);
901 				for (i = 0; i < header.delay.count; i++)
902 					OUT_RING(RADEON_CP_PACKET2);
903 				ADVANCE_RING();
904 			}
905 			break;
906 
907 		case R300_CMD_DMA_DISCARD:
908 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
909 			idx = header.dma.buf_idx;
910 			if (idx < 0 || idx >= dma->buf_count) {
911 				DRM_ERROR("buffer index %d (of %d max)\n",
912 				    idx, dma->buf_count - 1);
913 				ret = EINVAL;
914 				goto cleanup;
915 			}
916 
917 			buf = dma->buflist[idx];
918 			if (buf->filp != fpriv || buf->pending) {
919 				DRM_ERROR("bad buffer %p %p %d\n",
920 				    buf->filp, fpriv, buf->pending);
921 				ret = EINVAL;
922 				goto cleanup;
923 			}
924 
925 			emit_dispatch_age = 1;
926 			r300_discard_buffer(dev, buf);
927 			break;
928 
929 		case R300_CMD_WAIT:
930 			/* simple enough, we can do it here */
931 			DRM_DEBUG("R300_CMD_WAIT\n");
932 			if (header.wait.flags == 0)
933 				break;	/* nothing to do */
934 
935 			{
936 				RING_LOCALS;
937 
938 				BEGIN_RING(2);
939 				OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
940 				OUT_RING((header.wait.flags & 0xf) << 14);
941 				ADVANCE_RING();
942 			}
943 			break;
944 
945 		case R300_CMD_SCRATCH:
946 			DRM_DEBUG("R300_CMD_SCRATCH\n");
947 			ret = r300_scratch(dev_priv, cmdbuf, header);
948 			if (ret) {
949 				DRM_ERROR("r300_scratch failed\n");
950 				goto cleanup;
951 			}
952 			break;
953 
954 		default:
955 			DRM_ERROR("bad cmd_type %i at %p\n",
956 			    header.header.cmd_type,
957 			    cmdbuf->buf - sizeof (header));
958 			ret = EINVAL;
959 			goto cleanup;
960 		}
961 	}
962 
963 	DRM_DEBUG("END\n");
964 
965 cleanup:
966 	r300_pacify(dev_priv);
967 
968 	/*
969 	 * We emit the vertex buffer age here, outside the pacifier "brackets"
970 	 * for two reasons:
971 	 * (1) This may coalesce multiple age emissions into a single one and
972 	 * (2) more importantly, some chips lock up hard when scratch registers
973 	 * 		are written inside the pacifier bracket.
974 	 */
975 	if (emit_dispatch_age) {
976 		RING_LOCALS;
977 
978 		/* Emit the vertex buffer age */
979 		BEGIN_RING(2);
980 		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
981 		ADVANCE_RING();
982 	}
983 
984 	COMMIT_RING();
985 
986 	return (ret);
987 }
988