xref: /titanic_50/usr/src/uts/intel/io/drm/r300_reg.h (revision ff17c8bf86c3e567734be83f90267edee20f580f)
1 /*
2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 #pragma ident	"%Z%%M%	%I%	%E% SMI"
6 
7 #ifndef	__R300_REG_H_
8 #define	__R300_REG_H_
9 
10 #ifdef	__cplusplus
11 extern "C" {
12 #endif
13 
14 
15 /*
16  * Copyright (C) 2004-2005 Nicolai Haehnle et al.
17  * Permission is hereby granted, free of charge, to any person obtaining a
18  * copy of this software and associated documentation files (the "Software"),
19  * to deal in the Software without restriction, including without limitation
20  * on the rights to use, copy, modify, merge, publish, distribute, sub
21  * license, and/or sell copies of the Software, and to permit persons to whom
22  * the Software is furnished to do so, subject to the following conditions:
23  *
24  * The above copyright notice and this permission notice (including the next
25  * paragraph) shall be included in all copies or substantial portions of the
26  * Software.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
31  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
32  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
33  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
34  * USE OR OTHER DEALINGS IN THE SOFTWARE.
35  */
36 
37 
38 #define	R300_MC_INIT_MISC_LAT_TIMER	0x180
39 #define	R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT	0
40 #define	R300_MC_MISC__MC_VF_INIT_LAT_SHIFT	4
41 #define	R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT	8
42 #define	R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT	12
43 #define	R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT	16
44 #define	R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT	20
45 #define	R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT	24
46 #define	R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT	28
47 
48 
49 #define	R300_MC_INIT_GFX_LAT_TIMER	0x154
50 #define	R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT	0
51 #define	R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT	4
52 #define	R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT	8
53 #define	R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT	12
54 #define	R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT	16
55 #define	R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT	20
56 #define	R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT	24
57 #define	R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT	28
58 
59 /*
60  * This file contains registers and constants for the R300. They have been
61  * found mostly by examining command buffers captured using glxtest, as well
62  * as by extrapolating some known registers and constants from the R200.
63  *
64  * I am fairly certain that they are correct unless stated otherwise in
65  * comments.
66  */
67 
68 #define	R300_SE_VPORT_XSCALE				0x1D98
69 #define	R300_SE_VPORT_XOFFSET			   0x1D9C
70 #define	R300_SE_VPORT_YSCALE				0x1DA0
71 #define	R300_SE_VPORT_YOFFSET			   0x1DA4
72 #define	R300_SE_VPORT_ZSCALE				0x1DA8
73 #define	R300_SE_VPORT_ZOFFSET			   0x1DAC
74 
75 
76 // This register is written directly and also starts data
77 // section in many 3d CP_PACKET3's
78 #define	R300_VAP_VF_CNTL	0x2084
79 
80 #define	R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT		   0
81 #define	 R300_VAP_VF_CNTL__PRIM_NONE				 (0<<0)
82 #define	 R300_VAP_VF_CNTL__PRIM_POINTS				 (1<<0)
83 #define	 R300_VAP_VF_CNTL__PRIM_LINES				 (2<<0)
84 #define	 R300_VAP_VF_CNTL__PRIM_LINE_STRIP			 (3<<0)
85 #define	 R300_VAP_VF_CNTL__PRIM_TRIANGLES			 (4<<0)
86 #define	 R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN			 (5<<0)
87 #define	 R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP			 (6<<0)
88 #define	 R300_VAP_VF_CNTL__PRIM_LINE_LOOP			(12<<0)
89 #define	 R300_VAP_VF_CNTL__PRIM_QUADS				(13<<0)
90 #define	 R300_VAP_VF_CNTL__PRIM_QUAD_STRIP			(14<<0)
91 #define	 R300_VAP_VF_CNTL__PRIM_POLYGON				(15<<0)
92 
93 #define	R300_VAP_VF_CNTL__PRIM_WALK__SHIFT		   4
94 	/* State based - direct writes to registers trigger vertex generation */
95 #define	R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED			(0<<4)
96 #define	R300_VAP_VF_CNTL__PRIM_WALK_INDICES				(1<<4)
97 #define	R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST			(2<<4)
98 #define	R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED		(3<<4)
99 
100 		/* I don't think I saw these three used.. */
101 #define	R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT			6
102 #define	R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT		9
103 #define	R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT		10
104 
105 /* index size - when not set the indices are assumed to be 16 bit */
106 #define	R300_VAP_VF_CNTL__INDEX_SIZE_32bit			(1<<11)
107 				/* number of vertices */
108 #define	R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT		16
109 
110 /* BEGIN: Wild guesses */
111 #define	R300_VAP_OUTPUT_VTX_FMT_0		   0x2090
112 #define	 R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT	 (1<<0)
113 #define	 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT   (1<<1)
114 #define	 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */
115 #define	 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */
116 #define	 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */
117 #define	 R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */
118 
119 #define	R300_VAP_OUTPUT_VTX_FMT_1		   0x2094
120 #define	 R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
121 #define	 R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
122 #define	 R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
123 #define	 R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
124 #define	 R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
125 #define	 R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
126 #define	 R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
127 #define	 R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
128 /* END */
129 
130 #define	R300_SE_VTE_CNTL				  0x20b0
131 #define	 R300_VPORT_X_SCALE_ENA				0x00000001
132 #define	 R300_VPORT_X_OFFSET_ENA			   0x00000002
133 #define	 R300_VPORT_Y_SCALE_ENA				0x00000004
134 #define	 R300_VPORT_Y_OFFSET_ENA			   0x00000008
135 #define	 R300_VPORT_Z_SCALE_ENA				0x00000010
136 #define	 R300_VPORT_Z_OFFSET_ENA			   0x00000020
137 #define	 R300_VTX_XY_FMT					   0x00000100
138 #define	 R300_VTX_Z_FMT						0x00000200
139 #define	 R300_VTX_W0_FMT					   0x00000400
140 #define	 R300_VTX_W0_NORMALIZE				 0x00000800
141 #define	 R300_VTX_ST_DENORMALIZED			  0x00001000
142 
143 /* BEGIN: Vertex data assembly - lots of uncertainties */
144 /* gap */
145 // Where do we get our vertex data?
146 //
147 // Vertex data either comes either from immediate mode registers or from
148 // vertex arrays.
149 // There appears to be no mixed mode (though we can force the pitch of
150 // vertex arrays to 0, effectively reusing the same element over and over
151 // again).
152 //
153 // Immediate mode is controlled by the INPUT_CNTL registers. I am not sure
154 // if these registers influence vertex array processing.
155 //
156 // Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3.
157 //
158 // In both cases, vertex attributes are then passed through INPUT_ROUTE.
159 
160 // Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data
161 // into the vertex processor's input registers.
162 // The first word routes the first input, the second word the second, etc.
163 // The corresponding input is routed into the register with the given index.
164 // The list is ended by a word with INPUT_ROUTE_END set.
165 //
166 // Always set COMPONENTS_4 in immediate mode. */
167 
168 #define	R300_VAP_INPUT_ROUTE_0_0			0x2150
169 #define	 R300_INPUT_ROUTE_COMPONENTS_1	 (0 << 0)
170 #define	 R300_INPUT_ROUTE_COMPONENTS_2	 (1 << 0)
171 #define	 R300_INPUT_ROUTE_COMPONENTS_3	 (2 << 0)
172 #define	 R300_INPUT_ROUTE_COMPONENTS_4	 (3 << 0)
173 #define	 R300_INPUT_ROUTE_COMPONENTS_RGBA  (4 << 0) /* GUESS */
174 #define	 R300_VAP_INPUT_ROUTE_IDX_SHIFT	8
175 #define	 R300_VAP_INPUT_ROUTE_IDX_MASK	 (31 << 8) /* GUESS */
176 #define	 R300_VAP_INPUT_ROUTE_END		  (1 << 13)
177 #define	 R300_INPUT_ROUTE_IMMEDIATE_MODE   (0 << 14) /* GUESS */
178 #define	 R300_INPUT_ROUTE_FLOAT			(1 << 14) /* GUESS */
179 #define	 R300_INPUT_ROUTE_UNSIGNED_BYTE	(2 << 14) /* GUESS */
180 #define	 R300_INPUT_ROUTE_FLOAT_COLOR	  (3 << 14) /* GUESS */
181 #define	R300_VAP_INPUT_ROUTE_0_1			0x2154
182 #define	R300_VAP_INPUT_ROUTE_0_2			0x2158
183 #define	R300_VAP_INPUT_ROUTE_0_3			0x215C
184 #define	R300_VAP_INPUT_ROUTE_0_4			0x2160
185 #define	R300_VAP_INPUT_ROUTE_0_5			0x2164
186 #define	R300_VAP_INPUT_ROUTE_0_6			0x2168
187 #define	R300_VAP_INPUT_ROUTE_0_7			0x216C
188 
189 /* gap */
190 // Notes:
191 //  - always set up to produce at least two attributes:
192 //	if vertex program uses only position, fglrx will set normal, too
193 //  - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal */
194 #define	R300_VAP_INPUT_CNTL_0			   0x2180
195 #define	 R300_INPUT_CNTL_0_COLOR		   0x00000001
196 #define	R300_VAP_INPUT_CNTL_1			   0x2184
197 #define	 R300_INPUT_CNTL_POS			   0x00000001
198 #define	 R300_INPUT_CNTL_NORMAL			0x00000002
199 #define	 R300_INPUT_CNTL_COLOR			 0x00000004
200 #define	 R300_INPUT_CNTL_TC0			   0x00000400
201 #define	 R300_INPUT_CNTL_TC1			   0x00000800
202 #define	 R300_INPUT_CNTL_TC2			   0x00001000 /* GUESS */
203 #define	 R300_INPUT_CNTL_TC3			   0x00002000 /* GUESS */
204 #define	 R300_INPUT_CNTL_TC4			   0x00004000 /* GUESS */
205 #define	 R300_INPUT_CNTL_TC5			   0x00008000 /* GUESS */
206 #define	 R300_INPUT_CNTL_TC6			   0x00010000 /* GUESS */
207 #define	 R300_INPUT_CNTL_TC7			   0x00020000 /* GUESS */
208 
209 /* gap */
210 // Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
211 // are set to a swizzling bit pattern, other words are 0.
212 //
213 // In immediate mode, the pattern is always set to xyzw. In vertex array
214 // mode, the swizzling pattern is e.g. used to set zw components in texture
215 // coordinates with only tweo components
216 #define	R300_VAP_INPUT_ROUTE_1_0			0x21E0
217 #define	 R300_INPUT_ROUTE_SELECT_X	0
218 #define	 R300_INPUT_ROUTE_SELECT_Y	1
219 #define	 R300_INPUT_ROUTE_SELECT_Z	2
220 #define	 R300_INPUT_ROUTE_SELECT_W	3
221 #define	 R300_INPUT_ROUTE_SELECT_ZERO 4
222 #define	 R300_INPUT_ROUTE_SELECT_ONE  5
223 #define	 R300_INPUT_ROUTE_SELECT_MASK 7
224 #define	 R300_INPUT_ROUTE_X_SHIFT		  0
225 #define	 R300_INPUT_ROUTE_Y_SHIFT		  3
226 #define	 R300_INPUT_ROUTE_Z_SHIFT		  6
227 #define	 R300_INPUT_ROUTE_W_SHIFT		  9
228 #define	 R300_INPUT_ROUTE_ENABLE		   (15 << 12)
229 #define	R300_VAP_INPUT_ROUTE_1_1			0x21E4
230 #define	R300_VAP_INPUT_ROUTE_1_2			0x21E8
231 #define	R300_VAP_INPUT_ROUTE_1_3			0x21EC
232 #define	R300_VAP_INPUT_ROUTE_1_4			0x21F0
233 #define	R300_VAP_INPUT_ROUTE_1_5			0x21F4
234 #define	R300_VAP_INPUT_ROUTE_1_6			0x21F8
235 #define	R300_VAP_INPUT_ROUTE_1_7			0x21FC
236 
237 /* END */
238 
239 /* gap */
240 // BEGIN: Upload vertex program and data
241 // The programmable vertex shader unit has a memory bank of unknown size
242 // that can be written to in 16 byte units by writing the address into
243 // UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs).
244 //
245 // Pointers into the memory bank are always in multiples of 16 bytes.
246 //
247 // The memory bank is divided into areas with fixed meaning.
248 //
249 // Starting at address UPLOAD_PROGRAM: Vertex program instructions.
250 // Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB),
251 // whereas the difference between known addresses suggests size 512.
252 //
253 // Starting at address UPLOAD_PARAMETERS: Vertex program parameters.
254 // Native reported limits and the VPI layout suggest size 256, whereas
255 // difference between known addresses suggests size 512.
256 //
257 // At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the
258 // floating point pointsize. The exact purpose of this state is uncertain,
259 // as there is also the R300_RE_POINTSIZE register.
260 //
261 // Multiple vertex programs and parameter sets can be loaded at once,
262 // which could explain the size discrepancy.
263 #define	R300_VAP_PVS_UPLOAD_ADDRESS		 0x2200
264 #define	 R300_PVS_UPLOAD_PROGRAM		   0x00000000
265 #define	 R300_PVS_UPLOAD_PARAMETERS		0x00000200
266 #define	 R300_PVS_UPLOAD_POINTSIZE		 0x00000406
267 /* gap */
268 #define	R300_VAP_PVS_UPLOAD_DATA			0x2208
269 /* END */
270 
271 /* gap */
272 /*
273  * I do not know the purpose of this register. However, I do know that
274  * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL
275  * for normal rendering.
276  */
277 #define	R300_VAP_UNKNOWN_221C			   0x221C
278 #define	 R300_221C_NORMAL				  0x00000000
279 #define	 R300_221C_CLEAR				   0x0001C000
280 
281 /* gap */
282 /*
283  * Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
284  * rendering commands and overwriting vertex program parameters.
285  * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
286  * avoids bugs caused by still running shaders reading bad data from memory.
287  */
288 #define	R300_VAP_PVS_WAITIDLE			   0x2284 /* GUESS */
289 
290 /* Absolutely no clue what this register is about. */
291 #define	R300_VAP_UNKNOWN_2288			   0x2288
292 #define	 R300_2288_R300			0x00750000 /* -- nh */
293 #define	 R300_2288_RV350		   0x0000FFFF /* -- Vladimir */
294 
295 /* gap */
296 /*
297  * Addresses are relative to the vertex program instruction area of the
298  * memory bank. PROGRAM_END points to the last instruction of the active
299  * program
300  *
301  * The meaning of the two UNKNOWN fields is obviously not known. However,
302  * experiments so far have shown that both *must* point to an instruction
303  * inside the vertex program, otherwise the GPU locks up.
304  * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
305  * CNTL_1_UNKNOWN points to instruction where last write to position
306  * takes place. Most likely this is used to ignore rest of the program
307  * in cases where group of verts arent visible.
308  * For some reason this "section" is sometimes accepted other instruction
309  * that have no relationship with position calculations.
310  */
311 #define	R300_VAP_PVS_CNTL_1				 0x22D0
312 #define	 R300_PVS_CNTL_1_PROGRAM_START_SHIFT   0
313 #define	 R300_PVS_CNTL_1_POS_END_SHIFT		 10
314 #define	 R300_PVS_CNTL_1_PROGRAM_END_SHIFT	 20
315 /* Addresses are relative the the vertex program parameters area. */
316 #define	R300_VAP_PVS_CNTL_2				 0x22D4
317 #define	 R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0
318 #define	 R300_PVS_CNTL_2_PARAM_COUNT_SHIFT  16
319 #define	R300_VAP_PVS_CNTL_3			   0x22D8
320 #define	 R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10
321 #define	 R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0
322 
323 // The entire range from 0x2300 to 0x2AC inclusive seems to be used for
324 // immediate vertices
325 #define	R300_VAP_VTX_COLOR_R				0x2464
326 #define	R300_VAP_VTX_COLOR_G				0x2468
327 #define	R300_VAP_VTX_COLOR_B				0x246C
328 #define	R300_VAP_VTX_POS_0_X_1			  0x2490
329 #define	R300_VAP_VTX_POS_0_Y_1			  0x2494
330 #define	R300_VAP_VTX_COLOR_PKD			  0x249C /* RGBA */
331 #define	R300_VAP_VTX_POS_0_X_2			  0x24A0
332 #define	R300_VAP_VTX_POS_0_Y_2			  0x24A4
333 #define	R300_VAP_VTX_POS_0_Z_2			  0x24A8
334 #define	R300_VAP_VTX_END_OF_PKT			 0x24AC
335 
336 /* gap */
337 
338 /*
339  * These are values from r300_reg/r300_reg.h - they are known to
340  * be correct and are here so we can use one register file instead
341  * of several
342  *  - Vladimir
343  */
344 #define	R300_GB_VAP_RASTER_VTX_FMT_0	0x4000
345 #define	R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT	(1<<0)
346 #define	R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT	(1<<1)
347 #define	R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT	(1<<2)
348 #define	R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT	(1<<3)
349 #define	R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT	(1<<4)
350 #define	R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE	(0xf<<5)
351 #define	R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT	(0x1<<16)
352 
353 #define	R300_GB_VAP_RASTER_VTX_FMT_1	0x4004
354 	// each of the following is 3 bits wide, specifies number
355 	// of components
356 #define	R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT	0
357 #define	R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT	3
358 #define	R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT	6
359 #define	R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT	9
360 #define	R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT	12
361 #define	R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT	15
362 #define	R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT	18
363 #define	R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT	21
364 
365 /*
366  * UNK30 seems to enables point to quad transformation on
367  * textures (or something closely related to that).This bit
368  * is rather fatal at the time being due to lackings at pixel
369  * shader side
370  */
371 #define	R300_GB_ENABLE	0x4008
372 #define	R300_GB_POINT_STUFF_ENABLE	(1<<0)
373 #define	R300_GB_LINE_STUFF_ENABLE	(1<<1)
374 #define	R300_GB_TRIANGLE_STUFF_ENABLE	(1<<2)
375 #define	R300_GB_STENCIL_AUTO_ENABLE	(1<<4)
376 #define	R300_GB_UNK30			(1<<30)
377 	/* each of the following is 2 bits wide */
378 #define	R300_GB_TEX_REPLICATE	0
379 #define	R300_GB_TEX_ST		1
380 #define	R300_GB_TEX_STR		2
381 #define	R300_GB_TEX0_SOURCE_SHIFT	16
382 #define	R300_GB_TEX1_SOURCE_SHIFT	18
383 #define	R300_GB_TEX2_SOURCE_SHIFT	20
384 #define	R300_GB_TEX3_SOURCE_SHIFT	22
385 #define	R300_GB_TEX4_SOURCE_SHIFT	24
386 #define	R300_GB_TEX5_SOURCE_SHIFT	26
387 #define	R300_GB_TEX6_SOURCE_SHIFT	28
388 #define	R300_GB_TEX7_SOURCE_SHIFT	30
389 
390 /* MSPOS - positions for multisample antialiasing (?) */
391 #define	R300_GB_MSPOS0	0x4010
392 	/* shifts - each of the fields is 4 bits */
393 #define	R300_GB_MSPOS0__MS_X0_SHIFT	0
394 #define	R300_GB_MSPOS0__MS_Y0_SHIFT	4
395 #define	R300_GB_MSPOS0__MS_X1_SHIFT	8
396 #define	R300_GB_MSPOS0__MS_Y1_SHIFT	12
397 #define	R300_GB_MSPOS0__MS_X2_SHIFT	16
398 #define	R300_GB_MSPOS0__MS_Y2_SHIFT	20
399 #define	R300_GB_MSPOS0__MSBD0_Y		24
400 #define	R300_GB_MSPOS0__MSBD0_X		28
401 
402 #define	R300_GB_MSPOS1	0x4014
403 #define	R300_GB_MSPOS1__MS_X3_SHIFT	0
404 #define	R300_GB_MSPOS1__MS_Y3_SHIFT	4
405 #define	R300_GB_MSPOS1__MS_X4_SHIFT	8
406 #define	R300_GB_MSPOS1__MS_Y4_SHIFT	12
407 #define	R300_GB_MSPOS1__MS_X5_SHIFT	16
408 #define	R300_GB_MSPOS1__MS_Y5_SHIFT	20
409 #define	R300_GB_MSPOS1__MSBD1		24
410 
411 
412 #define	R300_GB_TILE_CONFIG	0x4018
413 #define	R300_GB_TILE_ENABLE	(1<<0)
414 #define	R300_GB_TILE_PIPE_COUNT_RV300	0
415 #define	R300_GB_TILE_PIPE_COUNT_R300	(3<<1)
416 #define	R300_GB_TILE_PIPE_COUNT_R420	(7<<1)
417 #define	R300_GB_TILE_SIZE_8		0
418 #define	R300_GB_TILE_SIZE_16		(1<<4)
419 #define	R300_GB_TILE_SIZE_32		(2<<4)
420 #define	R300_GB_SUPER_SIZE_1		(0<<6)
421 #define	R300_GB_SUPER_SIZE_2		(1<<6)
422 #define	R300_GB_SUPER_SIZE_4		(2<<6)
423 #define	R300_GB_SUPER_SIZE_8		(3<<6)
424 #define	R300_GB_SUPER_SIZE_16		(4<<6)
425 #define	R300_GB_SUPER_SIZE_32		(5<<6)
426 #define	R300_GB_SUPER_SIZE_64		(6<<6)
427 #define	R300_GB_SUPER_SIZE_128		(7<<6)
428 #define	R300_GB_SUPER_X_SHIFT		9	/* 3 bits wide */
429 #define	R300_GB_SUPER_Y_SHIFT		12	/* 3 bits wide */
430 #define	R300_GB_SUPER_TILE_A		0
431 #define	R300_GB_SUPER_TILE_B		(1<<15)
432 #define	R300_GB_SUBPIXEL_1_12		0
433 #define	R300_GB_SUBPIXEL_1_16		(1<<16)
434 
435 #define	R300_GB_FIFO_SIZE	0x4024
436 	/* each of the following is 2 bits wide */
437 #define	R300_GB_FIFO_SIZE_32	0
438 #define	R300_GB_FIFO_SIZE_64	1
439 #define	R300_GB_FIFO_SIZE_128	2
440 #define	R300_GB_FIFO_SIZE_256	3
441 #define	R300_SC_IFIFO_SIZE_SHIFT	0
442 #define	R300_SC_TZFIFO_SIZE_SHIFT	2
443 #define	R300_SC_BFIFO_SIZE_SHIFT	4
444 
445 #define	R300_US_OFIFO_SIZE_SHIFT	12
446 #define	R300_US_WFIFO_SIZE_SHIFT	14
447 	// the following use the same constants as above, but meaning is
448 	// is times 2 (i.e. instead of 32 words it means 64 */
449 #define	R300_RS_TFIFO_SIZE_SHIFT	6
450 #define	R300_RS_CFIFO_SIZE_SHIFT	8
451 #define	R300_US_RAM_SIZE_SHIFT		10
452 	/* watermarks, 3 bits wide */
453 #define	R300_RS_HIGHWATER_COL_SHIFT	16
454 #define	R300_RS_HIGHWATER_TEX_SHIFT	19
455 #define	R300_OFIFO_HIGHWATER_SHIFT	22	/* two bits only */
456 #define	R300_CUBE_FIFO_HIGHWATER_COL_SHIFT	24
457 
458 #define	R300_GB_SELECT	0x401C
459 #define	R300_GB_FOG_SELECT_C0A		0
460 #define	R300_GB_FOG_SELECT_C1A		1
461 #define	R300_GB_FOG_SELECT_C2A		2
462 #define	R300_GB_FOG_SELECT_C3A		3
463 #define	R300_GB_FOG_SELECT_1_1_W	4
464 #define	R300_GB_FOG_SELECT_Z		5
465 #define	R300_GB_DEPTH_SELECT_Z		0
466 #define	R300_GB_DEPTH_SELECT_1_1_W	(1<<3)
467 #define	R300_GB_W_SELECT_1_W		0
468 #define	R300_GB_W_SELECT_1		(1<<4)
469 
470 #define	R300_GB_AA_CONFIG		0x4020
471 #define	R300_AA_ENABLE			0x01
472 #define	R300_AA_SUBSAMPLES_2		0
473 #define	R300_AA_SUBSAMPLES_3		(1<<1)
474 #define	R300_AA_SUBSAMPLES_4		(2<<1)
475 #define	R300_AA_SUBSAMPLES_6		(3<<1)
476 
477 /* END */
478 
479 /* gap */
480 /* Zero to flush caches. */
481 #define	R300_TX_CNTL						0x4100
482 
483 /* The upper enable bits are guessed, based on fglrx reported limits. */
484 #define	R300_TX_ENABLE					  0x4104
485 #define	 R300_TX_ENABLE_0				  (1 << 0)
486 #define	 R300_TX_ENABLE_1				  (1 << 1)
487 #define	 R300_TX_ENABLE_2				  (1 << 2)
488 #define	 R300_TX_ENABLE_3				  (1 << 3)
489 #define	 R300_TX_ENABLE_4				  (1 << 4)
490 #define	 R300_TX_ENABLE_5				  (1 << 5)
491 #define	 R300_TX_ENABLE_6				  (1 << 6)
492 #define	 R300_TX_ENABLE_7				  (1 << 7)
493 #define	 R300_TX_ENABLE_8				  (1 << 8)
494 #define	 R300_TX_ENABLE_9				  (1 << 9)
495 #define	 R300_TX_ENABLE_10				 (1 << 10)
496 #define	 R300_TX_ENABLE_11				 (1 << 11)
497 #define	 R300_TX_ENABLE_12				 (1 << 12)
498 #define	 R300_TX_ENABLE_13				 (1 << 13)
499 #define	 R300_TX_ENABLE_14				 (1 << 14)
500 #define	 R300_TX_ENABLE_15				 (1 << 15)
501 
502 // The pointsize is given in multiples of 6. The pointsize can be
503 // enormous: Clear() renders a single point that fills the entire
504 // framebuffer. */
505 #define	R300_RE_POINTSIZE				   0x421C
506 #define	 R300_POINTSIZE_Y_SHIFT			0
507 #define	 R300_POINTSIZE_Y_MASK			 (0xFFFF << 0) /* GUESS */
508 #define	 R300_POINTSIZE_X_SHIFT			16
509 #define	 R300_POINTSIZE_X_MASK			 (0xFFFF << 16) /* GUESS */
510 #define	 R300_POINTSIZE_MAX			 (R300_POINTSIZE_Y_MASK / 6)
511 
512 /*
513  * The line width is given in multiples of 6.
514  * In default mode lines are classified as vertical lines.
515  * HO: horizontal
516  * VE: vertical or horizontal
517  * HO & VE: no classification
518  */
519 #define	R300_RE_LINE_CNT					  0x4234
520 #define	 R300_LINESIZE_SHIFT			0
521 #define	 R300_LINESIZE_MASK			 (0xFFFF << 0) /* GUESS */
522 #define	 R300_LINESIZE_MAX			 (R300_LINESIZE_MASK / 6)
523 #define	 R300_LINE_CNT_HO			   (1 << 16)
524 #define	 R300_LINE_CNT_VE			   (1 << 17)
525 
526 /* Some sort of scale or clamp value for texcoordless textures. */
527 #define	R300_RE_UNK4238					   0x4238
528 
529 #define	R300_RE_SHADE_MODEL				   0x4278
530 #define	R300_RE_SHADE_MODEL_SMOOTH	 0x3aaaa
531 #define	R300_RE_SHADE_MODEL_FLAT	   0x39595
532 
533 /* Dangerous */
534 #define	R300_RE_POLYGON_MODE				  0x4288
535 #define	R300_PM_ENABLED				(1 << 0)
536 #define	R300_PM_FRONT_POINT			(0 << 0)
537 #define	R300_PM_BACK_POINT			 (0 << 0)
538 #define	R300_PM_FRONT_LINE			 (1 << 4)
539 #define	R300_PM_FRONT_FILL			 (1 << 5)
540 #define	R300_PM_BACK_LINE			  (1 << 7)
541 #define	R300_PM_BACK_FILL			  (1 << 8)
542 
543 /*
544  * Not sure why there are duplicate of factor and constant values.
545  * My best guess so far is that there are seperate zbiases for test
546  * and write.
547  * Ordering might be wrong.
548  * Some of the tests indicate that fgl has a fallback implementation
549  * of zbias via pixel shaders.
550  */
551 #define	R300_RE_ZBIAS_T_FACTOR				0x42A4
552 #define	R300_RE_ZBIAS_T_CONSTANT			  0x42A8
553 #define	R300_RE_ZBIAS_W_FACTOR				0x42AC
554 #define	R300_RE_ZBIAS_W_CONSTANT			  0x42B0
555 
556 /*
557  * This register needs to be set to (1<<1) for RV350 to correctly
558  * perform depth test (see --vb-triangles in r300_demo)
559  * Don't know about other chips. - Vladimir
560  * This is set to 3 when GL_POLYGON_OFFSET_FILL is on.
561  * My guess is that there are two bits for each zbias
562  * primitive (FILL, LINE, POINT).
563  * One to enable depth test and one for depth write.
564  * Yet this doesnt explain why depth writes work ...
565  */
566 #define	R300_RE_OCCLUSION_CNTL			0x42B4
567 #define	R300_OCCLUSION_ON		(1<<1)
568 
569 #define	R300_RE_CULL_CNTL				   0x42B8
570 #define	 R300_CULL_FRONT				   (1 << 0)
571 #define	 R300_CULL_BACK					(1 << 1)
572 #define	 R300_FRONT_FACE_CCW			   (0 << 2)
573 #define	 R300_FRONT_FACE_CW				(1 << 2)
574 
575 
576 // BEGIN: Rasterization / Interpolators - many guesses
577 // 0_UNKNOWN_18 has always been set except for clear operations.
578 // TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
579 // on the vertex program, *not* the fragment program) */
580 #define	R300_RS_CNTL_0					  0x4300
581 #define	 R300_RS_CNTL_TC_CNT_SHIFT		 2
582 #define	 R300_RS_CNTL_TC_CNT_MASK		  (7 << 2)
583 #define	R300_RS_CNTL_CI_CNT_SHIFT		 7
584 			/* number of color interpolators used */
585 #define	 R300_RS_CNTL_0_UNKNOWN_18		 (1 << 18)
586 /* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n register. */
587 #define	R300_RS_CNTL_1					  0x4304
588 
589 /* gap */
590 // Only used for texture coordinates.
591 // Use the source field to route texture coordinate input from the
592 // vertex program to the desired interpolator. Note that the source
593 // field is relative to the outputs the vertex program *actually*
594 // writes. If a vertex program only writes texcoord[1], this will
595 // be source index 0. Set INTERP_USED on all interpolators that
596 // produce data used by the fragment program. INTERP_USED looks
597 // like a swizzling mask, but I haven't seen it used that way.
598 //
599 // Note: The _UNKNOWN constants are always set in their respective register.
600 // I don't know if this is necessary. */
601 #define	R300_RS_INTERP_0					0x4310
602 #define	R300_RS_INTERP_1					0x4314
603 #define	 R300_RS_INTERP_1_UNKNOWN		  0x40
604 #define	R300_RS_INTERP_2					0x4318
605 #define	 R300_RS_INTERP_2_UNKNOWN		  0x80
606 #define	R300_RS_INTERP_3					0x431C
607 #define	 R300_RS_INTERP_3_UNKNOWN		  0xC0
608 #define	R300_RS_INTERP_4					0x4320
609 #define	R300_RS_INTERP_5					0x4324
610 #define	R300_RS_INTERP_6					0x4328
611 #define	R300_RS_INTERP_7					0x432C
612 #define	 R300_RS_INTERP_SRC_SHIFT		  2
613 #define	 R300_RS_INTERP_SRC_MASK		   (7 << 2)
614 #define	 R300_RS_INTERP_USED			   0x00D10000
615 
616 // These DWORDs control how vertex data is routed into fragment program
617 // registers, after interpolators. */
618 #define	R300_RS_ROUTE_0					 0x4330
619 #define	R300_RS_ROUTE_1					 0x4334
620 #define	R300_RS_ROUTE_2					 0x4338
621 #define	R300_RS_ROUTE_3					 0x433C /* GUESS */
622 #define	R300_RS_ROUTE_4					 0x4340 /* GUESS */
623 #define	R300_RS_ROUTE_5					 0x4344 /* GUESS */
624 #define	R300_RS_ROUTE_6					 0x4348 /* GUESS */
625 #define	R300_RS_ROUTE_7					 0x434C /* GUESS */
626 #define	 R300_RS_ROUTE_SOURCE_INTERP_0	 0
627 #define	 R300_RS_ROUTE_SOURCE_INTERP_1	 1
628 #define	 R300_RS_ROUTE_SOURCE_INTERP_2	 2
629 #define	 R300_RS_ROUTE_SOURCE_INTERP_3	 3
630 #define	 R300_RS_ROUTE_SOURCE_INTERP_4	 4
631 #define	 R300_RS_ROUTE_SOURCE_INTERP_5	 5 /* GUESS */
632 #define	 R300_RS_ROUTE_SOURCE_INTERP_6	 6 /* GUESS */
633 #define	 R300_RS_ROUTE_SOURCE_INTERP_7	 7 /* GUESS */
634 #define	 R300_RS_ROUTE_ENABLE			  (1 << 3) /* GUESS */
635 #define	 R300_RS_ROUTE_DEST_SHIFT		  6
636 #define	 R300_RS_ROUTE_DEST_MASK		   (31 << 6) /* GUESS */
637 
638 // Special handling for color: When the fragment program uses color,
639 // the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the
640 // color register index. */
641 #define	 R300_RS_ROUTE_0_COLOR			 (1 << 14)
642 #define	 R300_RS_ROUTE_0_COLOR_DEST_SHIFT  17
643 #define	 R300_RS_ROUTE_0_COLOR_DEST_MASK   (31 << 17) /* GUESS */
644 /* As above, but for secondary color */
645 #define	R300_RS_ROUTE_1_COLOR1			(1 << 14)
646 #define	R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17
647 #define	R300_RS_ROUTE_1_COLOR1_DEST_MASK  (31 << 17)
648 #define	R300_RS_ROUTE_1_UNKNOWN11		 (1 << 11)
649 /* END */
650 
651 // BEGIN: Scissors and cliprects
652 // There are four clipping rectangles. Their corner coordinates are inclusive.
653 // Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
654 // on whether the pixel is inside cliprects 0-3, respectively. For example,
655 // if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
656 // the number 3 (binary 0011).
657 // Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set,
658 // the pixel is rasterized.
659 //
660 // In addition to this, there is a scissors rectangle. Only pixels inside the
661 // scissors rectangle are drawn. (coordinates are inclusive)
662 //
663 // For some reason, the top-left corner of the framebuffer is at (1440, 1440)
664 // for the purpose of clipping and scissors. */
665 #define	R300_RE_CLIPRECT_TL_0			   0x43B0
666 #define	R300_RE_CLIPRECT_BR_0			   0x43B4
667 #define	R300_RE_CLIPRECT_TL_1			   0x43B8
668 #define	R300_RE_CLIPRECT_BR_1			   0x43BC
669 #define	R300_RE_CLIPRECT_TL_2			   0x43C0
670 #define	R300_RE_CLIPRECT_BR_2			   0x43C4
671 #define	R300_RE_CLIPRECT_TL_3			   0x43C8
672 #define	R300_RE_CLIPRECT_BR_3			   0x43CC
673 #define	 R300_CLIPRECT_OFFSET			  1440
674 #define	 R300_CLIPRECT_MASK				0x1FFF
675 #define	 R300_CLIPRECT_X_SHIFT			 0
676 #define	 R300_CLIPRECT_X_MASK			  (0x1FFF << 0)
677 #define	 R300_CLIPRECT_Y_SHIFT			 13
678 #define	 R300_CLIPRECT_Y_MASK			  (0x1FFF << 13)
679 #define	R300_RE_CLIPRECT_CNTL			   0x43D0
680 #define	 R300_CLIP_OUT					 (1 << 0)
681 #define	 R300_CLIP_0					   (1 << 1)
682 #define	 R300_CLIP_1					   (1 << 2)
683 #define	 R300_CLIP_10					  (1 << 3)
684 #define	 R300_CLIP_2					   (1 << 4)
685 #define	 R300_CLIP_20					  (1 << 5)
686 #define	 R300_CLIP_21					  (1 << 6)
687 #define	 R300_CLIP_210					 (1 << 7)
688 #define	 R300_CLIP_3					   (1 << 8)
689 #define	 R300_CLIP_30					  (1 << 9)
690 #define	 R300_CLIP_31					  (1 << 10)
691 #define	 R300_CLIP_310					 (1 << 11)
692 #define	 R300_CLIP_32					  (1 << 12)
693 #define	 R300_CLIP_320					 (1 << 13)
694 #define	 R300_CLIP_321					 (1 << 14)
695 #define	 R300_CLIP_3210					(1 << 15)
696 
697 /* gap */
698 #define	R300_RE_SCISSORS_TL				 0x43E0
699 #define	R300_RE_SCISSORS_BR				 0x43E4
700 #define	 R300_SCISSORS_OFFSET			  1440
701 #define	 R300_SCISSORS_X_SHIFT			 0
702 #define	 R300_SCISSORS_X_MASK			  (0x1FFF << 0)
703 #define	 R300_SCISSORS_Y_SHIFT			 13
704 #define	 R300_SCISSORS_Y_MASK			  (0x1FFF << 13)
705 /* END */
706 
707 // BEGIN: Texture specification
708 // The texture specification dwords are grouped by meaning and not
709 // by texture unit. This means that e.g. the offset for texture
710 // image unit N is found in register TX_OFFSET_0 + (4*N) */
711 #define	R300_TX_FILTER_0					0x4400
712 #define	 R300_TX_REPEAT					0
713 #define	 R300_TX_MIRRORED				  1
714 #define	 R300_TX_CLAMP					 4
715 #define	 R300_TX_CLAMP_TO_EDGE			 2
716 #define	 R300_TX_CLAMP_TO_BORDER		   6
717 #define	 R300_TX_WRAP_S_SHIFT			  0
718 #define	 R300_TX_WRAP_S_MASK			   (7 << 0)
719 #define	 R300_TX_WRAP_T_SHIFT			  3
720 #define	 R300_TX_WRAP_T_MASK			   (7 << 3)
721 #define	 R300_TX_WRAP_Q_SHIFT			  6
722 #define	 R300_TX_WRAP_Q_MASK			   (7 << 6)
723 #define	 R300_TX_MAG_FILTER_NEAREST		(1 << 9)
724 #define	 R300_TX_MAG_FILTER_LINEAR		 (2 << 9)
725 #define	 R300_TX_MAG_FILTER_MASK		   (3 << 9)
726 #define	 R300_TX_MIN_FILTER_NEAREST		(1 << 11)
727 #define	 R300_TX_MIN_FILTER_LINEAR		 (2 << 11)
728 #define	R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST	   (5  <<  11)
729 #define	R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR		(9  <<  11)
730 #define	R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST		(6  <<  11)
731 #define	R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR		 (10 <<  11)
732 
733 /*
734  * NOTE: NEAREST doesnt seem to exist
735  * Im not seting MAG_FILTER_MASK and (3 << 11) on for all
736  * anisotropy modes because that would void selected mag filter
737  */
738 #define	R300_TX_MIN_FILTER_ANISO_NEAREST			 ((0 << 13)
739 #define	R300_TX_MIN_FILTER_ANISO_LINEAR			  ((0 << 13)
740 #define	R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST ((1 << 13)
741 #define	R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR  ((2 << 13)
742 #define	 R300_TX_MIN_FILTER_MASK		   ((15 << 11) | (3 << 13))
743 #define	R300_TX_MAX_ANISO_1_TO_1  (0 << 21)
744 #define	R300_TX_MAX_ANISO_2_TO_1  (2 << 21)
745 #define	R300_TX_MAX_ANISO_4_TO_1  (4 << 21)
746 #define	R300_TX_MAX_ANISO_8_TO_1  (6 << 21)
747 #define	R300_TX_MAX_ANISO_16_TO_1 (8 << 21)
748 #define	R300_TX_MAX_ANISO_MASK	(14 << 21)
749 
750 #define	R300_TX_FILTER1_0					  0x4440
751 #define	R300_CHROMA_KEY_MODE_DISABLE	0
752 #define	R300_CHROMA_KEY_FORCE		   1
753 #define	R300_CHROMA_KEY_BLEND		   2
754 #define	R300_MC_ROUND_NORMAL			(0<<2)
755 #define	R300_MC_ROUND_MPEG4			 (1<<2)
756 #define	R300_LOD_BIAS_MASK		0x1fff
757 #define	R300_EDGE_ANISO_EDGE_DIAG	   (0<<13)
758 #define	R300_EDGE_ANISO_EDGE_ONLY	   (1<<13)
759 #define	R300_MC_COORD_TRUNCATE_DISABLE  (0<<14)
760 #define	R300_MC_COORD_TRUNCATE_MPEG	 (1<<14)
761 #define	R300_TX_TRI_PERF_0_8			(0<<15)
762 #define	R300_TX_TRI_PERF_1_8			(1<<15)
763 #define	R300_TX_TRI_PERF_1_4			(2<<15)
764 #define	R300_TX_TRI_PERF_3_8			(3<<15)
765 #define	R300_ANISO_THRESHOLD_MASK	   (7<<17)
766 
767 #define	R300_TX_SIZE_0					  0x4480
768 #define	 R300_TX_WIDTHMASK_SHIFT		   0
769 #define	 R300_TX_WIDTHMASK_MASK			(2047 << 0)
770 #define	 R300_TX_HEIGHTMASK_SHIFT		  11
771 #define	 R300_TX_HEIGHTMASK_MASK		   (2047 << 11)
772 #define	 R300_TX_UNK23					 (1 << 23)
773 #define	 R300_TX_SIZE_SHIFT		26 /* largest of width, height */
774 #define	 R300_TX_SIZE_MASK				 (15 << 26)
775 #define	 R300_TX_SIZE_PROJECTED					 (1<<30)
776 #define	 R300_TX_SIZE_TXPITCH_EN					 (1<<31)
777 #define	R300_TX_FORMAT_0					0x44C0
778 	/* The interpretation of the format word by Wladimir van der Laan */
779 	/*
780 	 * The X, Y, Z and W refer to the layout of the components.
781 	 * They are given meanings as R, G, B and Alpha by the swizzle
782 	 * specification
783 	 */
784 #define	R300_TX_FORMAT_X8			0x0
785 #define	R300_TX_FORMAT_X16			0x1
786 #define	R300_TX_FORMAT_Y4X4			0x2
787 #define	R300_TX_FORMAT_Y8X8			0x3
788 #define	R300_TX_FORMAT_Y16X16			0x4
789 #define	R300_TX_FORMAT_Z3Y3X2			0x5
790 #define	R300_TX_FORMAT_Z5Y6X5			0x6
791 #define	R300_TX_FORMAT_Z6Y5X5			0x7
792 #define	R300_TX_FORMAT_Z11Y11X10			0x8
793 #define	R300_TX_FORMAT_Z10Y11X11			0x9
794 #define	R300_TX_FORMAT_W4Z4Y4X4			0xA
795 #define	R300_TX_FORMAT_W1Z5Y5X5			0xB
796 #define	R300_TX_FORMAT_W8Z8Y8X8			0xC
797 #define	R300_TX_FORMAT_W2Z10Y10X10		0xD
798 #define	R300_TX_FORMAT_W16Z16Y16X16		0xE
799 #define	R300_TX_FORMAT_DXT1				0xF
800 #define	R300_TX_FORMAT_DXT3				0x10
801 #define	R300_TX_FORMAT_DXT5				0x11
802 #define	R300_TX_FORMAT_D3DMFT_CxV8U8	0x12	 /* no swizzle */
803 #define	R300_TX_FORMAT_A8R8G8B8			0x13	 /* no swizzle */
804 #define	R300_TX_FORMAT_B8G8_B8G8		0x14	 /* no swizzle */
805 #define	R300_TX_FORMAT_G8R8_G8B8		0x15	 /* no swizzle */
806 	/* 0x16 - some 16 bit green format.. ?? */
807 #define	R300_TX_FORMAT_UNK25		   (1 << 25) /* no swizzle */
808 #define	R300_TX_FORMAT_CUBIC_MAP		   (1 << 26)
809 
810 	/* gap */
811 	/* Floating point formats */
812 	/* Note - hardware supports both 16 and 32 bit floating point */
813 #define	R300_TX_FORMAT_FL_I16				0x18
814 #define	R300_TX_FORMAT_FL_I16A16				0x19
815 #define	R300_TX_FORMAT_FL_R16G16B16A16		0x1A
816 #define	R300_TX_FORMAT_FL_I32				0x1B
817 #define	R300_TX_FORMAT_FL_I32A32				0x1C
818 #define	R300_TX_FORMAT_FL_R32G32B32A32		0x1D
819 	/* alpha modes, convenience mostly */
820 	// if you have alpha, pick constant appropriate to the
821 	// number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc
822 #define	R300_TX_FORMAT_ALPHA_1CH			0x000
823 #define	R300_TX_FORMAT_ALPHA_2CH			0x200
824 #define	R300_TX_FORMAT_ALPHA_4CH			0x600
825 #define	R300_TX_FORMAT_ALPHA_NONE		0xA00
826 	/* Swizzling */
827 	/* constants */
828 #define	R300_TX_FORMAT_X		0
829 #define	R300_TX_FORMAT_Y		1
830 #define	R300_TX_FORMAT_Z		2
831 #define	R300_TX_FORMAT_W		3
832 #define	R300_TX_FORMAT_ZERO	4
833 #define	R300_TX_FORMAT_ONE	5
834 #define	R300_TX_FORMAT_CUT_Z	6
835 				/* 2.0*Z, everything above 1.0 is set to 0.0 */
836 #define	R300_TX_FORMAT_CUT_W	7
837 				/* 2.0*W, everything above 1.0 is set to 0.0 */
838 
839 #define	R300_TX_FORMAT_B_SHIFT	18
840 #define	R300_TX_FORMAT_G_SHIFT	15
841 #define	R300_TX_FORMAT_R_SHIFT	12
842 #define	R300_TX_FORMAT_A_SHIFT	9
843 	/* Convenience macro to take care of layout and swizzling */
844 #define	R300_EASY_TX_FORMAT(B, G, R, A, FMT)	(\
845 	((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \
846 	| ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \
847 	| ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \
848 	| ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \
849 	| (R300_TX_FORMAT_##FMT))
850 	/* These can be ORed with result of R300_EASY_TX_FORMAT() */
851 /* We don't really know what they do. Take values from a constant color ? */
852 #define	R300_TX_FORMAT_CONST_X		(1<<5)
853 #define	R300_TX_FORMAT_CONST_Y		(2<<5)
854 #define	R300_TX_FORMAT_CONST_Z		(4<<5)
855 #define	R300_TX_FORMAT_CONST_W		(8<<5)
856 
857 #define	R300_TX_FORMAT_YUV_MODE		0x00800000
858 
859 #define	R300_TX_PITCH_0				0x4500
860 					/* obvious missing in gap */
861 #define	R300_TX_OFFSET_0					0x4540
862 /* BEGIN: Guess from R200 */
863 #define	 R300_TXO_ENDIAN_NO_SWAP		   (0 << 0)
864 #define	 R300_TXO_ENDIAN_BYTE_SWAP		 (1 << 0)
865 #define	 R300_TXO_ENDIAN_WORD_SWAP		 (2 << 0)
866 #define	 R300_TXO_ENDIAN_HALFDW_SWAP	   (3 << 0)
867 #define	 R300_TXO_MACRO_TILE			   (1 << 2)
868 #define	 R300_TXO_MICRO_TILE			   (1 << 3)
869 #define	 R300_TXO_OFFSET_MASK			  0xffffffe0
870 #define	 R300_TXO_OFFSET_SHIFT			 5
871 /* END */
872 #define	R300_TX_CHROMA_KEY_0					  0x4580
873 				/* 32 bit chroma key */
874 #define	R300_TX_BORDER_COLOR_0			  0x45C0
875 				/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
876 
877 /* END */
878 
879 // BEGIN: Fragment program instruction set
880 // Fragment programs are written directly into register space.
881 // There are separate instruction streams for texture instructions and ALU
882 // instructions.
883 // In order to synchronize these streams, the program is divided into up
884 // to 4 nodes. Each node begins with a number of TEX operations, followed
885 // by a number of ALU operations.
886 // The first node can have zero TEX ops, all subsequent nodes must have at least
887 // one TEX ops.
888 // All nodes must have at least one ALU op.
889 //
890 // The index of the last node is stored in PFS_CNTL_0: A value of 0 means
891 // 1 node, a value of 3 means 4 nodes.
892 // The total amount of instructions is defined in PFS_CNTL_2. The offsets are
893 // offsets into the respective instruction streams, while *_END points to the
894 // last instruction relative to this offset.
895 #define	R300_PFS_CNTL_0					 0x4600
896 #define	 R300_PFS_CNTL_LAST_NODES_SHIFT	0
897 #define	 R300_PFS_CNTL_LAST_NODES_MASK	 (3 << 0)
898 #define	 R300_PFS_CNTL_FIRST_NODE_HAS_TEX  (1 << 3)
899 #define	R300_PFS_CNTL_1					 0x4604
900 // There is an unshifted value here which has so far always been equal to the
901 // index of the highest used temporary register.
902 #define	R300_PFS_CNTL_2					 0x4608
903 #define	 R300_PFS_CNTL_ALU_OFFSET_SHIFT	0
904 #define	 R300_PFS_CNTL_ALU_OFFSET_MASK	 (63 << 0)
905 #define	 R300_PFS_CNTL_ALU_END_SHIFT	   6
906 #define	 R300_PFS_CNTL_ALU_END_MASK		(63 << 6)
907 #define	 R300_PFS_CNTL_TEX_OFFSET_SHIFT	12
908 #define	 R300_PFS_CNTL_TEX_OFFSET_MASK	 (31 << 12) /* GUESS */
909 #define	 R300_PFS_CNTL_TEX_END_SHIFT	   18
910 #define	 R300_PFS_CNTL_TEX_END_MASK		(31 << 18) /* GUESS */
911 
912 /* gap */
913 // Nodes are stored backwards. The last active node is always stored in
914 // PFS_NODE_3.
915 // Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The
916 // first node is stored in NODE_2, the second node is stored in NODE_3.
917 //
918 // Offsets are relative to the master offset from PFS_CNTL_2.
919 // LAST_NODE is set for the last node, and only for the last node.
920 #define	R300_PFS_NODE_0					 0x4610
921 #define	R300_PFS_NODE_1					 0x4614
922 #define	R300_PFS_NODE_2					 0x4618
923 #define	R300_PFS_NODE_3					 0x461C
924 #define	R300_PFS_NODE_ALU_OFFSET_SHIFT	0
925 #define	R300_PFS_NODE_ALU_OFFSET_MASK	 (63 << 0)
926 #define	R300_PFS_NODE_ALU_END_SHIFT	   6
927 #define	R300_PFS_NODE_ALU_END_MASK		(63 << 6)
928 #define	R300_PFS_NODE_TEX_OFFSET_SHIFT	12
929 #define	R300_PFS_NODE_TEX_OFFSET_MASK	 (31 << 12)
930 #define	R300_PFS_NODE_TEX_END_SHIFT	   17
931 #define	R300_PFS_NODE_TEX_END_MASK		(31 << 17)
932 /* #define	R300_PFS_NODE_LAST_NODE		   (1 << 22) */
933 #define	R300_PFS_NODE_OUTPUT_COLOR		(1 << 22)
934 #define	R300_PFS_NODE_OUTPUT_DEPTH		(1 << 23)
935 
936 // TEX
937 // As far as I can tell, texture instructions cannot write into output
938 // registers directly. A subsequent ALU instruction is always necessary,
939 // even if it's just MAD o0, r0, 1, 0
940 #define	R300_PFS_TEXI_0					 0x4620
941 #define	R300_FPITX_SRC_SHIFT			  0
942 #define	R300_FPITX_SRC_MASK			   (31 << 0)
943 #define	R300_FPITX_SRC_CONST			  (1 << 5) /* GUESS */
944 #define	R300_FPITX_DST_SHIFT			  6
945 #define	R300_FPITX_DST_MASK			   (31 << 6)
946 #define	R300_FPITX_IMAGE_SHIFT			11
947 #define	R300_FPITX_IMAGE_MASK			 (15 << 11)
948 				/* GUESS based on layout and native limits */
949 /*
950  * Unsure if these are opcodes, or some kind of bitfield, but this is how
951  * they were set when I checked
952  */
953 #define	R300_FPITX_OPCODE_SHIFT			15
954 #define	R300_FPITX_OP_TEX			1
955 #define	R300_FPITX_OP_KIL			2
956 #define	R300_FPITX_OP_TXP			3
957 #define	R300_FPITX_OP_TXB			4
958 
959 // ALU
960 // The ALU instructions register blocks are enumerated according to the order
961 // in which fglrx. I assume there is space for 64 instructions, since
962 // each block has space for a maximum of 64 DWORDs, and this matches reported
963 // native limits.
964 //
965 // The basic functional block seems to be one MAD for each color and alpha,
966 // and an adder that adds all components after the MUL.
967 //  - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands
968 //  - DP4: Use OUTC_DP4, OUTA_DP4
969 //  - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands
970 //  - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands
971 //  - CMP: If ARG2 < 0, return ARG1, else return ARG0
972 //  - FLR: use FRC+MAD
973 //  - XPD: use MAD+MAD
974 //  - SGE, SLT: use MAD+CMP
975 //  - RSQ: use ABS modifier for argument
976 //  - Use OUTC_REPL_ALPHA to write results of an alpha-only operation (e.g. RCP)
977 //	into color register
978 //  - apparently, there's no quick DST operation
979 //  - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2"
980 //  - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0"
981 //  - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1"
982 //
983 // Operand selection
984 // First stage selects three sources from the available registers and
985 // constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha).
986 // fglrx sorts the three source fields: Registers before constants,
987 // lower indices before higher indices; I do not know whether this is necessary.
988 // fglrx fills unused sources with "read constant 0"
989 // According to specs, you cannot select more than two different constants.
990 //
991 // Second stage selects the operands from the sources. This is defined in
992 // INSTR0 (color) and INSTR2 (alpha). You can also select the special constants
993 // zero and one.
994 // Swizzling and negation happens in this stage, as well.
995 //
996 // Important: Color and alpha seem to be mostly separate, i.e. their sources
997 // selection appears to be fully independent (the register storage is probably
998 // physically split into a color and an alpha section).
999 // However (because of the apparent physical split), there is some interaction
1000 // WRT swizzling. If, for example, you want to load an R component into an
1001 // Alpha operand, this R component is taken from a *color* source, not from
1002 // an alpha source. The corresponding register doesn't even have to appear in
1003 // the alpha sources list. (I hope this alll makes sense to you)
1004 //
1005 // Destination selection
1006 // The destination register index is in FPI1 (color) and FPI3 (alpha) together
1007 // with enable bits.
1008 // There are separate enable bits for writing into temporary registers
1009 // (DSTC_REG_* /DSTA_REG) and and program output registers
1010 // (DSTC_OUTPUT_* /DSTA_OUTPUT).
1011 // You can write to both at once, or not write at all (the same index
1012 // must be used for both).
1013 //
1014 // Note: There is a special form for LRP
1015 //  - Argument order is the same as in ARB_fragment_program.
1016 //  - Operation is MAD
1017 //  - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP
1018 //  - Set FPI0/FPI2_SPECIAL_LRP
1019 // Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
1020 #define	R300_PFS_INSTR1_0				   0x46C0
1021 #define	R300_FPI1_SRC0C_SHIFT			 0
1022 #define	R300_FPI1_SRC0C_MASK			  (31 << 0)
1023 #define	R300_FPI1_SRC0C_CONST			 (1 << 5)
1024 #define	R300_FPI1_SRC1C_SHIFT			 6
1025 #define	R300_FPI1_SRC1C_MASK			  (31 << 6)
1026 #define	R300_FPI1_SRC1C_CONST			 (1 << 11)
1027 #define	R300_FPI1_SRC2C_SHIFT			 12
1028 #define	R300_FPI1_SRC2C_MASK			  (31 << 12)
1029 #define	R300_FPI1_SRC2C_CONST			 (1 << 17)
1030 #define	R300_FPI1_DSTC_SHIFT			  18
1031 #define	R300_FPI1_DSTC_MASK			   (31 << 18)
1032 #define	R300_FPI1_DSTC_REG_MASK_SHIFT	 23
1033 #define	R300_FPI1_DSTC_REG_X			  (1 << 23)
1034 #define	R300_FPI1_DSTC_REG_Y			  (1 << 24)
1035 #define	R300_FPI1_DSTC_REG_Z			  (1 << 25)
1036 #define	R300_FPI1_DSTC_OUTPUT_MASK_SHIFT  26
1037 #define	R300_FPI1_DSTC_OUTPUT_X		   (1 << 26)
1038 #define	R300_FPI1_DSTC_OUTPUT_Y		   (1 << 27)
1039 #define	R300_FPI1_DSTC_OUTPUT_Z		   (1 << 28)
1040 
1041 #define	R300_PFS_INSTR3_0				   0x47C0
1042 #define	R300_FPI3_SRC0A_SHIFT			 0
1043 #define	R300_FPI3_SRC0A_MASK			  (31 << 0)
1044 #define	R300_FPI3_SRC0A_CONST			 (1 << 5)
1045 #define	R300_FPI3_SRC1A_SHIFT			 6
1046 #define	R300_FPI3_SRC1A_MASK			  (31 << 6)
1047 #define	R300_FPI3_SRC1A_CONST			 (1 << 11)
1048 #define	R300_FPI3_SRC2A_SHIFT			 12
1049 #define	R300_FPI3_SRC2A_MASK			  (31 << 12)
1050 #define	R300_FPI3_SRC2A_CONST			 (1 << 17)
1051 #define	R300_FPI3_DSTA_SHIFT			  18
1052 #define	R300_FPI3_DSTA_MASK			   (31 << 18)
1053 #define	R300_FPI3_DSTA_REG				(1 << 23)
1054 #define	R300_FPI3_DSTA_OUTPUT			 (1 << 24)
1055 #define	R300_FPI3_DSTA_DEPTH			  (1 << 27)
1056 
1057 #define	R300_PFS_INSTR0_0				   0x48C0
1058 #define	R300_FPI0_ARGC_SRC0C_XYZ		  0
1059 #define	R300_FPI0_ARGC_SRC0C_XXX		  1
1060 #define	R300_FPI0_ARGC_SRC0C_YYY		  2
1061 #define	R300_FPI0_ARGC_SRC0C_ZZZ		  3
1062 #define	R300_FPI0_ARGC_SRC1C_XYZ		  4
1063 #define	R300_FPI0_ARGC_SRC1C_XXX		  5
1064 #define	R300_FPI0_ARGC_SRC1C_YYY		  6
1065 #define	R300_FPI0_ARGC_SRC1C_ZZZ		  7
1066 #define	R300_FPI0_ARGC_SRC2C_XYZ		  8
1067 #define	R300_FPI0_ARGC_SRC2C_XXX		  9
1068 #define	R300_FPI0_ARGC_SRC2C_YYY		  10
1069 #define	R300_FPI0_ARGC_SRC2C_ZZZ		  11
1070 #define	R300_FPI0_ARGC_SRC0A			  12
1071 #define	R300_FPI0_ARGC_SRC1A			  13
1072 #define	R300_FPI0_ARGC_SRC2A			  14
1073 #define	R300_FPI0_ARGC_SRC1C_LRP		  15
1074 #define	R300_FPI0_ARGC_ZERO			   20
1075 #define	R300_FPI0_ARGC_ONE				21
1076 #define	R300_FPI0_ARGC_HALF			   22 /* GUESS */
1077 #define	R300_FPI0_ARGC_SRC0C_YZX		  23
1078 #define	R300_FPI0_ARGC_SRC1C_YZX		  24
1079 #define	R300_FPI0_ARGC_SRC2C_YZX		  25
1080 #define	R300_FPI0_ARGC_SRC0C_ZXY		  26
1081 #define	R300_FPI0_ARGC_SRC1C_ZXY		  27
1082 #define	R300_FPI0_ARGC_SRC2C_ZXY		  28
1083 #define	R300_FPI0_ARGC_SRC0CA_WZY		 29
1084 #define	R300_FPI0_ARGC_SRC1CA_WZY		 30
1085 #define	R300_FPI0_ARGC_SRC2CA_WZY		 31
1086 
1087 #define	R300_FPI0_ARG0C_SHIFT			 0
1088 #define	R300_FPI0_ARG0C_MASK			  (31 << 0)
1089 #define	R300_FPI0_ARG0C_NEG			   (1 << 5)
1090 #define	R300_FPI0_ARG0C_ABS			   (1 << 6)
1091 #define	R300_FPI0_ARG1C_SHIFT			 7
1092 #define	R300_FPI0_ARG1C_MASK			  (31 << 7)
1093 #define	R300_FPI0_ARG1C_NEG			   (1 << 12)
1094 #define	R300_FPI0_ARG1C_ABS			   (1 << 13)
1095 #define	R300_FPI0_ARG2C_SHIFT			 14
1096 #define	R300_FPI0_ARG2C_MASK			  (31 << 14)
1097 #define	R300_FPI0_ARG2C_NEG			   (1 << 19)
1098 #define	R300_FPI0_ARG2C_ABS			   (1 << 20)
1099 #define	R300_FPI0_SPECIAL_LRP			 (1 << 21)
1100 #define	R300_FPI0_OUTC_MAD				(0 << 23)
1101 #define	R300_FPI0_OUTC_DP3				(1 << 23)
1102 #define	R300_FPI0_OUTC_DP4				(2 << 23)
1103 #define	R300_FPI0_OUTC_MIN				(4 << 23)
1104 #define	R300_FPI0_OUTC_MAX				(5 << 23)
1105 #define	R300_FPI0_OUTC_CMP				(8 << 23)
1106 #define	R300_FPI0_OUTC_FRC				(9 << 23)
1107 #define	R300_FPI0_OUTC_REPL_ALPHA		 (10 << 23)
1108 #define	R300_FPI0_OUTC_SAT				(1 << 30)
1109 #define	R300_FPI0_INSERT_NOP			  (1 << 31)
1110 
1111 #define	R300_PFS_INSTR2_0				   0x49C0
1112 #define	R300_FPI2_ARGA_SRC0C_X			0
1113 #define	R300_FPI2_ARGA_SRC0C_Y			1
1114 #define	R300_FPI2_ARGA_SRC0C_Z			2
1115 #define	R300_FPI2_ARGA_SRC1C_X			3
1116 #define	R300_FPI2_ARGA_SRC1C_Y			4
1117 #define	R300_FPI2_ARGA_SRC1C_Z			5
1118 #define	R300_FPI2_ARGA_SRC2C_X			6
1119 #define	R300_FPI2_ARGA_SRC2C_Y			7
1120 #define	R300_FPI2_ARGA_SRC2C_Z			8
1121 #define	R300_FPI2_ARGA_SRC0A			  9
1122 #define	R300_FPI2_ARGA_SRC1A			  10
1123 #define	R300_FPI2_ARGA_SRC2A			  11
1124 #define	R300_FPI2_ARGA_SRC1A_LRP		  15
1125 #define	R300_FPI2_ARGA_ZERO			   16
1126 #define	R300_FPI2_ARGA_ONE				17
1127 #define	R300_FPI2_ARGA_HALF			   18 /* GUESS */
1128 
1129 #define	R300_FPI2_ARG0A_SHIFT			 0
1130 #define	R300_FPI2_ARG0A_MASK			  (31 << 0)
1131 #define	R300_FPI2_ARG0A_NEG			   (1 << 5)
1132 #define	R300_FPI2_ARG0A_ABS				 (1 << 6) /* GUESS */
1133 #define	R300_FPI2_ARG1A_SHIFT			 7
1134 #define	R300_FPI2_ARG1A_MASK			  (31 << 7)
1135 #define	R300_FPI2_ARG1A_NEG			   (1 << 12)
1136 #define	R300_FPI2_ARG1A_ABS				 (1 << 13) /* GUESS */
1137 #define	R300_FPI2_ARG2A_SHIFT			 14
1138 #define	R300_FPI2_ARG2A_MASK			  (31 << 14)
1139 #define	R300_FPI2_ARG2A_NEG			   (1 << 19)
1140 #define	R300_FPI2_ARG2A_ABS				 (1 << 20) /* GUESS */
1141 #define	R300_FPI2_SPECIAL_LRP			 (1 << 21)
1142 #define	R300_FPI2_OUTA_MAD				(0 << 23)
1143 #define	R300_FPI2_OUTA_DP4				(1 << 23)
1144 #define	R300_FPI2_OUTA_MIN				(2 << 23)
1145 #define	R300_FPI2_OUTA_MAX				(3 << 23)
1146 #define	R300_FPI2_OUTA_CMP				(6 << 23)
1147 #define	R300_FPI2_OUTA_FRC				(7 << 23)
1148 #define	R300_FPI2_OUTA_EX2				(8 << 23)
1149 #define	R300_FPI2_OUTA_LG2				(9 << 23)
1150 #define	R300_FPI2_OUTA_RCP				(10 << 23)
1151 #define	R300_FPI2_OUTA_RSQ				(11 << 23)
1152 #define	R300_FPI2_OUTA_SAT				(1 << 30)
1153 #define	R300_FPI2_UNKNOWN_31			  (1 << 31)
1154 /* END */
1155 
1156 /* gap */
1157 #define	R300_PP_ALPHA_TEST				  0x4BD4
1158 #define	 R300_REF_ALPHA_MASK			   0x000000ff
1159 #define	 R300_ALPHA_TEST_FAIL			  (0 << 8)
1160 #define	 R300_ALPHA_TEST_LESS			  (1 << 8)
1161 #define	 R300_ALPHA_TEST_LEQUAL			(3 << 8)
1162 #define	 R300_ALPHA_TEST_EQUAL			 (2 << 8)
1163 #define	 R300_ALPHA_TEST_GEQUAL			(6 << 8)
1164 #define	 R300_ALPHA_TEST_GREATER		   (4 << 8)
1165 #define	 R300_ALPHA_TEST_NEQUAL			(5 << 8)
1166 #define	 R300_ALPHA_TEST_PASS			  (7 << 8)
1167 #define	 R300_ALPHA_TEST_OP_MASK		   (7 << 8)
1168 #define	 R300_ALPHA_TEST_ENABLE			(1 << 11)
1169 
1170 /* gap */
1171 /* Fragment program parameters in 7.16 floating point */
1172 #define	R300_PFS_PARAM_0_X				  0x4C00
1173 #define	R300_PFS_PARAM_0_Y				  0x4C04
1174 #define	R300_PFS_PARAM_0_Z				  0x4C08
1175 #define	R300_PFS_PARAM_0_W				  0x4C0C
1176 /* GUESS: PARAM_31 is last, based on native limits reported by fglrx */
1177 #define	R300_PFS_PARAM_31_X				 0x4DF0
1178 #define	R300_PFS_PARAM_31_Y				 0x4DF4
1179 #define	R300_PFS_PARAM_31_Z				 0x4DF8
1180 #define	R300_PFS_PARAM_31_W				 0x4DFC
1181 
1182 // Notes:
1183 // - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used
1184 //   in the application
1185 // - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and
1186 //   ABLEND are set to the same
1187 //   function (both registers are always set up completely in any case)
1188 // - Most blend flags are simply copied from R200 and not tested yet
1189 #define	R300_RB3D_CBLEND					0x4E04
1190 #define	R300_RB3D_ABLEND					0x4E08
1191 /* the following only appear in CBLEND */
1192 #define	 R300_BLEND_ENABLE					 (1 << 0)
1193 #define	 R300_BLEND_UNKNOWN					(3 << 1)
1194 #define	 R300_BLEND_NO_SEPARATE				(1 << 3)
1195 /* the following are shared between CBLEND and ABLEND */
1196 #define	 R300_FCN_MASK						 (3  << 12)
1197 #define	 R300_COMB_FCN_ADD_CLAMP			   (0  << 12)
1198 #define	 R300_COMB_FCN_ADD_NOCLAMP			 (1  << 12)
1199 #define	 R300_COMB_FCN_SUB_CLAMP			   (2  << 12)
1200 #define	 R300_COMB_FCN_SUB_NOCLAMP			 (3  << 12)
1201 #define	 R300_SRC_BLEND_GL_ZERO				(32 << 16)
1202 #define	 R300_SRC_BLEND_GL_ONE				 (33 << 16)
1203 #define	 R300_SRC_BLEND_GL_SRC_COLOR		   (34 << 16)
1204 #define	 R300_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 16)
1205 #define	 R300_SRC_BLEND_GL_DST_COLOR		   (36 << 16)
1206 #define	 R300_SRC_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 16)
1207 #define	 R300_SRC_BLEND_GL_SRC_ALPHA		   (38 << 16)
1208 #define	 R300_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 16)
1209 #define	 R300_SRC_BLEND_GL_DST_ALPHA		   (40 << 16)
1210 #define	 R300_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 16)
1211 #define	 R300_SRC_BLEND_GL_SRC_ALPHA_SATURATE  (42 << 16)
1212 #define	 R300_SRC_BLEND_MASK				   (63 << 16)
1213 #define	 R300_DST_BLEND_GL_ZERO				(32 << 24)
1214 #define	 R300_DST_BLEND_GL_ONE				 (33 << 24)
1215 #define	 R300_DST_BLEND_GL_SRC_COLOR		   (34 << 24)
1216 #define	 R300_DST_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 24)
1217 #define	 R300_DST_BLEND_GL_DST_COLOR		   (36 << 24)
1218 #define	 R300_DST_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 24)
1219 #define	 R300_DST_BLEND_GL_SRC_ALPHA		   (38 << 24)
1220 #define	 R300_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 24)
1221 #define	 R300_DST_BLEND_GL_DST_ALPHA		   (40 << 24)
1222 #define	 R300_DST_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 24)
1223 #define	 R300_DST_BLEND_MASK				   (63 << 24)
1224 #define	R300_RB3D_COLORMASK				 0x4E0C
1225 #define	 R300_COLORMASK0_B				 (1<<0)
1226 #define	 R300_COLORMASK0_G				 (1<<1)
1227 #define	 R300_COLORMASK0_R				 (1<<2)
1228 #define	 R300_COLORMASK0_A				 (1<<3)
1229 
1230 /* gap */
1231 #define	R300_RB3D_COLOROFFSET0			  0x4E28
1232 #define	 R300_COLOROFFSET_MASK			 0xFFFFFFF0 /* GUESS */
1233 #define	R300_RB3D_COLOROFFSET1			  0x4E2C /* GUESS */
1234 #define	R300_RB3D_COLOROFFSET2			  0x4E30 /* GUESS */
1235 #define	R300_RB3D_COLOROFFSET3			  0x4E34 /* GUESS */
1236 /* gap */
1237 // Bit 16: Larger tiles
1238 // Bit 17: 4x2 tiles
1239 // Bit 18: Extremely weird tile like, but some pixels duplicated?
1240 #define	R300_RB3D_COLORPITCH0			   0x4E38
1241 #define	 R300_COLORPITCH_MASK			  0x00001FF8 /* GUESS */
1242 #define	 R300_COLOR_TILE_ENABLE			(1 << 16) /* GUESS */
1243 #define	 R300_COLOR_MICROTILE_ENABLE	   (1 << 17) /* GUESS */
1244 #define	 R300_COLOR_ENDIAN_NO_SWAP		 (0 << 18) /* GUESS */
1245 #define	 R300_COLOR_ENDIAN_WORD_SWAP	   (1 << 18) /* GUESS */
1246 #define	 R300_COLOR_ENDIAN_DWORD_SWAP	  (2 << 18) /* GUESS */
1247 #define	 R300_COLOR_FORMAT_RGB565		  (2 << 22)
1248 #define	 R300_COLOR_FORMAT_ARGB8888		(3 << 22)
1249 #define	R300_RB3D_COLORPITCH1			   0x4E3C /* GUESS */
1250 #define	R300_RB3D_COLORPITCH2			   0x4E40 /* GUESS */
1251 #define	R300_RB3D_COLORPITCH3			   0x4E44 /* GUESS */
1252 
1253 /* gap */
1254 /*
1255  * Guess by Vladimir.
1256  * Set to 0A before 3D operations, set to 02 afterwards.
1257  */
1258 #define	R300_RB3D_DSTCACHE_CTLSTAT		  0x4E4C
1259 #define	 R300_RB3D_DSTCACHE_02			 0x00000002
1260 #define	 R300_RB3D_DSTCACHE_0A			 0x0000000A
1261 
1262 /* gap */
1263 /*
1264  * There seems to be no "write only" setting, so use
1265  * Z-test = ALWAYS for this. Bit (1<<8) is the "test"
1266  * bit. so plain write is 6  - vd
1267  */
1268 #define	R300_RB3D_ZSTENCIL_CNTL_0				   0x4F00
1269 #define	 R300_RB3D_Z_DISABLED_1			0x00000010 /* GUESS */
1270 #define	 R300_RB3D_Z_DISABLED_2			0x00000014 /* GUESS */
1271 #define	 R300_RB3D_Z_TEST				  0x00000012
1272 #define	 R300_RB3D_Z_TEST_AND_WRITE		0x00000016
1273 #define	 R300_RB3D_Z_WRITE_ONLY			 0x00000006
1274 
1275 #define	 R300_RB3D_Z_TEST				  0x00000012
1276 #define	 R300_RB3D_Z_TEST_AND_WRITE		0x00000016
1277 #define	 R300_RB3D_Z_WRITE_ONLY			 0x00000006
1278 #define	R300_RB3D_STENCIL_ENABLE		 0x00000001
1279 
1280 #define	R300_RB3D_ZSTENCIL_CNTL_1				   0x4F04
1281 		/* functions */
1282 #define	R300_ZS_NEVER			0
1283 #define	R300_ZS_LESS			1
1284 #define	R300_ZS_LEQUAL			2
1285 #define	R300_ZS_EQUAL			3
1286 #define	R300_ZS_GEQUAL			4
1287 #define	R300_ZS_GREATER			5
1288 #define	R300_ZS_NOTEQUAL			6
1289 #define	R300_ZS_ALWAYS			7
1290 #define	R300_ZS_MASK					 7
1291 		/* operations */
1292 #define	R300_ZS_KEEP			0
1293 #define	R300_ZS_ZERO			1
1294 #define	R300_ZS_REPLACE			2
1295 #define	R300_ZS_INCR			3
1296 #define	R300_ZS_DECR			4
1297 #define	R300_ZS_INVERT			5
1298 #define	R300_ZS_INCR_WRAP		6
1299 #define	R300_ZS_DECR_WRAP		7
1300 
1301 /*
1302  * front and back refer to operations done for front
1303  * and back faces, i.e. separate stencil function support
1304  */
1305 #define	R300_RB3D_ZS1_DEPTH_FUNC_SHIFT		0
1306 #define	R300_RB3D_ZS1_FRONT_FUNC_SHIFT		3
1307 #define	R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT	6
1308 #define	R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT	9
1309 #define	R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT	  12
1310 #define	R300_RB3D_ZS1_BACK_FUNC_SHIFT		   15
1311 #define	R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT		18
1312 #define	R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT	   21
1313 #define	R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT	   24
1314 
1315 
1316 
1317 #define	R300_RB3D_ZSTENCIL_CNTL_2				   0x4F08
1318 #define	R300_RB3D_ZS2_STENCIL_REF_SHIFT		0
1319 #define	R300_RB3D_ZS2_STENCIL_MASK		0xFF
1320 #define	R300_RB3D_ZS2_STENCIL_MASK_SHIFT			8
1321 #define	R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT	16
1322 
1323 /* gap */
1324 
1325 #define	R300_RB3D_ZSTENCIL_FORMAT				   0x4F10
1326 #define	R300_DEPTH_FORMAT_16BIT_INT_Z	 (0 << 0)
1327 #define	R300_DEPTH_FORMAT_24BIT_INT_Z	 (2 << 0)
1328 
1329 /* gap */
1330 #define	R300_RB3D_DEPTHOFFSET			   0x4F20
1331 #define	R300_RB3D_DEPTHPITCH				0x4F24
1332 #define	R300_DEPTHPITCH_MASK			  0x00001FF8 /* GUESS */
1333 #define	R300_DEPTH_TILE_ENABLE			(1 << 16) /* GUESS */
1334 #define	R300_DEPTH_MICROTILE_ENABLE	   (1 << 17) /* GUESS */
1335 #define	R300_DEPTH_ENDIAN_NO_SWAP		 (0 << 18) /* GUESS */
1336 #define	R300_DEPTH_ENDIAN_WORD_SWAP	   (1 << 18) /* GUESS */
1337 #define	R300_DEPTH_ENDIAN_DWORD_SWAP	  (2 << 18) /* GUESS */
1338 
1339 /*
1340  * BEGIN: Vertex program instruction set
1341  * Every instruction is four dwords long:
1342  * 		DWORD 0: output and opcode
1343  * 		DWORD 1: first argument
1344  * 		DWORD 2: second argument
1345  * 		DWORD 3: third argument
1346  *
1347  * Notes:
1348  * 	- ABS r, a is implemented as MAX r, a, -a
1349  * 	- MOV is implemented as ADD to zero
1350  * 	- XPD is implemented as MUL + MAD
1351  * 	- FLR is implemented as FRC + ADD
1352  * 	- apparently, fglrx tries to schedule instructions so that there
1353  * 		is at least one instruction between the write to a temporary
1354  * 		and the first read from said temporary; however, violations
1355  * 		of this scheduling are allowed
1356  * 	- register indices seem to be unrelated with OpenGL aliasing to
1357  * 		conventional state
1358  * 	- only one attribute and one parameter can be loaded at a time;
1359  * 		however, the same attribute/parameter can be used for more
1360  * 		than one argument
1361  * 	- the second software argument for POW is the third hardware
1362  * 		argument (no idea why)
1363  * 	- MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2
1364  *
1365  * There is some magic surrounding LIT:
1366  * The single argument is replicated across all three inputs, but swizzled:
1367  * 		First argument: xyzy
1368  * 		Second argument: xyzx
1369  * 		Third argument: xyzw
1370  * Whenever the result is used later in the fragment program, fglrx forces
1371  * x and w to be 1.0 in the input selection; I don't know whether this is
1372  * strictly necessary
1373  */
1374 #define	R300_VPI_OUT_OP_DOT					 (1 << 0)
1375 #define	R300_VPI_OUT_OP_MUL					 (2 << 0)
1376 #define	R300_VPI_OUT_OP_ADD					 (3 << 0)
1377 #define	R300_VPI_OUT_OP_MAD					 (4 << 0)
1378 #define	R300_VPI_OUT_OP_DST					 (5 << 0)
1379 #define	R300_VPI_OUT_OP_FRC					 (6 << 0)
1380 #define	R300_VPI_OUT_OP_MAX					 (7 << 0)
1381 #define	R300_VPI_OUT_OP_MIN					 (8 << 0)
1382 #define	R300_VPI_OUT_OP_SGE					 (9 << 0)
1383 #define	R300_VPI_OUT_OP_SLT					 (10 << 0)
1384 #define	R300_VPI_OUT_OP_UNK12				   (12 << 0)
1385 				/*
1386 				 * Used in GL_POINT_DISTANCE_ATTENUATION_ARB,
1387 				 * vector(scalar, vector)
1388 				 */
1389 #define	R300_VPI_OUT_OP_EXP					 (65 << 0)
1390 #define	R300_VPI_OUT_OP_LOG					 (66 << 0)
1391 #define	R300_VPI_OUT_OP_UNK67				   (67 << 0)
1392 				/* Used in fog computations, scalar(scalar) */
1393 #define	R300_VPI_OUT_OP_LIT					 (68 << 0)
1394 #define	R300_VPI_OUT_OP_POW					 (69 << 0)
1395 #define	R300_VPI_OUT_OP_RCP					 (70 << 0)
1396 #define	R300_VPI_OUT_OP_RSQ					 (72 << 0)
1397 #define	R300_VPI_OUT_OP_UNK73				   (73 << 0)
1398 				/*
1399 				 * Used in GL_POINT_DISTANCE_ATTENUATION_ARB,
1400 				 * scalar(scalar)
1401 				 */
1402 #define	R300_VPI_OUT_OP_EX2					 (75 << 0)
1403 #define	R300_VPI_OUT_OP_LG2					 (76 << 0)
1404 #define	R300_VPI_OUT_OP_MAD_2				   (128 << 0)
1405 #define	R300_VPI_OUT_OP_UNK129				  (129 << 0)
1406 				/* all temps, vector(scalar, vector, vector) */
1407 
1408 #define	R300_VPI_OUT_REG_CLASS_TEMPORARY		(0 << 8)
1409 #define	R300_VPI_OUT_REG_CLASS_RESULT		   (2 << 8)
1410 #define	R300_VPI_OUT_REG_CLASS_MASK			 (31 << 8)
1411 
1412 #define	R300_VPI_OUT_REG_INDEX_SHIFT			13
1413 #define	R300_VPI_OUT_REG_INDEX_MASK			 (31 << 13)
1414 				/* GUESS based on fglrx native limits */
1415 
1416 #define	R300_VPI_OUT_WRITE_X					(1 << 20)
1417 #define	R300_VPI_OUT_WRITE_Y					(1 << 21)
1418 #define	R300_VPI_OUT_WRITE_Z					(1 << 22)
1419 #define	R300_VPI_OUT_WRITE_W					(1 << 23)
1420 
1421 #define	R300_VPI_IN_REG_CLASS_TEMPORARY		 (0 << 0)
1422 #define	R300_VPI_IN_REG_CLASS_ATTRIBUTE		 (1 << 0)
1423 #define	R300_VPI_IN_REG_CLASS_PARAMETER		 (2 << 0)
1424 #define	R300_VPI_IN_REG_CLASS_NONE			  (9 << 0)
1425 #define	R300_VPI_IN_REG_CLASS_MASK			  (31 << 0) /* GUESS */
1426 
1427 #define	R300_VPI_IN_REG_INDEX_SHIFT			 5
1428 #define	R300_VPI_IN_REG_INDEX_MASK			  (255 << 5)
1429 				/* GUESS based on fglrx native limits */
1430 
1431 /*
1432  * The R300 can select components from the input register arbitrarily.
1433  * Use the following constants, shifted by the component shift you
1434  * want to select
1435  */
1436 #define	R300_VPI_IN_SELECT_X	0
1437 #define	R300_VPI_IN_SELECT_Y	1
1438 #define	R300_VPI_IN_SELECT_Z	2
1439 #define	R300_VPI_IN_SELECT_W	3
1440 #define	R300_VPI_IN_SELECT_ZERO 4
1441 #define	R300_VPI_IN_SELECT_ONE  5
1442 #define	R300_VPI_IN_SELECT_MASK 7
1443 
1444 #define	R300_VPI_IN_X_SHIFT					 13
1445 #define	R300_VPI_IN_Y_SHIFT					 16
1446 #define	R300_VPI_IN_Z_SHIFT					 19
1447 #define	R300_VPI_IN_W_SHIFT					 22
1448 
1449 #define	R300_VPI_IN_NEG_X					   (1 << 25)
1450 #define	R300_VPI_IN_NEG_Y					   (1 << 26)
1451 #define	R300_VPI_IN_NEG_Z					   (1 << 27)
1452 #define	R300_VPI_IN_NEG_W					   (1 << 28)
1453 /* END */
1454 
1455 /* BEGIN: Packet 3 commands */
1456 
1457 // A primitive emission dword.
1458 #define	R300_PRIM_TYPE_NONE					 (0 << 0)
1459 #define	R300_PRIM_TYPE_POINT					(1 << 0)
1460 #define	R300_PRIM_TYPE_LINE					 (2 << 0)
1461 #define	R300_PRIM_TYPE_LINE_STRIP			   (3 << 0)
1462 #define	R300_PRIM_TYPE_TRI_LIST				 (4 << 0)
1463 #define	R300_PRIM_TYPE_TRI_FAN				  (5 << 0)
1464 #define	R300_PRIM_TYPE_TRI_STRIP				(6 << 0)
1465 #define	R300_PRIM_TYPE_TRI_TYPE2				(7 << 0)
1466 #define	R300_PRIM_TYPE_RECT_LIST				(8 << 0)
1467 #define	R300_PRIM_TYPE_3VRT_POINT_LIST		  (9 << 0)
1468 #define	R300_PRIM_TYPE_3VRT_LINE_LIST		   (10 << 0)
1469 #define	R300_PRIM_TYPE_POINT_SPRITES			(11 << 0)
1470 				// GUESS (based on r200)
1471 #define	R300_PRIM_TYPE_LINE_LOOP				(12 << 0)
1472 #define	R300_PRIM_TYPE_QUADS					(13 << 0)
1473 #define	R300_PRIM_TYPE_QUAD_STRIP			   (14 << 0)
1474 #define	R300_PRIM_TYPE_POLYGON				  (15 << 0)
1475 #define	R300_PRIM_TYPE_MASK					 0xF
1476 #define	R300_PRIM_WALK_IND					  (1 << 4)
1477 #define	R300_PRIM_WALK_LIST					 (2 << 4)
1478 #define	R300_PRIM_WALK_RING					 (3 << 4)
1479 #define	R300_PRIM_WALK_MASK					 (3 << 4)
1480 #define	R300_PRIM_COLOR_ORDER_BGRA			  (0 << 6)
1481 				// GUESS (based on r200)
1482 #define	R300_PRIM_COLOR_ORDER_RGBA			  (1 << 6) // GUESS
1483 #define	R300_PRIM_NUM_VERTICES_SHIFT			16
1484 
1485 // Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
1486 // Two parameter dwords:
1487 // 0. The first parameter appears to be always 0
1488 // 1. The second parameter is a standard primitive emission dword.
1489 #define	R300_PACKET3_3D_DRAW_VBUF		   0x00002800
1490 
1491 // Specify the full set of vertex arrays as (address, stride).
1492 // The first parameter is the number of vertex arrays specified.
1493 // The rest of the command is a variable length list of blocks, where
1494 // each block is three dwords long and specifies two arrays.
1495 // The first dword of a block is split into two words, the lower significant
1496 // word refers to the first array, the more significant word to the second
1497 // array in the block.
1498 // The low byte of each word contains the size of an array entry in dwords,
1499 // the high byte contains the stride of the array.
1500 // The second dword of a block contains the pointer to the first array,
1501 // the third dword of a block contains the pointer to the second array.
1502 // Note that if the total number of arrays is odd, the third dword of
1503 // the last block is omitted.
1504 #define	R300_PACKET3_3D_LOAD_VBPNTR		 0x00002F00
1505 
1506 #define	R300_PACKET3_INDX_BUFFER			0x00003300
1507 #define	R300_EB_UNK1_SHIFT					  24
1508 #define	R300_EB_UNK1					(0x80<<24)
1509 #define	R300_EB_UNK2						0x0810
1510 #define	R300_PACKET3_3D_DRAW_INDX_2		 0x00003600
1511 
1512 #ifdef	__cplusplus
1513 }
1514 #endif
1515 
1516 #endif /* __R300_REG_H_ */
1517