xref: /linux/drivers/gpu/drm/radeon/r300.c (revision 4b2a108cd0d34880fe9d932258ca5b2ccebcd05e)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33 #include "radeon_drm.h"
34 #include "radeon_share.h"
35 
36 /* r300,r350,rv350,rv370,rv380 depends on : */
37 void r100_hdp_reset(struct radeon_device *rdev);
38 int r100_cp_reset(struct radeon_device *rdev);
39 int r100_rb2d_reset(struct radeon_device *rdev);
40 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
41 int r100_pci_gart_enable(struct radeon_device *rdev);
42 void r100_pci_gart_disable(struct radeon_device *rdev);
43 void r100_mc_setup(struct radeon_device *rdev);
44 void r100_mc_disable_clients(struct radeon_device *rdev);
45 int r100_gui_wait_for_idle(struct radeon_device *rdev);
46 int r100_cs_packet_parse(struct radeon_cs_parser *p,
47 			 struct radeon_cs_packet *pkt,
48 			 unsigned idx);
49 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
50 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
51 			      struct radeon_cs_reloc **cs_reloc);
52 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
53 			  struct radeon_cs_packet *pkt,
54 			  const unsigned *auth, unsigned n,
55 			  radeon_packet0_check_t check);
56 void r100_cs_dump_packet(struct radeon_cs_parser *p,
57 			 struct radeon_cs_packet *pkt);
58 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
59 					 struct radeon_cs_packet *pkt,
60 					 struct radeon_object *robj);
61 
62 /* This files gather functions specifics to:
63  * r300,r350,rv350,rv370,rv380
64  *
65  * Some of these functions might be used by newer ASICs.
66  */
67 void r300_gpu_init(struct radeon_device *rdev);
68 int r300_mc_wait_for_idle(struct radeon_device *rdev);
69 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
70 
71 
72 /*
73  * rv370,rv380 PCIE GART
74  */
75 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
76 {
77 	uint32_t tmp;
78 	int i;
79 
80 	/* Workaround HW bug do flush 2 times */
81 	for (i = 0; i < 2; i++) {
82 		tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
83 		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
84 		(void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
85 		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
86 		mb();
87 	}
88 }
89 
90 int rv370_pcie_gart_enable(struct radeon_device *rdev)
91 {
92 	uint32_t table_addr;
93 	uint32_t tmp;
94 	int r;
95 
96 	/* Initialize common gart structure */
97 	r = radeon_gart_init(rdev);
98 	if (r) {
99 		return r;
100 	}
101 	r = rv370_debugfs_pcie_gart_info_init(rdev);
102 	if (r) {
103 		DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
104 	}
105 	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
106 	r = radeon_gart_table_vram_alloc(rdev);
107 	if (r) {
108 		return r;
109 	}
110 	/* discard memory request outside of configured range */
111 	tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
112 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
113 	WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
114 	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
115 	WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
116 	WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
117 	WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
118 	table_addr = rdev->gart.table_addr;
119 	WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
120 	/* FIXME: setup default page */
121 	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
122 	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
123 	/* Clear error */
124 	WREG32_PCIE(0x18, 0);
125 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
126 	tmp |= RADEON_PCIE_TX_GART_EN;
127 	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
128 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
129 	rv370_pcie_gart_tlb_flush(rdev);
130 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
131 		 rdev->mc.gtt_size >> 20, table_addr);
132 	rdev->gart.ready = true;
133 	return 0;
134 }
135 
136 void rv370_pcie_gart_disable(struct radeon_device *rdev)
137 {
138 	uint32_t tmp;
139 
140 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
141 	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
142 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
143 	if (rdev->gart.table.vram.robj) {
144 		radeon_object_kunmap(rdev->gart.table.vram.robj);
145 		radeon_object_unpin(rdev->gart.table.vram.robj);
146 	}
147 }
148 
149 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
150 {
151 	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
152 
153 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
154 		return -EINVAL;
155 	}
156 	addr = (lower_32_bits(addr) >> 8) |
157 	       ((upper_32_bits(addr) & 0xff) << 24) |
158 	       0xc;
159 	/* on x86 we want this to be CPU endian, on powerpc
160 	 * on powerpc without HW swappers, it'll get swapped on way
161 	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
162 	writel(addr, ((void __iomem *)ptr) + (i * 4));
163 	return 0;
164 }
165 
166 int r300_gart_enable(struct radeon_device *rdev)
167 {
168 #if __OS_HAS_AGP
169 	if (rdev->flags & RADEON_IS_AGP) {
170 		if (rdev->family > CHIP_RV350) {
171 			rv370_pcie_gart_disable(rdev);
172 		} else {
173 			r100_pci_gart_disable(rdev);
174 		}
175 		return 0;
176 	}
177 #endif
178 	if (rdev->flags & RADEON_IS_PCIE) {
179 		rdev->asic->gart_disable = &rv370_pcie_gart_disable;
180 		rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
181 		rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
182 		return rv370_pcie_gart_enable(rdev);
183 	}
184 	return r100_pci_gart_enable(rdev);
185 }
186 
187 
188 /*
189  * MC
190  */
191 int r300_mc_init(struct radeon_device *rdev)
192 {
193 	int r;
194 
195 	if (r100_debugfs_rbbm_init(rdev)) {
196 		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
197 	}
198 
199 	r300_gpu_init(rdev);
200 	r100_pci_gart_disable(rdev);
201 	if (rdev->flags & RADEON_IS_PCIE) {
202 		rv370_pcie_gart_disable(rdev);
203 	}
204 
205 	/* Setup GPU memory space */
206 	rdev->mc.vram_location = 0xFFFFFFFFUL;
207 	rdev->mc.gtt_location = 0xFFFFFFFFUL;
208 	if (rdev->flags & RADEON_IS_AGP) {
209 		r = radeon_agp_init(rdev);
210 		if (r) {
211 			printk(KERN_WARNING "[drm] Disabling AGP\n");
212 			rdev->flags &= ~RADEON_IS_AGP;
213 			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
214 		} else {
215 			rdev->mc.gtt_location = rdev->mc.agp_base;
216 		}
217 	}
218 	r = radeon_mc_setup(rdev);
219 	if (r) {
220 		return r;
221 	}
222 
223 	/* Program GPU memory space */
224 	r100_mc_disable_clients(rdev);
225 	if (r300_mc_wait_for_idle(rdev)) {
226 		printk(KERN_WARNING "Failed to wait MC idle while "
227 		       "programming pipes. Bad things might happen.\n");
228 	}
229 	r100_mc_setup(rdev);
230 	return 0;
231 }
232 
233 void r300_mc_fini(struct radeon_device *rdev)
234 {
235 	if (rdev->flags & RADEON_IS_PCIE) {
236 		rv370_pcie_gart_disable(rdev);
237 		radeon_gart_table_vram_free(rdev);
238 	} else {
239 		r100_pci_gart_disable(rdev);
240 		radeon_gart_table_ram_free(rdev);
241 	}
242 	radeon_gart_fini(rdev);
243 }
244 
245 
246 /*
247  * Fence emission
248  */
249 void r300_fence_ring_emit(struct radeon_device *rdev,
250 			  struct radeon_fence *fence)
251 {
252 	/* Who ever call radeon_fence_emit should call ring_lock and ask
253 	 * for enough space (today caller are ib schedule and buffer move) */
254 	/* Write SC register so SC & US assert idle */
255 	radeon_ring_write(rdev, PACKET0(0x43E0, 0));
256 	radeon_ring_write(rdev, 0);
257 	radeon_ring_write(rdev, PACKET0(0x43E4, 0));
258 	radeon_ring_write(rdev, 0);
259 	/* Flush 3D cache */
260 	radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
261 	radeon_ring_write(rdev, (2 << 0));
262 	radeon_ring_write(rdev, PACKET0(0x4F18, 0));
263 	radeon_ring_write(rdev, (1 << 0));
264 	/* Wait until IDLE & CLEAN */
265 	radeon_ring_write(rdev, PACKET0(0x1720, 0));
266 	radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
267 	/* Emit fence sequence & fire IRQ */
268 	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
269 	radeon_ring_write(rdev, fence->seq);
270 	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
271 	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
272 }
273 
274 
275 /*
276  * Global GPU functions
277  */
278 int r300_copy_dma(struct radeon_device *rdev,
279 		  uint64_t src_offset,
280 		  uint64_t dst_offset,
281 		  unsigned num_pages,
282 		  struct radeon_fence *fence)
283 {
284 	uint32_t size;
285 	uint32_t cur_size;
286 	int i, num_loops;
287 	int r = 0;
288 
289 	/* radeon pitch is /64 */
290 	size = num_pages << PAGE_SHIFT;
291 	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
292 	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
293 	if (r) {
294 		DRM_ERROR("radeon: moving bo (%d).\n", r);
295 		return r;
296 	}
297 	/* Must wait for 2D idle & clean before DMA or hangs might happen */
298 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
299 	radeon_ring_write(rdev, (1 << 16));
300 	for (i = 0; i < num_loops; i++) {
301 		cur_size = size;
302 		if (cur_size > 0x1FFFFF) {
303 			cur_size = 0x1FFFFF;
304 		}
305 		size -= cur_size;
306 		radeon_ring_write(rdev, PACKET0(0x720, 2));
307 		radeon_ring_write(rdev, src_offset);
308 		radeon_ring_write(rdev, dst_offset);
309 		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
310 		src_offset += cur_size;
311 		dst_offset += cur_size;
312 	}
313 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
314 	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
315 	if (fence) {
316 		r = radeon_fence_emit(rdev, fence);
317 	}
318 	radeon_ring_unlock_commit(rdev);
319 	return r;
320 }
321 
322 void r300_ring_start(struct radeon_device *rdev)
323 {
324 	unsigned gb_tile_config;
325 	int r;
326 
327 	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
328 	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
329 	switch(rdev->num_gb_pipes) {
330 	case 2:
331 		gb_tile_config |= R300_PIPE_COUNT_R300;
332 		break;
333 	case 3:
334 		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
335 		break;
336 	case 4:
337 		gb_tile_config |= R300_PIPE_COUNT_R420;
338 		break;
339 	case 1:
340 	default:
341 		gb_tile_config |= R300_PIPE_COUNT_RV350;
342 		break;
343 	}
344 
345 	r = radeon_ring_lock(rdev, 64);
346 	if (r) {
347 		return;
348 	}
349 	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
350 	radeon_ring_write(rdev,
351 			  RADEON_ISYNC_ANY2D_IDLE3D |
352 			  RADEON_ISYNC_ANY3D_IDLE2D |
353 			  RADEON_ISYNC_WAIT_IDLEGUI |
354 			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
355 	radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
356 	radeon_ring_write(rdev, gb_tile_config);
357 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
358 	radeon_ring_write(rdev,
359 			  RADEON_WAIT_2D_IDLECLEAN |
360 			  RADEON_WAIT_3D_IDLECLEAN);
361 	radeon_ring_write(rdev, PACKET0(0x170C, 0));
362 	radeon_ring_write(rdev, 1 << 31);
363 	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
364 	radeon_ring_write(rdev, 0);
365 	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
366 	radeon_ring_write(rdev, 0);
367 	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
368 	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
369 	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
370 	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
371 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
372 	radeon_ring_write(rdev,
373 			  RADEON_WAIT_2D_IDLECLEAN |
374 			  RADEON_WAIT_3D_IDLECLEAN);
375 	radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
376 	radeon_ring_write(rdev, 0);
377 	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
378 	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
379 	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
380 	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
381 	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
382 	radeon_ring_write(rdev,
383 			  ((6 << R300_MS_X0_SHIFT) |
384 			   (6 << R300_MS_Y0_SHIFT) |
385 			   (6 << R300_MS_X1_SHIFT) |
386 			   (6 << R300_MS_Y1_SHIFT) |
387 			   (6 << R300_MS_X2_SHIFT) |
388 			   (6 << R300_MS_Y2_SHIFT) |
389 			   (6 << R300_MSBD0_Y_SHIFT) |
390 			   (6 << R300_MSBD0_X_SHIFT)));
391 	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
392 	radeon_ring_write(rdev,
393 			  ((6 << R300_MS_X3_SHIFT) |
394 			   (6 << R300_MS_Y3_SHIFT) |
395 			   (6 << R300_MS_X4_SHIFT) |
396 			   (6 << R300_MS_Y4_SHIFT) |
397 			   (6 << R300_MS_X5_SHIFT) |
398 			   (6 << R300_MS_Y5_SHIFT) |
399 			   (6 << R300_MSBD1_SHIFT)));
400 	radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
401 	radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
402 	radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
403 	radeon_ring_write(rdev,
404 			  R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
405 	radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
406 	radeon_ring_write(rdev,
407 			  R300_GEOMETRY_ROUND_NEAREST |
408 			  R300_COLOR_ROUND_NEAREST);
409 	radeon_ring_unlock_commit(rdev);
410 }
411 
412 void r300_errata(struct radeon_device *rdev)
413 {
414 	rdev->pll_errata = 0;
415 
416 	if (rdev->family == CHIP_R300 &&
417 	    (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
418 		rdev->pll_errata |= CHIP_ERRATA_R300_CG;
419 	}
420 }
421 
422 int r300_mc_wait_for_idle(struct radeon_device *rdev)
423 {
424 	unsigned i;
425 	uint32_t tmp;
426 
427 	for (i = 0; i < rdev->usec_timeout; i++) {
428 		/* read MC_STATUS */
429 		tmp = RREG32(0x0150);
430 		if (tmp & (1 << 4)) {
431 			return 0;
432 		}
433 		DRM_UDELAY(1);
434 	}
435 	return -1;
436 }
437 
438 void r300_gpu_init(struct radeon_device *rdev)
439 {
440 	uint32_t gb_tile_config, tmp;
441 
442 	r100_hdp_reset(rdev);
443 	/* FIXME: rv380 one pipes ? */
444 	if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
445 		/* r300,r350 */
446 		rdev->num_gb_pipes = 2;
447 	} else {
448 		/* rv350,rv370,rv380 */
449 		rdev->num_gb_pipes = 1;
450 	}
451 	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
452 	switch (rdev->num_gb_pipes) {
453 	case 2:
454 		gb_tile_config |= R300_PIPE_COUNT_R300;
455 		break;
456 	case 3:
457 		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
458 		break;
459 	case 4:
460 		gb_tile_config |= R300_PIPE_COUNT_R420;
461 		break;
462 	default:
463 	case 1:
464 		gb_tile_config |= R300_PIPE_COUNT_RV350;
465 		break;
466 	}
467 	WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
468 
469 	if (r100_gui_wait_for_idle(rdev)) {
470 		printk(KERN_WARNING "Failed to wait GUI idle while "
471 		       "programming pipes. Bad things might happen.\n");
472 	}
473 
474 	tmp = RREG32(0x170C);
475 	WREG32(0x170C, tmp | (1 << 31));
476 
477 	WREG32(R300_RB2D_DSTCACHE_MODE,
478 	       R300_DC_AUTOFLUSH_ENABLE |
479 	       R300_DC_DC_DISABLE_IGNORE_PE);
480 
481 	if (r100_gui_wait_for_idle(rdev)) {
482 		printk(KERN_WARNING "Failed to wait GUI idle while "
483 		       "programming pipes. Bad things might happen.\n");
484 	}
485 	if (r300_mc_wait_for_idle(rdev)) {
486 		printk(KERN_WARNING "Failed to wait MC idle while "
487 		       "programming pipes. Bad things might happen.\n");
488 	}
489 	DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
490 }
491 
492 int r300_ga_reset(struct radeon_device *rdev)
493 {
494 	uint32_t tmp;
495 	bool reinit_cp;
496 	int i;
497 
498 	reinit_cp = rdev->cp.ready;
499 	rdev->cp.ready = false;
500 	for (i = 0; i < rdev->usec_timeout; i++) {
501 		WREG32(RADEON_CP_CSQ_MODE, 0);
502 		WREG32(RADEON_CP_CSQ_CNTL, 0);
503 		WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
504 		(void)RREG32(RADEON_RBBM_SOFT_RESET);
505 		udelay(200);
506 		WREG32(RADEON_RBBM_SOFT_RESET, 0);
507 		/* Wait to prevent race in RBBM_STATUS */
508 		mdelay(1);
509 		tmp = RREG32(RADEON_RBBM_STATUS);
510 		if (tmp & ((1 << 20) | (1 << 26))) {
511 			DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
512 			/* GA still busy soft reset it */
513 			WREG32(0x429C, 0x200);
514 			WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
515 			WREG32(0x43E0, 0);
516 			WREG32(0x43E4, 0);
517 			WREG32(0x24AC, 0);
518 		}
519 		/* Wait to prevent race in RBBM_STATUS */
520 		mdelay(1);
521 		tmp = RREG32(RADEON_RBBM_STATUS);
522 		if (!(tmp & ((1 << 20) | (1 << 26)))) {
523 			break;
524 		}
525 	}
526 	for (i = 0; i < rdev->usec_timeout; i++) {
527 		tmp = RREG32(RADEON_RBBM_STATUS);
528 		if (!(tmp & ((1 << 20) | (1 << 26)))) {
529 			DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
530 				 tmp);
531 			if (reinit_cp) {
532 				return r100_cp_init(rdev, rdev->cp.ring_size);
533 			}
534 			return 0;
535 		}
536 		DRM_UDELAY(1);
537 	}
538 	tmp = RREG32(RADEON_RBBM_STATUS);
539 	DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
540 	return -1;
541 }
542 
543 int r300_gpu_reset(struct radeon_device *rdev)
544 {
545 	uint32_t status;
546 
547 	/* reset order likely matter */
548 	status = RREG32(RADEON_RBBM_STATUS);
549 	/* reset HDP */
550 	r100_hdp_reset(rdev);
551 	/* reset rb2d */
552 	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
553 		r100_rb2d_reset(rdev);
554 	}
555 	/* reset GA */
556 	if (status & ((1 << 20) | (1 << 26))) {
557 		r300_ga_reset(rdev);
558 	}
559 	/* reset CP */
560 	status = RREG32(RADEON_RBBM_STATUS);
561 	if (status & (1 << 16)) {
562 		r100_cp_reset(rdev);
563 	}
564 	/* Check if GPU is idle */
565 	status = RREG32(RADEON_RBBM_STATUS);
566 	if (status & (1 << 31)) {
567 		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
568 		return -1;
569 	}
570 	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
571 	return 0;
572 }
573 
574 
575 /*
576  * r300,r350,rv350,rv380 VRAM info
577  */
578 void r300_vram_info(struct radeon_device *rdev)
579 {
580 	uint32_t tmp;
581 
582 	/* DDR for all card after R300 & IGP */
583 	rdev->mc.vram_is_ddr = true;
584 	tmp = RREG32(RADEON_MEM_CNTL);
585 	if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
586 		rdev->mc.vram_width = 128;
587 	} else {
588 		rdev->mc.vram_width = 64;
589 	}
590 
591 	r100_vram_init_sizes(rdev);
592 }
593 
594 
595 /*
596  * Indirect registers accessor
597  */
598 uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
599 {
600 	uint32_t r;
601 
602 	WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
603 	(void)RREG32(RADEON_PCIE_INDEX);
604 	r = RREG32(RADEON_PCIE_DATA);
605 	return r;
606 }
607 
608 void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
609 {
610 	WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
611 	(void)RREG32(RADEON_PCIE_INDEX);
612 	WREG32(RADEON_PCIE_DATA, (v));
613 	(void)RREG32(RADEON_PCIE_DATA);
614 }
615 
616 /*
617  * PCIE Lanes
618  */
619 
620 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
621 {
622 	uint32_t link_width_cntl, mask;
623 
624 	if (rdev->flags & RADEON_IS_IGP)
625 		return;
626 
627 	if (!(rdev->flags & RADEON_IS_PCIE))
628 		return;
629 
630 	/* FIXME wait for idle */
631 
632 	switch (lanes) {
633 	case 0:
634 		mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
635 		break;
636 	case 1:
637 		mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
638 		break;
639 	case 2:
640 		mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
641 		break;
642 	case 4:
643 		mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
644 		break;
645 	case 8:
646 		mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
647 		break;
648 	case 12:
649 		mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
650 		break;
651 	case 16:
652 	default:
653 		mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
654 		break;
655 	}
656 
657 	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
658 
659 	if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
660 	    (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
661 		return;
662 
663 	link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
664 			     RADEON_PCIE_LC_RECONFIG_NOW |
665 			     RADEON_PCIE_LC_RECONFIG_LATER |
666 			     RADEON_PCIE_LC_SHORT_RECONFIG_EN);
667 	link_width_cntl |= mask;
668 	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
669 	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
670 						     RADEON_PCIE_LC_RECONFIG_NOW));
671 
672 	/* wait for lane set to complete */
673 	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
674 	while (link_width_cntl == 0xffffffff)
675 		link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
676 
677 }
678 
679 
680 /*
681  * Debugfs info
682  */
683 #if defined(CONFIG_DEBUG_FS)
684 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
685 {
686 	struct drm_info_node *node = (struct drm_info_node *) m->private;
687 	struct drm_device *dev = node->minor->dev;
688 	struct radeon_device *rdev = dev->dev_private;
689 	uint32_t tmp;
690 
691 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
692 	seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
693 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
694 	seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
695 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
696 	seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
697 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
698 	seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
699 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
700 	seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
701 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
702 	seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
703 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
704 	seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
705 	return 0;
706 }
707 
708 static struct drm_info_list rv370_pcie_gart_info_list[] = {
709 	{"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
710 };
711 #endif
712 
713 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
714 {
715 #if defined(CONFIG_DEBUG_FS)
716 	return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
717 #else
718 	return 0;
719 #endif
720 }
721 
722 
723 /*
724  * CS functions
725  */
726 struct r300_cs_track_cb {
727 	struct radeon_object	*robj;
728 	unsigned		pitch;
729 	unsigned		cpp;
730 	unsigned		offset;
731 };
732 
733 struct r300_cs_track_array {
734 	struct radeon_object	*robj;
735 	unsigned		esize;
736 };
737 
738 struct r300_cs_track_texture {
739 	struct radeon_object	*robj;
740 	unsigned		pitch;
741 	unsigned		width;
742 	unsigned		height;
743 	unsigned		num_levels;
744 	unsigned		cpp;
745 	unsigned		tex_coord_type;
746 	unsigned		txdepth;
747 	unsigned		width_11;
748 	unsigned		height_11;
749 	bool			use_pitch;
750 	bool			enabled;
751 	bool			roundup_w;
752 	bool			roundup_h;
753 };
754 
755 struct r300_cs_track {
756 	unsigned			num_cb;
757 	unsigned			maxy;
758 	unsigned			vtx_size;
759 	unsigned			vap_vf_cntl;
760 	unsigned			immd_dwords;
761 	unsigned			num_arrays;
762 	unsigned			max_indx;
763 	struct r300_cs_track_array	arrays[11];
764 	struct r300_cs_track_cb 	cb[4];
765 	struct r300_cs_track_cb 	zb;
766 	struct r300_cs_track_texture	textures[16];
767 	bool				z_enabled;
768 };
769 
770 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
771 {
772 	DRM_ERROR("pitch                      %d\n", t->pitch);
773 	DRM_ERROR("width                      %d\n", t->width);
774 	DRM_ERROR("height                     %d\n", t->height);
775 	DRM_ERROR("num levels                 %d\n", t->num_levels);
776 	DRM_ERROR("depth                      %d\n", t->txdepth);
777 	DRM_ERROR("bpp                        %d\n", t->cpp);
778 	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
779 	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
780 	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
781 }
782 
783 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
784 					      struct r300_cs_track *track)
785 {
786 	struct radeon_object *robj;
787 	unsigned long size;
788 	unsigned u, i, w, h;
789 
790 	for (u = 0; u < 16; u++) {
791 		if (!track->textures[u].enabled)
792 			continue;
793 		robj = track->textures[u].robj;
794 		if (robj == NULL) {
795 			DRM_ERROR("No texture bound to unit %u\n", u);
796 			return -EINVAL;
797 		}
798 		size = 0;
799 		for (i = 0; i <= track->textures[u].num_levels; i++) {
800 			if (track->textures[u].use_pitch) {
801 				w = track->textures[u].pitch / (1 << i);
802 			} else {
803 				w = track->textures[u].width / (1 << i);
804 				if (rdev->family >= CHIP_RV515)
805 					w |= track->textures[u].width_11;
806 				if (track->textures[u].roundup_w)
807 					w = roundup_pow_of_two(w);
808 			}
809 			h = track->textures[u].height / (1 << i);
810 			if (rdev->family >= CHIP_RV515)
811 				h |= track->textures[u].height_11;
812 			if (track->textures[u].roundup_h)
813 				h = roundup_pow_of_two(h);
814 			size += w * h;
815 		}
816 		size *= track->textures[u].cpp;
817 		switch (track->textures[u].tex_coord_type) {
818 		case 0:
819 			break;
820 		case 1:
821 			size *= (1 << track->textures[u].txdepth);
822 			break;
823 		case 2:
824 			size *= 6;
825 			break;
826 		default:
827 			DRM_ERROR("Invalid texture coordinate type %u for unit "
828 				  "%u\n", track->textures[u].tex_coord_type, u);
829 			return -EINVAL;
830 		}
831 		if (size > radeon_object_size(robj)) {
832 			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
833 				  "%lu\n", u, size, radeon_object_size(robj));
834 			r300_cs_track_texture_print(&track->textures[u]);
835 			return -EINVAL;
836 		}
837 	}
838 	return 0;
839 }
840 
841 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
842 {
843 	unsigned i;
844 	unsigned long size;
845 	unsigned prim_walk;
846 	unsigned nverts;
847 
848 	for (i = 0; i < track->num_cb; i++) {
849 		if (track->cb[i].robj == NULL) {
850 			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
851 			return -EINVAL;
852 		}
853 		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
854 		size += track->cb[i].offset;
855 		if (size > radeon_object_size(track->cb[i].robj)) {
856 			DRM_ERROR("[drm] Buffer too small for color buffer %d "
857 				  "(need %lu have %lu) !\n", i, size,
858 				  radeon_object_size(track->cb[i].robj));
859 			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
860 				  i, track->cb[i].pitch, track->cb[i].cpp,
861 				  track->cb[i].offset, track->maxy);
862 			return -EINVAL;
863 		}
864 	}
865 	if (track->z_enabled) {
866 		if (track->zb.robj == NULL) {
867 			DRM_ERROR("[drm] No buffer for z buffer !\n");
868 			return -EINVAL;
869 		}
870 		size = track->zb.pitch * track->zb.cpp * track->maxy;
871 		size += track->zb.offset;
872 		if (size > radeon_object_size(track->zb.robj)) {
873 			DRM_ERROR("[drm] Buffer too small for z buffer "
874 				  "(need %lu have %lu) !\n", size,
875 				  radeon_object_size(track->zb.robj));
876 			return -EINVAL;
877 		}
878 	}
879 	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
880 	nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
881 	switch (prim_walk) {
882 	case 1:
883 		for (i = 0; i < track->num_arrays; i++) {
884 			size = track->arrays[i].esize * track->max_indx * 4;
885 			if (track->arrays[i].robj == NULL) {
886 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
887 					  "bound\n", prim_walk, i);
888 				return -EINVAL;
889 			}
890 			if (size > radeon_object_size(track->arrays[i].robj)) {
891 				DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
892 					   "have %lu dwords\n", prim_walk, i,
893 					   size >> 2,
894 					   radeon_object_size(track->arrays[i].robj) >> 2);
895 				DRM_ERROR("Max indices %u\n", track->max_indx);
896 				return -EINVAL;
897 			}
898 		}
899 		break;
900 	case 2:
901 		for (i = 0; i < track->num_arrays; i++) {
902 			size = track->arrays[i].esize * (nverts - 1) * 4;
903 			if (track->arrays[i].robj == NULL) {
904 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
905 					  "bound\n", prim_walk, i);
906 				return -EINVAL;
907 			}
908 			if (size > radeon_object_size(track->arrays[i].robj)) {
909 				DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
910 					   "have %lu dwords\n", prim_walk, i, size >> 2,
911 					   radeon_object_size(track->arrays[i].robj) >> 2);
912 				return -EINVAL;
913 			}
914 		}
915 		break;
916 	case 3:
917 		size = track->vtx_size * nverts;
918 		if (size != track->immd_dwords) {
919 			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
920 				  track->immd_dwords, size);
921 			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
922 				  nverts, track->vtx_size);
923 			return -EINVAL;
924 		}
925 		break;
926 	default:
927 		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
928 			  prim_walk);
929 		return -EINVAL;
930 	}
931 	return r300_cs_track_texture_check(rdev, track);
932 }
933 
934 static inline void r300_cs_track_clear(struct r300_cs_track *track)
935 {
936 	unsigned i;
937 
938 	track->num_cb = 4;
939 	track->maxy = 4096;
940 	for (i = 0; i < track->num_cb; i++) {
941 		track->cb[i].robj = NULL;
942 		track->cb[i].pitch = 8192;
943 		track->cb[i].cpp = 16;
944 		track->cb[i].offset = 0;
945 	}
946 	track->z_enabled = true;
947 	track->zb.robj = NULL;
948 	track->zb.pitch = 8192;
949 	track->zb.cpp = 4;
950 	track->zb.offset = 0;
951 	track->vtx_size = 0x7F;
952 	track->immd_dwords = 0xFFFFFFFFUL;
953 	track->num_arrays = 11;
954 	track->max_indx = 0x00FFFFFFUL;
955 	for (i = 0; i < track->num_arrays; i++) {
956 		track->arrays[i].robj = NULL;
957 		track->arrays[i].esize = 0x7F;
958 	}
959 	for (i = 0; i < 16; i++) {
960 		track->textures[i].pitch = 16536;
961 		track->textures[i].width = 16536;
962 		track->textures[i].height = 16536;
963 		track->textures[i].width_11 = 1 << 11;
964 		track->textures[i].height_11 = 1 << 11;
965 		track->textures[i].num_levels = 12;
966 		track->textures[i].txdepth = 16;
967 		track->textures[i].cpp = 64;
968 		track->textures[i].tex_coord_type = 1;
969 		track->textures[i].robj = NULL;
970 		/* CS IB emission code makes sure texture unit are disabled */
971 		track->textures[i].enabled = false;
972 		track->textures[i].roundup_w = true;
973 		track->textures[i].roundup_h = true;
974 	}
975 }
976 
977 static const unsigned r300_reg_safe_bm[159] = {
978 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
987 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988 	0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
989 	0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
990 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
992 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
993 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
994 	0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
995 	0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
996 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
997 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
998 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
999 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1000 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1001 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1002 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1003 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1004 	0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1005 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1006 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1007 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1008 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1009 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1010 	0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
1011 	0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
1012 	0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
1013 	0x00000000, 0x0000C100, 0x00000000, 0x00000000,
1014 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
1015 	0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
1016 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
1017 	0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
1018 };
1019 
1020 static int r300_packet0_check(struct radeon_cs_parser *p,
1021 		struct radeon_cs_packet *pkt,
1022 		unsigned idx, unsigned reg)
1023 {
1024 	struct radeon_cs_chunk *ib_chunk;
1025 	struct radeon_cs_reloc *reloc;
1026 	struct r300_cs_track *track;
1027 	volatile uint32_t *ib;
1028 	uint32_t tmp, tile_flags = 0;
1029 	unsigned i;
1030 	int r;
1031 
1032 	ib = p->ib->ptr;
1033 	ib_chunk = &p->chunks[p->chunk_ib_idx];
1034 	track = (struct r300_cs_track*)p->track;
1035 	switch(reg) {
1036 	case AVIVO_D1MODE_VLINE_START_END:
1037 	case RADEON_CRTC_GUI_TRIG_VLINE:
1038 		r = r100_cs_packet_parse_vline(p);
1039 		if (r) {
1040 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1041 					idx, reg);
1042 			r100_cs_dump_packet(p, pkt);
1043 			return r;
1044 		}
1045 		break;
1046 	case RADEON_DST_PITCH_OFFSET:
1047 	case RADEON_SRC_PITCH_OFFSET:
1048 		r = r100_cs_packet_next_reloc(p, &reloc);
1049 		if (r) {
1050 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1051 					idx, reg);
1052 			r100_cs_dump_packet(p, pkt);
1053 			return r;
1054 		}
1055 		tmp = ib_chunk->kdata[idx] & 0x003fffff;
1056 		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1057 
1058 		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1059 			tile_flags |= RADEON_DST_TILE_MACRO;
1060 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1061 			if (reg == RADEON_SRC_PITCH_OFFSET) {
1062 				DRM_ERROR("Cannot src blit from microtiled surface\n");
1063 				r100_cs_dump_packet(p, pkt);
1064 				return -EINVAL;
1065 			}
1066 			tile_flags |= RADEON_DST_TILE_MICRO;
1067 		}
1068 		tmp |= tile_flags;
1069 		ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
1070 		break;
1071 	case R300_RB3D_COLOROFFSET0:
1072 	case R300_RB3D_COLOROFFSET1:
1073 	case R300_RB3D_COLOROFFSET2:
1074 	case R300_RB3D_COLOROFFSET3:
1075 		i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1076 		r = r100_cs_packet_next_reloc(p, &reloc);
1077 		if (r) {
1078 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1079 					idx, reg);
1080 			r100_cs_dump_packet(p, pkt);
1081 			return r;
1082 		}
1083 		track->cb[i].robj = reloc->robj;
1084 		track->cb[i].offset = ib_chunk->kdata[idx];
1085 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1086 		break;
1087 	case R300_ZB_DEPTHOFFSET:
1088 		r = r100_cs_packet_next_reloc(p, &reloc);
1089 		if (r) {
1090 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1091 					idx, reg);
1092 			r100_cs_dump_packet(p, pkt);
1093 			return r;
1094 		}
1095 		track->zb.robj = reloc->robj;
1096 		track->zb.offset = ib_chunk->kdata[idx];
1097 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1098 		break;
1099 	case R300_TX_OFFSET_0:
1100 	case R300_TX_OFFSET_0+4:
1101 	case R300_TX_OFFSET_0+8:
1102 	case R300_TX_OFFSET_0+12:
1103 	case R300_TX_OFFSET_0+16:
1104 	case R300_TX_OFFSET_0+20:
1105 	case R300_TX_OFFSET_0+24:
1106 	case R300_TX_OFFSET_0+28:
1107 	case R300_TX_OFFSET_0+32:
1108 	case R300_TX_OFFSET_0+36:
1109 	case R300_TX_OFFSET_0+40:
1110 	case R300_TX_OFFSET_0+44:
1111 	case R300_TX_OFFSET_0+48:
1112 	case R300_TX_OFFSET_0+52:
1113 	case R300_TX_OFFSET_0+56:
1114 	case R300_TX_OFFSET_0+60:
1115 		i = (reg - R300_TX_OFFSET_0) >> 2;
1116 		r = r100_cs_packet_next_reloc(p, &reloc);
1117 		if (r) {
1118 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1119 					idx, reg);
1120 			r100_cs_dump_packet(p, pkt);
1121 			return r;
1122 		}
1123 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1124 		track->textures[i].robj = reloc->robj;
1125 		break;
1126 	/* Tracked registers */
1127 	case 0x2084:
1128 		/* VAP_VF_CNTL */
1129 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1130 		break;
1131 	case 0x20B4:
1132 		/* VAP_VTX_SIZE */
1133 		track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1134 		break;
1135 	case 0x2134:
1136 		/* VAP_VF_MAX_VTX_INDX */
1137 		track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1138 		break;
1139 	case 0x43E4:
1140 		/* SC_SCISSOR1 */
1141 		track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1142 		if (p->rdev->family < CHIP_RV515) {
1143 			track->maxy -= 1440;
1144 		}
1145 		break;
1146 	case 0x4E00:
1147 		/* RB3D_CCTL */
1148 		track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1149 		break;
1150 	case 0x4E38:
1151 	case 0x4E3C:
1152 	case 0x4E40:
1153 	case 0x4E44:
1154 		/* RB3D_COLORPITCH0 */
1155 		/* RB3D_COLORPITCH1 */
1156 		/* RB3D_COLORPITCH2 */
1157 		/* RB3D_COLORPITCH3 */
1158 		r = r100_cs_packet_next_reloc(p, &reloc);
1159 		if (r) {
1160 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1161 				  idx, reg);
1162 			r100_cs_dump_packet(p, pkt);
1163 			return r;
1164 		}
1165 
1166 		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1167 			tile_flags |= R300_COLOR_TILE_ENABLE;
1168 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1169 			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
1170 
1171 		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1172 		tmp |= tile_flags;
1173 		ib[idx] = tmp;
1174 
1175 		i = (reg - 0x4E38) >> 2;
1176 		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1177 		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1178 		case 9:
1179 		case 11:
1180 		case 12:
1181 			track->cb[i].cpp = 1;
1182 			break;
1183 		case 3:
1184 		case 4:
1185 		case 13:
1186 		case 15:
1187 			track->cb[i].cpp = 2;
1188 			break;
1189 		case 6:
1190 			track->cb[i].cpp = 4;
1191 			break;
1192 		case 10:
1193 			track->cb[i].cpp = 8;
1194 			break;
1195 		case 7:
1196 			track->cb[i].cpp = 16;
1197 			break;
1198 		default:
1199 			DRM_ERROR("Invalid color buffer format (%d) !\n",
1200 				  ((ib_chunk->kdata[idx] >> 21) & 0xF));
1201 			return -EINVAL;
1202 		}
1203 		break;
1204 	case 0x4F00:
1205 		/* ZB_CNTL */
1206 		if (ib_chunk->kdata[idx] & 2) {
1207 			track->z_enabled = true;
1208 		} else {
1209 			track->z_enabled = false;
1210 		}
1211 		break;
1212 	case 0x4F10:
1213 		/* ZB_FORMAT */
1214 		switch ((ib_chunk->kdata[idx] & 0xF)) {
1215 		case 0:
1216 		case 1:
1217 			track->zb.cpp = 2;
1218 			break;
1219 		case 2:
1220 			track->zb.cpp = 4;
1221 			break;
1222 		default:
1223 			DRM_ERROR("Invalid z buffer format (%d) !\n",
1224 				  (ib_chunk->kdata[idx] & 0xF));
1225 			return -EINVAL;
1226 		}
1227 		break;
1228 	case 0x4F24:
1229 		/* ZB_DEPTHPITCH */
1230 		r = r100_cs_packet_next_reloc(p, &reloc);
1231 		if (r) {
1232 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1233 				  idx, reg);
1234 			r100_cs_dump_packet(p, pkt);
1235 			return r;
1236 		}
1237 
1238 		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1239 			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
1240 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1241 			tile_flags |= R300_DEPTHMICROTILE_TILED;;
1242 
1243 		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1244 		tmp |= tile_flags;
1245 		ib[idx] = tmp;
1246 
1247 		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1248 		break;
1249 	case 0x4104:
1250 		for (i = 0; i < 16; i++) {
1251 			bool enabled;
1252 
1253 			enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1254 			track->textures[i].enabled = enabled;
1255 		}
1256 		break;
1257 	case 0x44C0:
1258 	case 0x44C4:
1259 	case 0x44C8:
1260 	case 0x44CC:
1261 	case 0x44D0:
1262 	case 0x44D4:
1263 	case 0x44D8:
1264 	case 0x44DC:
1265 	case 0x44E0:
1266 	case 0x44E4:
1267 	case 0x44E8:
1268 	case 0x44EC:
1269 	case 0x44F0:
1270 	case 0x44F4:
1271 	case 0x44F8:
1272 	case 0x44FC:
1273 		/* TX_FORMAT1_[0-15] */
1274 		i = (reg - 0x44C0) >> 2;
1275 		tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1276 		track->textures[i].tex_coord_type = tmp;
1277 		switch ((ib_chunk->kdata[idx] & 0x1F)) {
1278 		case 0:
1279 		case 2:
1280 		case 5:
1281 		case 18:
1282 		case 20:
1283 		case 21:
1284 			track->textures[i].cpp = 1;
1285 			break;
1286 		case 1:
1287 		case 3:
1288 		case 6:
1289 		case 7:
1290 		case 10:
1291 		case 11:
1292 		case 19:
1293 		case 22:
1294 		case 24:
1295 			track->textures[i].cpp = 2;
1296 			break;
1297 		case 4:
1298 		case 8:
1299 		case 9:
1300 		case 12:
1301 		case 13:
1302 		case 23:
1303 		case 25:
1304 		case 27:
1305 		case 30:
1306 			track->textures[i].cpp = 4;
1307 			break;
1308 		case 14:
1309 		case 26:
1310 		case 28:
1311 			track->textures[i].cpp = 8;
1312 			break;
1313 		case 29:
1314 			track->textures[i].cpp = 16;
1315 			break;
1316 		default:
1317 			DRM_ERROR("Invalid texture format %u\n",
1318 				  (ib_chunk->kdata[idx] & 0x1F));
1319 			return -EINVAL;
1320 			break;
1321 		}
1322 		break;
1323 	case 0x4400:
1324 	case 0x4404:
1325 	case 0x4408:
1326 	case 0x440C:
1327 	case 0x4410:
1328 	case 0x4414:
1329 	case 0x4418:
1330 	case 0x441C:
1331 	case 0x4420:
1332 	case 0x4424:
1333 	case 0x4428:
1334 	case 0x442C:
1335 	case 0x4430:
1336 	case 0x4434:
1337 	case 0x4438:
1338 	case 0x443C:
1339 		/* TX_FILTER0_[0-15] */
1340 		i = (reg - 0x4400) >> 2;
1341 		tmp = ib_chunk->kdata[idx] & 0x7;;
1342 		if (tmp == 2 || tmp == 4 || tmp == 6) {
1343 			track->textures[i].roundup_w = false;
1344 		}
1345 		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1346 		if (tmp == 2 || tmp == 4 || tmp == 6) {
1347 			track->textures[i].roundup_h = false;
1348 		}
1349 		break;
1350 	case 0x4500:
1351 	case 0x4504:
1352 	case 0x4508:
1353 	case 0x450C:
1354 	case 0x4510:
1355 	case 0x4514:
1356 	case 0x4518:
1357 	case 0x451C:
1358 	case 0x4520:
1359 	case 0x4524:
1360 	case 0x4528:
1361 	case 0x452C:
1362 	case 0x4530:
1363 	case 0x4534:
1364 	case 0x4538:
1365 	case 0x453C:
1366 		/* TX_FORMAT2_[0-15] */
1367 		i = (reg - 0x4500) >> 2;
1368 		tmp = ib_chunk->kdata[idx] & 0x3FFF;
1369 		track->textures[i].pitch = tmp + 1;
1370 		if (p->rdev->family >= CHIP_RV515) {
1371 			tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1372 			track->textures[i].width_11 = tmp;
1373 			tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1374 			track->textures[i].height_11 = tmp;
1375 		}
1376 		break;
1377 	case 0x4480:
1378 	case 0x4484:
1379 	case 0x4488:
1380 	case 0x448C:
1381 	case 0x4490:
1382 	case 0x4494:
1383 	case 0x4498:
1384 	case 0x449C:
1385 	case 0x44A0:
1386 	case 0x44A4:
1387 	case 0x44A8:
1388 	case 0x44AC:
1389 	case 0x44B0:
1390 	case 0x44B4:
1391 	case 0x44B8:
1392 	case 0x44BC:
1393 		/* TX_FORMAT0_[0-15] */
1394 		i = (reg - 0x4480) >> 2;
1395 		tmp = ib_chunk->kdata[idx] & 0x7FF;
1396 		track->textures[i].width = tmp + 1;
1397 		tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1398 		track->textures[i].height = tmp + 1;
1399 		tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1400 		track->textures[i].num_levels = tmp;
1401 		tmp = ib_chunk->kdata[idx] & (1 << 31);
1402 		track->textures[i].use_pitch = !!tmp;
1403 		tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1404 		track->textures[i].txdepth = tmp;
1405 		break;
1406 	default:
1407 		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1408 		       reg, idx);
1409 		return -EINVAL;
1410 	}
1411 	return 0;
1412 }
1413 
1414 static int r300_packet3_check(struct radeon_cs_parser *p,
1415 			      struct radeon_cs_packet *pkt)
1416 {
1417 	struct radeon_cs_chunk *ib_chunk;
1418 	struct radeon_cs_reloc *reloc;
1419 	struct r300_cs_track *track;
1420 	volatile uint32_t *ib;
1421 	unsigned idx;
1422 	unsigned i, c;
1423 	int r;
1424 
1425 	ib = p->ib->ptr;
1426 	ib_chunk = &p->chunks[p->chunk_ib_idx];
1427 	idx = pkt->idx + 1;
1428 	track = (struct r300_cs_track*)p->track;
1429 	switch(pkt->opcode) {
1430 	case PACKET3_3D_LOAD_VBPNTR:
1431 		c = ib_chunk->kdata[idx++] & 0x1F;
1432 		track->num_arrays = c;
1433 		for (i = 0; i < (c - 1); i+=2, idx+=3) {
1434 			r = r100_cs_packet_next_reloc(p, &reloc);
1435 			if (r) {
1436 				DRM_ERROR("No reloc for packet3 %d\n",
1437 					  pkt->opcode);
1438 				r100_cs_dump_packet(p, pkt);
1439 				return r;
1440 			}
1441 			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1442 			track->arrays[i + 0].robj = reloc->robj;
1443 			track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1444 			track->arrays[i + 0].esize &= 0x7F;
1445 			r = r100_cs_packet_next_reloc(p, &reloc);
1446 			if (r) {
1447 				DRM_ERROR("No reloc for packet3 %d\n",
1448 					  pkt->opcode);
1449 				r100_cs_dump_packet(p, pkt);
1450 				return r;
1451 			}
1452 			ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1453 			track->arrays[i + 1].robj = reloc->robj;
1454 			track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1455 			track->arrays[i + 1].esize &= 0x7F;
1456 		}
1457 		if (c & 1) {
1458 			r = r100_cs_packet_next_reloc(p, &reloc);
1459 			if (r) {
1460 				DRM_ERROR("No reloc for packet3 %d\n",
1461 					  pkt->opcode);
1462 				r100_cs_dump_packet(p, pkt);
1463 				return r;
1464 			}
1465 			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1466 			track->arrays[i + 0].robj = reloc->robj;
1467 			track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1468 			track->arrays[i + 0].esize &= 0x7F;
1469 		}
1470 		break;
1471 	case PACKET3_INDX_BUFFER:
1472 		r = r100_cs_packet_next_reloc(p, &reloc);
1473 		if (r) {
1474 			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1475 			r100_cs_dump_packet(p, pkt);
1476 			return r;
1477 		}
1478 		ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1479 		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1480 		if (r) {
1481 			return r;
1482 		}
1483 		break;
1484 	/* Draw packet */
1485 	case PACKET3_3D_DRAW_IMMD:
1486 		/* Number of dwords is vtx_size * (num_vertices - 1)
1487 		 * PRIM_WALK must be equal to 3 vertex data in embedded
1488 		 * in cmd stream */
1489 		if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1490 			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1491 			return -EINVAL;
1492 		}
1493 		track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1494 		track->immd_dwords = pkt->count - 1;
1495 		r = r300_cs_track_check(p->rdev, track);
1496 		if (r) {
1497 			return r;
1498 		}
1499 		break;
1500 	case PACKET3_3D_DRAW_IMMD_2:
1501 		/* Number of dwords is vtx_size * (num_vertices - 1)
1502 		 * PRIM_WALK must be equal to 3 vertex data in embedded
1503 		 * in cmd stream */
1504 		if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1505 			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1506 			return -EINVAL;
1507 		}
1508 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1509 		track->immd_dwords = pkt->count;
1510 		r = r300_cs_track_check(p->rdev, track);
1511 		if (r) {
1512 			return r;
1513 		}
1514 		break;
1515 	case PACKET3_3D_DRAW_VBUF:
1516 		track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1517 		r = r300_cs_track_check(p->rdev, track);
1518 		if (r) {
1519 			return r;
1520 		}
1521 		break;
1522 	case PACKET3_3D_DRAW_VBUF_2:
1523 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1524 		r = r300_cs_track_check(p->rdev, track);
1525 		if (r) {
1526 			return r;
1527 		}
1528 		break;
1529 	case PACKET3_3D_DRAW_INDX:
1530 		track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1531 		r = r300_cs_track_check(p->rdev, track);
1532 		if (r) {
1533 			return r;
1534 		}
1535 		break;
1536 	case PACKET3_3D_DRAW_INDX_2:
1537 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1538 		r = r300_cs_track_check(p->rdev, track);
1539 		if (r) {
1540 			return r;
1541 		}
1542 		break;
1543 	case PACKET3_NOP:
1544 		break;
1545 	default:
1546 		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1547 		return -EINVAL;
1548 	}
1549 	return 0;
1550 }
1551 
1552 int r300_cs_parse(struct radeon_cs_parser *p)
1553 {
1554 	struct radeon_cs_packet pkt;
1555 	struct r300_cs_track track;
1556 	int r;
1557 
1558 	r300_cs_track_clear(&track);
1559 	p->track = &track;
1560 	do {
1561 		r = r100_cs_packet_parse(p, &pkt, p->idx);
1562 		if (r) {
1563 			return r;
1564 		}
1565 		p->idx += pkt.count + 2;
1566 		switch (pkt.type) {
1567 		case PACKET_TYPE0:
1568 			r = r100_cs_parse_packet0(p, &pkt,
1569 						  p->rdev->config.r300.reg_safe_bm,
1570 						  p->rdev->config.r300.reg_safe_bm_size,
1571 						  &r300_packet0_check);
1572 			break;
1573 		case PACKET_TYPE2:
1574 			break;
1575 		case PACKET_TYPE3:
1576 			r = r300_packet3_check(p, &pkt);
1577 			break;
1578 		default:
1579 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1580 			return -EINVAL;
1581 		}
1582 		if (r) {
1583 			return r;
1584 		}
1585 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1586 	return 0;
1587 }
1588 
1589 int r300_init(struct radeon_device *rdev)
1590 {
1591 	rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1592 	rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1593 	return 0;
1594 }
1595