xref: /linux/drivers/gpu/drm/radeon/r300.c (revision 2277ab4a1df50e05bc732fe9488d4e902bb8399a)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33 
34 /* r300,r350,rv350,rv370,rv380 depends on : */
35 void r100_hdp_reset(struct radeon_device *rdev);
36 int r100_cp_reset(struct radeon_device *rdev);
37 int r100_rb2d_reset(struct radeon_device *rdev);
38 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
39 int r100_pci_gart_enable(struct radeon_device *rdev);
40 void r100_pci_gart_disable(struct radeon_device *rdev);
41 void r100_mc_setup(struct radeon_device *rdev);
42 void r100_mc_disable_clients(struct radeon_device *rdev);
43 int r100_gui_wait_for_idle(struct radeon_device *rdev);
44 int r100_cs_packet_parse(struct radeon_cs_parser *p,
45 			 struct radeon_cs_packet *pkt,
46 			 unsigned idx);
47 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
48 			      struct radeon_cs_reloc **cs_reloc);
49 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
50 			  struct radeon_cs_packet *pkt,
51 			  const unsigned *auth, unsigned n,
52 			  radeon_packet0_check_t check);
53 void r100_cs_dump_packet(struct radeon_cs_parser *p,
54 			 struct radeon_cs_packet *pkt);
55 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
56 					 struct radeon_cs_packet *pkt,
57 					 struct radeon_object *robj);
58 
59 /* This files gather functions specifics to:
60  * r300,r350,rv350,rv370,rv380
61  *
62  * Some of these functions might be used by newer ASICs.
63  */
64 void r300_gpu_init(struct radeon_device *rdev);
65 int r300_mc_wait_for_idle(struct radeon_device *rdev);
66 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
67 
68 
69 /*
70  * rv370,rv380 PCIE GART
71  */
72 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
73 {
74 	uint32_t tmp;
75 	int i;
76 
77 	/* Workaround HW bug do flush 2 times */
78 	for (i = 0; i < 2; i++) {
79 		tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
80 		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
81 		(void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
82 		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
83 		mb();
84 	}
85 }
86 
87 int rv370_pcie_gart_enable(struct radeon_device *rdev)
88 {
89 	uint32_t table_addr;
90 	uint32_t tmp;
91 	int r;
92 
93 	/* Initialize common gart structure */
94 	r = radeon_gart_init(rdev);
95 	if (r) {
96 		return r;
97 	}
98 	r = rv370_debugfs_pcie_gart_info_init(rdev);
99 	if (r) {
100 		DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
101 	}
102 	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
103 	r = radeon_gart_table_vram_alloc(rdev);
104 	if (r) {
105 		return r;
106 	}
107 	/* discard memory request outside of configured range */
108 	tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
109 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
110 	WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
111 	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
112 	WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
113 	WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
114 	WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
115 	table_addr = rdev->gart.table_addr;
116 	WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
117 	/* FIXME: setup default page */
118 	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
119 	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
120 	/* Clear error */
121 	WREG32_PCIE(0x18, 0);
122 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
123 	tmp |= RADEON_PCIE_TX_GART_EN;
124 	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
125 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
126 	rv370_pcie_gart_tlb_flush(rdev);
127 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
128 		 rdev->mc.gtt_size >> 20, table_addr);
129 	rdev->gart.ready = true;
130 	return 0;
131 }
132 
133 void rv370_pcie_gart_disable(struct radeon_device *rdev)
134 {
135 	uint32_t tmp;
136 
137 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
138 	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
139 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
140 	if (rdev->gart.table.vram.robj) {
141 		radeon_object_kunmap(rdev->gart.table.vram.robj);
142 		radeon_object_unpin(rdev->gart.table.vram.robj);
143 	}
144 }
145 
146 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
147 {
148 	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
149 
150 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
151 		return -EINVAL;
152 	}
153 	addr = (((u32)addr) >> 8) | ((upper_32_bits(addr) & 0xff) << 4) | 0xC;
154 	writel(cpu_to_le32(addr), ((void __iomem *)ptr) + (i * 4));
155 	return 0;
156 }
157 
158 int r300_gart_enable(struct radeon_device *rdev)
159 {
160 #if __OS_HAS_AGP
161 	if (rdev->flags & RADEON_IS_AGP) {
162 		if (rdev->family > CHIP_RV350) {
163 			rv370_pcie_gart_disable(rdev);
164 		} else {
165 			r100_pci_gart_disable(rdev);
166 		}
167 		return 0;
168 	}
169 #endif
170 	if (rdev->flags & RADEON_IS_PCIE) {
171 		rdev->asic->gart_disable = &rv370_pcie_gart_disable;
172 		rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
173 		rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
174 		return rv370_pcie_gart_enable(rdev);
175 	}
176 	return r100_pci_gart_enable(rdev);
177 }
178 
179 
180 /*
181  * MC
182  */
183 int r300_mc_init(struct radeon_device *rdev)
184 {
185 	int r;
186 
187 	if (r100_debugfs_rbbm_init(rdev)) {
188 		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
189 	}
190 
191 	r300_gpu_init(rdev);
192 	r100_pci_gart_disable(rdev);
193 	if (rdev->flags & RADEON_IS_PCIE) {
194 		rv370_pcie_gart_disable(rdev);
195 	}
196 
197 	/* Setup GPU memory space */
198 	rdev->mc.vram_location = 0xFFFFFFFFUL;
199 	rdev->mc.gtt_location = 0xFFFFFFFFUL;
200 	if (rdev->flags & RADEON_IS_AGP) {
201 		r = radeon_agp_init(rdev);
202 		if (r) {
203 			printk(KERN_WARNING "[drm] Disabling AGP\n");
204 			rdev->flags &= ~RADEON_IS_AGP;
205 			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
206 		} else {
207 			rdev->mc.gtt_location = rdev->mc.agp_base;
208 		}
209 	}
210 	r = radeon_mc_setup(rdev);
211 	if (r) {
212 		return r;
213 	}
214 
215 	/* Program GPU memory space */
216 	r100_mc_disable_clients(rdev);
217 	if (r300_mc_wait_for_idle(rdev)) {
218 		printk(KERN_WARNING "Failed to wait MC idle while "
219 		       "programming pipes. Bad things might happen.\n");
220 	}
221 	r100_mc_setup(rdev);
222 	return 0;
223 }
224 
225 void r300_mc_fini(struct radeon_device *rdev)
226 {
227 	if (rdev->flags & RADEON_IS_PCIE) {
228 		rv370_pcie_gart_disable(rdev);
229 		radeon_gart_table_vram_free(rdev);
230 	} else {
231 		r100_pci_gart_disable(rdev);
232 		radeon_gart_table_ram_free(rdev);
233 	}
234 	radeon_gart_fini(rdev);
235 }
236 
237 
238 /*
239  * Fence emission
240  */
241 void r300_fence_ring_emit(struct radeon_device *rdev,
242 			  struct radeon_fence *fence)
243 {
244 	/* Who ever call radeon_fence_emit should call ring_lock and ask
245 	 * for enough space (today caller are ib schedule and buffer move) */
246 	/* Write SC register so SC & US assert idle */
247 	radeon_ring_write(rdev, PACKET0(0x43E0, 0));
248 	radeon_ring_write(rdev, 0);
249 	radeon_ring_write(rdev, PACKET0(0x43E4, 0));
250 	radeon_ring_write(rdev, 0);
251 	/* Flush 3D cache */
252 	radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
253 	radeon_ring_write(rdev, (2 << 0));
254 	radeon_ring_write(rdev, PACKET0(0x4F18, 0));
255 	radeon_ring_write(rdev, (1 << 0));
256 	/* Wait until IDLE & CLEAN */
257 	radeon_ring_write(rdev, PACKET0(0x1720, 0));
258 	radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
259 	/* Emit fence sequence & fire IRQ */
260 	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
261 	radeon_ring_write(rdev, fence->seq);
262 	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
263 	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
264 }
265 
266 
267 /*
268  * Global GPU functions
269  */
270 int r300_copy_dma(struct radeon_device *rdev,
271 		  uint64_t src_offset,
272 		  uint64_t dst_offset,
273 		  unsigned num_pages,
274 		  struct radeon_fence *fence)
275 {
276 	uint32_t size;
277 	uint32_t cur_size;
278 	int i, num_loops;
279 	int r = 0;
280 
281 	/* radeon pitch is /64 */
282 	size = num_pages << PAGE_SHIFT;
283 	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
284 	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
285 	if (r) {
286 		DRM_ERROR("radeon: moving bo (%d).\n", r);
287 		return r;
288 	}
289 	/* Must wait for 2D idle & clean before DMA or hangs might happen */
290 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
291 	radeon_ring_write(rdev, (1 << 16));
292 	for (i = 0; i < num_loops; i++) {
293 		cur_size = size;
294 		if (cur_size > 0x1FFFFF) {
295 			cur_size = 0x1FFFFF;
296 		}
297 		size -= cur_size;
298 		radeon_ring_write(rdev, PACKET0(0x720, 2));
299 		radeon_ring_write(rdev, src_offset);
300 		radeon_ring_write(rdev, dst_offset);
301 		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
302 		src_offset += cur_size;
303 		dst_offset += cur_size;
304 	}
305 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
306 	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
307 	if (fence) {
308 		r = radeon_fence_emit(rdev, fence);
309 	}
310 	radeon_ring_unlock_commit(rdev);
311 	return r;
312 }
313 
314 void r300_ring_start(struct radeon_device *rdev)
315 {
316 	unsigned gb_tile_config;
317 	int r;
318 
319 	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
320 	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
321 	switch(rdev->num_gb_pipes) {
322 	case 2:
323 		gb_tile_config |= R300_PIPE_COUNT_R300;
324 		break;
325 	case 3:
326 		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
327 		break;
328 	case 4:
329 		gb_tile_config |= R300_PIPE_COUNT_R420;
330 		break;
331 	case 1:
332 	default:
333 		gb_tile_config |= R300_PIPE_COUNT_RV350;
334 		break;
335 	}
336 
337 	r = radeon_ring_lock(rdev, 64);
338 	if (r) {
339 		return;
340 	}
341 	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
342 	radeon_ring_write(rdev,
343 			  RADEON_ISYNC_ANY2D_IDLE3D |
344 			  RADEON_ISYNC_ANY3D_IDLE2D |
345 			  RADEON_ISYNC_WAIT_IDLEGUI |
346 			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
347 	radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
348 	radeon_ring_write(rdev, gb_tile_config);
349 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
350 	radeon_ring_write(rdev,
351 			  RADEON_WAIT_2D_IDLECLEAN |
352 			  RADEON_WAIT_3D_IDLECLEAN);
353 	radeon_ring_write(rdev, PACKET0(0x170C, 0));
354 	radeon_ring_write(rdev, 1 << 31);
355 	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
356 	radeon_ring_write(rdev, 0);
357 	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
358 	radeon_ring_write(rdev, 0);
359 	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
360 	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
361 	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
362 	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
363 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
364 	radeon_ring_write(rdev,
365 			  RADEON_WAIT_2D_IDLECLEAN |
366 			  RADEON_WAIT_3D_IDLECLEAN);
367 	radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
368 	radeon_ring_write(rdev, 0);
369 	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
370 	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
371 	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
372 	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
373 	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
374 	radeon_ring_write(rdev,
375 			  ((6 << R300_MS_X0_SHIFT) |
376 			   (6 << R300_MS_Y0_SHIFT) |
377 			   (6 << R300_MS_X1_SHIFT) |
378 			   (6 << R300_MS_Y1_SHIFT) |
379 			   (6 << R300_MS_X2_SHIFT) |
380 			   (6 << R300_MS_Y2_SHIFT) |
381 			   (6 << R300_MSBD0_Y_SHIFT) |
382 			   (6 << R300_MSBD0_X_SHIFT)));
383 	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
384 	radeon_ring_write(rdev,
385 			  ((6 << R300_MS_X3_SHIFT) |
386 			   (6 << R300_MS_Y3_SHIFT) |
387 			   (6 << R300_MS_X4_SHIFT) |
388 			   (6 << R300_MS_Y4_SHIFT) |
389 			   (6 << R300_MS_X5_SHIFT) |
390 			   (6 << R300_MS_Y5_SHIFT) |
391 			   (6 << R300_MSBD1_SHIFT)));
392 	radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
393 	radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
394 	radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
395 	radeon_ring_write(rdev,
396 			  R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
397 	radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
398 	radeon_ring_write(rdev,
399 			  R300_GEOMETRY_ROUND_NEAREST |
400 			  R300_COLOR_ROUND_NEAREST);
401 	radeon_ring_unlock_commit(rdev);
402 }
403 
404 void r300_errata(struct radeon_device *rdev)
405 {
406 	rdev->pll_errata = 0;
407 
408 	if (rdev->family == CHIP_R300 &&
409 	    (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
410 		rdev->pll_errata |= CHIP_ERRATA_R300_CG;
411 	}
412 }
413 
414 int r300_mc_wait_for_idle(struct radeon_device *rdev)
415 {
416 	unsigned i;
417 	uint32_t tmp;
418 
419 	for (i = 0; i < rdev->usec_timeout; i++) {
420 		/* read MC_STATUS */
421 		tmp = RREG32(0x0150);
422 		if (tmp & (1 << 4)) {
423 			return 0;
424 		}
425 		DRM_UDELAY(1);
426 	}
427 	return -1;
428 }
429 
430 void r300_gpu_init(struct radeon_device *rdev)
431 {
432 	uint32_t gb_tile_config, tmp;
433 
434 	r100_hdp_reset(rdev);
435 	/* FIXME: rv380 one pipes ? */
436 	if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
437 		/* r300,r350 */
438 		rdev->num_gb_pipes = 2;
439 	} else {
440 		/* rv350,rv370,rv380 */
441 		rdev->num_gb_pipes = 1;
442 	}
443 	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
444 	switch (rdev->num_gb_pipes) {
445 	case 2:
446 		gb_tile_config |= R300_PIPE_COUNT_R300;
447 		break;
448 	case 3:
449 		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
450 		break;
451 	case 4:
452 		gb_tile_config |= R300_PIPE_COUNT_R420;
453 		break;
454 	default:
455 	case 1:
456 		gb_tile_config |= R300_PIPE_COUNT_RV350;
457 		break;
458 	}
459 	WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
460 
461 	if (r100_gui_wait_for_idle(rdev)) {
462 		printk(KERN_WARNING "Failed to wait GUI idle while "
463 		       "programming pipes. Bad things might happen.\n");
464 	}
465 
466 	tmp = RREG32(0x170C);
467 	WREG32(0x170C, tmp | (1 << 31));
468 
469 	WREG32(R300_RB2D_DSTCACHE_MODE,
470 	       R300_DC_AUTOFLUSH_ENABLE |
471 	       R300_DC_DC_DISABLE_IGNORE_PE);
472 
473 	if (r100_gui_wait_for_idle(rdev)) {
474 		printk(KERN_WARNING "Failed to wait GUI idle while "
475 		       "programming pipes. Bad things might happen.\n");
476 	}
477 	if (r300_mc_wait_for_idle(rdev)) {
478 		printk(KERN_WARNING "Failed to wait MC idle while "
479 		       "programming pipes. Bad things might happen.\n");
480 	}
481 	DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
482 }
483 
484 int r300_ga_reset(struct radeon_device *rdev)
485 {
486 	uint32_t tmp;
487 	bool reinit_cp;
488 	int i;
489 
490 	reinit_cp = rdev->cp.ready;
491 	rdev->cp.ready = false;
492 	for (i = 0; i < rdev->usec_timeout; i++) {
493 		WREG32(RADEON_CP_CSQ_MODE, 0);
494 		WREG32(RADEON_CP_CSQ_CNTL, 0);
495 		WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
496 		(void)RREG32(RADEON_RBBM_SOFT_RESET);
497 		udelay(200);
498 		WREG32(RADEON_RBBM_SOFT_RESET, 0);
499 		/* Wait to prevent race in RBBM_STATUS */
500 		mdelay(1);
501 		tmp = RREG32(RADEON_RBBM_STATUS);
502 		if (tmp & ((1 << 20) | (1 << 26))) {
503 			DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
504 			/* GA still busy soft reset it */
505 			WREG32(0x429C, 0x200);
506 			WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
507 			WREG32(0x43E0, 0);
508 			WREG32(0x43E4, 0);
509 			WREG32(0x24AC, 0);
510 		}
511 		/* Wait to prevent race in RBBM_STATUS */
512 		mdelay(1);
513 		tmp = RREG32(RADEON_RBBM_STATUS);
514 		if (!(tmp & ((1 << 20) | (1 << 26)))) {
515 			break;
516 		}
517 	}
518 	for (i = 0; i < rdev->usec_timeout; i++) {
519 		tmp = RREG32(RADEON_RBBM_STATUS);
520 		if (!(tmp & ((1 << 20) | (1 << 26)))) {
521 			DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
522 				 tmp);
523 			if (reinit_cp) {
524 				return r100_cp_init(rdev, rdev->cp.ring_size);
525 			}
526 			return 0;
527 		}
528 		DRM_UDELAY(1);
529 	}
530 	tmp = RREG32(RADEON_RBBM_STATUS);
531 	DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
532 	return -1;
533 }
534 
535 int r300_gpu_reset(struct radeon_device *rdev)
536 {
537 	uint32_t status;
538 
539 	/* reset order likely matter */
540 	status = RREG32(RADEON_RBBM_STATUS);
541 	/* reset HDP */
542 	r100_hdp_reset(rdev);
543 	/* reset rb2d */
544 	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
545 		r100_rb2d_reset(rdev);
546 	}
547 	/* reset GA */
548 	if (status & ((1 << 20) | (1 << 26))) {
549 		r300_ga_reset(rdev);
550 	}
551 	/* reset CP */
552 	status = RREG32(RADEON_RBBM_STATUS);
553 	if (status & (1 << 16)) {
554 		r100_cp_reset(rdev);
555 	}
556 	/* Check if GPU is idle */
557 	status = RREG32(RADEON_RBBM_STATUS);
558 	if (status & (1 << 31)) {
559 		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
560 		return -1;
561 	}
562 	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
563 	return 0;
564 }
565 
566 
567 /*
568  * r300,r350,rv350,rv380 VRAM info
569  */
570 void r300_vram_info(struct radeon_device *rdev)
571 {
572 	uint32_t tmp;
573 
574 	/* DDR for all card after R300 & IGP */
575 	rdev->mc.vram_is_ddr = true;
576 	tmp = RREG32(RADEON_MEM_CNTL);
577 	if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
578 		rdev->mc.vram_width = 128;
579 	} else {
580 		rdev->mc.vram_width = 64;
581 	}
582 	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
583 
584 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
585 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
586 }
587 
588 
589 /*
590  * Indirect registers accessor
591  */
592 uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
593 {
594 	uint32_t r;
595 
596 	WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
597 	(void)RREG32(RADEON_PCIE_INDEX);
598 	r = RREG32(RADEON_PCIE_DATA);
599 	return r;
600 }
601 
602 void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
603 {
604 	WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
605 	(void)RREG32(RADEON_PCIE_INDEX);
606 	WREG32(RADEON_PCIE_DATA, (v));
607 	(void)RREG32(RADEON_PCIE_DATA);
608 }
609 
610 /*
611  * PCIE Lanes
612  */
613 
614 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
615 {
616 	uint32_t link_width_cntl, mask;
617 
618 	if (rdev->flags & RADEON_IS_IGP)
619 		return;
620 
621 	if (!(rdev->flags & RADEON_IS_PCIE))
622 		return;
623 
624 	/* FIXME wait for idle */
625 
626 	switch (lanes) {
627 	case 0:
628 		mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
629 		break;
630 	case 1:
631 		mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
632 		break;
633 	case 2:
634 		mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
635 		break;
636 	case 4:
637 		mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
638 		break;
639 	case 8:
640 		mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
641 		break;
642 	case 12:
643 		mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
644 		break;
645 	case 16:
646 	default:
647 		mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
648 		break;
649 	}
650 
651 	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
652 
653 	if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
654 	    (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
655 		return;
656 
657 	link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
658 			     RADEON_PCIE_LC_RECONFIG_NOW |
659 			     RADEON_PCIE_LC_RECONFIG_LATER |
660 			     RADEON_PCIE_LC_SHORT_RECONFIG_EN);
661 	link_width_cntl |= mask;
662 	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
663 	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
664 						     RADEON_PCIE_LC_RECONFIG_NOW));
665 
666 	/* wait for lane set to complete */
667 	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
668 	while (link_width_cntl == 0xffffffff)
669 		link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
670 
671 }
672 
673 
674 /*
675  * Debugfs info
676  */
677 #if defined(CONFIG_DEBUG_FS)
678 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
679 {
680 	struct drm_info_node *node = (struct drm_info_node *) m->private;
681 	struct drm_device *dev = node->minor->dev;
682 	struct radeon_device *rdev = dev->dev_private;
683 	uint32_t tmp;
684 
685 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
686 	seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
687 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
688 	seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
689 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
690 	seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
691 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
692 	seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
693 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
694 	seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
695 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
696 	seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
697 	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
698 	seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
699 	return 0;
700 }
701 
702 static struct drm_info_list rv370_pcie_gart_info_list[] = {
703 	{"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
704 };
705 #endif
706 
707 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
708 {
709 #if defined(CONFIG_DEBUG_FS)
710 	return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
711 #else
712 	return 0;
713 #endif
714 }
715 
716 
717 /*
718  * CS functions
719  */
720 struct r300_cs_track_cb {
721 	struct radeon_object	*robj;
722 	unsigned		pitch;
723 	unsigned		cpp;
724 	unsigned		offset;
725 };
726 
727 struct r300_cs_track_array {
728 	struct radeon_object	*robj;
729 	unsigned		esize;
730 };
731 
732 struct r300_cs_track_texture {
733 	struct radeon_object	*robj;
734 	unsigned		pitch;
735 	unsigned		width;
736 	unsigned		height;
737 	unsigned		num_levels;
738 	unsigned		cpp;
739 	unsigned		tex_coord_type;
740 	unsigned		txdepth;
741 	unsigned		width_11;
742 	unsigned		height_11;
743 	bool			use_pitch;
744 	bool			enabled;
745 	bool			roundup_w;
746 	bool			roundup_h;
747 };
748 
749 struct r300_cs_track {
750 	unsigned			num_cb;
751 	unsigned			maxy;
752 	unsigned			vtx_size;
753 	unsigned			vap_vf_cntl;
754 	unsigned			immd_dwords;
755 	unsigned			num_arrays;
756 	unsigned			max_indx;
757 	struct r300_cs_track_array	arrays[11];
758 	struct r300_cs_track_cb 	cb[4];
759 	struct r300_cs_track_cb 	zb;
760 	struct r300_cs_track_texture	textures[16];
761 	bool				z_enabled;
762 };
763 
764 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
765 {
766 	DRM_ERROR("pitch                      %d\n", t->pitch);
767 	DRM_ERROR("width                      %d\n", t->width);
768 	DRM_ERROR("height                     %d\n", t->height);
769 	DRM_ERROR("num levels                 %d\n", t->num_levels);
770 	DRM_ERROR("depth                      %d\n", t->txdepth);
771 	DRM_ERROR("bpp                        %d\n", t->cpp);
772 	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
773 	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
774 	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
775 }
776 
777 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
778 					      struct r300_cs_track *track)
779 {
780 	struct radeon_object *robj;
781 	unsigned long size;
782 	unsigned u, i, w, h;
783 
784 	for (u = 0; u < 16; u++) {
785 		if (!track->textures[u].enabled)
786 			continue;
787 		robj = track->textures[u].robj;
788 		if (robj == NULL) {
789 			DRM_ERROR("No texture bound to unit %u\n", u);
790 			return -EINVAL;
791 		}
792 		size = 0;
793 		for (i = 0; i <= track->textures[u].num_levels; i++) {
794 			if (track->textures[u].use_pitch) {
795 				w = track->textures[u].pitch / (1 << i);
796 			} else {
797 				w = track->textures[u].width / (1 << i);
798 				if (rdev->family >= CHIP_RV515)
799 					w |= track->textures[u].width_11;
800 				if (track->textures[u].roundup_w)
801 					w = roundup_pow_of_two(w);
802 			}
803 			h = track->textures[u].height / (1 << i);
804 			if (rdev->family >= CHIP_RV515)
805 				h |= track->textures[u].height_11;
806 			if (track->textures[u].roundup_h)
807 				h = roundup_pow_of_two(h);
808 			size += w * h;
809 		}
810 		size *= track->textures[u].cpp;
811 		switch (track->textures[u].tex_coord_type) {
812 		case 0:
813 			break;
814 		case 1:
815 			size *= (1 << track->textures[u].txdepth);
816 			break;
817 		case 2:
818 			size *= 6;
819 			break;
820 		default:
821 			DRM_ERROR("Invalid texture coordinate type %u for unit "
822 				  "%u\n", track->textures[u].tex_coord_type, u);
823 			return -EINVAL;
824 		}
825 		if (size > radeon_object_size(robj)) {
826 			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
827 				  "%lu\n", u, size, radeon_object_size(robj));
828 			r300_cs_track_texture_print(&track->textures[u]);
829 			return -EINVAL;
830 		}
831 	}
832 	return 0;
833 }
834 
835 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
836 {
837 	unsigned i;
838 	unsigned long size;
839 	unsigned prim_walk;
840 	unsigned nverts;
841 
842 	for (i = 0; i < track->num_cb; i++) {
843 		if (track->cb[i].robj == NULL) {
844 			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
845 			return -EINVAL;
846 		}
847 		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
848 		size += track->cb[i].offset;
849 		if (size > radeon_object_size(track->cb[i].robj)) {
850 			DRM_ERROR("[drm] Buffer too small for color buffer %d "
851 				  "(need %lu have %lu) !\n", i, size,
852 				  radeon_object_size(track->cb[i].robj));
853 			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
854 				  i, track->cb[i].pitch, track->cb[i].cpp,
855 				  track->cb[i].offset, track->maxy);
856 			return -EINVAL;
857 		}
858 	}
859 	if (track->z_enabled) {
860 		if (track->zb.robj == NULL) {
861 			DRM_ERROR("[drm] No buffer for z buffer !\n");
862 			return -EINVAL;
863 		}
864 		size = track->zb.pitch * track->zb.cpp * track->maxy;
865 		size += track->zb.offset;
866 		if (size > radeon_object_size(track->zb.robj)) {
867 			DRM_ERROR("[drm] Buffer too small for z buffer "
868 				  "(need %lu have %lu) !\n", size,
869 				  radeon_object_size(track->zb.robj));
870 			return -EINVAL;
871 		}
872 	}
873 	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
874 	nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
875 	switch (prim_walk) {
876 	case 1:
877 		for (i = 0; i < track->num_arrays; i++) {
878 			size = track->arrays[i].esize * track->max_indx * 4;
879 			if (track->arrays[i].robj == NULL) {
880 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
881 					  "bound\n", prim_walk, i);
882 				return -EINVAL;
883 			}
884 			if (size > radeon_object_size(track->arrays[i].robj)) {
885 				DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
886 					   "have %lu dwords\n", prim_walk, i,
887 					   size >> 2,
888 					   radeon_object_size(track->arrays[i].robj) >> 2);
889 				DRM_ERROR("Max indices %u\n", track->max_indx);
890 				return -EINVAL;
891 			}
892 		}
893 		break;
894 	case 2:
895 		for (i = 0; i < track->num_arrays; i++) {
896 			size = track->arrays[i].esize * (nverts - 1) * 4;
897 			if (track->arrays[i].robj == NULL) {
898 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
899 					  "bound\n", prim_walk, i);
900 				return -EINVAL;
901 			}
902 			if (size > radeon_object_size(track->arrays[i].robj)) {
903 				DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
904 					   "have %lu dwords\n", prim_walk, i, size >> 2,
905 					   radeon_object_size(track->arrays[i].robj) >> 2);
906 				return -EINVAL;
907 			}
908 		}
909 		break;
910 	case 3:
911 		size = track->vtx_size * nverts;
912 		if (size != track->immd_dwords) {
913 			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
914 				  track->immd_dwords, size);
915 			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
916 				  nverts, track->vtx_size);
917 			return -EINVAL;
918 		}
919 		break;
920 	default:
921 		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
922 			  prim_walk);
923 		return -EINVAL;
924 	}
925 	return r300_cs_track_texture_check(rdev, track);
926 }
927 
928 static inline void r300_cs_track_clear(struct r300_cs_track *track)
929 {
930 	unsigned i;
931 
932 	track->num_cb = 4;
933 	track->maxy = 4096;
934 	for (i = 0; i < track->num_cb; i++) {
935 		track->cb[i].robj = NULL;
936 		track->cb[i].pitch = 8192;
937 		track->cb[i].cpp = 16;
938 		track->cb[i].offset = 0;
939 	}
940 	track->z_enabled = true;
941 	track->zb.robj = NULL;
942 	track->zb.pitch = 8192;
943 	track->zb.cpp = 4;
944 	track->zb.offset = 0;
945 	track->vtx_size = 0x7F;
946 	track->immd_dwords = 0xFFFFFFFFUL;
947 	track->num_arrays = 11;
948 	track->max_indx = 0x00FFFFFFUL;
949 	for (i = 0; i < track->num_arrays; i++) {
950 		track->arrays[i].robj = NULL;
951 		track->arrays[i].esize = 0x7F;
952 	}
953 	for (i = 0; i < 16; i++) {
954 		track->textures[i].pitch = 16536;
955 		track->textures[i].width = 16536;
956 		track->textures[i].height = 16536;
957 		track->textures[i].width_11 = 1 << 11;
958 		track->textures[i].height_11 = 1 << 11;
959 		track->textures[i].num_levels = 12;
960 		track->textures[i].txdepth = 16;
961 		track->textures[i].cpp = 64;
962 		track->textures[i].tex_coord_type = 1;
963 		track->textures[i].robj = NULL;
964 		/* CS IB emission code makes sure texture unit are disabled */
965 		track->textures[i].enabled = false;
966 		track->textures[i].roundup_w = true;
967 		track->textures[i].roundup_h = true;
968 	}
969 }
970 
971 static const unsigned r300_reg_safe_bm[159] = {
972 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
973 	0xFFFFFFBF, 0xFFFFFFFF, 0xFFFFFFBF, 0xFFFFFFFF,
974 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
975 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
976 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
977 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
978 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982 	0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
983 	0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
984 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
987 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988 	0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
989 	0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
990 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
992 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
993 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
994 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
995 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
996 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
997 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
998 	0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
999 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1000 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1001 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1002 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1003 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1004 	0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
1005 	0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
1006 	0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
1007 	0x00000000, 0x0000C100, 0x00000000, 0x00000000,
1008 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
1009 	0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
1010 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
1011 	0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
1012 };
1013 
1014 static int r300_packet0_check(struct radeon_cs_parser *p,
1015 		struct radeon_cs_packet *pkt,
1016 		unsigned idx, unsigned reg)
1017 {
1018 	struct radeon_cs_chunk *ib_chunk;
1019 	struct radeon_cs_reloc *reloc;
1020 	struct r300_cs_track *track;
1021 	volatile uint32_t *ib;
1022 	uint32_t tmp;
1023 	unsigned i;
1024 	int r;
1025 
1026 	ib = p->ib->ptr;
1027 	ib_chunk = &p->chunks[p->chunk_ib_idx];
1028 	track = (struct r300_cs_track*)p->track;
1029 	switch(reg) {
1030 	case RADEON_DST_PITCH_OFFSET:
1031 	case RADEON_SRC_PITCH_OFFSET:
1032 		r = r100_cs_packet_next_reloc(p, &reloc);
1033 		if (r) {
1034 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1035 					idx, reg);
1036 			r100_cs_dump_packet(p, pkt);
1037 			return r;
1038 		}
1039 		tmp = ib_chunk->kdata[idx] & 0x003fffff;
1040 		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1041 		ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
1042 		break;
1043 	case R300_RB3D_COLOROFFSET0:
1044 	case R300_RB3D_COLOROFFSET1:
1045 	case R300_RB3D_COLOROFFSET2:
1046 	case R300_RB3D_COLOROFFSET3:
1047 		i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1048 		r = r100_cs_packet_next_reloc(p, &reloc);
1049 		if (r) {
1050 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1051 					idx, reg);
1052 			r100_cs_dump_packet(p, pkt);
1053 			return r;
1054 		}
1055 		track->cb[i].robj = reloc->robj;
1056 		track->cb[i].offset = ib_chunk->kdata[idx];
1057 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1058 		break;
1059 	case R300_ZB_DEPTHOFFSET:
1060 		r = r100_cs_packet_next_reloc(p, &reloc);
1061 		if (r) {
1062 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1063 					idx, reg);
1064 			r100_cs_dump_packet(p, pkt);
1065 			return r;
1066 		}
1067 		track->zb.robj = reloc->robj;
1068 		track->zb.offset = ib_chunk->kdata[idx];
1069 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1070 		break;
1071 	case R300_TX_OFFSET_0:
1072 	case R300_TX_OFFSET_0+4:
1073 	case R300_TX_OFFSET_0+8:
1074 	case R300_TX_OFFSET_0+12:
1075 	case R300_TX_OFFSET_0+16:
1076 	case R300_TX_OFFSET_0+20:
1077 	case R300_TX_OFFSET_0+24:
1078 	case R300_TX_OFFSET_0+28:
1079 	case R300_TX_OFFSET_0+32:
1080 	case R300_TX_OFFSET_0+36:
1081 	case R300_TX_OFFSET_0+40:
1082 	case R300_TX_OFFSET_0+44:
1083 	case R300_TX_OFFSET_0+48:
1084 	case R300_TX_OFFSET_0+52:
1085 	case R300_TX_OFFSET_0+56:
1086 	case R300_TX_OFFSET_0+60:
1087 		i = (reg - R300_TX_OFFSET_0) >> 2;
1088 		r = r100_cs_packet_next_reloc(p, &reloc);
1089 		if (r) {
1090 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1091 					idx, reg);
1092 			r100_cs_dump_packet(p, pkt);
1093 			return r;
1094 		}
1095 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1096 		track->textures[i].robj = reloc->robj;
1097 		break;
1098 	/* Tracked registers */
1099 	case 0x2084:
1100 		/* VAP_VF_CNTL */
1101 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1102 		break;
1103 	case 0x20B4:
1104 		/* VAP_VTX_SIZE */
1105 		track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1106 		break;
1107 	case 0x2134:
1108 		/* VAP_VF_MAX_VTX_INDX */
1109 		track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1110 		break;
1111 	case 0x43E4:
1112 		/* SC_SCISSOR1 */
1113 		track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1114 		if (p->rdev->family < CHIP_RV515) {
1115 			track->maxy -= 1440;
1116 		}
1117 		break;
1118 	case 0x4E00:
1119 		/* RB3D_CCTL */
1120 		track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1121 		break;
1122 	case 0x4E38:
1123 	case 0x4E3C:
1124 	case 0x4E40:
1125 	case 0x4E44:
1126 		/* RB3D_COLORPITCH0 */
1127 		/* RB3D_COLORPITCH1 */
1128 		/* RB3D_COLORPITCH2 */
1129 		/* RB3D_COLORPITCH3 */
1130 		i = (reg - 0x4E38) >> 2;
1131 		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1132 		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1133 		case 9:
1134 		case 11:
1135 		case 12:
1136 			track->cb[i].cpp = 1;
1137 			break;
1138 		case 3:
1139 		case 4:
1140 		case 13:
1141 		case 15:
1142 			track->cb[i].cpp = 2;
1143 			break;
1144 		case 6:
1145 			track->cb[i].cpp = 4;
1146 			break;
1147 		case 10:
1148 			track->cb[i].cpp = 8;
1149 			break;
1150 		case 7:
1151 			track->cb[i].cpp = 16;
1152 			break;
1153 		default:
1154 			DRM_ERROR("Invalid color buffer format (%d) !\n",
1155 				  ((ib_chunk->kdata[idx] >> 21) & 0xF));
1156 			return -EINVAL;
1157 		}
1158 		break;
1159 	case 0x4F00:
1160 		/* ZB_CNTL */
1161 		if (ib_chunk->kdata[idx] & 2) {
1162 			track->z_enabled = true;
1163 		} else {
1164 			track->z_enabled = false;
1165 		}
1166 		break;
1167 	case 0x4F10:
1168 		/* ZB_FORMAT */
1169 		switch ((ib_chunk->kdata[idx] & 0xF)) {
1170 		case 0:
1171 		case 1:
1172 			track->zb.cpp = 2;
1173 			break;
1174 		case 2:
1175 			track->zb.cpp = 4;
1176 			break;
1177 		default:
1178 			DRM_ERROR("Invalid z buffer format (%d) !\n",
1179 				  (ib_chunk->kdata[idx] & 0xF));
1180 			return -EINVAL;
1181 		}
1182 		break;
1183 	case 0x4F24:
1184 		/* ZB_DEPTHPITCH */
1185 		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1186 		break;
1187 	case 0x4104:
1188 		for (i = 0; i < 16; i++) {
1189 			bool enabled;
1190 
1191 			enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1192 			track->textures[i].enabled = enabled;
1193 		}
1194 		break;
1195 	case 0x44C0:
1196 	case 0x44C4:
1197 	case 0x44C8:
1198 	case 0x44CC:
1199 	case 0x44D0:
1200 	case 0x44D4:
1201 	case 0x44D8:
1202 	case 0x44DC:
1203 	case 0x44E0:
1204 	case 0x44E4:
1205 	case 0x44E8:
1206 	case 0x44EC:
1207 	case 0x44F0:
1208 	case 0x44F4:
1209 	case 0x44F8:
1210 	case 0x44FC:
1211 		/* TX_FORMAT1_[0-15] */
1212 		i = (reg - 0x44C0) >> 2;
1213 		tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1214 		track->textures[i].tex_coord_type = tmp;
1215 		switch ((ib_chunk->kdata[idx] & 0x1F)) {
1216 		case 0:
1217 		case 2:
1218 		case 5:
1219 		case 18:
1220 		case 20:
1221 		case 21:
1222 			track->textures[i].cpp = 1;
1223 			break;
1224 		case 1:
1225 		case 3:
1226 		case 6:
1227 		case 7:
1228 		case 10:
1229 		case 11:
1230 		case 19:
1231 		case 22:
1232 		case 24:
1233 			track->textures[i].cpp = 2;
1234 			break;
1235 		case 4:
1236 		case 8:
1237 		case 9:
1238 		case 12:
1239 		case 13:
1240 		case 23:
1241 		case 25:
1242 		case 27:
1243 		case 30:
1244 			track->textures[i].cpp = 4;
1245 			break;
1246 		case 14:
1247 		case 26:
1248 		case 28:
1249 			track->textures[i].cpp = 8;
1250 			break;
1251 		case 29:
1252 			track->textures[i].cpp = 16;
1253 			break;
1254 		default:
1255 			DRM_ERROR("Invalid texture format %u\n",
1256 				  (ib_chunk->kdata[idx] & 0x1F));
1257 			return -EINVAL;
1258 			break;
1259 		}
1260 		break;
1261 	case 0x4400:
1262 	case 0x4404:
1263 	case 0x4408:
1264 	case 0x440C:
1265 	case 0x4410:
1266 	case 0x4414:
1267 	case 0x4418:
1268 	case 0x441C:
1269 	case 0x4420:
1270 	case 0x4424:
1271 	case 0x4428:
1272 	case 0x442C:
1273 	case 0x4430:
1274 	case 0x4434:
1275 	case 0x4438:
1276 	case 0x443C:
1277 		/* TX_FILTER0_[0-15] */
1278 		i = (reg - 0x4400) >> 2;
1279 		tmp = ib_chunk->kdata[idx] & 0x7;;
1280 		if (tmp == 2 || tmp == 4 || tmp == 6) {
1281 			track->textures[i].roundup_w = false;
1282 		}
1283 		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1284 		if (tmp == 2 || tmp == 4 || tmp == 6) {
1285 			track->textures[i].roundup_h = false;
1286 		}
1287 		break;
1288 	case 0x4500:
1289 	case 0x4504:
1290 	case 0x4508:
1291 	case 0x450C:
1292 	case 0x4510:
1293 	case 0x4514:
1294 	case 0x4518:
1295 	case 0x451C:
1296 	case 0x4520:
1297 	case 0x4524:
1298 	case 0x4528:
1299 	case 0x452C:
1300 	case 0x4530:
1301 	case 0x4534:
1302 	case 0x4538:
1303 	case 0x453C:
1304 		/* TX_FORMAT2_[0-15] */
1305 		i = (reg - 0x4500) >> 2;
1306 		tmp = ib_chunk->kdata[idx] & 0x3FFF;
1307 		track->textures[i].pitch = tmp + 1;
1308 		if (p->rdev->family >= CHIP_RV515) {
1309 			tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1310 			track->textures[i].width_11 = tmp;
1311 			tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1312 			track->textures[i].height_11 = tmp;
1313 		}
1314 		break;
1315 	case 0x4480:
1316 	case 0x4484:
1317 	case 0x4488:
1318 	case 0x448C:
1319 	case 0x4490:
1320 	case 0x4494:
1321 	case 0x4498:
1322 	case 0x449C:
1323 	case 0x44A0:
1324 	case 0x44A4:
1325 	case 0x44A8:
1326 	case 0x44AC:
1327 	case 0x44B0:
1328 	case 0x44B4:
1329 	case 0x44B8:
1330 	case 0x44BC:
1331 		/* TX_FORMAT0_[0-15] */
1332 		i = (reg - 0x4480) >> 2;
1333 		tmp = ib_chunk->kdata[idx] & 0x7FF;
1334 		track->textures[i].width = tmp + 1;
1335 		tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1336 		track->textures[i].height = tmp + 1;
1337 		tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1338 		track->textures[i].num_levels = tmp;
1339 		tmp = ib_chunk->kdata[idx] & (1 << 31);
1340 		track->textures[i].use_pitch = !!tmp;
1341 		tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1342 		track->textures[i].txdepth = tmp;
1343 		break;
1344 	default:
1345 		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1346 		       reg, idx);
1347 		return -EINVAL;
1348 	}
1349 	return 0;
1350 }
1351 
1352 static int r300_packet3_check(struct radeon_cs_parser *p,
1353 			      struct radeon_cs_packet *pkt)
1354 {
1355 	struct radeon_cs_chunk *ib_chunk;
1356 	struct radeon_cs_reloc *reloc;
1357 	struct r300_cs_track *track;
1358 	volatile uint32_t *ib;
1359 	unsigned idx;
1360 	unsigned i, c;
1361 	int r;
1362 
1363 	ib = p->ib->ptr;
1364 	ib_chunk = &p->chunks[p->chunk_ib_idx];
1365 	idx = pkt->idx + 1;
1366 	track = (struct r300_cs_track*)p->track;
1367 	switch(pkt->opcode) {
1368 	case PACKET3_3D_LOAD_VBPNTR:
1369 		c = ib_chunk->kdata[idx++] & 0x1F;
1370 		track->num_arrays = c;
1371 		for (i = 0; i < (c - 1); i+=2, idx+=3) {
1372 			r = r100_cs_packet_next_reloc(p, &reloc);
1373 			if (r) {
1374 				DRM_ERROR("No reloc for packet3 %d\n",
1375 					  pkt->opcode);
1376 				r100_cs_dump_packet(p, pkt);
1377 				return r;
1378 			}
1379 			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1380 			track->arrays[i + 0].robj = reloc->robj;
1381 			track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1382 			track->arrays[i + 0].esize &= 0x7F;
1383 			r = r100_cs_packet_next_reloc(p, &reloc);
1384 			if (r) {
1385 				DRM_ERROR("No reloc for packet3 %d\n",
1386 					  pkt->opcode);
1387 				r100_cs_dump_packet(p, pkt);
1388 				return r;
1389 			}
1390 			ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1391 			track->arrays[i + 1].robj = reloc->robj;
1392 			track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1393 			track->arrays[i + 1].esize &= 0x7F;
1394 		}
1395 		if (c & 1) {
1396 			r = r100_cs_packet_next_reloc(p, &reloc);
1397 			if (r) {
1398 				DRM_ERROR("No reloc for packet3 %d\n",
1399 					  pkt->opcode);
1400 				r100_cs_dump_packet(p, pkt);
1401 				return r;
1402 			}
1403 			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1404 			track->arrays[i + 0].robj = reloc->robj;
1405 			track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1406 			track->arrays[i + 0].esize &= 0x7F;
1407 		}
1408 		break;
1409 	case PACKET3_INDX_BUFFER:
1410 		r = r100_cs_packet_next_reloc(p, &reloc);
1411 		if (r) {
1412 			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1413 			r100_cs_dump_packet(p, pkt);
1414 			return r;
1415 		}
1416 		ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1417 		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1418 		if (r) {
1419 			return r;
1420 		}
1421 		break;
1422 	/* Draw packet */
1423 	case PACKET3_3D_DRAW_IMMD:
1424 		/* Number of dwords is vtx_size * (num_vertices - 1)
1425 		 * PRIM_WALK must be equal to 3 vertex data in embedded
1426 		 * in cmd stream */
1427 		if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1428 			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1429 			return -EINVAL;
1430 		}
1431 		track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1432 		track->immd_dwords = pkt->count - 1;
1433 		r = r300_cs_track_check(p->rdev, track);
1434 		if (r) {
1435 			return r;
1436 		}
1437 		break;
1438 	case PACKET3_3D_DRAW_IMMD_2:
1439 		/* Number of dwords is vtx_size * (num_vertices - 1)
1440 		 * PRIM_WALK must be equal to 3 vertex data in embedded
1441 		 * in cmd stream */
1442 		if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1443 			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1444 			return -EINVAL;
1445 		}
1446 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1447 		track->immd_dwords = pkt->count;
1448 		r = r300_cs_track_check(p->rdev, track);
1449 		if (r) {
1450 			return r;
1451 		}
1452 		break;
1453 	case PACKET3_3D_DRAW_VBUF:
1454 		track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1455 		r = r300_cs_track_check(p->rdev, track);
1456 		if (r) {
1457 			return r;
1458 		}
1459 		break;
1460 	case PACKET3_3D_DRAW_VBUF_2:
1461 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1462 		r = r300_cs_track_check(p->rdev, track);
1463 		if (r) {
1464 			return r;
1465 		}
1466 		break;
1467 	case PACKET3_3D_DRAW_INDX:
1468 		track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1469 		r = r300_cs_track_check(p->rdev, track);
1470 		if (r) {
1471 			return r;
1472 		}
1473 		break;
1474 	case PACKET3_3D_DRAW_INDX_2:
1475 		track->vap_vf_cntl = ib_chunk->kdata[idx];
1476 		r = r300_cs_track_check(p->rdev, track);
1477 		if (r) {
1478 			return r;
1479 		}
1480 		break;
1481 	case PACKET3_NOP:
1482 		break;
1483 	default:
1484 		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1485 		return -EINVAL;
1486 	}
1487 	return 0;
1488 }
1489 
1490 int r300_cs_parse(struct radeon_cs_parser *p)
1491 {
1492 	struct radeon_cs_packet pkt;
1493 	struct r300_cs_track track;
1494 	int r;
1495 
1496 	r300_cs_track_clear(&track);
1497 	p->track = &track;
1498 	do {
1499 		r = r100_cs_packet_parse(p, &pkt, p->idx);
1500 		if (r) {
1501 			return r;
1502 		}
1503 		p->idx += pkt.count + 2;
1504 		switch (pkt.type) {
1505 		case PACKET_TYPE0:
1506 			r = r100_cs_parse_packet0(p, &pkt,
1507 						  p->rdev->config.r300.reg_safe_bm,
1508 						  p->rdev->config.r300.reg_safe_bm_size,
1509 						  &r300_packet0_check);
1510 			break;
1511 		case PACKET_TYPE2:
1512 			break;
1513 		case PACKET_TYPE3:
1514 			r = r300_packet3_check(p, &pkt);
1515 			break;
1516 		default:
1517 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1518 			return -EINVAL;
1519 		}
1520 		if (r) {
1521 			return r;
1522 		}
1523 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1524 	return 0;
1525 }
1526 
1527 int r300_init(struct radeon_device *rdev)
1528 {
1529 	rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1530 	rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1531 	return 0;
1532 }
1533