xref: /linux/drivers/gpu/drm/radeon/r100.c (revision 2277ab4a1df50e05bc732fe9488d4e902bb8399a)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_drm.h"
32 #include "radeon_microcode.h"
33 #include "radeon_reg.h"
34 #include "radeon.h"
35 
36 /* This files gather functions specifics to:
37  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
38  *
39  * Some of these functions might be used by newer ASICs.
40  */
41 void r100_hdp_reset(struct radeon_device *rdev);
42 void r100_gpu_init(struct radeon_device *rdev);
43 int r100_gui_wait_for_idle(struct radeon_device *rdev);
44 int r100_mc_wait_for_idle(struct radeon_device *rdev);
45 void r100_gpu_wait_for_vsync(struct radeon_device *rdev);
46 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev);
47 int r100_debugfs_mc_info_init(struct radeon_device *rdev);
48 
49 
50 /*
51  * PCI GART
52  */
53 void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
54 {
55 	/* TODO: can we do somethings here ? */
56 	/* It seems hw only cache one entry so we should discard this
57 	 * entry otherwise if first GPU GART read hit this entry it
58 	 * could end up in wrong address. */
59 }
60 
61 int r100_pci_gart_enable(struct radeon_device *rdev)
62 {
63 	uint32_t tmp;
64 	int r;
65 
66 	/* Initialize common gart structure */
67 	r = radeon_gart_init(rdev);
68 	if (r) {
69 		return r;
70 	}
71 	if (rdev->gart.table.ram.ptr == NULL) {
72 		rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
73 		r = radeon_gart_table_ram_alloc(rdev);
74 		if (r) {
75 			return r;
76 		}
77 	}
78 	/* discard memory request outside of configured range */
79 	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
80 	WREG32(RADEON_AIC_CNTL, tmp);
81 	/* set address range for PCI address translate */
82 	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
83 	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
84 	WREG32(RADEON_AIC_HI_ADDR, tmp);
85 	/* Enable bus mastering */
86 	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
87 	WREG32(RADEON_BUS_CNTL, tmp);
88 	/* set PCI GART page-table base address */
89 	WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
90 	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
91 	WREG32(RADEON_AIC_CNTL, tmp);
92 	r100_pci_gart_tlb_flush(rdev);
93 	rdev->gart.ready = true;
94 	return 0;
95 }
96 
97 void r100_pci_gart_disable(struct radeon_device *rdev)
98 {
99 	uint32_t tmp;
100 
101 	/* discard memory request outside of configured range */
102 	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
103 	WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
104 	WREG32(RADEON_AIC_LO_ADDR, 0);
105 	WREG32(RADEON_AIC_HI_ADDR, 0);
106 }
107 
108 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
109 {
110 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
111 		return -EINVAL;
112 	}
113 	rdev->gart.table.ram.ptr[i] = cpu_to_le32((uint32_t)addr);
114 	return 0;
115 }
116 
117 int r100_gart_enable(struct radeon_device *rdev)
118 {
119 	if (rdev->flags & RADEON_IS_AGP) {
120 		r100_pci_gart_disable(rdev);
121 		return 0;
122 	}
123 	return r100_pci_gart_enable(rdev);
124 }
125 
126 
127 /*
128  * MC
129  */
130 void r100_mc_disable_clients(struct radeon_device *rdev)
131 {
132 	uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl;
133 
134 	/* FIXME: is this function correct for rs100,rs200,rs300 ? */
135 	if (r100_gui_wait_for_idle(rdev)) {
136 		printk(KERN_WARNING "Failed to wait GUI idle while "
137 		       "programming pipes. Bad things might happen.\n");
138 	}
139 
140 	/* stop display and memory access */
141 	ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL);
142 	WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE);
143 	crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
144 	WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS);
145 	crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
146 
147 	r100_gpu_wait_for_vsync(rdev);
148 
149 	WREG32(RADEON_CRTC_GEN_CNTL,
150 	       (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) |
151 	       RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN);
152 
153 	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
154 		crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
155 
156 		r100_gpu_wait_for_vsync2(rdev);
157 		WREG32(RADEON_CRTC2_GEN_CNTL,
158 		       (crtc2_gen_cntl &
159 		        ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) |
160 		       RADEON_CRTC2_DISP_REQ_EN_B);
161 	}
162 
163 	udelay(500);
164 }
165 
166 void r100_mc_setup(struct radeon_device *rdev)
167 {
168 	uint32_t tmp;
169 	int r;
170 
171 	r = r100_debugfs_mc_info_init(rdev);
172 	if (r) {
173 		DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
174 	}
175 	/* Write VRAM size in case we are limiting it */
176 	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
177 	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
178 	tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
179 	tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
180 	WREG32(RADEON_MC_FB_LOCATION, tmp);
181 
182 	/* Enable bus mastering */
183 	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
184 	WREG32(RADEON_BUS_CNTL, tmp);
185 
186 	if (rdev->flags & RADEON_IS_AGP) {
187 		tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
188 		tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16);
189 		tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16);
190 		WREG32(RADEON_MC_AGP_LOCATION, tmp);
191 		WREG32(RADEON_AGP_BASE, rdev->mc.agp_base);
192 	} else {
193 		WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF);
194 		WREG32(RADEON_AGP_BASE, 0);
195 	}
196 
197 	tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
198 	tmp |= (7 << 28);
199 	WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
200 	(void)RREG32(RADEON_HOST_PATH_CNTL);
201 	WREG32(RADEON_HOST_PATH_CNTL, tmp);
202 	(void)RREG32(RADEON_HOST_PATH_CNTL);
203 }
204 
205 int r100_mc_init(struct radeon_device *rdev)
206 {
207 	int r;
208 
209 	if (r100_debugfs_rbbm_init(rdev)) {
210 		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
211 	}
212 
213 	r100_gpu_init(rdev);
214 	/* Disable gart which also disable out of gart access */
215 	r100_pci_gart_disable(rdev);
216 
217 	/* Setup GPU memory space */
218 	rdev->mc.vram_location = 0xFFFFFFFFUL;
219 	rdev->mc.gtt_location = 0xFFFFFFFFUL;
220 	if (rdev->flags & RADEON_IS_AGP) {
221 		r = radeon_agp_init(rdev);
222 		if (r) {
223 			printk(KERN_WARNING "[drm] Disabling AGP\n");
224 			rdev->flags &= ~RADEON_IS_AGP;
225 			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
226 		} else {
227 			rdev->mc.gtt_location = rdev->mc.agp_base;
228 		}
229 	}
230 	r = radeon_mc_setup(rdev);
231 	if (r) {
232 		return r;
233 	}
234 
235 	r100_mc_disable_clients(rdev);
236 	if (r100_mc_wait_for_idle(rdev)) {
237 		printk(KERN_WARNING "Failed to wait MC idle while "
238 		       "programming pipes. Bad things might happen.\n");
239 	}
240 
241 	r100_mc_setup(rdev);
242 	return 0;
243 }
244 
245 void r100_mc_fini(struct radeon_device *rdev)
246 {
247 	r100_pci_gart_disable(rdev);
248 	radeon_gart_table_ram_free(rdev);
249 	radeon_gart_fini(rdev);
250 }
251 
252 
253 /*
254  * Fence emission
255  */
256 void r100_fence_ring_emit(struct radeon_device *rdev,
257 			  struct radeon_fence *fence)
258 {
259 	/* Who ever call radeon_fence_emit should call ring_lock and ask
260 	 * for enough space (today caller are ib schedule and buffer move) */
261 	/* Wait until IDLE & CLEAN */
262 	radeon_ring_write(rdev, PACKET0(0x1720, 0));
263 	radeon_ring_write(rdev, (1 << 16) | (1 << 17));
264 	/* Emit fence sequence & fire IRQ */
265 	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
266 	radeon_ring_write(rdev, fence->seq);
267 	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
268 	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
269 }
270 
271 
272 /*
273  * Writeback
274  */
275 int r100_wb_init(struct radeon_device *rdev)
276 {
277 	int r;
278 
279 	if (rdev->wb.wb_obj == NULL) {
280 		r = radeon_object_create(rdev, NULL, 4096,
281 					 true,
282 					 RADEON_GEM_DOMAIN_GTT,
283 					 false, &rdev->wb.wb_obj);
284 		if (r) {
285 			DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
286 			return r;
287 		}
288 		r = radeon_object_pin(rdev->wb.wb_obj,
289 				      RADEON_GEM_DOMAIN_GTT,
290 				      &rdev->wb.gpu_addr);
291 		if (r) {
292 			DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
293 			return r;
294 		}
295 		r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
296 		if (r) {
297 			DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
298 			return r;
299 		}
300 	}
301 	WREG32(0x774, rdev->wb.gpu_addr);
302 	WREG32(0x70C, rdev->wb.gpu_addr + 1024);
303 	WREG32(0x770, 0xff);
304 	return 0;
305 }
306 
307 void r100_wb_fini(struct radeon_device *rdev)
308 {
309 	if (rdev->wb.wb_obj) {
310 		radeon_object_kunmap(rdev->wb.wb_obj);
311 		radeon_object_unpin(rdev->wb.wb_obj);
312 		radeon_object_unref(&rdev->wb.wb_obj);
313 		rdev->wb.wb = NULL;
314 		rdev->wb.wb_obj = NULL;
315 	}
316 }
317 
318 int r100_copy_blit(struct radeon_device *rdev,
319 		   uint64_t src_offset,
320 		   uint64_t dst_offset,
321 		   unsigned num_pages,
322 		   struct radeon_fence *fence)
323 {
324 	uint32_t cur_pages;
325 	uint32_t stride_bytes = PAGE_SIZE;
326 	uint32_t pitch;
327 	uint32_t stride_pixels;
328 	unsigned ndw;
329 	int num_loops;
330 	int r = 0;
331 
332 	/* radeon limited to 16k stride */
333 	stride_bytes &= 0x3fff;
334 	/* radeon pitch is /64 */
335 	pitch = stride_bytes / 64;
336 	stride_pixels = stride_bytes / 4;
337 	num_loops = DIV_ROUND_UP(num_pages, 8191);
338 
339 	/* Ask for enough room for blit + flush + fence */
340 	ndw = 64 + (10 * num_loops);
341 	r = radeon_ring_lock(rdev, ndw);
342 	if (r) {
343 		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
344 		return -EINVAL;
345 	}
346 	while (num_pages > 0) {
347 		cur_pages = num_pages;
348 		if (cur_pages > 8191) {
349 			cur_pages = 8191;
350 		}
351 		num_pages -= cur_pages;
352 
353 		/* pages are in Y direction - height
354 		   page width in X direction - width */
355 		radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
356 		radeon_ring_write(rdev,
357 				  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
358 				  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
359 				  RADEON_GMC_SRC_CLIPPING |
360 				  RADEON_GMC_DST_CLIPPING |
361 				  RADEON_GMC_BRUSH_NONE |
362 				  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
363 				  RADEON_GMC_SRC_DATATYPE_COLOR |
364 				  RADEON_ROP3_S |
365 				  RADEON_DP_SRC_SOURCE_MEMORY |
366 				  RADEON_GMC_CLR_CMP_CNTL_DIS |
367 				  RADEON_GMC_WR_MSK_DIS);
368 		radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
369 		radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
370 		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
371 		radeon_ring_write(rdev, 0);
372 		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
373 		radeon_ring_write(rdev, num_pages);
374 		radeon_ring_write(rdev, num_pages);
375 		radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
376 	}
377 	radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
378 	radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
379 	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
380 	radeon_ring_write(rdev,
381 			  RADEON_WAIT_2D_IDLECLEAN |
382 			  RADEON_WAIT_HOST_IDLECLEAN |
383 			  RADEON_WAIT_DMA_GUI_IDLE);
384 	if (fence) {
385 		r = radeon_fence_emit(rdev, fence);
386 	}
387 	radeon_ring_unlock_commit(rdev);
388 	return r;
389 }
390 
391 
392 /*
393  * CP
394  */
395 void r100_ring_start(struct radeon_device *rdev)
396 {
397 	int r;
398 
399 	r = radeon_ring_lock(rdev, 2);
400 	if (r) {
401 		return;
402 	}
403 	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
404 	radeon_ring_write(rdev,
405 			  RADEON_ISYNC_ANY2D_IDLE3D |
406 			  RADEON_ISYNC_ANY3D_IDLE2D |
407 			  RADEON_ISYNC_WAIT_IDLEGUI |
408 			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
409 	radeon_ring_unlock_commit(rdev);
410 }
411 
412 static void r100_cp_load_microcode(struct radeon_device *rdev)
413 {
414 	int i;
415 
416 	if (r100_gui_wait_for_idle(rdev)) {
417 		printk(KERN_WARNING "Failed to wait GUI idle while "
418 		       "programming pipes. Bad things might happen.\n");
419 	}
420 
421 	WREG32(RADEON_CP_ME_RAM_ADDR, 0);
422 	if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
423 	    (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
424 	    (rdev->family == CHIP_RS200)) {
425 		DRM_INFO("Loading R100 Microcode\n");
426 		for (i = 0; i < 256; i++) {
427 			WREG32(RADEON_CP_ME_RAM_DATAH, R100_cp_microcode[i][1]);
428 			WREG32(RADEON_CP_ME_RAM_DATAL, R100_cp_microcode[i][0]);
429 		}
430 	} else if ((rdev->family == CHIP_R200) ||
431 		   (rdev->family == CHIP_RV250) ||
432 		   (rdev->family == CHIP_RV280) ||
433 		   (rdev->family == CHIP_RS300)) {
434 		DRM_INFO("Loading R200 Microcode\n");
435 		for (i = 0; i < 256; i++) {
436 			WREG32(RADEON_CP_ME_RAM_DATAH, R200_cp_microcode[i][1]);
437 			WREG32(RADEON_CP_ME_RAM_DATAL, R200_cp_microcode[i][0]);
438 		}
439 	} else if ((rdev->family == CHIP_R300) ||
440 		   (rdev->family == CHIP_R350) ||
441 		   (rdev->family == CHIP_RV350) ||
442 		   (rdev->family == CHIP_RV380) ||
443 		   (rdev->family == CHIP_RS400) ||
444 		   (rdev->family == CHIP_RS480)) {
445 		DRM_INFO("Loading R300 Microcode\n");
446 		for (i = 0; i < 256; i++) {
447 			WREG32(RADEON_CP_ME_RAM_DATAH, R300_cp_microcode[i][1]);
448 			WREG32(RADEON_CP_ME_RAM_DATAL, R300_cp_microcode[i][0]);
449 		}
450 	} else if ((rdev->family == CHIP_R420) ||
451 		   (rdev->family == CHIP_R423) ||
452 		   (rdev->family == CHIP_RV410)) {
453 		DRM_INFO("Loading R400 Microcode\n");
454 		for (i = 0; i < 256; i++) {
455 			WREG32(RADEON_CP_ME_RAM_DATAH, R420_cp_microcode[i][1]);
456 			WREG32(RADEON_CP_ME_RAM_DATAL, R420_cp_microcode[i][0]);
457 		}
458 	} else if ((rdev->family == CHIP_RS690) ||
459 		   (rdev->family == CHIP_RS740)) {
460 		DRM_INFO("Loading RS690/RS740 Microcode\n");
461 		for (i = 0; i < 256; i++) {
462 			WREG32(RADEON_CP_ME_RAM_DATAH, RS690_cp_microcode[i][1]);
463 			WREG32(RADEON_CP_ME_RAM_DATAL, RS690_cp_microcode[i][0]);
464 		}
465 	} else if (rdev->family == CHIP_RS600) {
466 		DRM_INFO("Loading RS600 Microcode\n");
467 		for (i = 0; i < 256; i++) {
468 			WREG32(RADEON_CP_ME_RAM_DATAH, RS600_cp_microcode[i][1]);
469 			WREG32(RADEON_CP_ME_RAM_DATAL, RS600_cp_microcode[i][0]);
470 		}
471 	} else if ((rdev->family == CHIP_RV515) ||
472 		   (rdev->family == CHIP_R520) ||
473 		   (rdev->family == CHIP_RV530) ||
474 		   (rdev->family == CHIP_R580) ||
475 		   (rdev->family == CHIP_RV560) ||
476 		   (rdev->family == CHIP_RV570)) {
477 		DRM_INFO("Loading R500 Microcode\n");
478 		for (i = 0; i < 256; i++) {
479 			WREG32(RADEON_CP_ME_RAM_DATAH, R520_cp_microcode[i][1]);
480 			WREG32(RADEON_CP_ME_RAM_DATAL, R520_cp_microcode[i][0]);
481 		}
482 	}
483 }
484 
485 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
486 {
487 	unsigned rb_bufsz;
488 	unsigned rb_blksz;
489 	unsigned max_fetch;
490 	unsigned pre_write_timer;
491 	unsigned pre_write_limit;
492 	unsigned indirect2_start;
493 	unsigned indirect1_start;
494 	uint32_t tmp;
495 	int r;
496 
497 	if (r100_debugfs_cp_init(rdev)) {
498 		DRM_ERROR("Failed to register debugfs file for CP !\n");
499 	}
500 	/* Reset CP */
501 	tmp = RREG32(RADEON_CP_CSQ_STAT);
502 	if ((tmp & (1 << 31))) {
503 		DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp);
504 		WREG32(RADEON_CP_CSQ_MODE, 0);
505 		WREG32(RADEON_CP_CSQ_CNTL, 0);
506 		WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
507 		tmp = RREG32(RADEON_RBBM_SOFT_RESET);
508 		mdelay(2);
509 		WREG32(RADEON_RBBM_SOFT_RESET, 0);
510 		tmp = RREG32(RADEON_RBBM_SOFT_RESET);
511 		mdelay(2);
512 		tmp = RREG32(RADEON_CP_CSQ_STAT);
513 		if ((tmp & (1 << 31))) {
514 			DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp);
515 		}
516 	} else {
517 		DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
518 	}
519 	/* Align ring size */
520 	rb_bufsz = drm_order(ring_size / 8);
521 	ring_size = (1 << (rb_bufsz + 1)) * 4;
522 	r100_cp_load_microcode(rdev);
523 	r = radeon_ring_init(rdev, ring_size);
524 	if (r) {
525 		return r;
526 	}
527 	/* Each time the cp read 1024 bytes (16 dword/quadword) update
528 	 * the rptr copy in system ram */
529 	rb_blksz = 9;
530 	/* cp will read 128bytes at a time (4 dwords) */
531 	max_fetch = 1;
532 	rdev->cp.align_mask = 16 - 1;
533 	/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
534 	pre_write_timer = 64;
535 	/* Force CP_RB_WPTR write if written more than one time before the
536 	 * delay expire
537 	 */
538 	pre_write_limit = 0;
539 	/* Setup the cp cache like this (cache size is 96 dwords) :
540 	 *	RING		0  to 15
541 	 *	INDIRECT1	16 to 79
542 	 *	INDIRECT2	80 to 95
543 	 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
544 	 *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
545 	 *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
546 	 * Idea being that most of the gpu cmd will be through indirect1 buffer
547 	 * so it gets the bigger cache.
548 	 */
549 	indirect2_start = 80;
550 	indirect1_start = 16;
551 	/* cp setup */
552 	WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
553 	WREG32(RADEON_CP_RB_CNTL,
554 #ifdef __BIG_ENDIAN
555 	       RADEON_BUF_SWAP_32BIT |
556 #endif
557 	       REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
558 	       REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
559 	       REG_SET(RADEON_MAX_FETCH, max_fetch) |
560 	       RADEON_RB_NO_UPDATE);
561 	/* Set ring address */
562 	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
563 	WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
564 	/* Force read & write ptr to 0 */
565 	tmp = RREG32(RADEON_CP_RB_CNTL);
566 	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
567 	WREG32(RADEON_CP_RB_RPTR_WR, 0);
568 	WREG32(RADEON_CP_RB_WPTR, 0);
569 	WREG32(RADEON_CP_RB_CNTL, tmp);
570 	udelay(10);
571 	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
572 	rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
573 	/* Set cp mode to bus mastering & enable cp*/
574 	WREG32(RADEON_CP_CSQ_MODE,
575 	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
576 	       REG_SET(RADEON_INDIRECT1_START, indirect1_start));
577 	WREG32(0x718, 0);
578 	WREG32(0x744, 0x00004D4D);
579 	WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
580 	radeon_ring_start(rdev);
581 	r = radeon_ring_test(rdev);
582 	if (r) {
583 		DRM_ERROR("radeon: cp isn't working (%d).\n", r);
584 		return r;
585 	}
586 	rdev->cp.ready = true;
587 	return 0;
588 }
589 
590 void r100_cp_fini(struct radeon_device *rdev)
591 {
592 	/* Disable ring */
593 	rdev->cp.ready = false;
594 	WREG32(RADEON_CP_CSQ_CNTL, 0);
595 	radeon_ring_fini(rdev);
596 	DRM_INFO("radeon: cp finalized\n");
597 }
598 
599 void r100_cp_disable(struct radeon_device *rdev)
600 {
601 	/* Disable ring */
602 	rdev->cp.ready = false;
603 	WREG32(RADEON_CP_CSQ_MODE, 0);
604 	WREG32(RADEON_CP_CSQ_CNTL, 0);
605 	if (r100_gui_wait_for_idle(rdev)) {
606 		printk(KERN_WARNING "Failed to wait GUI idle while "
607 		       "programming pipes. Bad things might happen.\n");
608 	}
609 }
610 
611 int r100_cp_reset(struct radeon_device *rdev)
612 {
613 	uint32_t tmp;
614 	bool reinit_cp;
615 	int i;
616 
617 	reinit_cp = rdev->cp.ready;
618 	rdev->cp.ready = false;
619 	WREG32(RADEON_CP_CSQ_MODE, 0);
620 	WREG32(RADEON_CP_CSQ_CNTL, 0);
621 	WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
622 	(void)RREG32(RADEON_RBBM_SOFT_RESET);
623 	udelay(200);
624 	WREG32(RADEON_RBBM_SOFT_RESET, 0);
625 	/* Wait to prevent race in RBBM_STATUS */
626 	mdelay(1);
627 	for (i = 0; i < rdev->usec_timeout; i++) {
628 		tmp = RREG32(RADEON_RBBM_STATUS);
629 		if (!(tmp & (1 << 16))) {
630 			DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n",
631 				 tmp);
632 			if (reinit_cp) {
633 				return r100_cp_init(rdev, rdev->cp.ring_size);
634 			}
635 			return 0;
636 		}
637 		DRM_UDELAY(1);
638 	}
639 	tmp = RREG32(RADEON_RBBM_STATUS);
640 	DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp);
641 	return -1;
642 }
643 
644 
645 /*
646  * CS functions
647  */
648 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
649 			  struct radeon_cs_packet *pkt,
650 			  const unsigned *auth, unsigned n,
651 			  radeon_packet0_check_t check)
652 {
653 	unsigned reg;
654 	unsigned i, j, m;
655 	unsigned idx;
656 	int r;
657 
658 	idx = pkt->idx + 1;
659 	reg = pkt->reg;
660 	/* Check that register fall into register range
661 	 * determined by the number of entry (n) in the
662 	 * safe register bitmap.
663 	 */
664 	if (pkt->one_reg_wr) {
665 		if ((reg >> 7) > n) {
666 			return -EINVAL;
667 		}
668 	} else {
669 		if (((reg + (pkt->count << 2)) >> 7) > n) {
670 			return -EINVAL;
671 		}
672 	}
673 	for (i = 0; i <= pkt->count; i++, idx++) {
674 		j = (reg >> 7);
675 		m = 1 << ((reg >> 2) & 31);
676 		if (auth[j] & m) {
677 			r = check(p, pkt, idx, reg);
678 			if (r) {
679 				return r;
680 			}
681 		}
682 		if (pkt->one_reg_wr) {
683 			if (!(auth[j] & m)) {
684 				break;
685 			}
686 		} else {
687 			reg += 4;
688 		}
689 	}
690 	return 0;
691 }
692 
693 void r100_cs_dump_packet(struct radeon_cs_parser *p,
694 			 struct radeon_cs_packet *pkt)
695 {
696 	struct radeon_cs_chunk *ib_chunk;
697 	volatile uint32_t *ib;
698 	unsigned i;
699 	unsigned idx;
700 
701 	ib = p->ib->ptr;
702 	ib_chunk = &p->chunks[p->chunk_ib_idx];
703 	idx = pkt->idx;
704 	for (i = 0; i <= (pkt->count + 1); i++, idx++) {
705 		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
706 	}
707 }
708 
709 /**
710  * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
711  * @parser:	parser structure holding parsing context.
712  * @pkt:	where to store packet informations
713  *
714  * Assume that chunk_ib_index is properly set. Will return -EINVAL
715  * if packet is bigger than remaining ib size. or if packets is unknown.
716  **/
717 int r100_cs_packet_parse(struct radeon_cs_parser *p,
718 			 struct radeon_cs_packet *pkt,
719 			 unsigned idx)
720 {
721 	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
722 	uint32_t header = ib_chunk->kdata[idx];
723 
724 	if (idx >= ib_chunk->length_dw) {
725 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
726 			  idx, ib_chunk->length_dw);
727 		return -EINVAL;
728 	}
729 	pkt->idx = idx;
730 	pkt->type = CP_PACKET_GET_TYPE(header);
731 	pkt->count = CP_PACKET_GET_COUNT(header);
732 	switch (pkt->type) {
733 	case PACKET_TYPE0:
734 		pkt->reg = CP_PACKET0_GET_REG(header);
735 		pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
736 		break;
737 	case PACKET_TYPE3:
738 		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
739 		break;
740 	case PACKET_TYPE2:
741 		pkt->count = -1;
742 		break;
743 	default:
744 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
745 		return -EINVAL;
746 	}
747 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
748 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
749 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
750 		return -EINVAL;
751 	}
752 	return 0;
753 }
754 
755 /**
756  * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
757  * @parser:		parser structure holding parsing context.
758  * @data:		pointer to relocation data
759  * @offset_start:	starting offset
760  * @offset_mask:	offset mask (to align start offset on)
761  * @reloc:		reloc informations
762  *
763  * Check next packet is relocation packet3, do bo validation and compute
764  * GPU offset using the provided start.
765  **/
766 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
767 			      struct radeon_cs_reloc **cs_reloc)
768 {
769 	struct radeon_cs_chunk *ib_chunk;
770 	struct radeon_cs_chunk *relocs_chunk;
771 	struct radeon_cs_packet p3reloc;
772 	unsigned idx;
773 	int r;
774 
775 	if (p->chunk_relocs_idx == -1) {
776 		DRM_ERROR("No relocation chunk !\n");
777 		return -EINVAL;
778 	}
779 	*cs_reloc = NULL;
780 	ib_chunk = &p->chunks[p->chunk_ib_idx];
781 	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
782 	r = r100_cs_packet_parse(p, &p3reloc, p->idx);
783 	if (r) {
784 		return r;
785 	}
786 	p->idx += p3reloc.count + 2;
787 	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
788 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
789 			  p3reloc.idx);
790 		r100_cs_dump_packet(p, &p3reloc);
791 		return -EINVAL;
792 	}
793 	idx = ib_chunk->kdata[p3reloc.idx + 1];
794 	if (idx >= relocs_chunk->length_dw) {
795 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
796 			  idx, relocs_chunk->length_dw);
797 		r100_cs_dump_packet(p, &p3reloc);
798 		return -EINVAL;
799 	}
800 	/* FIXME: we assume reloc size is 4 dwords */
801 	*cs_reloc = p->relocs_ptr[(idx / 4)];
802 	return 0;
803 }
804 
805 static int r100_packet0_check(struct radeon_cs_parser *p,
806 			      struct radeon_cs_packet *pkt)
807 {
808 	struct radeon_cs_chunk *ib_chunk;
809 	struct radeon_cs_reloc *reloc;
810 	volatile uint32_t *ib;
811 	uint32_t tmp;
812 	unsigned reg;
813 	unsigned i;
814 	unsigned idx;
815 	bool onereg;
816 	int r;
817 
818 	ib = p->ib->ptr;
819 	ib_chunk = &p->chunks[p->chunk_ib_idx];
820 	idx = pkt->idx + 1;
821 	reg = pkt->reg;
822 	onereg = false;
823 	if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) {
824 		onereg = true;
825 	}
826 	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
827 		switch (reg) {
828 		/* FIXME: only allow PACKET3 blit? easier to check for out of
829 		 * range access */
830 		case RADEON_DST_PITCH_OFFSET:
831 		case RADEON_SRC_PITCH_OFFSET:
832 			r = r100_cs_packet_next_reloc(p, &reloc);
833 			if (r) {
834 				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
835 					  idx, reg);
836 				r100_cs_dump_packet(p, pkt);
837 				return r;
838 			}
839 			tmp = ib_chunk->kdata[idx] & 0x003fffff;
840 			tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
841 			ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
842 			break;
843 		case RADEON_RB3D_DEPTHOFFSET:
844 		case RADEON_RB3D_COLOROFFSET:
845 		case R300_RB3D_COLOROFFSET0:
846 		case R300_ZB_DEPTHOFFSET:
847 		case R200_PP_TXOFFSET_0:
848 		case R200_PP_TXOFFSET_1:
849 		case R200_PP_TXOFFSET_2:
850 		case R200_PP_TXOFFSET_3:
851 		case R200_PP_TXOFFSET_4:
852 		case R200_PP_TXOFFSET_5:
853 		case RADEON_PP_TXOFFSET_0:
854 		case RADEON_PP_TXOFFSET_1:
855 		case RADEON_PP_TXOFFSET_2:
856 		case R300_TX_OFFSET_0:
857 		case R300_TX_OFFSET_0+4:
858 		case R300_TX_OFFSET_0+8:
859 		case R300_TX_OFFSET_0+12:
860 		case R300_TX_OFFSET_0+16:
861 		case R300_TX_OFFSET_0+20:
862 		case R300_TX_OFFSET_0+24:
863 		case R300_TX_OFFSET_0+28:
864 		case R300_TX_OFFSET_0+32:
865 		case R300_TX_OFFSET_0+36:
866 		case R300_TX_OFFSET_0+40:
867 		case R300_TX_OFFSET_0+44:
868 		case R300_TX_OFFSET_0+48:
869 		case R300_TX_OFFSET_0+52:
870 		case R300_TX_OFFSET_0+56:
871 		case R300_TX_OFFSET_0+60:
872 			r = r100_cs_packet_next_reloc(p, &reloc);
873 			if (r) {
874 				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
875 					  idx, reg);
876 				r100_cs_dump_packet(p, pkt);
877 				return r;
878 			}
879 			ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
880 			break;
881 		default:
882 			/* FIXME: we don't want to allow anyothers packet */
883 			break;
884 		}
885 		if (onereg) {
886 			/* FIXME: forbid onereg write to register on relocate */
887 			break;
888 		}
889 	}
890 	return 0;
891 }
892 
893 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
894 					 struct radeon_cs_packet *pkt,
895 					 struct radeon_object *robj)
896 {
897 	struct radeon_cs_chunk *ib_chunk;
898 	unsigned idx;
899 
900 	ib_chunk = &p->chunks[p->chunk_ib_idx];
901 	idx = pkt->idx + 1;
902 	if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) {
903 		DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
904 			  "(need %u have %lu) !\n",
905 			  ib_chunk->kdata[idx+2] + 1,
906 			  radeon_object_size(robj));
907 		return -EINVAL;
908 	}
909 	return 0;
910 }
911 
912 static int r100_packet3_check(struct radeon_cs_parser *p,
913 			      struct radeon_cs_packet *pkt)
914 {
915 	struct radeon_cs_chunk *ib_chunk;
916 	struct radeon_cs_reloc *reloc;
917 	unsigned idx;
918 	unsigned i, c;
919 	volatile uint32_t *ib;
920 	int r;
921 
922 	ib = p->ib->ptr;
923 	ib_chunk = &p->chunks[p->chunk_ib_idx];
924 	idx = pkt->idx + 1;
925 	switch (pkt->opcode) {
926 	case PACKET3_3D_LOAD_VBPNTR:
927 		c = ib_chunk->kdata[idx++];
928 		for (i = 0; i < (c - 1); i += 2, idx += 3) {
929 			r = r100_cs_packet_next_reloc(p, &reloc);
930 			if (r) {
931 				DRM_ERROR("No reloc for packet3 %d\n",
932 					  pkt->opcode);
933 				r100_cs_dump_packet(p, pkt);
934 				return r;
935 			}
936 			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
937 			r = r100_cs_packet_next_reloc(p, &reloc);
938 			if (r) {
939 				DRM_ERROR("No reloc for packet3 %d\n",
940 					  pkt->opcode);
941 				r100_cs_dump_packet(p, pkt);
942 				return r;
943 			}
944 			ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
945 		}
946 		if (c & 1) {
947 			r = r100_cs_packet_next_reloc(p, &reloc);
948 			if (r) {
949 				DRM_ERROR("No reloc for packet3 %d\n",
950 					  pkt->opcode);
951 				r100_cs_dump_packet(p, pkt);
952 				return r;
953 			}
954 			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
955 		}
956 		break;
957 	case PACKET3_INDX_BUFFER:
958 		r = r100_cs_packet_next_reloc(p, &reloc);
959 		if (r) {
960 			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
961 			r100_cs_dump_packet(p, pkt);
962 			return r;
963 		}
964 		ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
965 		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
966 		if (r) {
967 			return r;
968 		}
969 		break;
970 	case 0x23:
971 		/* FIXME: cleanup */
972 		/* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
973 		r = r100_cs_packet_next_reloc(p, &reloc);
974 		if (r) {
975 			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
976 			r100_cs_dump_packet(p, pkt);
977 			return r;
978 		}
979 		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
980 		break;
981 	case PACKET3_3D_DRAW_IMMD:
982 		/* triggers drawing using in-packet vertex data */
983 	case PACKET3_3D_DRAW_IMMD_2:
984 		/* triggers drawing using in-packet vertex data */
985 	case PACKET3_3D_DRAW_VBUF_2:
986 		/* triggers drawing of vertex buffers setup elsewhere */
987 	case PACKET3_3D_DRAW_INDX_2:
988 		/* triggers drawing using indices to vertex buffer */
989 	case PACKET3_3D_DRAW_VBUF:
990 		/* triggers drawing of vertex buffers setup elsewhere */
991 	case PACKET3_3D_DRAW_INDX:
992 		/* triggers drawing using indices to vertex buffer */
993 	case PACKET3_NOP:
994 		break;
995 	default:
996 		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
997 		return -EINVAL;
998 	}
999 	return 0;
1000 }
1001 
1002 int r100_cs_parse(struct radeon_cs_parser *p)
1003 {
1004 	struct radeon_cs_packet pkt;
1005 	int r;
1006 
1007 	do {
1008 		r = r100_cs_packet_parse(p, &pkt, p->idx);
1009 		if (r) {
1010 			return r;
1011 		}
1012 		p->idx += pkt.count + 2;
1013 		switch (pkt.type) {
1014 			case PACKET_TYPE0:
1015 				r = r100_packet0_check(p, &pkt);
1016 				break;
1017 			case PACKET_TYPE2:
1018 				break;
1019 			case PACKET_TYPE3:
1020 				r = r100_packet3_check(p, &pkt);
1021 				break;
1022 			default:
1023 				DRM_ERROR("Unknown packet type %d !\n",
1024 					  pkt.type);
1025 				return -EINVAL;
1026 		}
1027 		if (r) {
1028 			return r;
1029 		}
1030 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1031 	return 0;
1032 }
1033 
1034 
1035 /*
1036  * Global GPU functions
1037  */
1038 void r100_errata(struct radeon_device *rdev)
1039 {
1040 	rdev->pll_errata = 0;
1041 
1042 	if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
1043 		rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
1044 	}
1045 
1046 	if (rdev->family == CHIP_RV100 ||
1047 	    rdev->family == CHIP_RS100 ||
1048 	    rdev->family == CHIP_RS200) {
1049 		rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
1050 	}
1051 }
1052 
1053 /* Wait for vertical sync on primary CRTC */
1054 void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
1055 {
1056 	uint32_t crtc_gen_cntl, tmp;
1057 	int i;
1058 
1059 	crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
1060 	if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
1061 	    !(crtc_gen_cntl & RADEON_CRTC_EN)) {
1062 		return;
1063 	}
1064 	/* Clear the CRTC_VBLANK_SAVE bit */
1065 	WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
1066 	for (i = 0; i < rdev->usec_timeout; i++) {
1067 		tmp = RREG32(RADEON_CRTC_STATUS);
1068 		if (tmp & RADEON_CRTC_VBLANK_SAVE) {
1069 			return;
1070 		}
1071 		DRM_UDELAY(1);
1072 	}
1073 }
1074 
1075 /* Wait for vertical sync on secondary CRTC */
1076 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
1077 {
1078 	uint32_t crtc2_gen_cntl, tmp;
1079 	int i;
1080 
1081 	crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
1082 	if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
1083 	    !(crtc2_gen_cntl & RADEON_CRTC2_EN))
1084 		return;
1085 
1086 	/* Clear the CRTC_VBLANK_SAVE bit */
1087 	WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
1088 	for (i = 0; i < rdev->usec_timeout; i++) {
1089 		tmp = RREG32(RADEON_CRTC2_STATUS);
1090 		if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
1091 			return;
1092 		}
1093 		DRM_UDELAY(1);
1094 	}
1095 }
1096 
1097 int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
1098 {
1099 	unsigned i;
1100 	uint32_t tmp;
1101 
1102 	for (i = 0; i < rdev->usec_timeout; i++) {
1103 		tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
1104 		if (tmp >= n) {
1105 			return 0;
1106 		}
1107 		DRM_UDELAY(1);
1108 	}
1109 	return -1;
1110 }
1111 
1112 int r100_gui_wait_for_idle(struct radeon_device *rdev)
1113 {
1114 	unsigned i;
1115 	uint32_t tmp;
1116 
1117 	if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
1118 		printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
1119 		       " Bad things might happen.\n");
1120 	}
1121 	for (i = 0; i < rdev->usec_timeout; i++) {
1122 		tmp = RREG32(RADEON_RBBM_STATUS);
1123 		if (!(tmp & (1 << 31))) {
1124 			return 0;
1125 		}
1126 		DRM_UDELAY(1);
1127 	}
1128 	return -1;
1129 }
1130 
1131 int r100_mc_wait_for_idle(struct radeon_device *rdev)
1132 {
1133 	unsigned i;
1134 	uint32_t tmp;
1135 
1136 	for (i = 0; i < rdev->usec_timeout; i++) {
1137 		/* read MC_STATUS */
1138 		tmp = RREG32(0x0150);
1139 		if (tmp & (1 << 2)) {
1140 			return 0;
1141 		}
1142 		DRM_UDELAY(1);
1143 	}
1144 	return -1;
1145 }
1146 
1147 void r100_gpu_init(struct radeon_device *rdev)
1148 {
1149 	/* TODO: anythings to do here ? pipes ? */
1150 	r100_hdp_reset(rdev);
1151 }
1152 
1153 void r100_hdp_reset(struct radeon_device *rdev)
1154 {
1155 	uint32_t tmp;
1156 
1157 	tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
1158 	tmp |= (7 << 28);
1159 	WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
1160 	(void)RREG32(RADEON_HOST_PATH_CNTL);
1161 	udelay(200);
1162 	WREG32(RADEON_RBBM_SOFT_RESET, 0);
1163 	WREG32(RADEON_HOST_PATH_CNTL, tmp);
1164 	(void)RREG32(RADEON_HOST_PATH_CNTL);
1165 }
1166 
1167 int r100_rb2d_reset(struct radeon_device *rdev)
1168 {
1169 	uint32_t tmp;
1170 	int i;
1171 
1172 	WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2);
1173 	(void)RREG32(RADEON_RBBM_SOFT_RESET);
1174 	udelay(200);
1175 	WREG32(RADEON_RBBM_SOFT_RESET, 0);
1176 	/* Wait to prevent race in RBBM_STATUS */
1177 	mdelay(1);
1178 	for (i = 0; i < rdev->usec_timeout; i++) {
1179 		tmp = RREG32(RADEON_RBBM_STATUS);
1180 		if (!(tmp & (1 << 26))) {
1181 			DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n",
1182 				 tmp);
1183 			return 0;
1184 		}
1185 		DRM_UDELAY(1);
1186 	}
1187 	tmp = RREG32(RADEON_RBBM_STATUS);
1188 	DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp);
1189 	return -1;
1190 }
1191 
1192 int r100_gpu_reset(struct radeon_device *rdev)
1193 {
1194 	uint32_t status;
1195 
1196 	/* reset order likely matter */
1197 	status = RREG32(RADEON_RBBM_STATUS);
1198 	/* reset HDP */
1199 	r100_hdp_reset(rdev);
1200 	/* reset rb2d */
1201 	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
1202 		r100_rb2d_reset(rdev);
1203 	}
1204 	/* TODO: reset 3D engine */
1205 	/* reset CP */
1206 	status = RREG32(RADEON_RBBM_STATUS);
1207 	if (status & (1 << 16)) {
1208 		r100_cp_reset(rdev);
1209 	}
1210 	/* Check if GPU is idle */
1211 	status = RREG32(RADEON_RBBM_STATUS);
1212 	if (status & (1 << 31)) {
1213 		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
1214 		return -1;
1215 	}
1216 	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
1217 	return 0;
1218 }
1219 
1220 
1221 /*
1222  * VRAM info
1223  */
1224 static void r100_vram_get_type(struct radeon_device *rdev)
1225 {
1226 	uint32_t tmp;
1227 
1228 	rdev->mc.vram_is_ddr = false;
1229 	if (rdev->flags & RADEON_IS_IGP)
1230 		rdev->mc.vram_is_ddr = true;
1231 	else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
1232 		rdev->mc.vram_is_ddr = true;
1233 	if ((rdev->family == CHIP_RV100) ||
1234 	    (rdev->family == CHIP_RS100) ||
1235 	    (rdev->family == CHIP_RS200)) {
1236 		tmp = RREG32(RADEON_MEM_CNTL);
1237 		if (tmp & RV100_HALF_MODE) {
1238 			rdev->mc.vram_width = 32;
1239 		} else {
1240 			rdev->mc.vram_width = 64;
1241 		}
1242 		if (rdev->flags & RADEON_SINGLE_CRTC) {
1243 			rdev->mc.vram_width /= 4;
1244 			rdev->mc.vram_is_ddr = true;
1245 		}
1246 	} else if (rdev->family <= CHIP_RV280) {
1247 		tmp = RREG32(RADEON_MEM_CNTL);
1248 		if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
1249 			rdev->mc.vram_width = 128;
1250 		} else {
1251 			rdev->mc.vram_width = 64;
1252 		}
1253 	} else {
1254 		/* newer IGPs */
1255 		rdev->mc.vram_width = 128;
1256 	}
1257 }
1258 
1259 void r100_vram_info(struct radeon_device *rdev)
1260 {
1261 	r100_vram_get_type(rdev);
1262 
1263 	if (rdev->flags & RADEON_IS_IGP) {
1264 		uint32_t tom;
1265 		/* read NB_TOM to get the amount of ram stolen for the GPU */
1266 		tom = RREG32(RADEON_NB_TOM);
1267 		rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
1268 		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
1269 	} else {
1270 		rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
1271 		/* Some production boards of m6 will report 0
1272 		 * if it's 8 MB
1273 		 */
1274 		if (rdev->mc.vram_size == 0) {
1275 			rdev->mc.vram_size = 8192 * 1024;
1276 			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
1277 		}
1278 	}
1279 
1280 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
1281 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
1282 }
1283 
1284 
1285 /*
1286  * Indirect registers accessor
1287  */
1288 void r100_pll_errata_after_index(struct radeon_device *rdev)
1289 {
1290 	if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) {
1291 		return;
1292 	}
1293 	(void)RREG32(RADEON_CLOCK_CNTL_DATA);
1294 	(void)RREG32(RADEON_CRTC_GEN_CNTL);
1295 }
1296 
1297 static void r100_pll_errata_after_data(struct radeon_device *rdev)
1298 {
1299 	/* This workarounds is necessary on RV100, RS100 and RS200 chips
1300 	 * or the chip could hang on a subsequent access
1301 	 */
1302 	if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
1303 		udelay(5000);
1304 	}
1305 
1306 	/* This function is required to workaround a hardware bug in some (all?)
1307 	 * revisions of the R300.  This workaround should be called after every
1308 	 * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
1309 	 * may not be correct.
1310 	 */
1311 	if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
1312 		uint32_t save, tmp;
1313 
1314 		save = RREG32(RADEON_CLOCK_CNTL_INDEX);
1315 		tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
1316 		WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
1317 		tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
1318 		WREG32(RADEON_CLOCK_CNTL_INDEX, save);
1319 	}
1320 }
1321 
1322 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
1323 {
1324 	uint32_t data;
1325 
1326 	WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
1327 	r100_pll_errata_after_index(rdev);
1328 	data = RREG32(RADEON_CLOCK_CNTL_DATA);
1329 	r100_pll_errata_after_data(rdev);
1330 	return data;
1331 }
1332 
1333 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
1334 {
1335 	WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
1336 	r100_pll_errata_after_index(rdev);
1337 	WREG32(RADEON_CLOCK_CNTL_DATA, v);
1338 	r100_pll_errata_after_data(rdev);
1339 }
1340 
1341 uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
1342 {
1343 	if (reg < 0x10000)
1344 		return readl(((void __iomem *)rdev->rmmio) + reg);
1345 	else {
1346 		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
1347 		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
1348 	}
1349 }
1350 
1351 void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
1352 {
1353 	if (reg < 0x10000)
1354 		writel(v, ((void __iomem *)rdev->rmmio) + reg);
1355 	else {
1356 		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
1357 		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
1358 	}
1359 }
1360 
1361 int r100_init(struct radeon_device *rdev)
1362 {
1363 	return 0;
1364 }
1365 
1366 /*
1367  * Debugfs info
1368  */
1369 #if defined(CONFIG_DEBUG_FS)
1370 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
1371 {
1372 	struct drm_info_node *node = (struct drm_info_node *) m->private;
1373 	struct drm_device *dev = node->minor->dev;
1374 	struct radeon_device *rdev = dev->dev_private;
1375 	uint32_t reg, value;
1376 	unsigned i;
1377 
1378 	seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
1379 	seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
1380 	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1381 	for (i = 0; i < 64; i++) {
1382 		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
1383 		reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
1384 		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
1385 		value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
1386 		seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
1387 	}
1388 	return 0;
1389 }
1390 
1391 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
1392 {
1393 	struct drm_info_node *node = (struct drm_info_node *) m->private;
1394 	struct drm_device *dev = node->minor->dev;
1395 	struct radeon_device *rdev = dev->dev_private;
1396 	uint32_t rdp, wdp;
1397 	unsigned count, i, j;
1398 
1399 	radeon_ring_free_size(rdev);
1400 	rdp = RREG32(RADEON_CP_RB_RPTR);
1401 	wdp = RREG32(RADEON_CP_RB_WPTR);
1402 	count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
1403 	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1404 	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
1405 	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
1406 	seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
1407 	seq_printf(m, "%u dwords in ring\n", count);
1408 	for (j = 0; j <= count; j++) {
1409 		i = (rdp + j) & rdev->cp.ptr_mask;
1410 		seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
1411 	}
1412 	return 0;
1413 }
1414 
1415 
1416 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
1417 {
1418 	struct drm_info_node *node = (struct drm_info_node *) m->private;
1419 	struct drm_device *dev = node->minor->dev;
1420 	struct radeon_device *rdev = dev->dev_private;
1421 	uint32_t csq_stat, csq2_stat, tmp;
1422 	unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
1423 	unsigned i;
1424 
1425 	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1426 	seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
1427 	csq_stat = RREG32(RADEON_CP_CSQ_STAT);
1428 	csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
1429 	r_rptr = (csq_stat >> 0) & 0x3ff;
1430 	r_wptr = (csq_stat >> 10) & 0x3ff;
1431 	ib1_rptr = (csq_stat >> 20) & 0x3ff;
1432 	ib1_wptr = (csq2_stat >> 0) & 0x3ff;
1433 	ib2_rptr = (csq2_stat >> 10) & 0x3ff;
1434 	ib2_wptr = (csq2_stat >> 20) & 0x3ff;
1435 	seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
1436 	seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
1437 	seq_printf(m, "Ring rptr %u\n", r_rptr);
1438 	seq_printf(m, "Ring wptr %u\n", r_wptr);
1439 	seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
1440 	seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
1441 	seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
1442 	seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
1443 	/* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
1444 	 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
1445 	seq_printf(m, "Ring fifo:\n");
1446 	for (i = 0; i < 256; i++) {
1447 		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1448 		tmp = RREG32(RADEON_CP_CSQ_DATA);
1449 		seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
1450 	}
1451 	seq_printf(m, "Indirect1 fifo:\n");
1452 	for (i = 256; i <= 512; i++) {
1453 		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1454 		tmp = RREG32(RADEON_CP_CSQ_DATA);
1455 		seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
1456 	}
1457 	seq_printf(m, "Indirect2 fifo:\n");
1458 	for (i = 640; i < ib1_wptr; i++) {
1459 		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1460 		tmp = RREG32(RADEON_CP_CSQ_DATA);
1461 		seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
1462 	}
1463 	return 0;
1464 }
1465 
1466 static int r100_debugfs_mc_info(struct seq_file *m, void *data)
1467 {
1468 	struct drm_info_node *node = (struct drm_info_node *) m->private;
1469 	struct drm_device *dev = node->minor->dev;
1470 	struct radeon_device *rdev = dev->dev_private;
1471 	uint32_t tmp;
1472 
1473 	tmp = RREG32(RADEON_CONFIG_MEMSIZE);
1474 	seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
1475 	tmp = RREG32(RADEON_MC_FB_LOCATION);
1476 	seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
1477 	tmp = RREG32(RADEON_BUS_CNTL);
1478 	seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
1479 	tmp = RREG32(RADEON_MC_AGP_LOCATION);
1480 	seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
1481 	tmp = RREG32(RADEON_AGP_BASE);
1482 	seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
1483 	tmp = RREG32(RADEON_HOST_PATH_CNTL);
1484 	seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
1485 	tmp = RREG32(0x01D0);
1486 	seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
1487 	tmp = RREG32(RADEON_AIC_LO_ADDR);
1488 	seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
1489 	tmp = RREG32(RADEON_AIC_HI_ADDR);
1490 	seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
1491 	tmp = RREG32(0x01E4);
1492 	seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
1493 	return 0;
1494 }
1495 
1496 static struct drm_info_list r100_debugfs_rbbm_list[] = {
1497 	{"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
1498 };
1499 
1500 static struct drm_info_list r100_debugfs_cp_list[] = {
1501 	{"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
1502 	{"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
1503 };
1504 
1505 static struct drm_info_list r100_debugfs_mc_info_list[] = {
1506 	{"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
1507 };
1508 #endif
1509 
1510 int r100_debugfs_rbbm_init(struct radeon_device *rdev)
1511 {
1512 #if defined(CONFIG_DEBUG_FS)
1513 	return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
1514 #else
1515 	return 0;
1516 #endif
1517 }
1518 
1519 int r100_debugfs_cp_init(struct radeon_device *rdev)
1520 {
1521 #if defined(CONFIG_DEBUG_FS)
1522 	return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
1523 #else
1524 	return 0;
1525 #endif
1526 }
1527 
1528 int r100_debugfs_mc_info_init(struct radeon_device *rdev)
1529 {
1530 #if defined(CONFIG_DEBUG_FS)
1531 	return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
1532 #else
1533 	return 0;
1534 #endif
1535 }
1536