1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/seq_file.h> 29 #include "drmP.h" 30 #include "drm.h" 31 #include "radeon_drm.h" 32 #include "radeon_microcode.h" 33 #include "radeon_reg.h" 34 #include "radeon.h" 35 36 /* This files gather functions specifics to: 37 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 38 * 39 * Some of these functions might be used by newer ASICs. 40 */ 41 void r100_hdp_reset(struct radeon_device *rdev); 42 void r100_gpu_init(struct radeon_device *rdev); 43 int r100_gui_wait_for_idle(struct radeon_device *rdev); 44 int r100_mc_wait_for_idle(struct radeon_device *rdev); 45 void r100_gpu_wait_for_vsync(struct radeon_device *rdev); 46 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev); 47 int r100_debugfs_mc_info_init(struct radeon_device *rdev); 48 49 50 /* 51 * PCI GART 52 */ 53 void r100_pci_gart_tlb_flush(struct radeon_device *rdev) 54 { 55 /* TODO: can we do somethings here ? */ 56 /* It seems hw only cache one entry so we should discard this 57 * entry otherwise if first GPU GART read hit this entry it 58 * could end up in wrong address. */ 59 } 60 61 int r100_pci_gart_enable(struct radeon_device *rdev) 62 { 63 uint32_t tmp; 64 int r; 65 66 /* Initialize common gart structure */ 67 r = radeon_gart_init(rdev); 68 if (r) { 69 return r; 70 } 71 if (rdev->gart.table.ram.ptr == NULL) { 72 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; 73 r = radeon_gart_table_ram_alloc(rdev); 74 if (r) { 75 return r; 76 } 77 } 78 /* discard memory request outside of configured range */ 79 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 80 WREG32(RADEON_AIC_CNTL, tmp); 81 /* set address range for PCI address translate */ 82 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location); 83 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; 84 WREG32(RADEON_AIC_HI_ADDR, tmp); 85 /* Enable bus mastering */ 86 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 87 WREG32(RADEON_BUS_CNTL, tmp); 88 /* set PCI GART page-table base address */ 89 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr); 90 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN; 91 WREG32(RADEON_AIC_CNTL, tmp); 92 r100_pci_gart_tlb_flush(rdev); 93 rdev->gart.ready = true; 94 return 0; 95 } 96 97 void r100_pci_gart_disable(struct radeon_device *rdev) 98 { 99 uint32_t tmp; 100 101 /* discard memory request outside of configured range */ 102 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 103 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN); 104 WREG32(RADEON_AIC_LO_ADDR, 0); 105 WREG32(RADEON_AIC_HI_ADDR, 0); 106 } 107 108 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) 109 { 110 if (i < 0 || i > rdev->gart.num_gpu_pages) { 111 return -EINVAL; 112 } 113 rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr)); 114 return 0; 115 } 116 117 int r100_gart_enable(struct radeon_device *rdev) 118 { 119 if (rdev->flags & RADEON_IS_AGP) { 120 r100_pci_gart_disable(rdev); 121 return 0; 122 } 123 return r100_pci_gart_enable(rdev); 124 } 125 126 127 /* 128 * MC 129 */ 130 void r100_mc_disable_clients(struct radeon_device *rdev) 131 { 132 uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl; 133 134 /* FIXME: is this function correct for rs100,rs200,rs300 ? */ 135 if (r100_gui_wait_for_idle(rdev)) { 136 printk(KERN_WARNING "Failed to wait GUI idle while " 137 "programming pipes. Bad things might happen.\n"); 138 } 139 140 /* stop display and memory access */ 141 ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL); 142 WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE); 143 crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL); 144 WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS); 145 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); 146 147 r100_gpu_wait_for_vsync(rdev); 148 149 WREG32(RADEON_CRTC_GEN_CNTL, 150 (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) | 151 RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN); 152 153 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 154 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); 155 156 r100_gpu_wait_for_vsync2(rdev); 157 WREG32(RADEON_CRTC2_GEN_CNTL, 158 (crtc2_gen_cntl & 159 ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) | 160 RADEON_CRTC2_DISP_REQ_EN_B); 161 } 162 163 udelay(500); 164 } 165 166 void r100_mc_setup(struct radeon_device *rdev) 167 { 168 uint32_t tmp; 169 int r; 170 171 r = r100_debugfs_mc_info_init(rdev); 172 if (r) { 173 DRM_ERROR("Failed to register debugfs file for R100 MC !\n"); 174 } 175 /* Write VRAM size in case we are limiting it */ 176 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 177 /* Novell bug 204882 for RN50/M6/M7 with 8/16/32MB VRAM, 178 * if the aperture is 64MB but we have 32MB VRAM 179 * we report only 32MB VRAM but we have to set MC_FB_LOCATION 180 * to 64MB, otherwise the gpu accidentially dies */ 181 tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; 182 tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16); 183 tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16); 184 WREG32(RADEON_MC_FB_LOCATION, tmp); 185 186 /* Enable bus mastering */ 187 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 188 WREG32(RADEON_BUS_CNTL, tmp); 189 190 if (rdev->flags & RADEON_IS_AGP) { 191 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; 192 tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16); 193 tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16); 194 WREG32(RADEON_MC_AGP_LOCATION, tmp); 195 WREG32(RADEON_AGP_BASE, rdev->mc.agp_base); 196 } else { 197 WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF); 198 WREG32(RADEON_AGP_BASE, 0); 199 } 200 201 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL; 202 tmp |= (7 << 28); 203 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE); 204 (void)RREG32(RADEON_HOST_PATH_CNTL); 205 WREG32(RADEON_HOST_PATH_CNTL, tmp); 206 (void)RREG32(RADEON_HOST_PATH_CNTL); 207 } 208 209 int r100_mc_init(struct radeon_device *rdev) 210 { 211 int r; 212 213 if (r100_debugfs_rbbm_init(rdev)) { 214 DRM_ERROR("Failed to register debugfs file for RBBM !\n"); 215 } 216 217 r100_gpu_init(rdev); 218 /* Disable gart which also disable out of gart access */ 219 r100_pci_gart_disable(rdev); 220 221 /* Setup GPU memory space */ 222 rdev->mc.gtt_location = 0xFFFFFFFFUL; 223 if (rdev->flags & RADEON_IS_AGP) { 224 r = radeon_agp_init(rdev); 225 if (r) { 226 printk(KERN_WARNING "[drm] Disabling AGP\n"); 227 rdev->flags &= ~RADEON_IS_AGP; 228 rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; 229 } else { 230 rdev->mc.gtt_location = rdev->mc.agp_base; 231 } 232 } 233 r = radeon_mc_setup(rdev); 234 if (r) { 235 return r; 236 } 237 238 r100_mc_disable_clients(rdev); 239 if (r100_mc_wait_for_idle(rdev)) { 240 printk(KERN_WARNING "Failed to wait MC idle while " 241 "programming pipes. Bad things might happen.\n"); 242 } 243 244 r100_mc_setup(rdev); 245 return 0; 246 } 247 248 void r100_mc_fini(struct radeon_device *rdev) 249 { 250 r100_pci_gart_disable(rdev); 251 radeon_gart_table_ram_free(rdev); 252 radeon_gart_fini(rdev); 253 } 254 255 256 /* 257 * Fence emission 258 */ 259 void r100_fence_ring_emit(struct radeon_device *rdev, 260 struct radeon_fence *fence) 261 { 262 /* Who ever call radeon_fence_emit should call ring_lock and ask 263 * for enough space (today caller are ib schedule and buffer move) */ 264 /* Wait until IDLE & CLEAN */ 265 radeon_ring_write(rdev, PACKET0(0x1720, 0)); 266 radeon_ring_write(rdev, (1 << 16) | (1 << 17)); 267 /* Emit fence sequence & fire IRQ */ 268 radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0)); 269 radeon_ring_write(rdev, fence->seq); 270 radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0)); 271 radeon_ring_write(rdev, RADEON_SW_INT_FIRE); 272 } 273 274 275 /* 276 * Writeback 277 */ 278 int r100_wb_init(struct radeon_device *rdev) 279 { 280 int r; 281 282 if (rdev->wb.wb_obj == NULL) { 283 r = radeon_object_create(rdev, NULL, 4096, 284 true, 285 RADEON_GEM_DOMAIN_GTT, 286 false, &rdev->wb.wb_obj); 287 if (r) { 288 DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r); 289 return r; 290 } 291 r = radeon_object_pin(rdev->wb.wb_obj, 292 RADEON_GEM_DOMAIN_GTT, 293 &rdev->wb.gpu_addr); 294 if (r) { 295 DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r); 296 return r; 297 } 298 r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); 299 if (r) { 300 DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r); 301 return r; 302 } 303 } 304 WREG32(0x774, rdev->wb.gpu_addr); 305 WREG32(0x70C, rdev->wb.gpu_addr + 1024); 306 WREG32(0x770, 0xff); 307 return 0; 308 } 309 310 void r100_wb_fini(struct radeon_device *rdev) 311 { 312 if (rdev->wb.wb_obj) { 313 radeon_object_kunmap(rdev->wb.wb_obj); 314 radeon_object_unpin(rdev->wb.wb_obj); 315 radeon_object_unref(&rdev->wb.wb_obj); 316 rdev->wb.wb = NULL; 317 rdev->wb.wb_obj = NULL; 318 } 319 } 320 321 int r100_copy_blit(struct radeon_device *rdev, 322 uint64_t src_offset, 323 uint64_t dst_offset, 324 unsigned num_pages, 325 struct radeon_fence *fence) 326 { 327 uint32_t cur_pages; 328 uint32_t stride_bytes = PAGE_SIZE; 329 uint32_t pitch; 330 uint32_t stride_pixels; 331 unsigned ndw; 332 int num_loops; 333 int r = 0; 334 335 /* radeon limited to 16k stride */ 336 stride_bytes &= 0x3fff; 337 /* radeon pitch is /64 */ 338 pitch = stride_bytes / 64; 339 stride_pixels = stride_bytes / 4; 340 num_loops = DIV_ROUND_UP(num_pages, 8191); 341 342 /* Ask for enough room for blit + flush + fence */ 343 ndw = 64 + (10 * num_loops); 344 r = radeon_ring_lock(rdev, ndw); 345 if (r) { 346 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); 347 return -EINVAL; 348 } 349 while (num_pages > 0) { 350 cur_pages = num_pages; 351 if (cur_pages > 8191) { 352 cur_pages = 8191; 353 } 354 num_pages -= cur_pages; 355 356 /* pages are in Y direction - height 357 page width in X direction - width */ 358 radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8)); 359 radeon_ring_write(rdev, 360 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 361 RADEON_GMC_DST_PITCH_OFFSET_CNTL | 362 RADEON_GMC_SRC_CLIPPING | 363 RADEON_GMC_DST_CLIPPING | 364 RADEON_GMC_BRUSH_NONE | 365 (RADEON_COLOR_FORMAT_ARGB8888 << 8) | 366 RADEON_GMC_SRC_DATATYPE_COLOR | 367 RADEON_ROP3_S | 368 RADEON_DP_SRC_SOURCE_MEMORY | 369 RADEON_GMC_CLR_CMP_CNTL_DIS | 370 RADEON_GMC_WR_MSK_DIS); 371 radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10)); 372 radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10)); 373 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); 374 radeon_ring_write(rdev, 0); 375 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); 376 radeon_ring_write(rdev, num_pages); 377 radeon_ring_write(rdev, num_pages); 378 radeon_ring_write(rdev, cur_pages | (stride_pixels << 16)); 379 } 380 radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); 381 radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL); 382 radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); 383 radeon_ring_write(rdev, 384 RADEON_WAIT_2D_IDLECLEAN | 385 RADEON_WAIT_HOST_IDLECLEAN | 386 RADEON_WAIT_DMA_GUI_IDLE); 387 if (fence) { 388 r = radeon_fence_emit(rdev, fence); 389 } 390 radeon_ring_unlock_commit(rdev); 391 return r; 392 } 393 394 395 /* 396 * CP 397 */ 398 void r100_ring_start(struct radeon_device *rdev) 399 { 400 int r; 401 402 r = radeon_ring_lock(rdev, 2); 403 if (r) { 404 return; 405 } 406 radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0)); 407 radeon_ring_write(rdev, 408 RADEON_ISYNC_ANY2D_IDLE3D | 409 RADEON_ISYNC_ANY3D_IDLE2D | 410 RADEON_ISYNC_WAIT_IDLEGUI | 411 RADEON_ISYNC_CPSCRATCH_IDLEGUI); 412 radeon_ring_unlock_commit(rdev); 413 } 414 415 static void r100_cp_load_microcode(struct radeon_device *rdev) 416 { 417 int i; 418 419 if (r100_gui_wait_for_idle(rdev)) { 420 printk(KERN_WARNING "Failed to wait GUI idle while " 421 "programming pipes. Bad things might happen.\n"); 422 } 423 424 WREG32(RADEON_CP_ME_RAM_ADDR, 0); 425 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || 426 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || 427 (rdev->family == CHIP_RS200)) { 428 DRM_INFO("Loading R100 Microcode\n"); 429 for (i = 0; i < 256; i++) { 430 WREG32(RADEON_CP_ME_RAM_DATAH, R100_cp_microcode[i][1]); 431 WREG32(RADEON_CP_ME_RAM_DATAL, R100_cp_microcode[i][0]); 432 } 433 } else if ((rdev->family == CHIP_R200) || 434 (rdev->family == CHIP_RV250) || 435 (rdev->family == CHIP_RV280) || 436 (rdev->family == CHIP_RS300)) { 437 DRM_INFO("Loading R200 Microcode\n"); 438 for (i = 0; i < 256; i++) { 439 WREG32(RADEON_CP_ME_RAM_DATAH, R200_cp_microcode[i][1]); 440 WREG32(RADEON_CP_ME_RAM_DATAL, R200_cp_microcode[i][0]); 441 } 442 } else if ((rdev->family == CHIP_R300) || 443 (rdev->family == CHIP_R350) || 444 (rdev->family == CHIP_RV350) || 445 (rdev->family == CHIP_RV380) || 446 (rdev->family == CHIP_RS400) || 447 (rdev->family == CHIP_RS480)) { 448 DRM_INFO("Loading R300 Microcode\n"); 449 for (i = 0; i < 256; i++) { 450 WREG32(RADEON_CP_ME_RAM_DATAH, R300_cp_microcode[i][1]); 451 WREG32(RADEON_CP_ME_RAM_DATAL, R300_cp_microcode[i][0]); 452 } 453 } else if ((rdev->family == CHIP_R420) || 454 (rdev->family == CHIP_R423) || 455 (rdev->family == CHIP_RV410)) { 456 DRM_INFO("Loading R400 Microcode\n"); 457 for (i = 0; i < 256; i++) { 458 WREG32(RADEON_CP_ME_RAM_DATAH, R420_cp_microcode[i][1]); 459 WREG32(RADEON_CP_ME_RAM_DATAL, R420_cp_microcode[i][0]); 460 } 461 } else if ((rdev->family == CHIP_RS690) || 462 (rdev->family == CHIP_RS740)) { 463 DRM_INFO("Loading RS690/RS740 Microcode\n"); 464 for (i = 0; i < 256; i++) { 465 WREG32(RADEON_CP_ME_RAM_DATAH, RS690_cp_microcode[i][1]); 466 WREG32(RADEON_CP_ME_RAM_DATAL, RS690_cp_microcode[i][0]); 467 } 468 } else if (rdev->family == CHIP_RS600) { 469 DRM_INFO("Loading RS600 Microcode\n"); 470 for (i = 0; i < 256; i++) { 471 WREG32(RADEON_CP_ME_RAM_DATAH, RS600_cp_microcode[i][1]); 472 WREG32(RADEON_CP_ME_RAM_DATAL, RS600_cp_microcode[i][0]); 473 } 474 } else if ((rdev->family == CHIP_RV515) || 475 (rdev->family == CHIP_R520) || 476 (rdev->family == CHIP_RV530) || 477 (rdev->family == CHIP_R580) || 478 (rdev->family == CHIP_RV560) || 479 (rdev->family == CHIP_RV570)) { 480 DRM_INFO("Loading R500 Microcode\n"); 481 for (i = 0; i < 256; i++) { 482 WREG32(RADEON_CP_ME_RAM_DATAH, R520_cp_microcode[i][1]); 483 WREG32(RADEON_CP_ME_RAM_DATAL, R520_cp_microcode[i][0]); 484 } 485 } 486 } 487 488 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) 489 { 490 unsigned rb_bufsz; 491 unsigned rb_blksz; 492 unsigned max_fetch; 493 unsigned pre_write_timer; 494 unsigned pre_write_limit; 495 unsigned indirect2_start; 496 unsigned indirect1_start; 497 uint32_t tmp; 498 int r; 499 500 if (r100_debugfs_cp_init(rdev)) { 501 DRM_ERROR("Failed to register debugfs file for CP !\n"); 502 } 503 /* Reset CP */ 504 tmp = RREG32(RADEON_CP_CSQ_STAT); 505 if ((tmp & (1 << 31))) { 506 DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp); 507 WREG32(RADEON_CP_CSQ_MODE, 0); 508 WREG32(RADEON_CP_CSQ_CNTL, 0); 509 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); 510 tmp = RREG32(RADEON_RBBM_SOFT_RESET); 511 mdelay(2); 512 WREG32(RADEON_RBBM_SOFT_RESET, 0); 513 tmp = RREG32(RADEON_RBBM_SOFT_RESET); 514 mdelay(2); 515 tmp = RREG32(RADEON_CP_CSQ_STAT); 516 if ((tmp & (1 << 31))) { 517 DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp); 518 } 519 } else { 520 DRM_INFO("radeon: cp idle (0x%08X)\n", tmp); 521 } 522 /* Align ring size */ 523 rb_bufsz = drm_order(ring_size / 8); 524 ring_size = (1 << (rb_bufsz + 1)) * 4; 525 r100_cp_load_microcode(rdev); 526 r = radeon_ring_init(rdev, ring_size); 527 if (r) { 528 return r; 529 } 530 /* Each time the cp read 1024 bytes (16 dword/quadword) update 531 * the rptr copy in system ram */ 532 rb_blksz = 9; 533 /* cp will read 128bytes at a time (4 dwords) */ 534 max_fetch = 1; 535 rdev->cp.align_mask = 16 - 1; 536 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */ 537 pre_write_timer = 64; 538 /* Force CP_RB_WPTR write if written more than one time before the 539 * delay expire 540 */ 541 pre_write_limit = 0; 542 /* Setup the cp cache like this (cache size is 96 dwords) : 543 * RING 0 to 15 544 * INDIRECT1 16 to 79 545 * INDIRECT2 80 to 95 546 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 547 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords)) 548 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 549 * Idea being that most of the gpu cmd will be through indirect1 buffer 550 * so it gets the bigger cache. 551 */ 552 indirect2_start = 80; 553 indirect1_start = 16; 554 /* cp setup */ 555 WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); 556 WREG32(RADEON_CP_RB_CNTL, 557 #ifdef __BIG_ENDIAN 558 RADEON_BUF_SWAP_32BIT | 559 #endif 560 REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | 561 REG_SET(RADEON_RB_BLKSZ, rb_blksz) | 562 REG_SET(RADEON_MAX_FETCH, max_fetch) | 563 RADEON_RB_NO_UPDATE); 564 /* Set ring address */ 565 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr); 566 WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr); 567 /* Force read & write ptr to 0 */ 568 tmp = RREG32(RADEON_CP_RB_CNTL); 569 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); 570 WREG32(RADEON_CP_RB_RPTR_WR, 0); 571 WREG32(RADEON_CP_RB_WPTR, 0); 572 WREG32(RADEON_CP_RB_CNTL, tmp); 573 udelay(10); 574 rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); 575 rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR); 576 /* Set cp mode to bus mastering & enable cp*/ 577 WREG32(RADEON_CP_CSQ_MODE, 578 REG_SET(RADEON_INDIRECT2_START, indirect2_start) | 579 REG_SET(RADEON_INDIRECT1_START, indirect1_start)); 580 WREG32(0x718, 0); 581 WREG32(0x744, 0x00004D4D); 582 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM); 583 radeon_ring_start(rdev); 584 r = radeon_ring_test(rdev); 585 if (r) { 586 DRM_ERROR("radeon: cp isn't working (%d).\n", r); 587 return r; 588 } 589 rdev->cp.ready = true; 590 return 0; 591 } 592 593 void r100_cp_fini(struct radeon_device *rdev) 594 { 595 /* Disable ring */ 596 rdev->cp.ready = false; 597 WREG32(RADEON_CP_CSQ_CNTL, 0); 598 radeon_ring_fini(rdev); 599 DRM_INFO("radeon: cp finalized\n"); 600 } 601 602 void r100_cp_disable(struct radeon_device *rdev) 603 { 604 /* Disable ring */ 605 rdev->cp.ready = false; 606 WREG32(RADEON_CP_CSQ_MODE, 0); 607 WREG32(RADEON_CP_CSQ_CNTL, 0); 608 if (r100_gui_wait_for_idle(rdev)) { 609 printk(KERN_WARNING "Failed to wait GUI idle while " 610 "programming pipes. Bad things might happen.\n"); 611 } 612 } 613 614 int r100_cp_reset(struct radeon_device *rdev) 615 { 616 uint32_t tmp; 617 bool reinit_cp; 618 int i; 619 620 reinit_cp = rdev->cp.ready; 621 rdev->cp.ready = false; 622 WREG32(RADEON_CP_CSQ_MODE, 0); 623 WREG32(RADEON_CP_CSQ_CNTL, 0); 624 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); 625 (void)RREG32(RADEON_RBBM_SOFT_RESET); 626 udelay(200); 627 WREG32(RADEON_RBBM_SOFT_RESET, 0); 628 /* Wait to prevent race in RBBM_STATUS */ 629 mdelay(1); 630 for (i = 0; i < rdev->usec_timeout; i++) { 631 tmp = RREG32(RADEON_RBBM_STATUS); 632 if (!(tmp & (1 << 16))) { 633 DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n", 634 tmp); 635 if (reinit_cp) { 636 return r100_cp_init(rdev, rdev->cp.ring_size); 637 } 638 return 0; 639 } 640 DRM_UDELAY(1); 641 } 642 tmp = RREG32(RADEON_RBBM_STATUS); 643 DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp); 644 return -1; 645 } 646 647 648 /* 649 * CS functions 650 */ 651 int r100_cs_parse_packet0(struct radeon_cs_parser *p, 652 struct radeon_cs_packet *pkt, 653 const unsigned *auth, unsigned n, 654 radeon_packet0_check_t check) 655 { 656 unsigned reg; 657 unsigned i, j, m; 658 unsigned idx; 659 int r; 660 661 idx = pkt->idx + 1; 662 reg = pkt->reg; 663 /* Check that register fall into register range 664 * determined by the number of entry (n) in the 665 * safe register bitmap. 666 */ 667 if (pkt->one_reg_wr) { 668 if ((reg >> 7) > n) { 669 return -EINVAL; 670 } 671 } else { 672 if (((reg + (pkt->count << 2)) >> 7) > n) { 673 return -EINVAL; 674 } 675 } 676 for (i = 0; i <= pkt->count; i++, idx++) { 677 j = (reg >> 7); 678 m = 1 << ((reg >> 2) & 31); 679 if (auth[j] & m) { 680 r = check(p, pkt, idx, reg); 681 if (r) { 682 return r; 683 } 684 } 685 if (pkt->one_reg_wr) { 686 if (!(auth[j] & m)) { 687 break; 688 } 689 } else { 690 reg += 4; 691 } 692 } 693 return 0; 694 } 695 696 void r100_cs_dump_packet(struct radeon_cs_parser *p, 697 struct radeon_cs_packet *pkt) 698 { 699 struct radeon_cs_chunk *ib_chunk; 700 volatile uint32_t *ib; 701 unsigned i; 702 unsigned idx; 703 704 ib = p->ib->ptr; 705 ib_chunk = &p->chunks[p->chunk_ib_idx]; 706 idx = pkt->idx; 707 for (i = 0; i <= (pkt->count + 1); i++, idx++) { 708 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 709 } 710 } 711 712 /** 713 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet 714 * @parser: parser structure holding parsing context. 715 * @pkt: where to store packet informations 716 * 717 * Assume that chunk_ib_index is properly set. Will return -EINVAL 718 * if packet is bigger than remaining ib size. or if packets is unknown. 719 **/ 720 int r100_cs_packet_parse(struct radeon_cs_parser *p, 721 struct radeon_cs_packet *pkt, 722 unsigned idx) 723 { 724 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 725 uint32_t header = ib_chunk->kdata[idx]; 726 727 if (idx >= ib_chunk->length_dw) { 728 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 729 idx, ib_chunk->length_dw); 730 return -EINVAL; 731 } 732 pkt->idx = idx; 733 pkt->type = CP_PACKET_GET_TYPE(header); 734 pkt->count = CP_PACKET_GET_COUNT(header); 735 switch (pkt->type) { 736 case PACKET_TYPE0: 737 pkt->reg = CP_PACKET0_GET_REG(header); 738 pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header); 739 break; 740 case PACKET_TYPE3: 741 pkt->opcode = CP_PACKET3_GET_OPCODE(header); 742 break; 743 case PACKET_TYPE2: 744 pkt->count = -1; 745 break; 746 default: 747 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 748 return -EINVAL; 749 } 750 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 751 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 752 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 753 return -EINVAL; 754 } 755 return 0; 756 } 757 758 /** 759 * r100_cs_packet_next_vline() - parse userspace VLINE packet 760 * @parser: parser structure holding parsing context. 761 * 762 * Userspace sends a special sequence for VLINE waits. 763 * PACKET0 - VLINE_START_END + value 764 * PACKET0 - WAIT_UNTIL +_value 765 * RELOC (P3) - crtc_id in reloc. 766 * 767 * This function parses this and relocates the VLINE START END 768 * and WAIT UNTIL packets to the correct crtc. 769 * It also detects a switched off crtc and nulls out the 770 * wait in that case. 771 */ 772 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) 773 { 774 struct radeon_cs_chunk *ib_chunk; 775 struct drm_mode_object *obj; 776 struct drm_crtc *crtc; 777 struct radeon_crtc *radeon_crtc; 778 struct radeon_cs_packet p3reloc, waitreloc; 779 int crtc_id; 780 int r; 781 uint32_t header, h_idx, reg; 782 783 ib_chunk = &p->chunks[p->chunk_ib_idx]; 784 785 /* parse the wait until */ 786 r = r100_cs_packet_parse(p, &waitreloc, p->idx); 787 if (r) 788 return r; 789 790 /* check its a wait until and only 1 count */ 791 if (waitreloc.reg != RADEON_WAIT_UNTIL || 792 waitreloc.count != 0) { 793 DRM_ERROR("vline wait had illegal wait until segment\n"); 794 r = -EINVAL; 795 return r; 796 } 797 798 if (ib_chunk->kdata[waitreloc.idx + 1] != RADEON_WAIT_CRTC_VLINE) { 799 DRM_ERROR("vline wait had illegal wait until\n"); 800 r = -EINVAL; 801 return r; 802 } 803 804 /* jump over the NOP */ 805 r = r100_cs_packet_parse(p, &p3reloc, p->idx); 806 if (r) 807 return r; 808 809 h_idx = p->idx - 2; 810 p->idx += waitreloc.count; 811 p->idx += p3reloc.count; 812 813 header = ib_chunk->kdata[h_idx]; 814 crtc_id = ib_chunk->kdata[h_idx + 5]; 815 reg = ib_chunk->kdata[h_idx] >> 2; 816 mutex_lock(&p->rdev->ddev->mode_config.mutex); 817 obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); 818 if (!obj) { 819 DRM_ERROR("cannot find crtc %d\n", crtc_id); 820 r = -EINVAL; 821 goto out; 822 } 823 crtc = obj_to_crtc(obj); 824 radeon_crtc = to_radeon_crtc(crtc); 825 crtc_id = radeon_crtc->crtc_id; 826 827 if (!crtc->enabled) { 828 /* if the CRTC isn't enabled - we need to nop out the wait until */ 829 ib_chunk->kdata[h_idx + 2] = PACKET2(0); 830 ib_chunk->kdata[h_idx + 3] = PACKET2(0); 831 } else if (crtc_id == 1) { 832 switch (reg) { 833 case AVIVO_D1MODE_VLINE_START_END: 834 header &= R300_CP_PACKET0_REG_MASK; 835 header |= AVIVO_D2MODE_VLINE_START_END >> 2; 836 break; 837 case RADEON_CRTC_GUI_TRIG_VLINE: 838 header &= R300_CP_PACKET0_REG_MASK; 839 header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2; 840 break; 841 default: 842 DRM_ERROR("unknown crtc reloc\n"); 843 r = -EINVAL; 844 goto out; 845 } 846 ib_chunk->kdata[h_idx] = header; 847 ib_chunk->kdata[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1; 848 } 849 out: 850 mutex_unlock(&p->rdev->ddev->mode_config.mutex); 851 return r; 852 } 853 854 /** 855 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3 856 * @parser: parser structure holding parsing context. 857 * @data: pointer to relocation data 858 * @offset_start: starting offset 859 * @offset_mask: offset mask (to align start offset on) 860 * @reloc: reloc informations 861 * 862 * Check next packet is relocation packet3, do bo validation and compute 863 * GPU offset using the provided start. 864 **/ 865 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, 866 struct radeon_cs_reloc **cs_reloc) 867 { 868 struct radeon_cs_chunk *ib_chunk; 869 struct radeon_cs_chunk *relocs_chunk; 870 struct radeon_cs_packet p3reloc; 871 unsigned idx; 872 int r; 873 874 if (p->chunk_relocs_idx == -1) { 875 DRM_ERROR("No relocation chunk !\n"); 876 return -EINVAL; 877 } 878 *cs_reloc = NULL; 879 ib_chunk = &p->chunks[p->chunk_ib_idx]; 880 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 881 r = r100_cs_packet_parse(p, &p3reloc, p->idx); 882 if (r) { 883 return r; 884 } 885 p->idx += p3reloc.count + 2; 886 if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { 887 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 888 p3reloc.idx); 889 r100_cs_dump_packet(p, &p3reloc); 890 return -EINVAL; 891 } 892 idx = ib_chunk->kdata[p3reloc.idx + 1]; 893 if (idx >= relocs_chunk->length_dw) { 894 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 895 idx, relocs_chunk->length_dw); 896 r100_cs_dump_packet(p, &p3reloc); 897 return -EINVAL; 898 } 899 /* FIXME: we assume reloc size is 4 dwords */ 900 *cs_reloc = p->relocs_ptr[(idx / 4)]; 901 return 0; 902 } 903 904 static int r100_packet0_check(struct radeon_cs_parser *p, 905 struct radeon_cs_packet *pkt) 906 { 907 struct radeon_cs_chunk *ib_chunk; 908 struct radeon_cs_reloc *reloc; 909 volatile uint32_t *ib; 910 uint32_t tmp; 911 unsigned reg; 912 unsigned i; 913 unsigned idx; 914 bool onereg; 915 int r; 916 u32 tile_flags = 0; 917 918 ib = p->ib->ptr; 919 ib_chunk = &p->chunks[p->chunk_ib_idx]; 920 idx = pkt->idx + 1; 921 reg = pkt->reg; 922 onereg = false; 923 if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) { 924 onereg = true; 925 } 926 for (i = 0; i <= pkt->count; i++, idx++, reg += 4) { 927 switch (reg) { 928 case RADEON_CRTC_GUI_TRIG_VLINE: 929 r = r100_cs_packet_parse_vline(p); 930 if (r) { 931 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 932 idx, reg); 933 r100_cs_dump_packet(p, pkt); 934 return r; 935 } 936 break; 937 /* FIXME: only allow PACKET3 blit? easier to check for out of 938 * range access */ 939 case RADEON_DST_PITCH_OFFSET: 940 case RADEON_SRC_PITCH_OFFSET: 941 r = r100_cs_packet_next_reloc(p, &reloc); 942 if (r) { 943 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 944 idx, reg); 945 r100_cs_dump_packet(p, pkt); 946 return r; 947 } 948 tmp = ib_chunk->kdata[idx] & 0x003fffff; 949 tmp += (((u32)reloc->lobj.gpu_offset) >> 10); 950 951 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 952 tile_flags |= RADEON_DST_TILE_MACRO; 953 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 954 if (reg == RADEON_SRC_PITCH_OFFSET) { 955 DRM_ERROR("Cannot src blit from microtiled surface\n"); 956 r100_cs_dump_packet(p, pkt); 957 return -EINVAL; 958 } 959 tile_flags |= RADEON_DST_TILE_MICRO; 960 } 961 962 tmp |= tile_flags; 963 ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp; 964 break; 965 case RADEON_RB3D_DEPTHOFFSET: 966 case RADEON_RB3D_COLOROFFSET: 967 case R300_RB3D_COLOROFFSET0: 968 case R300_ZB_DEPTHOFFSET: 969 case R200_PP_TXOFFSET_0: 970 case R200_PP_TXOFFSET_1: 971 case R200_PP_TXOFFSET_2: 972 case R200_PP_TXOFFSET_3: 973 case R200_PP_TXOFFSET_4: 974 case R200_PP_TXOFFSET_5: 975 case RADEON_PP_TXOFFSET_0: 976 case RADEON_PP_TXOFFSET_1: 977 case RADEON_PP_TXOFFSET_2: 978 case R300_TX_OFFSET_0: 979 case R300_TX_OFFSET_0+4: 980 case R300_TX_OFFSET_0+8: 981 case R300_TX_OFFSET_0+12: 982 case R300_TX_OFFSET_0+16: 983 case R300_TX_OFFSET_0+20: 984 case R300_TX_OFFSET_0+24: 985 case R300_TX_OFFSET_0+28: 986 case R300_TX_OFFSET_0+32: 987 case R300_TX_OFFSET_0+36: 988 case R300_TX_OFFSET_0+40: 989 case R300_TX_OFFSET_0+44: 990 case R300_TX_OFFSET_0+48: 991 case R300_TX_OFFSET_0+52: 992 case R300_TX_OFFSET_0+56: 993 case R300_TX_OFFSET_0+60: 994 /* rn50 has no 3D engine so fail on any 3d setup */ 995 if (ASIC_IS_RN50(p->rdev)) { 996 DRM_ERROR("attempt to use RN50 3D engine failed\n"); 997 return -EINVAL; 998 } 999 r = r100_cs_packet_next_reloc(p, &reloc); 1000 if (r) { 1001 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1002 idx, reg); 1003 r100_cs_dump_packet(p, pkt); 1004 return r; 1005 } 1006 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1007 break; 1008 case R300_RB3D_COLORPITCH0: 1009 case RADEON_RB3D_COLORPITCH: 1010 r = r100_cs_packet_next_reloc(p, &reloc); 1011 if (r) { 1012 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1013 idx, reg); 1014 r100_cs_dump_packet(p, pkt); 1015 return r; 1016 } 1017 1018 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1019 tile_flags |= RADEON_COLOR_TILE_ENABLE; 1020 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1021 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 1022 1023 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); 1024 tmp |= tile_flags; 1025 ib[idx] = tmp; 1026 break; 1027 default: 1028 /* FIXME: we don't want to allow anyothers packet */ 1029 break; 1030 } 1031 if (onereg) { 1032 /* FIXME: forbid onereg write to register on relocate */ 1033 break; 1034 } 1035 } 1036 return 0; 1037 } 1038 1039 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, 1040 struct radeon_cs_packet *pkt, 1041 struct radeon_object *robj) 1042 { 1043 struct radeon_cs_chunk *ib_chunk; 1044 unsigned idx; 1045 1046 ib_chunk = &p->chunks[p->chunk_ib_idx]; 1047 idx = pkt->idx + 1; 1048 if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) { 1049 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " 1050 "(need %u have %lu) !\n", 1051 ib_chunk->kdata[idx+2] + 1, 1052 radeon_object_size(robj)); 1053 return -EINVAL; 1054 } 1055 return 0; 1056 } 1057 1058 static int r100_packet3_check(struct radeon_cs_parser *p, 1059 struct radeon_cs_packet *pkt) 1060 { 1061 struct radeon_cs_chunk *ib_chunk; 1062 struct radeon_cs_reloc *reloc; 1063 unsigned idx; 1064 unsigned i, c; 1065 volatile uint32_t *ib; 1066 int r; 1067 1068 ib = p->ib->ptr; 1069 ib_chunk = &p->chunks[p->chunk_ib_idx]; 1070 idx = pkt->idx + 1; 1071 switch (pkt->opcode) { 1072 case PACKET3_3D_LOAD_VBPNTR: 1073 c = ib_chunk->kdata[idx++]; 1074 for (i = 0; i < (c - 1); i += 2, idx += 3) { 1075 r = r100_cs_packet_next_reloc(p, &reloc); 1076 if (r) { 1077 DRM_ERROR("No reloc for packet3 %d\n", 1078 pkt->opcode); 1079 r100_cs_dump_packet(p, pkt); 1080 return r; 1081 } 1082 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 1083 r = r100_cs_packet_next_reloc(p, &reloc); 1084 if (r) { 1085 DRM_ERROR("No reloc for packet3 %d\n", 1086 pkt->opcode); 1087 r100_cs_dump_packet(p, pkt); 1088 return r; 1089 } 1090 ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset); 1091 } 1092 if (c & 1) { 1093 r = r100_cs_packet_next_reloc(p, &reloc); 1094 if (r) { 1095 DRM_ERROR("No reloc for packet3 %d\n", 1096 pkt->opcode); 1097 r100_cs_dump_packet(p, pkt); 1098 return r; 1099 } 1100 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 1101 } 1102 break; 1103 case PACKET3_INDX_BUFFER: 1104 r = r100_cs_packet_next_reloc(p, &reloc); 1105 if (r) { 1106 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1107 r100_cs_dump_packet(p, pkt); 1108 return r; 1109 } 1110 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 1111 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 1112 if (r) { 1113 return r; 1114 } 1115 break; 1116 case 0x23: 1117 /* FIXME: cleanup */ 1118 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ 1119 r = r100_cs_packet_next_reloc(p, &reloc); 1120 if (r) { 1121 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1122 r100_cs_dump_packet(p, pkt); 1123 return r; 1124 } 1125 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1126 break; 1127 case PACKET3_3D_DRAW_IMMD: 1128 /* triggers drawing using in-packet vertex data */ 1129 case PACKET3_3D_DRAW_IMMD_2: 1130 /* triggers drawing using in-packet vertex data */ 1131 case PACKET3_3D_DRAW_VBUF_2: 1132 /* triggers drawing of vertex buffers setup elsewhere */ 1133 case PACKET3_3D_DRAW_INDX_2: 1134 /* triggers drawing using indices to vertex buffer */ 1135 case PACKET3_3D_DRAW_VBUF: 1136 /* triggers drawing of vertex buffers setup elsewhere */ 1137 case PACKET3_3D_DRAW_INDX: 1138 /* triggers drawing using indices to vertex buffer */ 1139 case PACKET3_NOP: 1140 break; 1141 default: 1142 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 1143 return -EINVAL; 1144 } 1145 return 0; 1146 } 1147 1148 int r100_cs_parse(struct radeon_cs_parser *p) 1149 { 1150 struct radeon_cs_packet pkt; 1151 int r; 1152 1153 do { 1154 r = r100_cs_packet_parse(p, &pkt, p->idx); 1155 if (r) { 1156 return r; 1157 } 1158 p->idx += pkt.count + 2; 1159 switch (pkt.type) { 1160 case PACKET_TYPE0: 1161 r = r100_packet0_check(p, &pkt); 1162 break; 1163 case PACKET_TYPE2: 1164 break; 1165 case PACKET_TYPE3: 1166 r = r100_packet3_check(p, &pkt); 1167 break; 1168 default: 1169 DRM_ERROR("Unknown packet type %d !\n", 1170 pkt.type); 1171 return -EINVAL; 1172 } 1173 if (r) { 1174 return r; 1175 } 1176 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 1177 return 0; 1178 } 1179 1180 1181 /* 1182 * Global GPU functions 1183 */ 1184 void r100_errata(struct radeon_device *rdev) 1185 { 1186 rdev->pll_errata = 0; 1187 1188 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { 1189 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; 1190 } 1191 1192 if (rdev->family == CHIP_RV100 || 1193 rdev->family == CHIP_RS100 || 1194 rdev->family == CHIP_RS200) { 1195 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; 1196 } 1197 } 1198 1199 /* Wait for vertical sync on primary CRTC */ 1200 void r100_gpu_wait_for_vsync(struct radeon_device *rdev) 1201 { 1202 uint32_t crtc_gen_cntl, tmp; 1203 int i; 1204 1205 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); 1206 if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || 1207 !(crtc_gen_cntl & RADEON_CRTC_EN)) { 1208 return; 1209 } 1210 /* Clear the CRTC_VBLANK_SAVE bit */ 1211 WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); 1212 for (i = 0; i < rdev->usec_timeout; i++) { 1213 tmp = RREG32(RADEON_CRTC_STATUS); 1214 if (tmp & RADEON_CRTC_VBLANK_SAVE) { 1215 return; 1216 } 1217 DRM_UDELAY(1); 1218 } 1219 } 1220 1221 /* Wait for vertical sync on secondary CRTC */ 1222 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) 1223 { 1224 uint32_t crtc2_gen_cntl, tmp; 1225 int i; 1226 1227 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); 1228 if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || 1229 !(crtc2_gen_cntl & RADEON_CRTC2_EN)) 1230 return; 1231 1232 /* Clear the CRTC_VBLANK_SAVE bit */ 1233 WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); 1234 for (i = 0; i < rdev->usec_timeout; i++) { 1235 tmp = RREG32(RADEON_CRTC2_STATUS); 1236 if (tmp & RADEON_CRTC2_VBLANK_SAVE) { 1237 return; 1238 } 1239 DRM_UDELAY(1); 1240 } 1241 } 1242 1243 int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) 1244 { 1245 unsigned i; 1246 uint32_t tmp; 1247 1248 for (i = 0; i < rdev->usec_timeout; i++) { 1249 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 1250 if (tmp >= n) { 1251 return 0; 1252 } 1253 DRM_UDELAY(1); 1254 } 1255 return -1; 1256 } 1257 1258 int r100_gui_wait_for_idle(struct radeon_device *rdev) 1259 { 1260 unsigned i; 1261 uint32_t tmp; 1262 1263 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { 1264 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" 1265 " Bad things might happen.\n"); 1266 } 1267 for (i = 0; i < rdev->usec_timeout; i++) { 1268 tmp = RREG32(RADEON_RBBM_STATUS); 1269 if (!(tmp & (1 << 31))) { 1270 return 0; 1271 } 1272 DRM_UDELAY(1); 1273 } 1274 return -1; 1275 } 1276 1277 int r100_mc_wait_for_idle(struct radeon_device *rdev) 1278 { 1279 unsigned i; 1280 uint32_t tmp; 1281 1282 for (i = 0; i < rdev->usec_timeout; i++) { 1283 /* read MC_STATUS */ 1284 tmp = RREG32(0x0150); 1285 if (tmp & (1 << 2)) { 1286 return 0; 1287 } 1288 DRM_UDELAY(1); 1289 } 1290 return -1; 1291 } 1292 1293 void r100_gpu_init(struct radeon_device *rdev) 1294 { 1295 /* TODO: anythings to do here ? pipes ? */ 1296 r100_hdp_reset(rdev); 1297 } 1298 1299 void r100_hdp_reset(struct radeon_device *rdev) 1300 { 1301 uint32_t tmp; 1302 1303 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL; 1304 tmp |= (7 << 28); 1305 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE); 1306 (void)RREG32(RADEON_HOST_PATH_CNTL); 1307 udelay(200); 1308 WREG32(RADEON_RBBM_SOFT_RESET, 0); 1309 WREG32(RADEON_HOST_PATH_CNTL, tmp); 1310 (void)RREG32(RADEON_HOST_PATH_CNTL); 1311 } 1312 1313 int r100_rb2d_reset(struct radeon_device *rdev) 1314 { 1315 uint32_t tmp; 1316 int i; 1317 1318 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2); 1319 (void)RREG32(RADEON_RBBM_SOFT_RESET); 1320 udelay(200); 1321 WREG32(RADEON_RBBM_SOFT_RESET, 0); 1322 /* Wait to prevent race in RBBM_STATUS */ 1323 mdelay(1); 1324 for (i = 0; i < rdev->usec_timeout; i++) { 1325 tmp = RREG32(RADEON_RBBM_STATUS); 1326 if (!(tmp & (1 << 26))) { 1327 DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n", 1328 tmp); 1329 return 0; 1330 } 1331 DRM_UDELAY(1); 1332 } 1333 tmp = RREG32(RADEON_RBBM_STATUS); 1334 DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp); 1335 return -1; 1336 } 1337 1338 int r100_gpu_reset(struct radeon_device *rdev) 1339 { 1340 uint32_t status; 1341 1342 /* reset order likely matter */ 1343 status = RREG32(RADEON_RBBM_STATUS); 1344 /* reset HDP */ 1345 r100_hdp_reset(rdev); 1346 /* reset rb2d */ 1347 if (status & ((1 << 17) | (1 << 18) | (1 << 27))) { 1348 r100_rb2d_reset(rdev); 1349 } 1350 /* TODO: reset 3D engine */ 1351 /* reset CP */ 1352 status = RREG32(RADEON_RBBM_STATUS); 1353 if (status & (1 << 16)) { 1354 r100_cp_reset(rdev); 1355 } 1356 /* Check if GPU is idle */ 1357 status = RREG32(RADEON_RBBM_STATUS); 1358 if (status & (1 << 31)) { 1359 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status); 1360 return -1; 1361 } 1362 DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status); 1363 return 0; 1364 } 1365 1366 1367 /* 1368 * VRAM info 1369 */ 1370 static void r100_vram_get_type(struct radeon_device *rdev) 1371 { 1372 uint32_t tmp; 1373 1374 rdev->mc.vram_is_ddr = false; 1375 if (rdev->flags & RADEON_IS_IGP) 1376 rdev->mc.vram_is_ddr = true; 1377 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) 1378 rdev->mc.vram_is_ddr = true; 1379 if ((rdev->family == CHIP_RV100) || 1380 (rdev->family == CHIP_RS100) || 1381 (rdev->family == CHIP_RS200)) { 1382 tmp = RREG32(RADEON_MEM_CNTL); 1383 if (tmp & RV100_HALF_MODE) { 1384 rdev->mc.vram_width = 32; 1385 } else { 1386 rdev->mc.vram_width = 64; 1387 } 1388 if (rdev->flags & RADEON_SINGLE_CRTC) { 1389 rdev->mc.vram_width /= 4; 1390 rdev->mc.vram_is_ddr = true; 1391 } 1392 } else if (rdev->family <= CHIP_RV280) { 1393 tmp = RREG32(RADEON_MEM_CNTL); 1394 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { 1395 rdev->mc.vram_width = 128; 1396 } else { 1397 rdev->mc.vram_width = 64; 1398 } 1399 } else { 1400 /* newer IGPs */ 1401 rdev->mc.vram_width = 128; 1402 } 1403 } 1404 1405 static u32 r100_get_accessible_vram(struct radeon_device *rdev) 1406 { 1407 u32 aper_size; 1408 u8 byte; 1409 1410 aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 1411 1412 /* Set HDP_APER_CNTL only on cards that are known not to be broken, 1413 * that is has the 2nd generation multifunction PCI interface 1414 */ 1415 if (rdev->family == CHIP_RV280 || 1416 rdev->family >= CHIP_RV350) { 1417 WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, 1418 ~RADEON_HDP_APER_CNTL); 1419 DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); 1420 return aper_size * 2; 1421 } 1422 1423 /* Older cards have all sorts of funny issues to deal with. First 1424 * check if it's a multifunction card by reading the PCI config 1425 * header type... Limit those to one aperture size 1426 */ 1427 pci_read_config_byte(rdev->pdev, 0xe, &byte); 1428 if (byte & 0x80) { 1429 DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); 1430 DRM_INFO("Limiting VRAM to one aperture\n"); 1431 return aper_size; 1432 } 1433 1434 /* Single function older card. We read HDP_APER_CNTL to see how the BIOS 1435 * have set it up. We don't write this as it's broken on some ASICs but 1436 * we expect the BIOS to have done the right thing (might be too optimistic...) 1437 */ 1438 if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) 1439 return aper_size * 2; 1440 return aper_size; 1441 } 1442 1443 void r100_vram_init_sizes(struct radeon_device *rdev) 1444 { 1445 u64 config_aper_size; 1446 u32 accessible; 1447 1448 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 1449 1450 if (rdev->flags & RADEON_IS_IGP) { 1451 uint32_t tom; 1452 /* read NB_TOM to get the amount of ram stolen for the GPU */ 1453 tom = RREG32(RADEON_NB_TOM); 1454 rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); 1455 /* for IGPs we need to keep VRAM where it was put by the BIOS */ 1456 rdev->mc.vram_location = (tom & 0xffff) << 16; 1457 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 1458 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 1459 } else { 1460 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 1461 /* Some production boards of m6 will report 0 1462 * if it's 8 MB 1463 */ 1464 if (rdev->mc.real_vram_size == 0) { 1465 rdev->mc.real_vram_size = 8192 * 1024; 1466 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 1467 } 1468 /* let driver place VRAM */ 1469 rdev->mc.vram_location = 0xFFFFFFFFUL; 1470 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 1471 * Novell bug 204882 + along with lots of ubuntu ones */ 1472 if (config_aper_size > rdev->mc.real_vram_size) 1473 rdev->mc.mc_vram_size = config_aper_size; 1474 else 1475 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 1476 } 1477 1478 /* work out accessible VRAM */ 1479 accessible = r100_get_accessible_vram(rdev); 1480 1481 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); 1482 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); 1483 1484 if (accessible > rdev->mc.aper_size) 1485 accessible = rdev->mc.aper_size; 1486 1487 if (rdev->mc.mc_vram_size > rdev->mc.aper_size) 1488 rdev->mc.mc_vram_size = rdev->mc.aper_size; 1489 1490 if (rdev->mc.real_vram_size > rdev->mc.aper_size) 1491 rdev->mc.real_vram_size = rdev->mc.aper_size; 1492 } 1493 1494 void r100_vram_info(struct radeon_device *rdev) 1495 { 1496 r100_vram_get_type(rdev); 1497 1498 r100_vram_init_sizes(rdev); 1499 } 1500 1501 1502 /* 1503 * Indirect registers accessor 1504 */ 1505 void r100_pll_errata_after_index(struct radeon_device *rdev) 1506 { 1507 if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) { 1508 return; 1509 } 1510 (void)RREG32(RADEON_CLOCK_CNTL_DATA); 1511 (void)RREG32(RADEON_CRTC_GEN_CNTL); 1512 } 1513 1514 static void r100_pll_errata_after_data(struct radeon_device *rdev) 1515 { 1516 /* This workarounds is necessary on RV100, RS100 and RS200 chips 1517 * or the chip could hang on a subsequent access 1518 */ 1519 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { 1520 udelay(5000); 1521 } 1522 1523 /* This function is required to workaround a hardware bug in some (all?) 1524 * revisions of the R300. This workaround should be called after every 1525 * CLOCK_CNTL_INDEX register access. If not, register reads afterward 1526 * may not be correct. 1527 */ 1528 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { 1529 uint32_t save, tmp; 1530 1531 save = RREG32(RADEON_CLOCK_CNTL_INDEX); 1532 tmp = save & ~(0x3f | RADEON_PLL_WR_EN); 1533 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); 1534 tmp = RREG32(RADEON_CLOCK_CNTL_DATA); 1535 WREG32(RADEON_CLOCK_CNTL_INDEX, save); 1536 } 1537 } 1538 1539 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) 1540 { 1541 uint32_t data; 1542 1543 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); 1544 r100_pll_errata_after_index(rdev); 1545 data = RREG32(RADEON_CLOCK_CNTL_DATA); 1546 r100_pll_errata_after_data(rdev); 1547 return data; 1548 } 1549 1550 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 1551 { 1552 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); 1553 r100_pll_errata_after_index(rdev); 1554 WREG32(RADEON_CLOCK_CNTL_DATA, v); 1555 r100_pll_errata_after_data(rdev); 1556 } 1557 1558 uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) 1559 { 1560 if (reg < 0x10000) 1561 return readl(((void __iomem *)rdev->rmmio) + reg); 1562 else { 1563 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 1564 return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 1565 } 1566 } 1567 1568 void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 1569 { 1570 if (reg < 0x10000) 1571 writel(v, ((void __iomem *)rdev->rmmio) + reg); 1572 else { 1573 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 1574 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 1575 } 1576 } 1577 1578 int r100_init(struct radeon_device *rdev) 1579 { 1580 return 0; 1581 } 1582 1583 /* 1584 * Debugfs info 1585 */ 1586 #if defined(CONFIG_DEBUG_FS) 1587 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) 1588 { 1589 struct drm_info_node *node = (struct drm_info_node *) m->private; 1590 struct drm_device *dev = node->minor->dev; 1591 struct radeon_device *rdev = dev->dev_private; 1592 uint32_t reg, value; 1593 unsigned i; 1594 1595 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); 1596 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); 1597 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1598 for (i = 0; i < 64; i++) { 1599 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); 1600 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; 1601 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); 1602 value = RREG32(RADEON_RBBM_CMDFIFO_DATA); 1603 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); 1604 } 1605 return 0; 1606 } 1607 1608 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) 1609 { 1610 struct drm_info_node *node = (struct drm_info_node *) m->private; 1611 struct drm_device *dev = node->minor->dev; 1612 struct radeon_device *rdev = dev->dev_private; 1613 uint32_t rdp, wdp; 1614 unsigned count, i, j; 1615 1616 radeon_ring_free_size(rdev); 1617 rdp = RREG32(RADEON_CP_RB_RPTR); 1618 wdp = RREG32(RADEON_CP_RB_WPTR); 1619 count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask; 1620 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1621 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); 1622 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); 1623 seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); 1624 seq_printf(m, "%u dwords in ring\n", count); 1625 for (j = 0; j <= count; j++) { 1626 i = (rdp + j) & rdev->cp.ptr_mask; 1627 seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); 1628 } 1629 return 0; 1630 } 1631 1632 1633 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) 1634 { 1635 struct drm_info_node *node = (struct drm_info_node *) m->private; 1636 struct drm_device *dev = node->minor->dev; 1637 struct radeon_device *rdev = dev->dev_private; 1638 uint32_t csq_stat, csq2_stat, tmp; 1639 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; 1640 unsigned i; 1641 1642 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1643 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); 1644 csq_stat = RREG32(RADEON_CP_CSQ_STAT); 1645 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); 1646 r_rptr = (csq_stat >> 0) & 0x3ff; 1647 r_wptr = (csq_stat >> 10) & 0x3ff; 1648 ib1_rptr = (csq_stat >> 20) & 0x3ff; 1649 ib1_wptr = (csq2_stat >> 0) & 0x3ff; 1650 ib2_rptr = (csq2_stat >> 10) & 0x3ff; 1651 ib2_wptr = (csq2_stat >> 20) & 0x3ff; 1652 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); 1653 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); 1654 seq_printf(m, "Ring rptr %u\n", r_rptr); 1655 seq_printf(m, "Ring wptr %u\n", r_wptr); 1656 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); 1657 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); 1658 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); 1659 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); 1660 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms 1661 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ 1662 seq_printf(m, "Ring fifo:\n"); 1663 for (i = 0; i < 256; i++) { 1664 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1665 tmp = RREG32(RADEON_CP_CSQ_DATA); 1666 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); 1667 } 1668 seq_printf(m, "Indirect1 fifo:\n"); 1669 for (i = 256; i <= 512; i++) { 1670 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1671 tmp = RREG32(RADEON_CP_CSQ_DATA); 1672 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); 1673 } 1674 seq_printf(m, "Indirect2 fifo:\n"); 1675 for (i = 640; i < ib1_wptr; i++) { 1676 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1677 tmp = RREG32(RADEON_CP_CSQ_DATA); 1678 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); 1679 } 1680 return 0; 1681 } 1682 1683 static int r100_debugfs_mc_info(struct seq_file *m, void *data) 1684 { 1685 struct drm_info_node *node = (struct drm_info_node *) m->private; 1686 struct drm_device *dev = node->minor->dev; 1687 struct radeon_device *rdev = dev->dev_private; 1688 uint32_t tmp; 1689 1690 tmp = RREG32(RADEON_CONFIG_MEMSIZE); 1691 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); 1692 tmp = RREG32(RADEON_MC_FB_LOCATION); 1693 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); 1694 tmp = RREG32(RADEON_BUS_CNTL); 1695 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); 1696 tmp = RREG32(RADEON_MC_AGP_LOCATION); 1697 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); 1698 tmp = RREG32(RADEON_AGP_BASE); 1699 seq_printf(m, "AGP_BASE 0x%08x\n", tmp); 1700 tmp = RREG32(RADEON_HOST_PATH_CNTL); 1701 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); 1702 tmp = RREG32(0x01D0); 1703 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); 1704 tmp = RREG32(RADEON_AIC_LO_ADDR); 1705 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); 1706 tmp = RREG32(RADEON_AIC_HI_ADDR); 1707 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); 1708 tmp = RREG32(0x01E4); 1709 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); 1710 return 0; 1711 } 1712 1713 static struct drm_info_list r100_debugfs_rbbm_list[] = { 1714 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, 1715 }; 1716 1717 static struct drm_info_list r100_debugfs_cp_list[] = { 1718 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, 1719 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, 1720 }; 1721 1722 static struct drm_info_list r100_debugfs_mc_info_list[] = { 1723 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, 1724 }; 1725 #endif 1726 1727 int r100_debugfs_rbbm_init(struct radeon_device *rdev) 1728 { 1729 #if defined(CONFIG_DEBUG_FS) 1730 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); 1731 #else 1732 return 0; 1733 #endif 1734 } 1735 1736 int r100_debugfs_cp_init(struct radeon_device *rdev) 1737 { 1738 #if defined(CONFIG_DEBUG_FS) 1739 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); 1740 #else 1741 return 0; 1742 #endif 1743 } 1744 1745 int r100_debugfs_mc_info_init(struct radeon_device *rdev) 1746 { 1747 #if defined(CONFIG_DEBUG_FS) 1748 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); 1749 #else 1750 return 0; 1751 #endif 1752 } 1753 1754 int r100_set_surface_reg(struct radeon_device *rdev, int reg, 1755 uint32_t tiling_flags, uint32_t pitch, 1756 uint32_t offset, uint32_t obj_size) 1757 { 1758 int surf_index = reg * 16; 1759 int flags = 0; 1760 1761 /* r100/r200 divide by 16 */ 1762 if (rdev->family < CHIP_R300) 1763 flags = pitch / 16; 1764 else 1765 flags = pitch / 8; 1766 1767 if (rdev->family <= CHIP_RS200) { 1768 if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 1769 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 1770 flags |= RADEON_SURF_TILE_COLOR_BOTH; 1771 if (tiling_flags & RADEON_TILING_MACRO) 1772 flags |= RADEON_SURF_TILE_COLOR_MACRO; 1773 } else if (rdev->family <= CHIP_RV280) { 1774 if (tiling_flags & (RADEON_TILING_MACRO)) 1775 flags |= R200_SURF_TILE_COLOR_MACRO; 1776 if (tiling_flags & RADEON_TILING_MICRO) 1777 flags |= R200_SURF_TILE_COLOR_MICRO; 1778 } else { 1779 if (tiling_flags & RADEON_TILING_MACRO) 1780 flags |= R300_SURF_TILE_MACRO; 1781 if (tiling_flags & RADEON_TILING_MICRO) 1782 flags |= R300_SURF_TILE_MICRO; 1783 } 1784 1785 DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); 1786 WREG32(RADEON_SURFACE0_INFO + surf_index, flags); 1787 WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); 1788 WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); 1789 return 0; 1790 } 1791 1792 void r100_clear_surface_reg(struct radeon_device *rdev, int reg) 1793 { 1794 int surf_index = reg * 16; 1795 WREG32(RADEON_SURFACE0_INFO + surf_index, 0); 1796 } 1797 1798 void r100_bandwidth_update(struct radeon_device *rdev) 1799 { 1800 fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; 1801 fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; 1802 fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; 1803 uint32_t temp, data, mem_trcd, mem_trp, mem_tras; 1804 fixed20_12 memtcas_ff[8] = { 1805 fixed_init(1), 1806 fixed_init(2), 1807 fixed_init(3), 1808 fixed_init(0), 1809 fixed_init_half(1), 1810 fixed_init_half(2), 1811 fixed_init(0), 1812 }; 1813 fixed20_12 memtcas_rs480_ff[8] = { 1814 fixed_init(0), 1815 fixed_init(1), 1816 fixed_init(2), 1817 fixed_init(3), 1818 fixed_init(0), 1819 fixed_init_half(1), 1820 fixed_init_half(2), 1821 fixed_init_half(3), 1822 }; 1823 fixed20_12 memtcas2_ff[8] = { 1824 fixed_init(0), 1825 fixed_init(1), 1826 fixed_init(2), 1827 fixed_init(3), 1828 fixed_init(4), 1829 fixed_init(5), 1830 fixed_init(6), 1831 fixed_init(7), 1832 }; 1833 fixed20_12 memtrbs[8] = { 1834 fixed_init(1), 1835 fixed_init_half(1), 1836 fixed_init(2), 1837 fixed_init_half(2), 1838 fixed_init(3), 1839 fixed_init_half(3), 1840 fixed_init(4), 1841 fixed_init_half(4) 1842 }; 1843 fixed20_12 memtrbs_r4xx[8] = { 1844 fixed_init(4), 1845 fixed_init(5), 1846 fixed_init(6), 1847 fixed_init(7), 1848 fixed_init(8), 1849 fixed_init(9), 1850 fixed_init(10), 1851 fixed_init(11) 1852 }; 1853 fixed20_12 min_mem_eff; 1854 fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; 1855 fixed20_12 cur_latency_mclk, cur_latency_sclk; 1856 fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, 1857 disp_drain_rate2, read_return_rate; 1858 fixed20_12 time_disp1_drop_priority; 1859 int c; 1860 int cur_size = 16; /* in octawords */ 1861 int critical_point = 0, critical_point2; 1862 /* uint32_t read_return_rate, time_disp1_drop_priority; */ 1863 int stop_req, max_stop_req; 1864 struct drm_display_mode *mode1 = NULL; 1865 struct drm_display_mode *mode2 = NULL; 1866 uint32_t pixel_bytes1 = 0; 1867 uint32_t pixel_bytes2 = 0; 1868 1869 if (rdev->mode_info.crtcs[0]->base.enabled) { 1870 mode1 = &rdev->mode_info.crtcs[0]->base.mode; 1871 pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; 1872 } 1873 if (rdev->mode_info.crtcs[1]->base.enabled) { 1874 mode2 = &rdev->mode_info.crtcs[1]->base.mode; 1875 pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; 1876 } 1877 1878 min_mem_eff.full = rfixed_const_8(0); 1879 /* get modes */ 1880 if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { 1881 uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); 1882 mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); 1883 mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); 1884 /* check crtc enables */ 1885 if (mode2) 1886 mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); 1887 if (mode1) 1888 mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); 1889 WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); 1890 } 1891 1892 /* 1893 * determine is there is enough bw for current mode 1894 */ 1895 mclk_ff.full = rfixed_const(rdev->clock.default_mclk); 1896 temp_ff.full = rfixed_const(100); 1897 mclk_ff.full = rfixed_div(mclk_ff, temp_ff); 1898 sclk_ff.full = rfixed_const(rdev->clock.default_sclk); 1899 sclk_ff.full = rfixed_div(sclk_ff, temp_ff); 1900 1901 temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); 1902 temp_ff.full = rfixed_const(temp); 1903 mem_bw.full = rfixed_mul(mclk_ff, temp_ff); 1904 1905 pix_clk.full = 0; 1906 pix_clk2.full = 0; 1907 peak_disp_bw.full = 0; 1908 if (mode1) { 1909 temp_ff.full = rfixed_const(1000); 1910 pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */ 1911 pix_clk.full = rfixed_div(pix_clk, temp_ff); 1912 temp_ff.full = rfixed_const(pixel_bytes1); 1913 peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff); 1914 } 1915 if (mode2) { 1916 temp_ff.full = rfixed_const(1000); 1917 pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */ 1918 pix_clk2.full = rfixed_div(pix_clk2, temp_ff); 1919 temp_ff.full = rfixed_const(pixel_bytes2); 1920 peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff); 1921 } 1922 1923 mem_bw.full = rfixed_mul(mem_bw, min_mem_eff); 1924 if (peak_disp_bw.full >= mem_bw.full) { 1925 DRM_ERROR("You may not have enough display bandwidth for current mode\n" 1926 "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); 1927 } 1928 1929 /* Get values from the EXT_MEM_CNTL register...converting its contents. */ 1930 temp = RREG32(RADEON_MEM_TIMING_CNTL); 1931 if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ 1932 mem_trcd = ((temp >> 2) & 0x3) + 1; 1933 mem_trp = ((temp & 0x3)) + 1; 1934 mem_tras = ((temp & 0x70) >> 4) + 1; 1935 } else if (rdev->family == CHIP_R300 || 1936 rdev->family == CHIP_R350) { /* r300, r350 */ 1937 mem_trcd = (temp & 0x7) + 1; 1938 mem_trp = ((temp >> 8) & 0x7) + 1; 1939 mem_tras = ((temp >> 11) & 0xf) + 4; 1940 } else if (rdev->family == CHIP_RV350 || 1941 rdev->family <= CHIP_RV380) { 1942 /* rv3x0 */ 1943 mem_trcd = (temp & 0x7) + 3; 1944 mem_trp = ((temp >> 8) & 0x7) + 3; 1945 mem_tras = ((temp >> 11) & 0xf) + 6; 1946 } else if (rdev->family == CHIP_R420 || 1947 rdev->family == CHIP_R423 || 1948 rdev->family == CHIP_RV410) { 1949 /* r4xx */ 1950 mem_trcd = (temp & 0xf) + 3; 1951 if (mem_trcd > 15) 1952 mem_trcd = 15; 1953 mem_trp = ((temp >> 8) & 0xf) + 3; 1954 if (mem_trp > 15) 1955 mem_trp = 15; 1956 mem_tras = ((temp >> 12) & 0x1f) + 6; 1957 if (mem_tras > 31) 1958 mem_tras = 31; 1959 } else { /* RV200, R200 */ 1960 mem_trcd = (temp & 0x7) + 1; 1961 mem_trp = ((temp >> 8) & 0x7) + 1; 1962 mem_tras = ((temp >> 12) & 0xf) + 4; 1963 } 1964 /* convert to FF */ 1965 trcd_ff.full = rfixed_const(mem_trcd); 1966 trp_ff.full = rfixed_const(mem_trp); 1967 tras_ff.full = rfixed_const(mem_tras); 1968 1969 /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ 1970 temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); 1971 data = (temp & (7 << 20)) >> 20; 1972 if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { 1973 if (rdev->family == CHIP_RS480) /* don't think rs400 */ 1974 tcas_ff = memtcas_rs480_ff[data]; 1975 else 1976 tcas_ff = memtcas_ff[data]; 1977 } else 1978 tcas_ff = memtcas2_ff[data]; 1979 1980 if (rdev->family == CHIP_RS400 || 1981 rdev->family == CHIP_RS480) { 1982 /* extra cas latency stored in bits 23-25 0-4 clocks */ 1983 data = (temp >> 23) & 0x7; 1984 if (data < 5) 1985 tcas_ff.full += rfixed_const(data); 1986 } 1987 1988 if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { 1989 /* on the R300, Tcas is included in Trbs. 1990 */ 1991 temp = RREG32(RADEON_MEM_CNTL); 1992 data = (R300_MEM_NUM_CHANNELS_MASK & temp); 1993 if (data == 1) { 1994 if (R300_MEM_USE_CD_CH_ONLY & temp) { 1995 temp = RREG32(R300_MC_IND_INDEX); 1996 temp &= ~R300_MC_IND_ADDR_MASK; 1997 temp |= R300_MC_READ_CNTL_CD_mcind; 1998 WREG32(R300_MC_IND_INDEX, temp); 1999 temp = RREG32(R300_MC_IND_DATA); 2000 data = (R300_MEM_RBS_POSITION_C_MASK & temp); 2001 } else { 2002 temp = RREG32(R300_MC_READ_CNTL_AB); 2003 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 2004 } 2005 } else { 2006 temp = RREG32(R300_MC_READ_CNTL_AB); 2007 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 2008 } 2009 if (rdev->family == CHIP_RV410 || 2010 rdev->family == CHIP_R420 || 2011 rdev->family == CHIP_R423) 2012 trbs_ff = memtrbs_r4xx[data]; 2013 else 2014 trbs_ff = memtrbs[data]; 2015 tcas_ff.full += trbs_ff.full; 2016 } 2017 2018 sclk_eff_ff.full = sclk_ff.full; 2019 2020 if (rdev->flags & RADEON_IS_AGP) { 2021 fixed20_12 agpmode_ff; 2022 agpmode_ff.full = rfixed_const(radeon_agpmode); 2023 temp_ff.full = rfixed_const_666(16); 2024 sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff); 2025 } 2026 /* TODO PCIE lanes may affect this - agpmode == 16?? */ 2027 2028 if (ASIC_IS_R300(rdev)) { 2029 sclk_delay_ff.full = rfixed_const(250); 2030 } else { 2031 if ((rdev->family == CHIP_RV100) || 2032 rdev->flags & RADEON_IS_IGP) { 2033 if (rdev->mc.vram_is_ddr) 2034 sclk_delay_ff.full = rfixed_const(41); 2035 else 2036 sclk_delay_ff.full = rfixed_const(33); 2037 } else { 2038 if (rdev->mc.vram_width == 128) 2039 sclk_delay_ff.full = rfixed_const(57); 2040 else 2041 sclk_delay_ff.full = rfixed_const(41); 2042 } 2043 } 2044 2045 mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff); 2046 2047 if (rdev->mc.vram_is_ddr) { 2048 if (rdev->mc.vram_width == 32) { 2049 k1.full = rfixed_const(40); 2050 c = 3; 2051 } else { 2052 k1.full = rfixed_const(20); 2053 c = 1; 2054 } 2055 } else { 2056 k1.full = rfixed_const(40); 2057 c = 3; 2058 } 2059 2060 temp_ff.full = rfixed_const(2); 2061 mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff); 2062 temp_ff.full = rfixed_const(c); 2063 mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff); 2064 temp_ff.full = rfixed_const(4); 2065 mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff); 2066 mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff); 2067 mc_latency_mclk.full += k1.full; 2068 2069 mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff); 2070 mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff); 2071 2072 /* 2073 HW cursor time assuming worst case of full size colour cursor. 2074 */ 2075 temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); 2076 temp_ff.full += trcd_ff.full; 2077 if (temp_ff.full < tras_ff.full) 2078 temp_ff.full = tras_ff.full; 2079 cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff); 2080 2081 temp_ff.full = rfixed_const(cur_size); 2082 cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff); 2083 /* 2084 Find the total latency for the display data. 2085 */ 2086 disp_latency_overhead.full = rfixed_const(80); 2087 disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff); 2088 mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; 2089 mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; 2090 2091 if (mc_latency_mclk.full > mc_latency_sclk.full) 2092 disp_latency.full = mc_latency_mclk.full; 2093 else 2094 disp_latency.full = mc_latency_sclk.full; 2095 2096 /* setup Max GRPH_STOP_REQ default value */ 2097 if (ASIC_IS_RV100(rdev)) 2098 max_stop_req = 0x5c; 2099 else 2100 max_stop_req = 0x7c; 2101 2102 if (mode1) { 2103 /* CRTC1 2104 Set GRPH_BUFFER_CNTL register using h/w defined optimal values. 2105 GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] 2106 */ 2107 stop_req = mode1->hdisplay * pixel_bytes1 / 16; 2108 2109 if (stop_req > max_stop_req) 2110 stop_req = max_stop_req; 2111 2112 /* 2113 Find the drain rate of the display buffer. 2114 */ 2115 temp_ff.full = rfixed_const((16/pixel_bytes1)); 2116 disp_drain_rate.full = rfixed_div(pix_clk, temp_ff); 2117 2118 /* 2119 Find the critical point of the display buffer. 2120 */ 2121 crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency); 2122 crit_point_ff.full += rfixed_const_half(0); 2123 2124 critical_point = rfixed_trunc(crit_point_ff); 2125 2126 if (rdev->disp_priority == 2) { 2127 critical_point = 0; 2128 } 2129 2130 /* 2131 The critical point should never be above max_stop_req-4. Setting 2132 GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. 2133 */ 2134 if (max_stop_req - critical_point < 4) 2135 critical_point = 0; 2136 2137 if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { 2138 /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ 2139 critical_point = 0x10; 2140 } 2141 2142 temp = RREG32(RADEON_GRPH_BUFFER_CNTL); 2143 temp &= ~(RADEON_GRPH_STOP_REQ_MASK); 2144 temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 2145 temp &= ~(RADEON_GRPH_START_REQ_MASK); 2146 if ((rdev->family == CHIP_R350) && 2147 (stop_req > 0x15)) { 2148 stop_req -= 0x10; 2149 } 2150 temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 2151 temp |= RADEON_GRPH_BUFFER_SIZE; 2152 temp &= ~(RADEON_GRPH_CRITICAL_CNTL | 2153 RADEON_GRPH_CRITICAL_AT_SOF | 2154 RADEON_GRPH_STOP_CNTL); 2155 /* 2156 Write the result into the register. 2157 */ 2158 WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 2159 (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 2160 2161 #if 0 2162 if ((rdev->family == CHIP_RS400) || 2163 (rdev->family == CHIP_RS480)) { 2164 /* attempt to program RS400 disp regs correctly ??? */ 2165 temp = RREG32(RS400_DISP1_REG_CNTL); 2166 temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | 2167 RS400_DISP1_STOP_REQ_LEVEL_MASK); 2168 WREG32(RS400_DISP1_REQ_CNTL1, (temp | 2169 (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 2170 (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 2171 temp = RREG32(RS400_DMIF_MEM_CNTL1); 2172 temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | 2173 RS400_DISP1_CRITICAL_POINT_STOP_MASK); 2174 WREG32(RS400_DMIF_MEM_CNTL1, (temp | 2175 (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | 2176 (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); 2177 } 2178 #endif 2179 2180 DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n", 2181 /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ 2182 (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); 2183 } 2184 2185 if (mode2) { 2186 u32 grph2_cntl; 2187 stop_req = mode2->hdisplay * pixel_bytes2 / 16; 2188 2189 if (stop_req > max_stop_req) 2190 stop_req = max_stop_req; 2191 2192 /* 2193 Find the drain rate of the display buffer. 2194 */ 2195 temp_ff.full = rfixed_const((16/pixel_bytes2)); 2196 disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff); 2197 2198 grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); 2199 grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); 2200 grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 2201 grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); 2202 if ((rdev->family == CHIP_R350) && 2203 (stop_req > 0x15)) { 2204 stop_req -= 0x10; 2205 } 2206 grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 2207 grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; 2208 grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | 2209 RADEON_GRPH_CRITICAL_AT_SOF | 2210 RADEON_GRPH_STOP_CNTL); 2211 2212 if ((rdev->family == CHIP_RS100) || 2213 (rdev->family == CHIP_RS200)) 2214 critical_point2 = 0; 2215 else { 2216 temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; 2217 temp_ff.full = rfixed_const(temp); 2218 temp_ff.full = rfixed_mul(mclk_ff, temp_ff); 2219 if (sclk_ff.full < temp_ff.full) 2220 temp_ff.full = sclk_ff.full; 2221 2222 read_return_rate.full = temp_ff.full; 2223 2224 if (mode1) { 2225 temp_ff.full = read_return_rate.full - disp_drain_rate.full; 2226 time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff); 2227 } else { 2228 time_disp1_drop_priority.full = 0; 2229 } 2230 crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; 2231 crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2); 2232 crit_point_ff.full += rfixed_const_half(0); 2233 2234 critical_point2 = rfixed_trunc(crit_point_ff); 2235 2236 if (rdev->disp_priority == 2) { 2237 critical_point2 = 0; 2238 } 2239 2240 if (max_stop_req - critical_point2 < 4) 2241 critical_point2 = 0; 2242 2243 } 2244 2245 if (critical_point2 == 0 && rdev->family == CHIP_R300) { 2246 /* some R300 cards have problem with this set to 0 */ 2247 critical_point2 = 0x10; 2248 } 2249 2250 WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 2251 (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 2252 2253 if ((rdev->family == CHIP_RS400) || 2254 (rdev->family == CHIP_RS480)) { 2255 #if 0 2256 /* attempt to program RS400 disp2 regs correctly ??? */ 2257 temp = RREG32(RS400_DISP2_REQ_CNTL1); 2258 temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | 2259 RS400_DISP2_STOP_REQ_LEVEL_MASK); 2260 WREG32(RS400_DISP2_REQ_CNTL1, (temp | 2261 (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 2262 (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 2263 temp = RREG32(RS400_DISP2_REQ_CNTL2); 2264 temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | 2265 RS400_DISP2_CRITICAL_POINT_STOP_MASK); 2266 WREG32(RS400_DISP2_REQ_CNTL2, (temp | 2267 (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | 2268 (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); 2269 #endif 2270 WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); 2271 WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); 2272 WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); 2273 WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); 2274 } 2275 2276 DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n", 2277 (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); 2278 } 2279 } 2280