1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/seq_file.h> 29 #include "drmP.h" 30 #include "drm.h" 31 #include "radeon_drm.h" 32 #include "radeon_reg.h" 33 #include "radeon.h" 34 #include "r100d.h" 35 36 #include <linux/firmware.h> 37 #include <linux/platform_device.h> 38 39 #include "r100_reg_safe.h" 40 #include "rn50_reg_safe.h" 41 42 /* Firmware Names */ 43 #define FIRMWARE_R100 "radeon/R100_cp.bin" 44 #define FIRMWARE_R200 "radeon/R200_cp.bin" 45 #define FIRMWARE_R300 "radeon/R300_cp.bin" 46 #define FIRMWARE_R420 "radeon/R420_cp.bin" 47 #define FIRMWARE_RS690 "radeon/RS690_cp.bin" 48 #define FIRMWARE_RS600 "radeon/RS600_cp.bin" 49 #define FIRMWARE_R520 "radeon/R520_cp.bin" 50 51 MODULE_FIRMWARE(FIRMWARE_R100); 52 MODULE_FIRMWARE(FIRMWARE_R200); 53 MODULE_FIRMWARE(FIRMWARE_R300); 54 MODULE_FIRMWARE(FIRMWARE_R420); 55 MODULE_FIRMWARE(FIRMWARE_RS690); 56 MODULE_FIRMWARE(FIRMWARE_RS600); 57 MODULE_FIRMWARE(FIRMWARE_R520); 58 59 #include "r100_track.h" 60 61 /* This files gather functions specifics to: 62 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 63 * 64 * Some of these functions might be used by newer ASICs. 65 */ 66 int r200_init(struct radeon_device *rdev); 67 void r100_hdp_reset(struct radeon_device *rdev); 68 void r100_gpu_init(struct radeon_device *rdev); 69 int r100_gui_wait_for_idle(struct radeon_device *rdev); 70 int r100_mc_wait_for_idle(struct radeon_device *rdev); 71 void r100_gpu_wait_for_vsync(struct radeon_device *rdev); 72 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev); 73 int r100_debugfs_mc_info_init(struct radeon_device *rdev); 74 75 76 /* 77 * PCI GART 78 */ 79 void r100_pci_gart_tlb_flush(struct radeon_device *rdev) 80 { 81 /* TODO: can we do somethings here ? */ 82 /* It seems hw only cache one entry so we should discard this 83 * entry otherwise if first GPU GART read hit this entry it 84 * could end up in wrong address. */ 85 } 86 87 int r100_pci_gart_init(struct radeon_device *rdev) 88 { 89 int r; 90 91 if (rdev->gart.table.ram.ptr) { 92 WARN(1, "R100 PCI GART already initialized.\n"); 93 return 0; 94 } 95 /* Initialize common gart structure */ 96 r = radeon_gart_init(rdev); 97 if (r) 98 return r; 99 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; 100 rdev->asic->gart_tlb_flush = &r100_pci_gart_tlb_flush; 101 rdev->asic->gart_set_page = &r100_pci_gart_set_page; 102 return radeon_gart_table_ram_alloc(rdev); 103 } 104 105 int r100_pci_gart_enable(struct radeon_device *rdev) 106 { 107 uint32_t tmp; 108 109 /* discard memory request outside of configured range */ 110 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 111 WREG32(RADEON_AIC_CNTL, tmp); 112 /* set address range for PCI address translate */ 113 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location); 114 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; 115 WREG32(RADEON_AIC_HI_ADDR, tmp); 116 /* Enable bus mastering */ 117 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 118 WREG32(RADEON_BUS_CNTL, tmp); 119 /* set PCI GART page-table base address */ 120 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr); 121 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN; 122 WREG32(RADEON_AIC_CNTL, tmp); 123 r100_pci_gart_tlb_flush(rdev); 124 rdev->gart.ready = true; 125 return 0; 126 } 127 128 void r100_pci_gart_disable(struct radeon_device *rdev) 129 { 130 uint32_t tmp; 131 132 /* discard memory request outside of configured range */ 133 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 134 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN); 135 WREG32(RADEON_AIC_LO_ADDR, 0); 136 WREG32(RADEON_AIC_HI_ADDR, 0); 137 } 138 139 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) 140 { 141 if (i < 0 || i > rdev->gart.num_gpu_pages) { 142 return -EINVAL; 143 } 144 rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr)); 145 return 0; 146 } 147 148 void r100_pci_gart_fini(struct radeon_device *rdev) 149 { 150 r100_pci_gart_disable(rdev); 151 radeon_gart_table_ram_free(rdev); 152 radeon_gart_fini(rdev); 153 } 154 155 156 /* 157 * MC 158 */ 159 void r100_mc_disable_clients(struct radeon_device *rdev) 160 { 161 uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl; 162 163 /* FIXME: is this function correct for rs100,rs200,rs300 ? */ 164 if (r100_gui_wait_for_idle(rdev)) { 165 printk(KERN_WARNING "Failed to wait GUI idle while " 166 "programming pipes. Bad things might happen.\n"); 167 } 168 169 /* stop display and memory access */ 170 ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL); 171 WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE); 172 crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL); 173 WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS); 174 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); 175 176 r100_gpu_wait_for_vsync(rdev); 177 178 WREG32(RADEON_CRTC_GEN_CNTL, 179 (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) | 180 RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN); 181 182 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 183 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); 184 185 r100_gpu_wait_for_vsync2(rdev); 186 WREG32(RADEON_CRTC2_GEN_CNTL, 187 (crtc2_gen_cntl & 188 ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) | 189 RADEON_CRTC2_DISP_REQ_EN_B); 190 } 191 192 udelay(500); 193 } 194 195 void r100_mc_setup(struct radeon_device *rdev) 196 { 197 uint32_t tmp; 198 int r; 199 200 r = r100_debugfs_mc_info_init(rdev); 201 if (r) { 202 DRM_ERROR("Failed to register debugfs file for R100 MC !\n"); 203 } 204 /* Write VRAM size in case we are limiting it */ 205 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 206 /* Novell bug 204882 for RN50/M6/M7 with 8/16/32MB VRAM, 207 * if the aperture is 64MB but we have 32MB VRAM 208 * we report only 32MB VRAM but we have to set MC_FB_LOCATION 209 * to 64MB, otherwise the gpu accidentially dies */ 210 tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; 211 tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16); 212 tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16); 213 WREG32(RADEON_MC_FB_LOCATION, tmp); 214 215 /* Enable bus mastering */ 216 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 217 WREG32(RADEON_BUS_CNTL, tmp); 218 219 if (rdev->flags & RADEON_IS_AGP) { 220 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; 221 tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16); 222 tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16); 223 WREG32(RADEON_MC_AGP_LOCATION, tmp); 224 WREG32(RADEON_AGP_BASE, rdev->mc.agp_base); 225 } else { 226 WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF); 227 WREG32(RADEON_AGP_BASE, 0); 228 } 229 230 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL; 231 tmp |= (7 << 28); 232 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE); 233 (void)RREG32(RADEON_HOST_PATH_CNTL); 234 WREG32(RADEON_HOST_PATH_CNTL, tmp); 235 (void)RREG32(RADEON_HOST_PATH_CNTL); 236 } 237 238 int r100_mc_init(struct radeon_device *rdev) 239 { 240 int r; 241 242 if (r100_debugfs_rbbm_init(rdev)) { 243 DRM_ERROR("Failed to register debugfs file for RBBM !\n"); 244 } 245 246 r100_gpu_init(rdev); 247 /* Disable gart which also disable out of gart access */ 248 r100_pci_gart_disable(rdev); 249 250 /* Setup GPU memory space */ 251 rdev->mc.gtt_location = 0xFFFFFFFFUL; 252 if (rdev->flags & RADEON_IS_AGP) { 253 r = radeon_agp_init(rdev); 254 if (r) { 255 printk(KERN_WARNING "[drm] Disabling AGP\n"); 256 rdev->flags &= ~RADEON_IS_AGP; 257 rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; 258 } else { 259 rdev->mc.gtt_location = rdev->mc.agp_base; 260 } 261 } 262 r = radeon_mc_setup(rdev); 263 if (r) { 264 return r; 265 } 266 267 r100_mc_disable_clients(rdev); 268 if (r100_mc_wait_for_idle(rdev)) { 269 printk(KERN_WARNING "Failed to wait MC idle while " 270 "programming pipes. Bad things might happen.\n"); 271 } 272 273 r100_mc_setup(rdev); 274 return 0; 275 } 276 277 void r100_mc_fini(struct radeon_device *rdev) 278 { 279 } 280 281 282 /* 283 * Interrupts 284 */ 285 int r100_irq_set(struct radeon_device *rdev) 286 { 287 uint32_t tmp = 0; 288 289 if (rdev->irq.sw_int) { 290 tmp |= RADEON_SW_INT_ENABLE; 291 } 292 if (rdev->irq.crtc_vblank_int[0]) { 293 tmp |= RADEON_CRTC_VBLANK_MASK; 294 } 295 if (rdev->irq.crtc_vblank_int[1]) { 296 tmp |= RADEON_CRTC2_VBLANK_MASK; 297 } 298 WREG32(RADEON_GEN_INT_CNTL, tmp); 299 return 0; 300 } 301 302 void r100_irq_disable(struct radeon_device *rdev) 303 { 304 u32 tmp; 305 306 WREG32(R_000040_GEN_INT_CNTL, 0); 307 /* Wait and acknowledge irq */ 308 mdelay(1); 309 tmp = RREG32(R_000044_GEN_INT_STATUS); 310 WREG32(R_000044_GEN_INT_STATUS, tmp); 311 } 312 313 static inline uint32_t r100_irq_ack(struct radeon_device *rdev) 314 { 315 uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS); 316 uint32_t irq_mask = RADEON_SW_INT_TEST | RADEON_CRTC_VBLANK_STAT | 317 RADEON_CRTC2_VBLANK_STAT; 318 319 if (irqs) { 320 WREG32(RADEON_GEN_INT_STATUS, irqs); 321 } 322 return irqs & irq_mask; 323 } 324 325 int r100_irq_process(struct radeon_device *rdev) 326 { 327 uint32_t status; 328 329 status = r100_irq_ack(rdev); 330 if (!status) { 331 return IRQ_NONE; 332 } 333 if (rdev->shutdown) { 334 return IRQ_NONE; 335 } 336 while (status) { 337 /* SW interrupt */ 338 if (status & RADEON_SW_INT_TEST) { 339 radeon_fence_process(rdev); 340 } 341 /* Vertical blank interrupts */ 342 if (status & RADEON_CRTC_VBLANK_STAT) { 343 drm_handle_vblank(rdev->ddev, 0); 344 } 345 if (status & RADEON_CRTC2_VBLANK_STAT) { 346 drm_handle_vblank(rdev->ddev, 1); 347 } 348 status = r100_irq_ack(rdev); 349 } 350 return IRQ_HANDLED; 351 } 352 353 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) 354 { 355 if (crtc == 0) 356 return RREG32(RADEON_CRTC_CRNT_FRAME); 357 else 358 return RREG32(RADEON_CRTC2_CRNT_FRAME); 359 } 360 361 362 /* 363 * Fence emission 364 */ 365 void r100_fence_ring_emit(struct radeon_device *rdev, 366 struct radeon_fence *fence) 367 { 368 /* Who ever call radeon_fence_emit should call ring_lock and ask 369 * for enough space (today caller are ib schedule and buffer move) */ 370 /* Wait until IDLE & CLEAN */ 371 radeon_ring_write(rdev, PACKET0(0x1720, 0)); 372 radeon_ring_write(rdev, (1 << 16) | (1 << 17)); 373 /* Emit fence sequence & fire IRQ */ 374 radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0)); 375 radeon_ring_write(rdev, fence->seq); 376 radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0)); 377 radeon_ring_write(rdev, RADEON_SW_INT_FIRE); 378 } 379 380 381 /* 382 * Writeback 383 */ 384 int r100_wb_init(struct radeon_device *rdev) 385 { 386 int r; 387 388 if (rdev->wb.wb_obj == NULL) { 389 r = radeon_object_create(rdev, NULL, 4096, 390 true, 391 RADEON_GEM_DOMAIN_GTT, 392 false, &rdev->wb.wb_obj); 393 if (r) { 394 DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r); 395 return r; 396 } 397 r = radeon_object_pin(rdev->wb.wb_obj, 398 RADEON_GEM_DOMAIN_GTT, 399 &rdev->wb.gpu_addr); 400 if (r) { 401 DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r); 402 return r; 403 } 404 r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); 405 if (r) { 406 DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r); 407 return r; 408 } 409 } 410 WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr); 411 WREG32(R_00070C_CP_RB_RPTR_ADDR, 412 S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + 1024) >> 2)); 413 WREG32(R_000770_SCRATCH_UMSK, 0xff); 414 return 0; 415 } 416 417 void r100_wb_disable(struct radeon_device *rdev) 418 { 419 WREG32(R_000770_SCRATCH_UMSK, 0); 420 } 421 422 void r100_wb_fini(struct radeon_device *rdev) 423 { 424 r100_wb_disable(rdev); 425 if (rdev->wb.wb_obj) { 426 radeon_object_kunmap(rdev->wb.wb_obj); 427 radeon_object_unpin(rdev->wb.wb_obj); 428 radeon_object_unref(&rdev->wb.wb_obj); 429 rdev->wb.wb = NULL; 430 rdev->wb.wb_obj = NULL; 431 } 432 } 433 434 int r100_copy_blit(struct radeon_device *rdev, 435 uint64_t src_offset, 436 uint64_t dst_offset, 437 unsigned num_pages, 438 struct radeon_fence *fence) 439 { 440 uint32_t cur_pages; 441 uint32_t stride_bytes = PAGE_SIZE; 442 uint32_t pitch; 443 uint32_t stride_pixels; 444 unsigned ndw; 445 int num_loops; 446 int r = 0; 447 448 /* radeon limited to 16k stride */ 449 stride_bytes &= 0x3fff; 450 /* radeon pitch is /64 */ 451 pitch = stride_bytes / 64; 452 stride_pixels = stride_bytes / 4; 453 num_loops = DIV_ROUND_UP(num_pages, 8191); 454 455 /* Ask for enough room for blit + flush + fence */ 456 ndw = 64 + (10 * num_loops); 457 r = radeon_ring_lock(rdev, ndw); 458 if (r) { 459 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); 460 return -EINVAL; 461 } 462 while (num_pages > 0) { 463 cur_pages = num_pages; 464 if (cur_pages > 8191) { 465 cur_pages = 8191; 466 } 467 num_pages -= cur_pages; 468 469 /* pages are in Y direction - height 470 page width in X direction - width */ 471 radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8)); 472 radeon_ring_write(rdev, 473 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 474 RADEON_GMC_DST_PITCH_OFFSET_CNTL | 475 RADEON_GMC_SRC_CLIPPING | 476 RADEON_GMC_DST_CLIPPING | 477 RADEON_GMC_BRUSH_NONE | 478 (RADEON_COLOR_FORMAT_ARGB8888 << 8) | 479 RADEON_GMC_SRC_DATATYPE_COLOR | 480 RADEON_ROP3_S | 481 RADEON_DP_SRC_SOURCE_MEMORY | 482 RADEON_GMC_CLR_CMP_CNTL_DIS | 483 RADEON_GMC_WR_MSK_DIS); 484 radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10)); 485 radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10)); 486 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); 487 radeon_ring_write(rdev, 0); 488 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); 489 radeon_ring_write(rdev, num_pages); 490 radeon_ring_write(rdev, num_pages); 491 radeon_ring_write(rdev, cur_pages | (stride_pixels << 16)); 492 } 493 radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); 494 radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL); 495 radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); 496 radeon_ring_write(rdev, 497 RADEON_WAIT_2D_IDLECLEAN | 498 RADEON_WAIT_HOST_IDLECLEAN | 499 RADEON_WAIT_DMA_GUI_IDLE); 500 if (fence) { 501 r = radeon_fence_emit(rdev, fence); 502 } 503 radeon_ring_unlock_commit(rdev); 504 return r; 505 } 506 507 508 /* 509 * CP 510 */ 511 static int r100_cp_wait_for_idle(struct radeon_device *rdev) 512 { 513 unsigned i; 514 u32 tmp; 515 516 for (i = 0; i < rdev->usec_timeout; i++) { 517 tmp = RREG32(R_000E40_RBBM_STATUS); 518 if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) { 519 return 0; 520 } 521 udelay(1); 522 } 523 return -1; 524 } 525 526 void r100_ring_start(struct radeon_device *rdev) 527 { 528 int r; 529 530 r = radeon_ring_lock(rdev, 2); 531 if (r) { 532 return; 533 } 534 radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0)); 535 radeon_ring_write(rdev, 536 RADEON_ISYNC_ANY2D_IDLE3D | 537 RADEON_ISYNC_ANY3D_IDLE2D | 538 RADEON_ISYNC_WAIT_IDLEGUI | 539 RADEON_ISYNC_CPSCRATCH_IDLEGUI); 540 radeon_ring_unlock_commit(rdev); 541 } 542 543 544 /* Load the microcode for the CP */ 545 static int r100_cp_init_microcode(struct radeon_device *rdev) 546 { 547 struct platform_device *pdev; 548 const char *fw_name = NULL; 549 int err; 550 551 DRM_DEBUG("\n"); 552 553 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0); 554 err = IS_ERR(pdev); 555 if (err) { 556 printk(KERN_ERR "radeon_cp: Failed to register firmware\n"); 557 return -EINVAL; 558 } 559 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || 560 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || 561 (rdev->family == CHIP_RS200)) { 562 DRM_INFO("Loading R100 Microcode\n"); 563 fw_name = FIRMWARE_R100; 564 } else if ((rdev->family == CHIP_R200) || 565 (rdev->family == CHIP_RV250) || 566 (rdev->family == CHIP_RV280) || 567 (rdev->family == CHIP_RS300)) { 568 DRM_INFO("Loading R200 Microcode\n"); 569 fw_name = FIRMWARE_R200; 570 } else if ((rdev->family == CHIP_R300) || 571 (rdev->family == CHIP_R350) || 572 (rdev->family == CHIP_RV350) || 573 (rdev->family == CHIP_RV380) || 574 (rdev->family == CHIP_RS400) || 575 (rdev->family == CHIP_RS480)) { 576 DRM_INFO("Loading R300 Microcode\n"); 577 fw_name = FIRMWARE_R300; 578 } else if ((rdev->family == CHIP_R420) || 579 (rdev->family == CHIP_R423) || 580 (rdev->family == CHIP_RV410)) { 581 DRM_INFO("Loading R400 Microcode\n"); 582 fw_name = FIRMWARE_R420; 583 } else if ((rdev->family == CHIP_RS690) || 584 (rdev->family == CHIP_RS740)) { 585 DRM_INFO("Loading RS690/RS740 Microcode\n"); 586 fw_name = FIRMWARE_RS690; 587 } else if (rdev->family == CHIP_RS600) { 588 DRM_INFO("Loading RS600 Microcode\n"); 589 fw_name = FIRMWARE_RS600; 590 } else if ((rdev->family == CHIP_RV515) || 591 (rdev->family == CHIP_R520) || 592 (rdev->family == CHIP_RV530) || 593 (rdev->family == CHIP_R580) || 594 (rdev->family == CHIP_RV560) || 595 (rdev->family == CHIP_RV570)) { 596 DRM_INFO("Loading R500 Microcode\n"); 597 fw_name = FIRMWARE_R520; 598 } 599 600 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); 601 platform_device_unregister(pdev); 602 if (err) { 603 printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n", 604 fw_name); 605 } else if (rdev->me_fw->size % 8) { 606 printk(KERN_ERR 607 "radeon_cp: Bogus length %zu in firmware \"%s\"\n", 608 rdev->me_fw->size, fw_name); 609 err = -EINVAL; 610 release_firmware(rdev->me_fw); 611 rdev->me_fw = NULL; 612 } 613 return err; 614 } 615 static void r100_cp_load_microcode(struct radeon_device *rdev) 616 { 617 const __be32 *fw_data; 618 int i, size; 619 620 if (r100_gui_wait_for_idle(rdev)) { 621 printk(KERN_WARNING "Failed to wait GUI idle while " 622 "programming pipes. Bad things might happen.\n"); 623 } 624 625 if (rdev->me_fw) { 626 size = rdev->me_fw->size / 4; 627 fw_data = (const __be32 *)&rdev->me_fw->data[0]; 628 WREG32(RADEON_CP_ME_RAM_ADDR, 0); 629 for (i = 0; i < size; i += 2) { 630 WREG32(RADEON_CP_ME_RAM_DATAH, 631 be32_to_cpup(&fw_data[i])); 632 WREG32(RADEON_CP_ME_RAM_DATAL, 633 be32_to_cpup(&fw_data[i + 1])); 634 } 635 } 636 } 637 638 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) 639 { 640 unsigned rb_bufsz; 641 unsigned rb_blksz; 642 unsigned max_fetch; 643 unsigned pre_write_timer; 644 unsigned pre_write_limit; 645 unsigned indirect2_start; 646 unsigned indirect1_start; 647 uint32_t tmp; 648 int r; 649 650 if (r100_debugfs_cp_init(rdev)) { 651 DRM_ERROR("Failed to register debugfs file for CP !\n"); 652 } 653 /* Reset CP */ 654 tmp = RREG32(RADEON_CP_CSQ_STAT); 655 if ((tmp & (1 << 31))) { 656 DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp); 657 WREG32(RADEON_CP_CSQ_MODE, 0); 658 WREG32(RADEON_CP_CSQ_CNTL, 0); 659 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); 660 tmp = RREG32(RADEON_RBBM_SOFT_RESET); 661 mdelay(2); 662 WREG32(RADEON_RBBM_SOFT_RESET, 0); 663 tmp = RREG32(RADEON_RBBM_SOFT_RESET); 664 mdelay(2); 665 tmp = RREG32(RADEON_CP_CSQ_STAT); 666 if ((tmp & (1 << 31))) { 667 DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp); 668 } 669 } else { 670 DRM_INFO("radeon: cp idle (0x%08X)\n", tmp); 671 } 672 673 if (!rdev->me_fw) { 674 r = r100_cp_init_microcode(rdev); 675 if (r) { 676 DRM_ERROR("Failed to load firmware!\n"); 677 return r; 678 } 679 } 680 681 /* Align ring size */ 682 rb_bufsz = drm_order(ring_size / 8); 683 ring_size = (1 << (rb_bufsz + 1)) * 4; 684 r100_cp_load_microcode(rdev); 685 r = radeon_ring_init(rdev, ring_size); 686 if (r) { 687 return r; 688 } 689 /* Each time the cp read 1024 bytes (16 dword/quadword) update 690 * the rptr copy in system ram */ 691 rb_blksz = 9; 692 /* cp will read 128bytes at a time (4 dwords) */ 693 max_fetch = 1; 694 rdev->cp.align_mask = 16 - 1; 695 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */ 696 pre_write_timer = 64; 697 /* Force CP_RB_WPTR write if written more than one time before the 698 * delay expire 699 */ 700 pre_write_limit = 0; 701 /* Setup the cp cache like this (cache size is 96 dwords) : 702 * RING 0 to 15 703 * INDIRECT1 16 to 79 704 * INDIRECT2 80 to 95 705 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 706 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords)) 707 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 708 * Idea being that most of the gpu cmd will be through indirect1 buffer 709 * so it gets the bigger cache. 710 */ 711 indirect2_start = 80; 712 indirect1_start = 16; 713 /* cp setup */ 714 WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); 715 WREG32(RADEON_CP_RB_CNTL, 716 #ifdef __BIG_ENDIAN 717 RADEON_BUF_SWAP_32BIT | 718 #endif 719 REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | 720 REG_SET(RADEON_RB_BLKSZ, rb_blksz) | 721 REG_SET(RADEON_MAX_FETCH, max_fetch) | 722 RADEON_RB_NO_UPDATE); 723 /* Set ring address */ 724 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr); 725 WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr); 726 /* Force read & write ptr to 0 */ 727 tmp = RREG32(RADEON_CP_RB_CNTL); 728 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); 729 WREG32(RADEON_CP_RB_RPTR_WR, 0); 730 WREG32(RADEON_CP_RB_WPTR, 0); 731 WREG32(RADEON_CP_RB_CNTL, tmp); 732 udelay(10); 733 rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); 734 rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR); 735 /* Set cp mode to bus mastering & enable cp*/ 736 WREG32(RADEON_CP_CSQ_MODE, 737 REG_SET(RADEON_INDIRECT2_START, indirect2_start) | 738 REG_SET(RADEON_INDIRECT1_START, indirect1_start)); 739 WREG32(0x718, 0); 740 WREG32(0x744, 0x00004D4D); 741 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM); 742 radeon_ring_start(rdev); 743 r = radeon_ring_test(rdev); 744 if (r) { 745 DRM_ERROR("radeon: cp isn't working (%d).\n", r); 746 return r; 747 } 748 rdev->cp.ready = true; 749 return 0; 750 } 751 752 void r100_cp_fini(struct radeon_device *rdev) 753 { 754 if (r100_cp_wait_for_idle(rdev)) { 755 DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n"); 756 } 757 /* Disable ring */ 758 r100_cp_disable(rdev); 759 radeon_ring_fini(rdev); 760 DRM_INFO("radeon: cp finalized\n"); 761 } 762 763 void r100_cp_disable(struct radeon_device *rdev) 764 { 765 /* Disable ring */ 766 rdev->cp.ready = false; 767 WREG32(RADEON_CP_CSQ_MODE, 0); 768 WREG32(RADEON_CP_CSQ_CNTL, 0); 769 if (r100_gui_wait_for_idle(rdev)) { 770 printk(KERN_WARNING "Failed to wait GUI idle while " 771 "programming pipes. Bad things might happen.\n"); 772 } 773 } 774 775 int r100_cp_reset(struct radeon_device *rdev) 776 { 777 uint32_t tmp; 778 bool reinit_cp; 779 int i; 780 781 reinit_cp = rdev->cp.ready; 782 rdev->cp.ready = false; 783 WREG32(RADEON_CP_CSQ_MODE, 0); 784 WREG32(RADEON_CP_CSQ_CNTL, 0); 785 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); 786 (void)RREG32(RADEON_RBBM_SOFT_RESET); 787 udelay(200); 788 WREG32(RADEON_RBBM_SOFT_RESET, 0); 789 /* Wait to prevent race in RBBM_STATUS */ 790 mdelay(1); 791 for (i = 0; i < rdev->usec_timeout; i++) { 792 tmp = RREG32(RADEON_RBBM_STATUS); 793 if (!(tmp & (1 << 16))) { 794 DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n", 795 tmp); 796 if (reinit_cp) { 797 return r100_cp_init(rdev, rdev->cp.ring_size); 798 } 799 return 0; 800 } 801 DRM_UDELAY(1); 802 } 803 tmp = RREG32(RADEON_RBBM_STATUS); 804 DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp); 805 return -1; 806 } 807 808 void r100_cp_commit(struct radeon_device *rdev) 809 { 810 WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); 811 (void)RREG32(RADEON_CP_RB_WPTR); 812 } 813 814 815 /* 816 * CS functions 817 */ 818 int r100_cs_parse_packet0(struct radeon_cs_parser *p, 819 struct radeon_cs_packet *pkt, 820 const unsigned *auth, unsigned n, 821 radeon_packet0_check_t check) 822 { 823 unsigned reg; 824 unsigned i, j, m; 825 unsigned idx; 826 int r; 827 828 idx = pkt->idx + 1; 829 reg = pkt->reg; 830 /* Check that register fall into register range 831 * determined by the number of entry (n) in the 832 * safe register bitmap. 833 */ 834 if (pkt->one_reg_wr) { 835 if ((reg >> 7) > n) { 836 return -EINVAL; 837 } 838 } else { 839 if (((reg + (pkt->count << 2)) >> 7) > n) { 840 return -EINVAL; 841 } 842 } 843 for (i = 0; i <= pkt->count; i++, idx++) { 844 j = (reg >> 7); 845 m = 1 << ((reg >> 2) & 31); 846 if (auth[j] & m) { 847 r = check(p, pkt, idx, reg); 848 if (r) { 849 return r; 850 } 851 } 852 if (pkt->one_reg_wr) { 853 if (!(auth[j] & m)) { 854 break; 855 } 856 } else { 857 reg += 4; 858 } 859 } 860 return 0; 861 } 862 863 void r100_cs_dump_packet(struct radeon_cs_parser *p, 864 struct radeon_cs_packet *pkt) 865 { 866 struct radeon_cs_chunk *ib_chunk; 867 volatile uint32_t *ib; 868 unsigned i; 869 unsigned idx; 870 871 ib = p->ib->ptr; 872 ib_chunk = &p->chunks[p->chunk_ib_idx]; 873 idx = pkt->idx; 874 for (i = 0; i <= (pkt->count + 1); i++, idx++) { 875 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 876 } 877 } 878 879 /** 880 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet 881 * @parser: parser structure holding parsing context. 882 * @pkt: where to store packet informations 883 * 884 * Assume that chunk_ib_index is properly set. Will return -EINVAL 885 * if packet is bigger than remaining ib size. or if packets is unknown. 886 **/ 887 int r100_cs_packet_parse(struct radeon_cs_parser *p, 888 struct radeon_cs_packet *pkt, 889 unsigned idx) 890 { 891 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 892 uint32_t header; 893 894 if (idx >= ib_chunk->length_dw) { 895 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 896 idx, ib_chunk->length_dw); 897 return -EINVAL; 898 } 899 header = ib_chunk->kdata[idx]; 900 pkt->idx = idx; 901 pkt->type = CP_PACKET_GET_TYPE(header); 902 pkt->count = CP_PACKET_GET_COUNT(header); 903 switch (pkt->type) { 904 case PACKET_TYPE0: 905 pkt->reg = CP_PACKET0_GET_REG(header); 906 pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header); 907 break; 908 case PACKET_TYPE3: 909 pkt->opcode = CP_PACKET3_GET_OPCODE(header); 910 break; 911 case PACKET_TYPE2: 912 pkt->count = -1; 913 break; 914 default: 915 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 916 return -EINVAL; 917 } 918 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 919 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 920 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 921 return -EINVAL; 922 } 923 return 0; 924 } 925 926 /** 927 * r100_cs_packet_next_vline() - parse userspace VLINE packet 928 * @parser: parser structure holding parsing context. 929 * 930 * Userspace sends a special sequence for VLINE waits. 931 * PACKET0 - VLINE_START_END + value 932 * PACKET0 - WAIT_UNTIL +_value 933 * RELOC (P3) - crtc_id in reloc. 934 * 935 * This function parses this and relocates the VLINE START END 936 * and WAIT UNTIL packets to the correct crtc. 937 * It also detects a switched off crtc and nulls out the 938 * wait in that case. 939 */ 940 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) 941 { 942 struct radeon_cs_chunk *ib_chunk; 943 struct drm_mode_object *obj; 944 struct drm_crtc *crtc; 945 struct radeon_crtc *radeon_crtc; 946 struct radeon_cs_packet p3reloc, waitreloc; 947 int crtc_id; 948 int r; 949 uint32_t header, h_idx, reg; 950 951 ib_chunk = &p->chunks[p->chunk_ib_idx]; 952 953 /* parse the wait until */ 954 r = r100_cs_packet_parse(p, &waitreloc, p->idx); 955 if (r) 956 return r; 957 958 /* check its a wait until and only 1 count */ 959 if (waitreloc.reg != RADEON_WAIT_UNTIL || 960 waitreloc.count != 0) { 961 DRM_ERROR("vline wait had illegal wait until segment\n"); 962 r = -EINVAL; 963 return r; 964 } 965 966 if (ib_chunk->kdata[waitreloc.idx + 1] != RADEON_WAIT_CRTC_VLINE) { 967 DRM_ERROR("vline wait had illegal wait until\n"); 968 r = -EINVAL; 969 return r; 970 } 971 972 /* jump over the NOP */ 973 r = r100_cs_packet_parse(p, &p3reloc, p->idx); 974 if (r) 975 return r; 976 977 h_idx = p->idx - 2; 978 p->idx += waitreloc.count; 979 p->idx += p3reloc.count; 980 981 header = ib_chunk->kdata[h_idx]; 982 crtc_id = ib_chunk->kdata[h_idx + 5]; 983 reg = ib_chunk->kdata[h_idx] >> 2; 984 mutex_lock(&p->rdev->ddev->mode_config.mutex); 985 obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); 986 if (!obj) { 987 DRM_ERROR("cannot find crtc %d\n", crtc_id); 988 r = -EINVAL; 989 goto out; 990 } 991 crtc = obj_to_crtc(obj); 992 radeon_crtc = to_radeon_crtc(crtc); 993 crtc_id = radeon_crtc->crtc_id; 994 995 if (!crtc->enabled) { 996 /* if the CRTC isn't enabled - we need to nop out the wait until */ 997 ib_chunk->kdata[h_idx + 2] = PACKET2(0); 998 ib_chunk->kdata[h_idx + 3] = PACKET2(0); 999 } else if (crtc_id == 1) { 1000 switch (reg) { 1001 case AVIVO_D1MODE_VLINE_START_END: 1002 header &= R300_CP_PACKET0_REG_MASK; 1003 header |= AVIVO_D2MODE_VLINE_START_END >> 2; 1004 break; 1005 case RADEON_CRTC_GUI_TRIG_VLINE: 1006 header &= R300_CP_PACKET0_REG_MASK; 1007 header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2; 1008 break; 1009 default: 1010 DRM_ERROR("unknown crtc reloc\n"); 1011 r = -EINVAL; 1012 goto out; 1013 } 1014 ib_chunk->kdata[h_idx] = header; 1015 ib_chunk->kdata[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1; 1016 } 1017 out: 1018 mutex_unlock(&p->rdev->ddev->mode_config.mutex); 1019 return r; 1020 } 1021 1022 /** 1023 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3 1024 * @parser: parser structure holding parsing context. 1025 * @data: pointer to relocation data 1026 * @offset_start: starting offset 1027 * @offset_mask: offset mask (to align start offset on) 1028 * @reloc: reloc informations 1029 * 1030 * Check next packet is relocation packet3, do bo validation and compute 1031 * GPU offset using the provided start. 1032 **/ 1033 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, 1034 struct radeon_cs_reloc **cs_reloc) 1035 { 1036 struct radeon_cs_chunk *ib_chunk; 1037 struct radeon_cs_chunk *relocs_chunk; 1038 struct radeon_cs_packet p3reloc; 1039 unsigned idx; 1040 int r; 1041 1042 if (p->chunk_relocs_idx == -1) { 1043 DRM_ERROR("No relocation chunk !\n"); 1044 return -EINVAL; 1045 } 1046 *cs_reloc = NULL; 1047 ib_chunk = &p->chunks[p->chunk_ib_idx]; 1048 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 1049 r = r100_cs_packet_parse(p, &p3reloc, p->idx); 1050 if (r) { 1051 return r; 1052 } 1053 p->idx += p3reloc.count + 2; 1054 if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { 1055 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 1056 p3reloc.idx); 1057 r100_cs_dump_packet(p, &p3reloc); 1058 return -EINVAL; 1059 } 1060 idx = ib_chunk->kdata[p3reloc.idx + 1]; 1061 if (idx >= relocs_chunk->length_dw) { 1062 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 1063 idx, relocs_chunk->length_dw); 1064 r100_cs_dump_packet(p, &p3reloc); 1065 return -EINVAL; 1066 } 1067 /* FIXME: we assume reloc size is 4 dwords */ 1068 *cs_reloc = p->relocs_ptr[(idx / 4)]; 1069 return 0; 1070 } 1071 1072 static int r100_get_vtx_size(uint32_t vtx_fmt) 1073 { 1074 int vtx_size; 1075 vtx_size = 2; 1076 /* ordered according to bits in spec */ 1077 if (vtx_fmt & RADEON_SE_VTX_FMT_W0) 1078 vtx_size++; 1079 if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR) 1080 vtx_size += 3; 1081 if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA) 1082 vtx_size++; 1083 if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR) 1084 vtx_size++; 1085 if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC) 1086 vtx_size += 3; 1087 if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG) 1088 vtx_size++; 1089 if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC) 1090 vtx_size++; 1091 if (vtx_fmt & RADEON_SE_VTX_FMT_ST0) 1092 vtx_size += 2; 1093 if (vtx_fmt & RADEON_SE_VTX_FMT_ST1) 1094 vtx_size += 2; 1095 if (vtx_fmt & RADEON_SE_VTX_FMT_Q1) 1096 vtx_size++; 1097 if (vtx_fmt & RADEON_SE_VTX_FMT_ST2) 1098 vtx_size += 2; 1099 if (vtx_fmt & RADEON_SE_VTX_FMT_Q2) 1100 vtx_size++; 1101 if (vtx_fmt & RADEON_SE_VTX_FMT_ST3) 1102 vtx_size += 2; 1103 if (vtx_fmt & RADEON_SE_VTX_FMT_Q3) 1104 vtx_size++; 1105 if (vtx_fmt & RADEON_SE_VTX_FMT_Q0) 1106 vtx_size++; 1107 /* blend weight */ 1108 if (vtx_fmt & (0x7 << 15)) 1109 vtx_size += (vtx_fmt >> 15) & 0x7; 1110 if (vtx_fmt & RADEON_SE_VTX_FMT_N0) 1111 vtx_size += 3; 1112 if (vtx_fmt & RADEON_SE_VTX_FMT_XY1) 1113 vtx_size += 2; 1114 if (vtx_fmt & RADEON_SE_VTX_FMT_Z1) 1115 vtx_size++; 1116 if (vtx_fmt & RADEON_SE_VTX_FMT_W1) 1117 vtx_size++; 1118 if (vtx_fmt & RADEON_SE_VTX_FMT_N1) 1119 vtx_size++; 1120 if (vtx_fmt & RADEON_SE_VTX_FMT_Z) 1121 vtx_size++; 1122 return vtx_size; 1123 } 1124 1125 static int r100_packet0_check(struct radeon_cs_parser *p, 1126 struct radeon_cs_packet *pkt, 1127 unsigned idx, unsigned reg) 1128 { 1129 struct radeon_cs_chunk *ib_chunk; 1130 struct radeon_cs_reloc *reloc; 1131 struct r100_cs_track *track; 1132 volatile uint32_t *ib; 1133 uint32_t tmp; 1134 int r; 1135 int i, face; 1136 u32 tile_flags = 0; 1137 1138 ib = p->ib->ptr; 1139 ib_chunk = &p->chunks[p->chunk_ib_idx]; 1140 track = (struct r100_cs_track *)p->track; 1141 1142 switch (reg) { 1143 case RADEON_CRTC_GUI_TRIG_VLINE: 1144 r = r100_cs_packet_parse_vline(p); 1145 if (r) { 1146 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1147 idx, reg); 1148 r100_cs_dump_packet(p, pkt); 1149 return r; 1150 } 1151 break; 1152 /* FIXME: only allow PACKET3 blit? easier to check for out of 1153 * range access */ 1154 case RADEON_DST_PITCH_OFFSET: 1155 case RADEON_SRC_PITCH_OFFSET: 1156 r = r100_reloc_pitch_offset(p, pkt, idx, reg); 1157 if (r) 1158 return r; 1159 break; 1160 case RADEON_RB3D_DEPTHOFFSET: 1161 r = r100_cs_packet_next_reloc(p, &reloc); 1162 if (r) { 1163 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1164 idx, reg); 1165 r100_cs_dump_packet(p, pkt); 1166 return r; 1167 } 1168 track->zb.robj = reloc->robj; 1169 track->zb.offset = ib_chunk->kdata[idx]; 1170 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1171 break; 1172 case RADEON_RB3D_COLOROFFSET: 1173 r = r100_cs_packet_next_reloc(p, &reloc); 1174 if (r) { 1175 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1176 idx, reg); 1177 r100_cs_dump_packet(p, pkt); 1178 return r; 1179 } 1180 track->cb[0].robj = reloc->robj; 1181 track->cb[0].offset = ib_chunk->kdata[idx]; 1182 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1183 break; 1184 case RADEON_PP_TXOFFSET_0: 1185 case RADEON_PP_TXOFFSET_1: 1186 case RADEON_PP_TXOFFSET_2: 1187 i = (reg - RADEON_PP_TXOFFSET_0) / 24; 1188 r = r100_cs_packet_next_reloc(p, &reloc); 1189 if (r) { 1190 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1191 idx, reg); 1192 r100_cs_dump_packet(p, pkt); 1193 return r; 1194 } 1195 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1196 track->textures[i].robj = reloc->robj; 1197 break; 1198 case RADEON_PP_CUBIC_OFFSET_T0_0: 1199 case RADEON_PP_CUBIC_OFFSET_T0_1: 1200 case RADEON_PP_CUBIC_OFFSET_T0_2: 1201 case RADEON_PP_CUBIC_OFFSET_T0_3: 1202 case RADEON_PP_CUBIC_OFFSET_T0_4: 1203 i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4; 1204 r = r100_cs_packet_next_reloc(p, &reloc); 1205 if (r) { 1206 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1207 idx, reg); 1208 r100_cs_dump_packet(p, pkt); 1209 return r; 1210 } 1211 track->textures[0].cube_info[i].offset = ib_chunk->kdata[idx]; 1212 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1213 track->textures[0].cube_info[i].robj = reloc->robj; 1214 break; 1215 case RADEON_PP_CUBIC_OFFSET_T1_0: 1216 case RADEON_PP_CUBIC_OFFSET_T1_1: 1217 case RADEON_PP_CUBIC_OFFSET_T1_2: 1218 case RADEON_PP_CUBIC_OFFSET_T1_3: 1219 case RADEON_PP_CUBIC_OFFSET_T1_4: 1220 i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4; 1221 r = r100_cs_packet_next_reloc(p, &reloc); 1222 if (r) { 1223 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1224 idx, reg); 1225 r100_cs_dump_packet(p, pkt); 1226 return r; 1227 } 1228 track->textures[1].cube_info[i].offset = ib_chunk->kdata[idx]; 1229 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1230 track->textures[1].cube_info[i].robj = reloc->robj; 1231 break; 1232 case RADEON_PP_CUBIC_OFFSET_T2_0: 1233 case RADEON_PP_CUBIC_OFFSET_T2_1: 1234 case RADEON_PP_CUBIC_OFFSET_T2_2: 1235 case RADEON_PP_CUBIC_OFFSET_T2_3: 1236 case RADEON_PP_CUBIC_OFFSET_T2_4: 1237 i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4; 1238 r = r100_cs_packet_next_reloc(p, &reloc); 1239 if (r) { 1240 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1241 idx, reg); 1242 r100_cs_dump_packet(p, pkt); 1243 return r; 1244 } 1245 track->textures[2].cube_info[i].offset = ib_chunk->kdata[idx]; 1246 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1247 track->textures[2].cube_info[i].robj = reloc->robj; 1248 break; 1249 case RADEON_RE_WIDTH_HEIGHT: 1250 track->maxy = ((ib_chunk->kdata[idx] >> 16) & 0x7FF); 1251 break; 1252 case RADEON_RB3D_COLORPITCH: 1253 r = r100_cs_packet_next_reloc(p, &reloc); 1254 if (r) { 1255 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1256 idx, reg); 1257 r100_cs_dump_packet(p, pkt); 1258 return r; 1259 } 1260 1261 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1262 tile_flags |= RADEON_COLOR_TILE_ENABLE; 1263 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1264 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 1265 1266 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); 1267 tmp |= tile_flags; 1268 ib[idx] = tmp; 1269 1270 track->cb[0].pitch = ib_chunk->kdata[idx] & RADEON_COLORPITCH_MASK; 1271 break; 1272 case RADEON_RB3D_DEPTHPITCH: 1273 track->zb.pitch = ib_chunk->kdata[idx] & RADEON_DEPTHPITCH_MASK; 1274 break; 1275 case RADEON_RB3D_CNTL: 1276 switch ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 1277 case 7: 1278 case 8: 1279 case 9: 1280 case 11: 1281 case 12: 1282 track->cb[0].cpp = 1; 1283 break; 1284 case 3: 1285 case 4: 1286 case 15: 1287 track->cb[0].cpp = 2; 1288 break; 1289 case 6: 1290 track->cb[0].cpp = 4; 1291 break; 1292 default: 1293 DRM_ERROR("Invalid color buffer format (%d) !\n", 1294 ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); 1295 return -EINVAL; 1296 } 1297 track->z_enabled = !!(ib_chunk->kdata[idx] & RADEON_Z_ENABLE); 1298 break; 1299 case RADEON_RB3D_ZSTENCILCNTL: 1300 switch (ib_chunk->kdata[idx] & 0xf) { 1301 case 0: 1302 track->zb.cpp = 2; 1303 break; 1304 case 2: 1305 case 3: 1306 case 4: 1307 case 5: 1308 case 9: 1309 case 11: 1310 track->zb.cpp = 4; 1311 break; 1312 default: 1313 break; 1314 } 1315 break; 1316 case RADEON_RB3D_ZPASS_ADDR: 1317 r = r100_cs_packet_next_reloc(p, &reloc); 1318 if (r) { 1319 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1320 idx, reg); 1321 r100_cs_dump_packet(p, pkt); 1322 return r; 1323 } 1324 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1325 break; 1326 case RADEON_PP_CNTL: 1327 { 1328 uint32_t temp = ib_chunk->kdata[idx] >> 4; 1329 for (i = 0; i < track->num_texture; i++) 1330 track->textures[i].enabled = !!(temp & (1 << i)); 1331 } 1332 break; 1333 case RADEON_SE_VF_CNTL: 1334 track->vap_vf_cntl = ib_chunk->kdata[idx]; 1335 break; 1336 case RADEON_SE_VTX_FMT: 1337 track->vtx_size = r100_get_vtx_size(ib_chunk->kdata[idx]); 1338 break; 1339 case RADEON_PP_TEX_SIZE_0: 1340 case RADEON_PP_TEX_SIZE_1: 1341 case RADEON_PP_TEX_SIZE_2: 1342 i = (reg - RADEON_PP_TEX_SIZE_0) / 8; 1343 track->textures[i].width = (ib_chunk->kdata[idx] & RADEON_TEX_USIZE_MASK) + 1; 1344 track->textures[i].height = ((ib_chunk->kdata[idx] & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 1345 break; 1346 case RADEON_PP_TEX_PITCH_0: 1347 case RADEON_PP_TEX_PITCH_1: 1348 case RADEON_PP_TEX_PITCH_2: 1349 i = (reg - RADEON_PP_TEX_PITCH_0) / 8; 1350 track->textures[i].pitch = ib_chunk->kdata[idx] + 32; 1351 break; 1352 case RADEON_PP_TXFILTER_0: 1353 case RADEON_PP_TXFILTER_1: 1354 case RADEON_PP_TXFILTER_2: 1355 i = (reg - RADEON_PP_TXFILTER_0) / 24; 1356 track->textures[i].num_levels = ((ib_chunk->kdata[idx] & RADEON_MAX_MIP_LEVEL_MASK) 1357 >> RADEON_MAX_MIP_LEVEL_SHIFT); 1358 tmp = (ib_chunk->kdata[idx] >> 23) & 0x7; 1359 if (tmp == 2 || tmp == 6) 1360 track->textures[i].roundup_w = false; 1361 tmp = (ib_chunk->kdata[idx] >> 27) & 0x7; 1362 if (tmp == 2 || tmp == 6) 1363 track->textures[i].roundup_h = false; 1364 break; 1365 case RADEON_PP_TXFORMAT_0: 1366 case RADEON_PP_TXFORMAT_1: 1367 case RADEON_PP_TXFORMAT_2: 1368 i = (reg - RADEON_PP_TXFORMAT_0) / 24; 1369 if (ib_chunk->kdata[idx] & RADEON_TXFORMAT_NON_POWER2) { 1370 track->textures[i].use_pitch = 1; 1371 } else { 1372 track->textures[i].use_pitch = 0; 1373 track->textures[i].width = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); 1374 track->textures[i].height = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); 1375 } 1376 if (ib_chunk->kdata[idx] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) 1377 track->textures[i].tex_coord_type = 2; 1378 switch ((ib_chunk->kdata[idx] & RADEON_TXFORMAT_FORMAT_MASK)) { 1379 case RADEON_TXFORMAT_I8: 1380 case RADEON_TXFORMAT_RGB332: 1381 case RADEON_TXFORMAT_Y8: 1382 track->textures[i].cpp = 1; 1383 break; 1384 case RADEON_TXFORMAT_AI88: 1385 case RADEON_TXFORMAT_ARGB1555: 1386 case RADEON_TXFORMAT_RGB565: 1387 case RADEON_TXFORMAT_ARGB4444: 1388 case RADEON_TXFORMAT_VYUY422: 1389 case RADEON_TXFORMAT_YVYU422: 1390 case RADEON_TXFORMAT_DXT1: 1391 case RADEON_TXFORMAT_SHADOW16: 1392 case RADEON_TXFORMAT_LDUDV655: 1393 case RADEON_TXFORMAT_DUDV88: 1394 track->textures[i].cpp = 2; 1395 break; 1396 case RADEON_TXFORMAT_ARGB8888: 1397 case RADEON_TXFORMAT_RGBA8888: 1398 case RADEON_TXFORMAT_DXT23: 1399 case RADEON_TXFORMAT_DXT45: 1400 case RADEON_TXFORMAT_SHADOW32: 1401 case RADEON_TXFORMAT_LDUDUV8888: 1402 track->textures[i].cpp = 4; 1403 break; 1404 } 1405 track->textures[i].cube_info[4].width = 1 << ((ib_chunk->kdata[idx] >> 16) & 0xf); 1406 track->textures[i].cube_info[4].height = 1 << ((ib_chunk->kdata[idx] >> 20) & 0xf); 1407 break; 1408 case RADEON_PP_CUBIC_FACES_0: 1409 case RADEON_PP_CUBIC_FACES_1: 1410 case RADEON_PP_CUBIC_FACES_2: 1411 tmp = ib_chunk->kdata[idx]; 1412 i = (reg - RADEON_PP_CUBIC_FACES_0) / 4; 1413 for (face = 0; face < 4; face++) { 1414 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 1415 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 1416 } 1417 break; 1418 default: 1419 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 1420 reg, idx); 1421 return -EINVAL; 1422 } 1423 return 0; 1424 } 1425 1426 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, 1427 struct radeon_cs_packet *pkt, 1428 struct radeon_object *robj) 1429 { 1430 struct radeon_cs_chunk *ib_chunk; 1431 unsigned idx; 1432 1433 ib_chunk = &p->chunks[p->chunk_ib_idx]; 1434 idx = pkt->idx + 1; 1435 if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) { 1436 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " 1437 "(need %u have %lu) !\n", 1438 ib_chunk->kdata[idx+2] + 1, 1439 radeon_object_size(robj)); 1440 return -EINVAL; 1441 } 1442 return 0; 1443 } 1444 1445 static int r100_packet3_check(struct radeon_cs_parser *p, 1446 struct radeon_cs_packet *pkt) 1447 { 1448 struct radeon_cs_chunk *ib_chunk; 1449 struct radeon_cs_reloc *reloc; 1450 struct r100_cs_track *track; 1451 unsigned idx; 1452 unsigned i, c; 1453 volatile uint32_t *ib; 1454 int r; 1455 1456 ib = p->ib->ptr; 1457 ib_chunk = &p->chunks[p->chunk_ib_idx]; 1458 idx = pkt->idx + 1; 1459 track = (struct r100_cs_track *)p->track; 1460 switch (pkt->opcode) { 1461 case PACKET3_3D_LOAD_VBPNTR: 1462 c = ib_chunk->kdata[idx++]; 1463 track->num_arrays = c; 1464 for (i = 0; i < (c - 1); i += 2, idx += 3) { 1465 r = r100_cs_packet_next_reloc(p, &reloc); 1466 if (r) { 1467 DRM_ERROR("No reloc for packet3 %d\n", 1468 pkt->opcode); 1469 r100_cs_dump_packet(p, pkt); 1470 return r; 1471 } 1472 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 1473 track->arrays[i + 0].robj = reloc->robj; 1474 track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8; 1475 track->arrays[i + 0].esize &= 0x7F; 1476 r = r100_cs_packet_next_reloc(p, &reloc); 1477 if (r) { 1478 DRM_ERROR("No reloc for packet3 %d\n", 1479 pkt->opcode); 1480 r100_cs_dump_packet(p, pkt); 1481 return r; 1482 } 1483 ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset); 1484 track->arrays[i + 1].robj = reloc->robj; 1485 track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24; 1486 track->arrays[i + 1].esize &= 0x7F; 1487 } 1488 if (c & 1) { 1489 r = r100_cs_packet_next_reloc(p, &reloc); 1490 if (r) { 1491 DRM_ERROR("No reloc for packet3 %d\n", 1492 pkt->opcode); 1493 r100_cs_dump_packet(p, pkt); 1494 return r; 1495 } 1496 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 1497 track->arrays[i + 0].robj = reloc->robj; 1498 track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8; 1499 track->arrays[i + 0].esize &= 0x7F; 1500 } 1501 break; 1502 case PACKET3_INDX_BUFFER: 1503 r = r100_cs_packet_next_reloc(p, &reloc); 1504 if (r) { 1505 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1506 r100_cs_dump_packet(p, pkt); 1507 return r; 1508 } 1509 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 1510 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 1511 if (r) { 1512 return r; 1513 } 1514 break; 1515 case 0x23: 1516 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ 1517 r = r100_cs_packet_next_reloc(p, &reloc); 1518 if (r) { 1519 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1520 r100_cs_dump_packet(p, pkt); 1521 return r; 1522 } 1523 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 1524 track->num_arrays = 1; 1525 track->vtx_size = r100_get_vtx_size(ib_chunk->kdata[idx+2]); 1526 1527 track->arrays[0].robj = reloc->robj; 1528 track->arrays[0].esize = track->vtx_size; 1529 1530 track->max_indx = ib_chunk->kdata[idx+1]; 1531 1532 track->vap_vf_cntl = ib_chunk->kdata[idx+3]; 1533 track->immd_dwords = pkt->count - 1; 1534 r = r100_cs_track_check(p->rdev, track); 1535 if (r) 1536 return r; 1537 break; 1538 case PACKET3_3D_DRAW_IMMD: 1539 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) { 1540 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 1541 return -EINVAL; 1542 } 1543 track->vap_vf_cntl = ib_chunk->kdata[idx+1]; 1544 track->immd_dwords = pkt->count - 1; 1545 r = r100_cs_track_check(p->rdev, track); 1546 if (r) 1547 return r; 1548 break; 1549 /* triggers drawing using in-packet vertex data */ 1550 case PACKET3_3D_DRAW_IMMD_2: 1551 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) { 1552 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 1553 return -EINVAL; 1554 } 1555 track->vap_vf_cntl = ib_chunk->kdata[idx]; 1556 track->immd_dwords = pkt->count; 1557 r = r100_cs_track_check(p->rdev, track); 1558 if (r) 1559 return r; 1560 break; 1561 /* triggers drawing using in-packet vertex data */ 1562 case PACKET3_3D_DRAW_VBUF_2: 1563 track->vap_vf_cntl = ib_chunk->kdata[idx]; 1564 r = r100_cs_track_check(p->rdev, track); 1565 if (r) 1566 return r; 1567 break; 1568 /* triggers drawing of vertex buffers setup elsewhere */ 1569 case PACKET3_3D_DRAW_INDX_2: 1570 track->vap_vf_cntl = ib_chunk->kdata[idx]; 1571 r = r100_cs_track_check(p->rdev, track); 1572 if (r) 1573 return r; 1574 break; 1575 /* triggers drawing using indices to vertex buffer */ 1576 case PACKET3_3D_DRAW_VBUF: 1577 track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; 1578 r = r100_cs_track_check(p->rdev, track); 1579 if (r) 1580 return r; 1581 break; 1582 /* triggers drawing of vertex buffers setup elsewhere */ 1583 case PACKET3_3D_DRAW_INDX: 1584 track->vap_vf_cntl = ib_chunk->kdata[idx + 1]; 1585 r = r100_cs_track_check(p->rdev, track); 1586 if (r) 1587 return r; 1588 break; 1589 /* triggers drawing using indices to vertex buffer */ 1590 case PACKET3_NOP: 1591 break; 1592 default: 1593 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 1594 return -EINVAL; 1595 } 1596 return 0; 1597 } 1598 1599 int r100_cs_parse(struct radeon_cs_parser *p) 1600 { 1601 struct radeon_cs_packet pkt; 1602 struct r100_cs_track *track; 1603 int r; 1604 1605 track = kzalloc(sizeof(*track), GFP_KERNEL); 1606 r100_cs_track_clear(p->rdev, track); 1607 p->track = track; 1608 do { 1609 r = r100_cs_packet_parse(p, &pkt, p->idx); 1610 if (r) { 1611 return r; 1612 } 1613 p->idx += pkt.count + 2; 1614 switch (pkt.type) { 1615 case PACKET_TYPE0: 1616 if (p->rdev->family >= CHIP_R200) 1617 r = r100_cs_parse_packet0(p, &pkt, 1618 p->rdev->config.r100.reg_safe_bm, 1619 p->rdev->config.r100.reg_safe_bm_size, 1620 &r200_packet0_check); 1621 else 1622 r = r100_cs_parse_packet0(p, &pkt, 1623 p->rdev->config.r100.reg_safe_bm, 1624 p->rdev->config.r100.reg_safe_bm_size, 1625 &r100_packet0_check); 1626 break; 1627 case PACKET_TYPE2: 1628 break; 1629 case PACKET_TYPE3: 1630 r = r100_packet3_check(p, &pkt); 1631 break; 1632 default: 1633 DRM_ERROR("Unknown packet type %d !\n", 1634 pkt.type); 1635 return -EINVAL; 1636 } 1637 if (r) { 1638 return r; 1639 } 1640 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 1641 return 0; 1642 } 1643 1644 1645 /* 1646 * Global GPU functions 1647 */ 1648 void r100_errata(struct radeon_device *rdev) 1649 { 1650 rdev->pll_errata = 0; 1651 1652 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { 1653 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; 1654 } 1655 1656 if (rdev->family == CHIP_RV100 || 1657 rdev->family == CHIP_RS100 || 1658 rdev->family == CHIP_RS200) { 1659 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; 1660 } 1661 } 1662 1663 /* Wait for vertical sync on primary CRTC */ 1664 void r100_gpu_wait_for_vsync(struct radeon_device *rdev) 1665 { 1666 uint32_t crtc_gen_cntl, tmp; 1667 int i; 1668 1669 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); 1670 if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || 1671 !(crtc_gen_cntl & RADEON_CRTC_EN)) { 1672 return; 1673 } 1674 /* Clear the CRTC_VBLANK_SAVE bit */ 1675 WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); 1676 for (i = 0; i < rdev->usec_timeout; i++) { 1677 tmp = RREG32(RADEON_CRTC_STATUS); 1678 if (tmp & RADEON_CRTC_VBLANK_SAVE) { 1679 return; 1680 } 1681 DRM_UDELAY(1); 1682 } 1683 } 1684 1685 /* Wait for vertical sync on secondary CRTC */ 1686 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) 1687 { 1688 uint32_t crtc2_gen_cntl, tmp; 1689 int i; 1690 1691 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); 1692 if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || 1693 !(crtc2_gen_cntl & RADEON_CRTC2_EN)) 1694 return; 1695 1696 /* Clear the CRTC_VBLANK_SAVE bit */ 1697 WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); 1698 for (i = 0; i < rdev->usec_timeout; i++) { 1699 tmp = RREG32(RADEON_CRTC2_STATUS); 1700 if (tmp & RADEON_CRTC2_VBLANK_SAVE) { 1701 return; 1702 } 1703 DRM_UDELAY(1); 1704 } 1705 } 1706 1707 int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) 1708 { 1709 unsigned i; 1710 uint32_t tmp; 1711 1712 for (i = 0; i < rdev->usec_timeout; i++) { 1713 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 1714 if (tmp >= n) { 1715 return 0; 1716 } 1717 DRM_UDELAY(1); 1718 } 1719 return -1; 1720 } 1721 1722 int r100_gui_wait_for_idle(struct radeon_device *rdev) 1723 { 1724 unsigned i; 1725 uint32_t tmp; 1726 1727 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { 1728 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" 1729 " Bad things might happen.\n"); 1730 } 1731 for (i = 0; i < rdev->usec_timeout; i++) { 1732 tmp = RREG32(RADEON_RBBM_STATUS); 1733 if (!(tmp & (1 << 31))) { 1734 return 0; 1735 } 1736 DRM_UDELAY(1); 1737 } 1738 return -1; 1739 } 1740 1741 int r100_mc_wait_for_idle(struct radeon_device *rdev) 1742 { 1743 unsigned i; 1744 uint32_t tmp; 1745 1746 for (i = 0; i < rdev->usec_timeout; i++) { 1747 /* read MC_STATUS */ 1748 tmp = RREG32(0x0150); 1749 if (tmp & (1 << 2)) { 1750 return 0; 1751 } 1752 DRM_UDELAY(1); 1753 } 1754 return -1; 1755 } 1756 1757 void r100_gpu_init(struct radeon_device *rdev) 1758 { 1759 /* TODO: anythings to do here ? pipes ? */ 1760 r100_hdp_reset(rdev); 1761 } 1762 1763 void r100_hdp_reset(struct radeon_device *rdev) 1764 { 1765 uint32_t tmp; 1766 1767 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL; 1768 tmp |= (7 << 28); 1769 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE); 1770 (void)RREG32(RADEON_HOST_PATH_CNTL); 1771 udelay(200); 1772 WREG32(RADEON_RBBM_SOFT_RESET, 0); 1773 WREG32(RADEON_HOST_PATH_CNTL, tmp); 1774 (void)RREG32(RADEON_HOST_PATH_CNTL); 1775 } 1776 1777 int r100_rb2d_reset(struct radeon_device *rdev) 1778 { 1779 uint32_t tmp; 1780 int i; 1781 1782 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2); 1783 (void)RREG32(RADEON_RBBM_SOFT_RESET); 1784 udelay(200); 1785 WREG32(RADEON_RBBM_SOFT_RESET, 0); 1786 /* Wait to prevent race in RBBM_STATUS */ 1787 mdelay(1); 1788 for (i = 0; i < rdev->usec_timeout; i++) { 1789 tmp = RREG32(RADEON_RBBM_STATUS); 1790 if (!(tmp & (1 << 26))) { 1791 DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n", 1792 tmp); 1793 return 0; 1794 } 1795 DRM_UDELAY(1); 1796 } 1797 tmp = RREG32(RADEON_RBBM_STATUS); 1798 DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp); 1799 return -1; 1800 } 1801 1802 int r100_gpu_reset(struct radeon_device *rdev) 1803 { 1804 uint32_t status; 1805 1806 /* reset order likely matter */ 1807 status = RREG32(RADEON_RBBM_STATUS); 1808 /* reset HDP */ 1809 r100_hdp_reset(rdev); 1810 /* reset rb2d */ 1811 if (status & ((1 << 17) | (1 << 18) | (1 << 27))) { 1812 r100_rb2d_reset(rdev); 1813 } 1814 /* TODO: reset 3D engine */ 1815 /* reset CP */ 1816 status = RREG32(RADEON_RBBM_STATUS); 1817 if (status & (1 << 16)) { 1818 r100_cp_reset(rdev); 1819 } 1820 /* Check if GPU is idle */ 1821 status = RREG32(RADEON_RBBM_STATUS); 1822 if (status & (1 << 31)) { 1823 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status); 1824 return -1; 1825 } 1826 DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status); 1827 return 0; 1828 } 1829 1830 1831 /* 1832 * VRAM info 1833 */ 1834 static void r100_vram_get_type(struct radeon_device *rdev) 1835 { 1836 uint32_t tmp; 1837 1838 rdev->mc.vram_is_ddr = false; 1839 if (rdev->flags & RADEON_IS_IGP) 1840 rdev->mc.vram_is_ddr = true; 1841 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) 1842 rdev->mc.vram_is_ddr = true; 1843 if ((rdev->family == CHIP_RV100) || 1844 (rdev->family == CHIP_RS100) || 1845 (rdev->family == CHIP_RS200)) { 1846 tmp = RREG32(RADEON_MEM_CNTL); 1847 if (tmp & RV100_HALF_MODE) { 1848 rdev->mc.vram_width = 32; 1849 } else { 1850 rdev->mc.vram_width = 64; 1851 } 1852 if (rdev->flags & RADEON_SINGLE_CRTC) { 1853 rdev->mc.vram_width /= 4; 1854 rdev->mc.vram_is_ddr = true; 1855 } 1856 } else if (rdev->family <= CHIP_RV280) { 1857 tmp = RREG32(RADEON_MEM_CNTL); 1858 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { 1859 rdev->mc.vram_width = 128; 1860 } else { 1861 rdev->mc.vram_width = 64; 1862 } 1863 } else { 1864 /* newer IGPs */ 1865 rdev->mc.vram_width = 128; 1866 } 1867 } 1868 1869 static u32 r100_get_accessible_vram(struct radeon_device *rdev) 1870 { 1871 u32 aper_size; 1872 u8 byte; 1873 1874 aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 1875 1876 /* Set HDP_APER_CNTL only on cards that are known not to be broken, 1877 * that is has the 2nd generation multifunction PCI interface 1878 */ 1879 if (rdev->family == CHIP_RV280 || 1880 rdev->family >= CHIP_RV350) { 1881 WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, 1882 ~RADEON_HDP_APER_CNTL); 1883 DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); 1884 return aper_size * 2; 1885 } 1886 1887 /* Older cards have all sorts of funny issues to deal with. First 1888 * check if it's a multifunction card by reading the PCI config 1889 * header type... Limit those to one aperture size 1890 */ 1891 pci_read_config_byte(rdev->pdev, 0xe, &byte); 1892 if (byte & 0x80) { 1893 DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); 1894 DRM_INFO("Limiting VRAM to one aperture\n"); 1895 return aper_size; 1896 } 1897 1898 /* Single function older card. We read HDP_APER_CNTL to see how the BIOS 1899 * have set it up. We don't write this as it's broken on some ASICs but 1900 * we expect the BIOS to have done the right thing (might be too optimistic...) 1901 */ 1902 if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) 1903 return aper_size * 2; 1904 return aper_size; 1905 } 1906 1907 void r100_vram_init_sizes(struct radeon_device *rdev) 1908 { 1909 u64 config_aper_size; 1910 u32 accessible; 1911 1912 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 1913 1914 if (rdev->flags & RADEON_IS_IGP) { 1915 uint32_t tom; 1916 /* read NB_TOM to get the amount of ram stolen for the GPU */ 1917 tom = RREG32(RADEON_NB_TOM); 1918 rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); 1919 /* for IGPs we need to keep VRAM where it was put by the BIOS */ 1920 rdev->mc.vram_location = (tom & 0xffff) << 16; 1921 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 1922 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 1923 } else { 1924 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 1925 /* Some production boards of m6 will report 0 1926 * if it's 8 MB 1927 */ 1928 if (rdev->mc.real_vram_size == 0) { 1929 rdev->mc.real_vram_size = 8192 * 1024; 1930 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 1931 } 1932 /* let driver place VRAM */ 1933 rdev->mc.vram_location = 0xFFFFFFFFUL; 1934 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 1935 * Novell bug 204882 + along with lots of ubuntu ones */ 1936 if (config_aper_size > rdev->mc.real_vram_size) 1937 rdev->mc.mc_vram_size = config_aper_size; 1938 else 1939 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 1940 } 1941 1942 /* work out accessible VRAM */ 1943 accessible = r100_get_accessible_vram(rdev); 1944 1945 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); 1946 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); 1947 1948 if (accessible > rdev->mc.aper_size) 1949 accessible = rdev->mc.aper_size; 1950 1951 if (rdev->mc.mc_vram_size > rdev->mc.aper_size) 1952 rdev->mc.mc_vram_size = rdev->mc.aper_size; 1953 1954 if (rdev->mc.real_vram_size > rdev->mc.aper_size) 1955 rdev->mc.real_vram_size = rdev->mc.aper_size; 1956 } 1957 1958 void r100_vga_set_state(struct radeon_device *rdev, bool state) 1959 { 1960 uint32_t temp; 1961 1962 temp = RREG32(RADEON_CONFIG_CNTL); 1963 if (state == false) { 1964 temp &= ~(1<<8); 1965 temp |= (1<<9); 1966 } else { 1967 temp &= ~(1<<9); 1968 } 1969 WREG32(RADEON_CONFIG_CNTL, temp); 1970 } 1971 1972 void r100_vram_info(struct radeon_device *rdev) 1973 { 1974 r100_vram_get_type(rdev); 1975 1976 r100_vram_init_sizes(rdev); 1977 } 1978 1979 1980 /* 1981 * Indirect registers accessor 1982 */ 1983 void r100_pll_errata_after_index(struct radeon_device *rdev) 1984 { 1985 if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) { 1986 return; 1987 } 1988 (void)RREG32(RADEON_CLOCK_CNTL_DATA); 1989 (void)RREG32(RADEON_CRTC_GEN_CNTL); 1990 } 1991 1992 static void r100_pll_errata_after_data(struct radeon_device *rdev) 1993 { 1994 /* This workarounds is necessary on RV100, RS100 and RS200 chips 1995 * or the chip could hang on a subsequent access 1996 */ 1997 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { 1998 udelay(5000); 1999 } 2000 2001 /* This function is required to workaround a hardware bug in some (all?) 2002 * revisions of the R300. This workaround should be called after every 2003 * CLOCK_CNTL_INDEX register access. If not, register reads afterward 2004 * may not be correct. 2005 */ 2006 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { 2007 uint32_t save, tmp; 2008 2009 save = RREG32(RADEON_CLOCK_CNTL_INDEX); 2010 tmp = save & ~(0x3f | RADEON_PLL_WR_EN); 2011 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); 2012 tmp = RREG32(RADEON_CLOCK_CNTL_DATA); 2013 WREG32(RADEON_CLOCK_CNTL_INDEX, save); 2014 } 2015 } 2016 2017 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) 2018 { 2019 uint32_t data; 2020 2021 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); 2022 r100_pll_errata_after_index(rdev); 2023 data = RREG32(RADEON_CLOCK_CNTL_DATA); 2024 r100_pll_errata_after_data(rdev); 2025 return data; 2026 } 2027 2028 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 2029 { 2030 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); 2031 r100_pll_errata_after_index(rdev); 2032 WREG32(RADEON_CLOCK_CNTL_DATA, v); 2033 r100_pll_errata_after_data(rdev); 2034 } 2035 2036 int r100_init(struct radeon_device *rdev) 2037 { 2038 if (ASIC_IS_RN50(rdev)) { 2039 rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; 2040 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); 2041 } else if (rdev->family < CHIP_R200) { 2042 rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; 2043 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); 2044 } else { 2045 return r200_init(rdev); 2046 } 2047 return 0; 2048 } 2049 2050 /* 2051 * Debugfs info 2052 */ 2053 #if defined(CONFIG_DEBUG_FS) 2054 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) 2055 { 2056 struct drm_info_node *node = (struct drm_info_node *) m->private; 2057 struct drm_device *dev = node->minor->dev; 2058 struct radeon_device *rdev = dev->dev_private; 2059 uint32_t reg, value; 2060 unsigned i; 2061 2062 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); 2063 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); 2064 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2065 for (i = 0; i < 64; i++) { 2066 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); 2067 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; 2068 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); 2069 value = RREG32(RADEON_RBBM_CMDFIFO_DATA); 2070 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); 2071 } 2072 return 0; 2073 } 2074 2075 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) 2076 { 2077 struct drm_info_node *node = (struct drm_info_node *) m->private; 2078 struct drm_device *dev = node->minor->dev; 2079 struct radeon_device *rdev = dev->dev_private; 2080 uint32_t rdp, wdp; 2081 unsigned count, i, j; 2082 2083 radeon_ring_free_size(rdev); 2084 rdp = RREG32(RADEON_CP_RB_RPTR); 2085 wdp = RREG32(RADEON_CP_RB_WPTR); 2086 count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask; 2087 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2088 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); 2089 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); 2090 seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); 2091 seq_printf(m, "%u dwords in ring\n", count); 2092 for (j = 0; j <= count; j++) { 2093 i = (rdp + j) & rdev->cp.ptr_mask; 2094 seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); 2095 } 2096 return 0; 2097 } 2098 2099 2100 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) 2101 { 2102 struct drm_info_node *node = (struct drm_info_node *) m->private; 2103 struct drm_device *dev = node->minor->dev; 2104 struct radeon_device *rdev = dev->dev_private; 2105 uint32_t csq_stat, csq2_stat, tmp; 2106 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; 2107 unsigned i; 2108 2109 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 2110 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); 2111 csq_stat = RREG32(RADEON_CP_CSQ_STAT); 2112 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); 2113 r_rptr = (csq_stat >> 0) & 0x3ff; 2114 r_wptr = (csq_stat >> 10) & 0x3ff; 2115 ib1_rptr = (csq_stat >> 20) & 0x3ff; 2116 ib1_wptr = (csq2_stat >> 0) & 0x3ff; 2117 ib2_rptr = (csq2_stat >> 10) & 0x3ff; 2118 ib2_wptr = (csq2_stat >> 20) & 0x3ff; 2119 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); 2120 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); 2121 seq_printf(m, "Ring rptr %u\n", r_rptr); 2122 seq_printf(m, "Ring wptr %u\n", r_wptr); 2123 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); 2124 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); 2125 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); 2126 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); 2127 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms 2128 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ 2129 seq_printf(m, "Ring fifo:\n"); 2130 for (i = 0; i < 256; i++) { 2131 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2132 tmp = RREG32(RADEON_CP_CSQ_DATA); 2133 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); 2134 } 2135 seq_printf(m, "Indirect1 fifo:\n"); 2136 for (i = 256; i <= 512; i++) { 2137 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2138 tmp = RREG32(RADEON_CP_CSQ_DATA); 2139 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); 2140 } 2141 seq_printf(m, "Indirect2 fifo:\n"); 2142 for (i = 640; i < ib1_wptr; i++) { 2143 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 2144 tmp = RREG32(RADEON_CP_CSQ_DATA); 2145 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); 2146 } 2147 return 0; 2148 } 2149 2150 static int r100_debugfs_mc_info(struct seq_file *m, void *data) 2151 { 2152 struct drm_info_node *node = (struct drm_info_node *) m->private; 2153 struct drm_device *dev = node->minor->dev; 2154 struct radeon_device *rdev = dev->dev_private; 2155 uint32_t tmp; 2156 2157 tmp = RREG32(RADEON_CONFIG_MEMSIZE); 2158 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); 2159 tmp = RREG32(RADEON_MC_FB_LOCATION); 2160 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); 2161 tmp = RREG32(RADEON_BUS_CNTL); 2162 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); 2163 tmp = RREG32(RADEON_MC_AGP_LOCATION); 2164 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); 2165 tmp = RREG32(RADEON_AGP_BASE); 2166 seq_printf(m, "AGP_BASE 0x%08x\n", tmp); 2167 tmp = RREG32(RADEON_HOST_PATH_CNTL); 2168 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); 2169 tmp = RREG32(0x01D0); 2170 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); 2171 tmp = RREG32(RADEON_AIC_LO_ADDR); 2172 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); 2173 tmp = RREG32(RADEON_AIC_HI_ADDR); 2174 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); 2175 tmp = RREG32(0x01E4); 2176 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); 2177 return 0; 2178 } 2179 2180 static struct drm_info_list r100_debugfs_rbbm_list[] = { 2181 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, 2182 }; 2183 2184 static struct drm_info_list r100_debugfs_cp_list[] = { 2185 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, 2186 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, 2187 }; 2188 2189 static struct drm_info_list r100_debugfs_mc_info_list[] = { 2190 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, 2191 }; 2192 #endif 2193 2194 int r100_debugfs_rbbm_init(struct radeon_device *rdev) 2195 { 2196 #if defined(CONFIG_DEBUG_FS) 2197 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); 2198 #else 2199 return 0; 2200 #endif 2201 } 2202 2203 int r100_debugfs_cp_init(struct radeon_device *rdev) 2204 { 2205 #if defined(CONFIG_DEBUG_FS) 2206 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); 2207 #else 2208 return 0; 2209 #endif 2210 } 2211 2212 int r100_debugfs_mc_info_init(struct radeon_device *rdev) 2213 { 2214 #if defined(CONFIG_DEBUG_FS) 2215 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); 2216 #else 2217 return 0; 2218 #endif 2219 } 2220 2221 int r100_set_surface_reg(struct radeon_device *rdev, int reg, 2222 uint32_t tiling_flags, uint32_t pitch, 2223 uint32_t offset, uint32_t obj_size) 2224 { 2225 int surf_index = reg * 16; 2226 int flags = 0; 2227 2228 /* r100/r200 divide by 16 */ 2229 if (rdev->family < CHIP_R300) 2230 flags = pitch / 16; 2231 else 2232 flags = pitch / 8; 2233 2234 if (rdev->family <= CHIP_RS200) { 2235 if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 2236 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 2237 flags |= RADEON_SURF_TILE_COLOR_BOTH; 2238 if (tiling_flags & RADEON_TILING_MACRO) 2239 flags |= RADEON_SURF_TILE_COLOR_MACRO; 2240 } else if (rdev->family <= CHIP_RV280) { 2241 if (tiling_flags & (RADEON_TILING_MACRO)) 2242 flags |= R200_SURF_TILE_COLOR_MACRO; 2243 if (tiling_flags & RADEON_TILING_MICRO) 2244 flags |= R200_SURF_TILE_COLOR_MICRO; 2245 } else { 2246 if (tiling_flags & RADEON_TILING_MACRO) 2247 flags |= R300_SURF_TILE_MACRO; 2248 if (tiling_flags & RADEON_TILING_MICRO) 2249 flags |= R300_SURF_TILE_MICRO; 2250 } 2251 2252 if (tiling_flags & RADEON_TILING_SWAP_16BIT) 2253 flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; 2254 if (tiling_flags & RADEON_TILING_SWAP_32BIT) 2255 flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; 2256 2257 DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); 2258 WREG32(RADEON_SURFACE0_INFO + surf_index, flags); 2259 WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); 2260 WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); 2261 return 0; 2262 } 2263 2264 void r100_clear_surface_reg(struct radeon_device *rdev, int reg) 2265 { 2266 int surf_index = reg * 16; 2267 WREG32(RADEON_SURFACE0_INFO + surf_index, 0); 2268 } 2269 2270 void r100_bandwidth_update(struct radeon_device *rdev) 2271 { 2272 fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; 2273 fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; 2274 fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; 2275 uint32_t temp, data, mem_trcd, mem_trp, mem_tras; 2276 fixed20_12 memtcas_ff[8] = { 2277 fixed_init(1), 2278 fixed_init(2), 2279 fixed_init(3), 2280 fixed_init(0), 2281 fixed_init_half(1), 2282 fixed_init_half(2), 2283 fixed_init(0), 2284 }; 2285 fixed20_12 memtcas_rs480_ff[8] = { 2286 fixed_init(0), 2287 fixed_init(1), 2288 fixed_init(2), 2289 fixed_init(3), 2290 fixed_init(0), 2291 fixed_init_half(1), 2292 fixed_init_half(2), 2293 fixed_init_half(3), 2294 }; 2295 fixed20_12 memtcas2_ff[8] = { 2296 fixed_init(0), 2297 fixed_init(1), 2298 fixed_init(2), 2299 fixed_init(3), 2300 fixed_init(4), 2301 fixed_init(5), 2302 fixed_init(6), 2303 fixed_init(7), 2304 }; 2305 fixed20_12 memtrbs[8] = { 2306 fixed_init(1), 2307 fixed_init_half(1), 2308 fixed_init(2), 2309 fixed_init_half(2), 2310 fixed_init(3), 2311 fixed_init_half(3), 2312 fixed_init(4), 2313 fixed_init_half(4) 2314 }; 2315 fixed20_12 memtrbs_r4xx[8] = { 2316 fixed_init(4), 2317 fixed_init(5), 2318 fixed_init(6), 2319 fixed_init(7), 2320 fixed_init(8), 2321 fixed_init(9), 2322 fixed_init(10), 2323 fixed_init(11) 2324 }; 2325 fixed20_12 min_mem_eff; 2326 fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; 2327 fixed20_12 cur_latency_mclk, cur_latency_sclk; 2328 fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, 2329 disp_drain_rate2, read_return_rate; 2330 fixed20_12 time_disp1_drop_priority; 2331 int c; 2332 int cur_size = 16; /* in octawords */ 2333 int critical_point = 0, critical_point2; 2334 /* uint32_t read_return_rate, time_disp1_drop_priority; */ 2335 int stop_req, max_stop_req; 2336 struct drm_display_mode *mode1 = NULL; 2337 struct drm_display_mode *mode2 = NULL; 2338 uint32_t pixel_bytes1 = 0; 2339 uint32_t pixel_bytes2 = 0; 2340 2341 if (rdev->mode_info.crtcs[0]->base.enabled) { 2342 mode1 = &rdev->mode_info.crtcs[0]->base.mode; 2343 pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; 2344 } 2345 if (rdev->mode_info.crtcs[1]->base.enabled) { 2346 mode2 = &rdev->mode_info.crtcs[1]->base.mode; 2347 pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; 2348 } 2349 2350 min_mem_eff.full = rfixed_const_8(0); 2351 /* get modes */ 2352 if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { 2353 uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); 2354 mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); 2355 mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); 2356 /* check crtc enables */ 2357 if (mode2) 2358 mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); 2359 if (mode1) 2360 mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); 2361 WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); 2362 } 2363 2364 /* 2365 * determine is there is enough bw for current mode 2366 */ 2367 mclk_ff.full = rfixed_const(rdev->clock.default_mclk); 2368 temp_ff.full = rfixed_const(100); 2369 mclk_ff.full = rfixed_div(mclk_ff, temp_ff); 2370 sclk_ff.full = rfixed_const(rdev->clock.default_sclk); 2371 sclk_ff.full = rfixed_div(sclk_ff, temp_ff); 2372 2373 temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); 2374 temp_ff.full = rfixed_const(temp); 2375 mem_bw.full = rfixed_mul(mclk_ff, temp_ff); 2376 2377 pix_clk.full = 0; 2378 pix_clk2.full = 0; 2379 peak_disp_bw.full = 0; 2380 if (mode1) { 2381 temp_ff.full = rfixed_const(1000); 2382 pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */ 2383 pix_clk.full = rfixed_div(pix_clk, temp_ff); 2384 temp_ff.full = rfixed_const(pixel_bytes1); 2385 peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff); 2386 } 2387 if (mode2) { 2388 temp_ff.full = rfixed_const(1000); 2389 pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */ 2390 pix_clk2.full = rfixed_div(pix_clk2, temp_ff); 2391 temp_ff.full = rfixed_const(pixel_bytes2); 2392 peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff); 2393 } 2394 2395 mem_bw.full = rfixed_mul(mem_bw, min_mem_eff); 2396 if (peak_disp_bw.full >= mem_bw.full) { 2397 DRM_ERROR("You may not have enough display bandwidth for current mode\n" 2398 "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); 2399 } 2400 2401 /* Get values from the EXT_MEM_CNTL register...converting its contents. */ 2402 temp = RREG32(RADEON_MEM_TIMING_CNTL); 2403 if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ 2404 mem_trcd = ((temp >> 2) & 0x3) + 1; 2405 mem_trp = ((temp & 0x3)) + 1; 2406 mem_tras = ((temp & 0x70) >> 4) + 1; 2407 } else if (rdev->family == CHIP_R300 || 2408 rdev->family == CHIP_R350) { /* r300, r350 */ 2409 mem_trcd = (temp & 0x7) + 1; 2410 mem_trp = ((temp >> 8) & 0x7) + 1; 2411 mem_tras = ((temp >> 11) & 0xf) + 4; 2412 } else if (rdev->family == CHIP_RV350 || 2413 rdev->family <= CHIP_RV380) { 2414 /* rv3x0 */ 2415 mem_trcd = (temp & 0x7) + 3; 2416 mem_trp = ((temp >> 8) & 0x7) + 3; 2417 mem_tras = ((temp >> 11) & 0xf) + 6; 2418 } else if (rdev->family == CHIP_R420 || 2419 rdev->family == CHIP_R423 || 2420 rdev->family == CHIP_RV410) { 2421 /* r4xx */ 2422 mem_trcd = (temp & 0xf) + 3; 2423 if (mem_trcd > 15) 2424 mem_trcd = 15; 2425 mem_trp = ((temp >> 8) & 0xf) + 3; 2426 if (mem_trp > 15) 2427 mem_trp = 15; 2428 mem_tras = ((temp >> 12) & 0x1f) + 6; 2429 if (mem_tras > 31) 2430 mem_tras = 31; 2431 } else { /* RV200, R200 */ 2432 mem_trcd = (temp & 0x7) + 1; 2433 mem_trp = ((temp >> 8) & 0x7) + 1; 2434 mem_tras = ((temp >> 12) & 0xf) + 4; 2435 } 2436 /* convert to FF */ 2437 trcd_ff.full = rfixed_const(mem_trcd); 2438 trp_ff.full = rfixed_const(mem_trp); 2439 tras_ff.full = rfixed_const(mem_tras); 2440 2441 /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ 2442 temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); 2443 data = (temp & (7 << 20)) >> 20; 2444 if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { 2445 if (rdev->family == CHIP_RS480) /* don't think rs400 */ 2446 tcas_ff = memtcas_rs480_ff[data]; 2447 else 2448 tcas_ff = memtcas_ff[data]; 2449 } else 2450 tcas_ff = memtcas2_ff[data]; 2451 2452 if (rdev->family == CHIP_RS400 || 2453 rdev->family == CHIP_RS480) { 2454 /* extra cas latency stored in bits 23-25 0-4 clocks */ 2455 data = (temp >> 23) & 0x7; 2456 if (data < 5) 2457 tcas_ff.full += rfixed_const(data); 2458 } 2459 2460 if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { 2461 /* on the R300, Tcas is included in Trbs. 2462 */ 2463 temp = RREG32(RADEON_MEM_CNTL); 2464 data = (R300_MEM_NUM_CHANNELS_MASK & temp); 2465 if (data == 1) { 2466 if (R300_MEM_USE_CD_CH_ONLY & temp) { 2467 temp = RREG32(R300_MC_IND_INDEX); 2468 temp &= ~R300_MC_IND_ADDR_MASK; 2469 temp |= R300_MC_READ_CNTL_CD_mcind; 2470 WREG32(R300_MC_IND_INDEX, temp); 2471 temp = RREG32(R300_MC_IND_DATA); 2472 data = (R300_MEM_RBS_POSITION_C_MASK & temp); 2473 } else { 2474 temp = RREG32(R300_MC_READ_CNTL_AB); 2475 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 2476 } 2477 } else { 2478 temp = RREG32(R300_MC_READ_CNTL_AB); 2479 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 2480 } 2481 if (rdev->family == CHIP_RV410 || 2482 rdev->family == CHIP_R420 || 2483 rdev->family == CHIP_R423) 2484 trbs_ff = memtrbs_r4xx[data]; 2485 else 2486 trbs_ff = memtrbs[data]; 2487 tcas_ff.full += trbs_ff.full; 2488 } 2489 2490 sclk_eff_ff.full = sclk_ff.full; 2491 2492 if (rdev->flags & RADEON_IS_AGP) { 2493 fixed20_12 agpmode_ff; 2494 agpmode_ff.full = rfixed_const(radeon_agpmode); 2495 temp_ff.full = rfixed_const_666(16); 2496 sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff); 2497 } 2498 /* TODO PCIE lanes may affect this - agpmode == 16?? */ 2499 2500 if (ASIC_IS_R300(rdev)) { 2501 sclk_delay_ff.full = rfixed_const(250); 2502 } else { 2503 if ((rdev->family == CHIP_RV100) || 2504 rdev->flags & RADEON_IS_IGP) { 2505 if (rdev->mc.vram_is_ddr) 2506 sclk_delay_ff.full = rfixed_const(41); 2507 else 2508 sclk_delay_ff.full = rfixed_const(33); 2509 } else { 2510 if (rdev->mc.vram_width == 128) 2511 sclk_delay_ff.full = rfixed_const(57); 2512 else 2513 sclk_delay_ff.full = rfixed_const(41); 2514 } 2515 } 2516 2517 mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff); 2518 2519 if (rdev->mc.vram_is_ddr) { 2520 if (rdev->mc.vram_width == 32) { 2521 k1.full = rfixed_const(40); 2522 c = 3; 2523 } else { 2524 k1.full = rfixed_const(20); 2525 c = 1; 2526 } 2527 } else { 2528 k1.full = rfixed_const(40); 2529 c = 3; 2530 } 2531 2532 temp_ff.full = rfixed_const(2); 2533 mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff); 2534 temp_ff.full = rfixed_const(c); 2535 mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff); 2536 temp_ff.full = rfixed_const(4); 2537 mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff); 2538 mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff); 2539 mc_latency_mclk.full += k1.full; 2540 2541 mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff); 2542 mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff); 2543 2544 /* 2545 HW cursor time assuming worst case of full size colour cursor. 2546 */ 2547 temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); 2548 temp_ff.full += trcd_ff.full; 2549 if (temp_ff.full < tras_ff.full) 2550 temp_ff.full = tras_ff.full; 2551 cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff); 2552 2553 temp_ff.full = rfixed_const(cur_size); 2554 cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff); 2555 /* 2556 Find the total latency for the display data. 2557 */ 2558 disp_latency_overhead.full = rfixed_const(80); 2559 disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff); 2560 mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; 2561 mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; 2562 2563 if (mc_latency_mclk.full > mc_latency_sclk.full) 2564 disp_latency.full = mc_latency_mclk.full; 2565 else 2566 disp_latency.full = mc_latency_sclk.full; 2567 2568 /* setup Max GRPH_STOP_REQ default value */ 2569 if (ASIC_IS_RV100(rdev)) 2570 max_stop_req = 0x5c; 2571 else 2572 max_stop_req = 0x7c; 2573 2574 if (mode1) { 2575 /* CRTC1 2576 Set GRPH_BUFFER_CNTL register using h/w defined optimal values. 2577 GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] 2578 */ 2579 stop_req = mode1->hdisplay * pixel_bytes1 / 16; 2580 2581 if (stop_req > max_stop_req) 2582 stop_req = max_stop_req; 2583 2584 /* 2585 Find the drain rate of the display buffer. 2586 */ 2587 temp_ff.full = rfixed_const((16/pixel_bytes1)); 2588 disp_drain_rate.full = rfixed_div(pix_clk, temp_ff); 2589 2590 /* 2591 Find the critical point of the display buffer. 2592 */ 2593 crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency); 2594 crit_point_ff.full += rfixed_const_half(0); 2595 2596 critical_point = rfixed_trunc(crit_point_ff); 2597 2598 if (rdev->disp_priority == 2) { 2599 critical_point = 0; 2600 } 2601 2602 /* 2603 The critical point should never be above max_stop_req-4. Setting 2604 GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. 2605 */ 2606 if (max_stop_req - critical_point < 4) 2607 critical_point = 0; 2608 2609 if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { 2610 /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ 2611 critical_point = 0x10; 2612 } 2613 2614 temp = RREG32(RADEON_GRPH_BUFFER_CNTL); 2615 temp &= ~(RADEON_GRPH_STOP_REQ_MASK); 2616 temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 2617 temp &= ~(RADEON_GRPH_START_REQ_MASK); 2618 if ((rdev->family == CHIP_R350) && 2619 (stop_req > 0x15)) { 2620 stop_req -= 0x10; 2621 } 2622 temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 2623 temp |= RADEON_GRPH_BUFFER_SIZE; 2624 temp &= ~(RADEON_GRPH_CRITICAL_CNTL | 2625 RADEON_GRPH_CRITICAL_AT_SOF | 2626 RADEON_GRPH_STOP_CNTL); 2627 /* 2628 Write the result into the register. 2629 */ 2630 WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 2631 (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 2632 2633 #if 0 2634 if ((rdev->family == CHIP_RS400) || 2635 (rdev->family == CHIP_RS480)) { 2636 /* attempt to program RS400 disp regs correctly ??? */ 2637 temp = RREG32(RS400_DISP1_REG_CNTL); 2638 temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | 2639 RS400_DISP1_STOP_REQ_LEVEL_MASK); 2640 WREG32(RS400_DISP1_REQ_CNTL1, (temp | 2641 (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 2642 (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 2643 temp = RREG32(RS400_DMIF_MEM_CNTL1); 2644 temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | 2645 RS400_DISP1_CRITICAL_POINT_STOP_MASK); 2646 WREG32(RS400_DMIF_MEM_CNTL1, (temp | 2647 (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | 2648 (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); 2649 } 2650 #endif 2651 2652 DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n", 2653 /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ 2654 (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); 2655 } 2656 2657 if (mode2) { 2658 u32 grph2_cntl; 2659 stop_req = mode2->hdisplay * pixel_bytes2 / 16; 2660 2661 if (stop_req > max_stop_req) 2662 stop_req = max_stop_req; 2663 2664 /* 2665 Find the drain rate of the display buffer. 2666 */ 2667 temp_ff.full = rfixed_const((16/pixel_bytes2)); 2668 disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff); 2669 2670 grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); 2671 grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); 2672 grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 2673 grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); 2674 if ((rdev->family == CHIP_R350) && 2675 (stop_req > 0x15)) { 2676 stop_req -= 0x10; 2677 } 2678 grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 2679 grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; 2680 grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | 2681 RADEON_GRPH_CRITICAL_AT_SOF | 2682 RADEON_GRPH_STOP_CNTL); 2683 2684 if ((rdev->family == CHIP_RS100) || 2685 (rdev->family == CHIP_RS200)) 2686 critical_point2 = 0; 2687 else { 2688 temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; 2689 temp_ff.full = rfixed_const(temp); 2690 temp_ff.full = rfixed_mul(mclk_ff, temp_ff); 2691 if (sclk_ff.full < temp_ff.full) 2692 temp_ff.full = sclk_ff.full; 2693 2694 read_return_rate.full = temp_ff.full; 2695 2696 if (mode1) { 2697 temp_ff.full = read_return_rate.full - disp_drain_rate.full; 2698 time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff); 2699 } else { 2700 time_disp1_drop_priority.full = 0; 2701 } 2702 crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; 2703 crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2); 2704 crit_point_ff.full += rfixed_const_half(0); 2705 2706 critical_point2 = rfixed_trunc(crit_point_ff); 2707 2708 if (rdev->disp_priority == 2) { 2709 critical_point2 = 0; 2710 } 2711 2712 if (max_stop_req - critical_point2 < 4) 2713 critical_point2 = 0; 2714 2715 } 2716 2717 if (critical_point2 == 0 && rdev->family == CHIP_R300) { 2718 /* some R300 cards have problem with this set to 0 */ 2719 critical_point2 = 0x10; 2720 } 2721 2722 WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 2723 (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 2724 2725 if ((rdev->family == CHIP_RS400) || 2726 (rdev->family == CHIP_RS480)) { 2727 #if 0 2728 /* attempt to program RS400 disp2 regs correctly ??? */ 2729 temp = RREG32(RS400_DISP2_REQ_CNTL1); 2730 temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | 2731 RS400_DISP2_STOP_REQ_LEVEL_MASK); 2732 WREG32(RS400_DISP2_REQ_CNTL1, (temp | 2733 (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 2734 (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 2735 temp = RREG32(RS400_DISP2_REQ_CNTL2); 2736 temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | 2737 RS400_DISP2_CRITICAL_POINT_STOP_MASK); 2738 WREG32(RS400_DISP2_REQ_CNTL2, (temp | 2739 (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | 2740 (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); 2741 #endif 2742 WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); 2743 WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); 2744 WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); 2745 WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); 2746 } 2747 2748 DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n", 2749 (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); 2750 } 2751 } 2752 2753 static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t) 2754 { 2755 DRM_ERROR("pitch %d\n", t->pitch); 2756 DRM_ERROR("width %d\n", t->width); 2757 DRM_ERROR("height %d\n", t->height); 2758 DRM_ERROR("num levels %d\n", t->num_levels); 2759 DRM_ERROR("depth %d\n", t->txdepth); 2760 DRM_ERROR("bpp %d\n", t->cpp); 2761 DRM_ERROR("coordinate type %d\n", t->tex_coord_type); 2762 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); 2763 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); 2764 } 2765 2766 static int r100_cs_track_cube(struct radeon_device *rdev, 2767 struct r100_cs_track *track, unsigned idx) 2768 { 2769 unsigned face, w, h; 2770 struct radeon_object *cube_robj; 2771 unsigned long size; 2772 2773 for (face = 0; face < 5; face++) { 2774 cube_robj = track->textures[idx].cube_info[face].robj; 2775 w = track->textures[idx].cube_info[face].width; 2776 h = track->textures[idx].cube_info[face].height; 2777 2778 size = w * h; 2779 size *= track->textures[idx].cpp; 2780 2781 size += track->textures[idx].cube_info[face].offset; 2782 2783 if (size > radeon_object_size(cube_robj)) { 2784 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", 2785 size, radeon_object_size(cube_robj)); 2786 r100_cs_track_texture_print(&track->textures[idx]); 2787 return -1; 2788 } 2789 } 2790 return 0; 2791 } 2792 2793 static int r100_cs_track_texture_check(struct radeon_device *rdev, 2794 struct r100_cs_track *track) 2795 { 2796 struct radeon_object *robj; 2797 unsigned long size; 2798 unsigned u, i, w, h; 2799 int ret; 2800 2801 for (u = 0; u < track->num_texture; u++) { 2802 if (!track->textures[u].enabled) 2803 continue; 2804 robj = track->textures[u].robj; 2805 if (robj == NULL) { 2806 DRM_ERROR("No texture bound to unit %u\n", u); 2807 return -EINVAL; 2808 } 2809 size = 0; 2810 for (i = 0; i <= track->textures[u].num_levels; i++) { 2811 if (track->textures[u].use_pitch) { 2812 if (rdev->family < CHIP_R300) 2813 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); 2814 else 2815 w = track->textures[u].pitch / (1 << i); 2816 } else { 2817 w = track->textures[u].width / (1 << i); 2818 if (rdev->family >= CHIP_RV515) 2819 w |= track->textures[u].width_11; 2820 if (track->textures[u].roundup_w) 2821 w = roundup_pow_of_two(w); 2822 } 2823 h = track->textures[u].height / (1 << i); 2824 if (rdev->family >= CHIP_RV515) 2825 h |= track->textures[u].height_11; 2826 if (track->textures[u].roundup_h) 2827 h = roundup_pow_of_two(h); 2828 size += w * h; 2829 } 2830 size *= track->textures[u].cpp; 2831 switch (track->textures[u].tex_coord_type) { 2832 case 0: 2833 break; 2834 case 1: 2835 size *= (1 << track->textures[u].txdepth); 2836 break; 2837 case 2: 2838 if (track->separate_cube) { 2839 ret = r100_cs_track_cube(rdev, track, u); 2840 if (ret) 2841 return ret; 2842 } else 2843 size *= 6; 2844 break; 2845 default: 2846 DRM_ERROR("Invalid texture coordinate type %u for unit " 2847 "%u\n", track->textures[u].tex_coord_type, u); 2848 return -EINVAL; 2849 } 2850 if (size > radeon_object_size(robj)) { 2851 DRM_ERROR("Texture of unit %u needs %lu bytes but is " 2852 "%lu\n", u, size, radeon_object_size(robj)); 2853 r100_cs_track_texture_print(&track->textures[u]); 2854 return -EINVAL; 2855 } 2856 } 2857 return 0; 2858 } 2859 2860 int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) 2861 { 2862 unsigned i; 2863 unsigned long size; 2864 unsigned prim_walk; 2865 unsigned nverts; 2866 2867 for (i = 0; i < track->num_cb; i++) { 2868 if (track->cb[i].robj == NULL) { 2869 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); 2870 return -EINVAL; 2871 } 2872 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; 2873 size += track->cb[i].offset; 2874 if (size > radeon_object_size(track->cb[i].robj)) { 2875 DRM_ERROR("[drm] Buffer too small for color buffer %d " 2876 "(need %lu have %lu) !\n", i, size, 2877 radeon_object_size(track->cb[i].robj)); 2878 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", 2879 i, track->cb[i].pitch, track->cb[i].cpp, 2880 track->cb[i].offset, track->maxy); 2881 return -EINVAL; 2882 } 2883 } 2884 if (track->z_enabled) { 2885 if (track->zb.robj == NULL) { 2886 DRM_ERROR("[drm] No buffer for z buffer !\n"); 2887 return -EINVAL; 2888 } 2889 size = track->zb.pitch * track->zb.cpp * track->maxy; 2890 size += track->zb.offset; 2891 if (size > radeon_object_size(track->zb.robj)) { 2892 DRM_ERROR("[drm] Buffer too small for z buffer " 2893 "(need %lu have %lu) !\n", size, 2894 radeon_object_size(track->zb.robj)); 2895 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", 2896 track->zb.pitch, track->zb.cpp, 2897 track->zb.offset, track->maxy); 2898 return -EINVAL; 2899 } 2900 } 2901 prim_walk = (track->vap_vf_cntl >> 4) & 0x3; 2902 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; 2903 switch (prim_walk) { 2904 case 1: 2905 for (i = 0; i < track->num_arrays; i++) { 2906 size = track->arrays[i].esize * track->max_indx * 4; 2907 if (track->arrays[i].robj == NULL) { 2908 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2909 "bound\n", prim_walk, i); 2910 return -EINVAL; 2911 } 2912 if (size > radeon_object_size(track->arrays[i].robj)) { 2913 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords " 2914 "have %lu dwords\n", prim_walk, i, 2915 size >> 2, 2916 radeon_object_size(track->arrays[i].robj) >> 2); 2917 DRM_ERROR("Max indices %u\n", track->max_indx); 2918 return -EINVAL; 2919 } 2920 } 2921 break; 2922 case 2: 2923 for (i = 0; i < track->num_arrays; i++) { 2924 size = track->arrays[i].esize * (nverts - 1) * 4; 2925 if (track->arrays[i].robj == NULL) { 2926 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2927 "bound\n", prim_walk, i); 2928 return -EINVAL; 2929 } 2930 if (size > radeon_object_size(track->arrays[i].robj)) { 2931 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords " 2932 "have %lu dwords\n", prim_walk, i, size >> 2, 2933 radeon_object_size(track->arrays[i].robj) >> 2); 2934 return -EINVAL; 2935 } 2936 } 2937 break; 2938 case 3: 2939 size = track->vtx_size * nverts; 2940 if (size != track->immd_dwords) { 2941 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", 2942 track->immd_dwords, size); 2943 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", 2944 nverts, track->vtx_size); 2945 return -EINVAL; 2946 } 2947 break; 2948 default: 2949 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", 2950 prim_walk); 2951 return -EINVAL; 2952 } 2953 return r100_cs_track_texture_check(rdev, track); 2954 } 2955 2956 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) 2957 { 2958 unsigned i, face; 2959 2960 if (rdev->family < CHIP_R300) { 2961 track->num_cb = 1; 2962 if (rdev->family <= CHIP_RS200) 2963 track->num_texture = 3; 2964 else 2965 track->num_texture = 6; 2966 track->maxy = 2048; 2967 track->separate_cube = 1; 2968 } else { 2969 track->num_cb = 4; 2970 track->num_texture = 16; 2971 track->maxy = 4096; 2972 track->separate_cube = 0; 2973 } 2974 2975 for (i = 0; i < track->num_cb; i++) { 2976 track->cb[i].robj = NULL; 2977 track->cb[i].pitch = 8192; 2978 track->cb[i].cpp = 16; 2979 track->cb[i].offset = 0; 2980 } 2981 track->z_enabled = true; 2982 track->zb.robj = NULL; 2983 track->zb.pitch = 8192; 2984 track->zb.cpp = 4; 2985 track->zb.offset = 0; 2986 track->vtx_size = 0x7F; 2987 track->immd_dwords = 0xFFFFFFFFUL; 2988 track->num_arrays = 11; 2989 track->max_indx = 0x00FFFFFFUL; 2990 for (i = 0; i < track->num_arrays; i++) { 2991 track->arrays[i].robj = NULL; 2992 track->arrays[i].esize = 0x7F; 2993 } 2994 for (i = 0; i < track->num_texture; i++) { 2995 track->textures[i].pitch = 16536; 2996 track->textures[i].width = 16536; 2997 track->textures[i].height = 16536; 2998 track->textures[i].width_11 = 1 << 11; 2999 track->textures[i].height_11 = 1 << 11; 3000 track->textures[i].num_levels = 12; 3001 if (rdev->family <= CHIP_RS200) { 3002 track->textures[i].tex_coord_type = 0; 3003 track->textures[i].txdepth = 0; 3004 } else { 3005 track->textures[i].txdepth = 16; 3006 track->textures[i].tex_coord_type = 1; 3007 } 3008 track->textures[i].cpp = 64; 3009 track->textures[i].robj = NULL; 3010 /* CS IB emission code makes sure texture unit are disabled */ 3011 track->textures[i].enabled = false; 3012 track->textures[i].roundup_w = true; 3013 track->textures[i].roundup_h = true; 3014 if (track->separate_cube) 3015 for (face = 0; face < 5; face++) { 3016 track->textures[i].cube_info[face].robj = NULL; 3017 track->textures[i].cube_info[face].width = 16536; 3018 track->textures[i].cube_info[face].height = 16536; 3019 track->textures[i].cube_info[face].offset = 0; 3020 } 3021 } 3022 } 3023 3024 int r100_ring_test(struct radeon_device *rdev) 3025 { 3026 uint32_t scratch; 3027 uint32_t tmp = 0; 3028 unsigned i; 3029 int r; 3030 3031 r = radeon_scratch_get(rdev, &scratch); 3032 if (r) { 3033 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 3034 return r; 3035 } 3036 WREG32(scratch, 0xCAFEDEAD); 3037 r = radeon_ring_lock(rdev, 2); 3038 if (r) { 3039 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 3040 radeon_scratch_free(rdev, scratch); 3041 return r; 3042 } 3043 radeon_ring_write(rdev, PACKET0(scratch, 0)); 3044 radeon_ring_write(rdev, 0xDEADBEEF); 3045 radeon_ring_unlock_commit(rdev); 3046 for (i = 0; i < rdev->usec_timeout; i++) { 3047 tmp = RREG32(scratch); 3048 if (tmp == 0xDEADBEEF) { 3049 break; 3050 } 3051 DRM_UDELAY(1); 3052 } 3053 if (i < rdev->usec_timeout) { 3054 DRM_INFO("ring test succeeded in %d usecs\n", i); 3055 } else { 3056 DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n", 3057 scratch, tmp); 3058 r = -EINVAL; 3059 } 3060 radeon_scratch_free(rdev, scratch); 3061 return r; 3062 } 3063 3064 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3065 { 3066 radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); 3067 radeon_ring_write(rdev, ib->gpu_addr); 3068 radeon_ring_write(rdev, ib->length_dw); 3069 } 3070 3071 int r100_ib_test(struct radeon_device *rdev) 3072 { 3073 struct radeon_ib *ib; 3074 uint32_t scratch; 3075 uint32_t tmp = 0; 3076 unsigned i; 3077 int r; 3078 3079 r = radeon_scratch_get(rdev, &scratch); 3080 if (r) { 3081 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3082 return r; 3083 } 3084 WREG32(scratch, 0xCAFEDEAD); 3085 r = radeon_ib_get(rdev, &ib); 3086 if (r) { 3087 return r; 3088 } 3089 ib->ptr[0] = PACKET0(scratch, 0); 3090 ib->ptr[1] = 0xDEADBEEF; 3091 ib->ptr[2] = PACKET2(0); 3092 ib->ptr[3] = PACKET2(0); 3093 ib->ptr[4] = PACKET2(0); 3094 ib->ptr[5] = PACKET2(0); 3095 ib->ptr[6] = PACKET2(0); 3096 ib->ptr[7] = PACKET2(0); 3097 ib->length_dw = 8; 3098 r = radeon_ib_schedule(rdev, ib); 3099 if (r) { 3100 radeon_scratch_free(rdev, scratch); 3101 radeon_ib_free(rdev, &ib); 3102 return r; 3103 } 3104 r = radeon_fence_wait(ib->fence, false); 3105 if (r) { 3106 return r; 3107 } 3108 for (i = 0; i < rdev->usec_timeout; i++) { 3109 tmp = RREG32(scratch); 3110 if (tmp == 0xDEADBEEF) { 3111 break; 3112 } 3113 DRM_UDELAY(1); 3114 } 3115 if (i < rdev->usec_timeout) { 3116 DRM_INFO("ib test succeeded in %u usecs\n", i); 3117 } else { 3118 DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", 3119 scratch, tmp); 3120 r = -EINVAL; 3121 } 3122 radeon_scratch_free(rdev, scratch); 3123 radeon_ib_free(rdev, &ib); 3124 return r; 3125 } 3126 3127 void r100_ib_fini(struct radeon_device *rdev) 3128 { 3129 radeon_ib_pool_fini(rdev); 3130 } 3131 3132 int r100_ib_init(struct radeon_device *rdev) 3133 { 3134 int r; 3135 3136 r = radeon_ib_pool_init(rdev); 3137 if (r) { 3138 dev_err(rdev->dev, "failled initializing IB pool (%d).\n", r); 3139 r100_ib_fini(rdev); 3140 return r; 3141 } 3142 r = r100_ib_test(rdev); 3143 if (r) { 3144 dev_err(rdev->dev, "failled testing IB (%d).\n", r); 3145 r100_ib_fini(rdev); 3146 return r; 3147 } 3148 return 0; 3149 } 3150 3151 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) 3152 { 3153 /* Shutdown CP we shouldn't need to do that but better be safe than 3154 * sorry 3155 */ 3156 rdev->cp.ready = false; 3157 WREG32(R_000740_CP_CSQ_CNTL, 0); 3158 3159 /* Save few CRTC registers */ 3160 save->GENMO_WT = RREG32(R_0003C0_GENMO_WT); 3161 save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL); 3162 save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL); 3163 save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET); 3164 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3165 save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL); 3166 save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET); 3167 } 3168 3169 /* Disable VGA aperture access */ 3170 WREG32(R_0003C0_GENMO_WT, C_0003C0_VGA_RAM_EN & save->GENMO_WT); 3171 /* Disable cursor, overlay, crtc */ 3172 WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1)); 3173 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL | 3174 S_000054_CRTC_DISPLAY_DIS(1)); 3175 WREG32(R_000050_CRTC_GEN_CNTL, 3176 (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) | 3177 S_000050_CRTC_DISP_REQ_EN_B(1)); 3178 WREG32(R_000420_OV0_SCALE_CNTL, 3179 C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL)); 3180 WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET); 3181 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3182 WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET | 3183 S_000360_CUR2_LOCK(1)); 3184 WREG32(R_0003F8_CRTC2_GEN_CNTL, 3185 (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) | 3186 S_0003F8_CRTC2_DISPLAY_DIS(1) | 3187 S_0003F8_CRTC2_DISP_REQ_EN_B(1)); 3188 WREG32(R_000360_CUR2_OFFSET, 3189 C_000360_CUR2_LOCK & save->CUR2_OFFSET); 3190 } 3191 } 3192 3193 void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save) 3194 { 3195 /* Update base address for crtc */ 3196 WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_location); 3197 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3198 WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, 3199 rdev->mc.vram_location); 3200 } 3201 /* Restore CRTC registers */ 3202 WREG32(R_0003C0_GENMO_WT, save->GENMO_WT); 3203 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL); 3204 WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL); 3205 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3206 WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL); 3207 } 3208 } 3209