1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/seq_file.h> 29 #include "drmP.h" 30 #include "drm.h" 31 #include "radeon_drm.h" 32 #include "radeon_reg.h" 33 #include "radeon.h" 34 #include "r100d.h" 35 #include "rs100d.h" 36 #include "rv200d.h" 37 #include "rv250d.h" 38 39 #include <linux/firmware.h> 40 #include <linux/platform_device.h> 41 42 #include "r100_reg_safe.h" 43 #include "rn50_reg_safe.h" 44 45 /* Firmware Names */ 46 #define FIRMWARE_R100 "radeon/R100_cp.bin" 47 #define FIRMWARE_R200 "radeon/R200_cp.bin" 48 #define FIRMWARE_R300 "radeon/R300_cp.bin" 49 #define FIRMWARE_R420 "radeon/R420_cp.bin" 50 #define FIRMWARE_RS690 "radeon/RS690_cp.bin" 51 #define FIRMWARE_RS600 "radeon/RS600_cp.bin" 52 #define FIRMWARE_R520 "radeon/R520_cp.bin" 53 54 MODULE_FIRMWARE(FIRMWARE_R100); 55 MODULE_FIRMWARE(FIRMWARE_R200); 56 MODULE_FIRMWARE(FIRMWARE_R300); 57 MODULE_FIRMWARE(FIRMWARE_R420); 58 MODULE_FIRMWARE(FIRMWARE_RS690); 59 MODULE_FIRMWARE(FIRMWARE_RS600); 60 MODULE_FIRMWARE(FIRMWARE_R520); 61 62 #include "r100_track.h" 63 64 /* This files gather functions specifics to: 65 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 66 */ 67 68 /* 69 * PCI GART 70 */ 71 void r100_pci_gart_tlb_flush(struct radeon_device *rdev) 72 { 73 /* TODO: can we do somethings here ? */ 74 /* It seems hw only cache one entry so we should discard this 75 * entry otherwise if first GPU GART read hit this entry it 76 * could end up in wrong address. */ 77 } 78 79 int r100_pci_gart_init(struct radeon_device *rdev) 80 { 81 int r; 82 83 if (rdev->gart.table.ram.ptr) { 84 WARN(1, "R100 PCI GART already initialized.\n"); 85 return 0; 86 } 87 /* Initialize common gart structure */ 88 r = radeon_gart_init(rdev); 89 if (r) 90 return r; 91 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; 92 rdev->asic->gart_tlb_flush = &r100_pci_gart_tlb_flush; 93 rdev->asic->gart_set_page = &r100_pci_gart_set_page; 94 return radeon_gart_table_ram_alloc(rdev); 95 } 96 97 int r100_pci_gart_enable(struct radeon_device *rdev) 98 { 99 uint32_t tmp; 100 101 /* discard memory request outside of configured range */ 102 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 103 WREG32(RADEON_AIC_CNTL, tmp); 104 /* set address range for PCI address translate */ 105 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location); 106 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; 107 WREG32(RADEON_AIC_HI_ADDR, tmp); 108 /* Enable bus mastering */ 109 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 110 WREG32(RADEON_BUS_CNTL, tmp); 111 /* set PCI GART page-table base address */ 112 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr); 113 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN; 114 WREG32(RADEON_AIC_CNTL, tmp); 115 r100_pci_gart_tlb_flush(rdev); 116 rdev->gart.ready = true; 117 return 0; 118 } 119 120 void r100_pci_gart_disable(struct radeon_device *rdev) 121 { 122 uint32_t tmp; 123 124 /* discard memory request outside of configured range */ 125 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 126 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN); 127 WREG32(RADEON_AIC_LO_ADDR, 0); 128 WREG32(RADEON_AIC_HI_ADDR, 0); 129 } 130 131 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) 132 { 133 if (i < 0 || i > rdev->gart.num_gpu_pages) { 134 return -EINVAL; 135 } 136 rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr)); 137 return 0; 138 } 139 140 void r100_pci_gart_fini(struct radeon_device *rdev) 141 { 142 r100_pci_gart_disable(rdev); 143 radeon_gart_table_ram_free(rdev); 144 radeon_gart_fini(rdev); 145 } 146 147 int r100_irq_set(struct radeon_device *rdev) 148 { 149 uint32_t tmp = 0; 150 151 if (rdev->irq.sw_int) { 152 tmp |= RADEON_SW_INT_ENABLE; 153 } 154 if (rdev->irq.crtc_vblank_int[0]) { 155 tmp |= RADEON_CRTC_VBLANK_MASK; 156 } 157 if (rdev->irq.crtc_vblank_int[1]) { 158 tmp |= RADEON_CRTC2_VBLANK_MASK; 159 } 160 WREG32(RADEON_GEN_INT_CNTL, tmp); 161 return 0; 162 } 163 164 void r100_irq_disable(struct radeon_device *rdev) 165 { 166 u32 tmp; 167 168 WREG32(R_000040_GEN_INT_CNTL, 0); 169 /* Wait and acknowledge irq */ 170 mdelay(1); 171 tmp = RREG32(R_000044_GEN_INT_STATUS); 172 WREG32(R_000044_GEN_INT_STATUS, tmp); 173 } 174 175 static inline uint32_t r100_irq_ack(struct radeon_device *rdev) 176 { 177 uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS); 178 uint32_t irq_mask = RADEON_SW_INT_TEST | RADEON_CRTC_VBLANK_STAT | 179 RADEON_CRTC2_VBLANK_STAT; 180 181 if (irqs) { 182 WREG32(RADEON_GEN_INT_STATUS, irqs); 183 } 184 return irqs & irq_mask; 185 } 186 187 int r100_irq_process(struct radeon_device *rdev) 188 { 189 uint32_t status, msi_rearm; 190 191 status = r100_irq_ack(rdev); 192 if (!status) { 193 return IRQ_NONE; 194 } 195 if (rdev->shutdown) { 196 return IRQ_NONE; 197 } 198 while (status) { 199 /* SW interrupt */ 200 if (status & RADEON_SW_INT_TEST) { 201 radeon_fence_process(rdev); 202 } 203 /* Vertical blank interrupts */ 204 if (status & RADEON_CRTC_VBLANK_STAT) { 205 drm_handle_vblank(rdev->ddev, 0); 206 } 207 if (status & RADEON_CRTC2_VBLANK_STAT) { 208 drm_handle_vblank(rdev->ddev, 1); 209 } 210 status = r100_irq_ack(rdev); 211 } 212 if (rdev->msi_enabled) { 213 switch (rdev->family) { 214 case CHIP_RS400: 215 case CHIP_RS480: 216 msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM; 217 WREG32(RADEON_AIC_CNTL, msi_rearm); 218 WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM); 219 break; 220 default: 221 msi_rearm = RREG32(RADEON_MSI_REARM_EN) & ~RV370_MSI_REARM_EN; 222 WREG32(RADEON_MSI_REARM_EN, msi_rearm); 223 WREG32(RADEON_MSI_REARM_EN, msi_rearm | RV370_MSI_REARM_EN); 224 break; 225 } 226 } 227 return IRQ_HANDLED; 228 } 229 230 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) 231 { 232 if (crtc == 0) 233 return RREG32(RADEON_CRTC_CRNT_FRAME); 234 else 235 return RREG32(RADEON_CRTC2_CRNT_FRAME); 236 } 237 238 void r100_fence_ring_emit(struct radeon_device *rdev, 239 struct radeon_fence *fence) 240 { 241 /* Who ever call radeon_fence_emit should call ring_lock and ask 242 * for enough space (today caller are ib schedule and buffer move) */ 243 /* Wait until IDLE & CLEAN */ 244 radeon_ring_write(rdev, PACKET0(0x1720, 0)); 245 radeon_ring_write(rdev, (1 << 16) | (1 << 17)); 246 /* Emit fence sequence & fire IRQ */ 247 radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0)); 248 radeon_ring_write(rdev, fence->seq); 249 radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0)); 250 radeon_ring_write(rdev, RADEON_SW_INT_FIRE); 251 } 252 253 int r100_wb_init(struct radeon_device *rdev) 254 { 255 int r; 256 257 if (rdev->wb.wb_obj == NULL) { 258 r = radeon_object_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, 259 true, 260 RADEON_GEM_DOMAIN_GTT, 261 false, &rdev->wb.wb_obj); 262 if (r) { 263 DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r); 264 return r; 265 } 266 r = radeon_object_pin(rdev->wb.wb_obj, 267 RADEON_GEM_DOMAIN_GTT, 268 &rdev->wb.gpu_addr); 269 if (r) { 270 DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r); 271 return r; 272 } 273 r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); 274 if (r) { 275 DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r); 276 return r; 277 } 278 } 279 WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr); 280 WREG32(R_00070C_CP_RB_RPTR_ADDR, 281 S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + 1024) >> 2)); 282 WREG32(R_000770_SCRATCH_UMSK, 0xff); 283 return 0; 284 } 285 286 void r100_wb_disable(struct radeon_device *rdev) 287 { 288 WREG32(R_000770_SCRATCH_UMSK, 0); 289 } 290 291 void r100_wb_fini(struct radeon_device *rdev) 292 { 293 r100_wb_disable(rdev); 294 if (rdev->wb.wb_obj) { 295 radeon_object_kunmap(rdev->wb.wb_obj); 296 radeon_object_unpin(rdev->wb.wb_obj); 297 radeon_object_unref(&rdev->wb.wb_obj); 298 rdev->wb.wb = NULL; 299 rdev->wb.wb_obj = NULL; 300 } 301 } 302 303 int r100_copy_blit(struct radeon_device *rdev, 304 uint64_t src_offset, 305 uint64_t dst_offset, 306 unsigned num_pages, 307 struct radeon_fence *fence) 308 { 309 uint32_t cur_pages; 310 uint32_t stride_bytes = PAGE_SIZE; 311 uint32_t pitch; 312 uint32_t stride_pixels; 313 unsigned ndw; 314 int num_loops; 315 int r = 0; 316 317 /* radeon limited to 16k stride */ 318 stride_bytes &= 0x3fff; 319 /* radeon pitch is /64 */ 320 pitch = stride_bytes / 64; 321 stride_pixels = stride_bytes / 4; 322 num_loops = DIV_ROUND_UP(num_pages, 8191); 323 324 /* Ask for enough room for blit + flush + fence */ 325 ndw = 64 + (10 * num_loops); 326 r = radeon_ring_lock(rdev, ndw); 327 if (r) { 328 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); 329 return -EINVAL; 330 } 331 while (num_pages > 0) { 332 cur_pages = num_pages; 333 if (cur_pages > 8191) { 334 cur_pages = 8191; 335 } 336 num_pages -= cur_pages; 337 338 /* pages are in Y direction - height 339 page width in X direction - width */ 340 radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8)); 341 radeon_ring_write(rdev, 342 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 343 RADEON_GMC_DST_PITCH_OFFSET_CNTL | 344 RADEON_GMC_SRC_CLIPPING | 345 RADEON_GMC_DST_CLIPPING | 346 RADEON_GMC_BRUSH_NONE | 347 (RADEON_COLOR_FORMAT_ARGB8888 << 8) | 348 RADEON_GMC_SRC_DATATYPE_COLOR | 349 RADEON_ROP3_S | 350 RADEON_DP_SRC_SOURCE_MEMORY | 351 RADEON_GMC_CLR_CMP_CNTL_DIS | 352 RADEON_GMC_WR_MSK_DIS); 353 radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10)); 354 radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10)); 355 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); 356 radeon_ring_write(rdev, 0); 357 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); 358 radeon_ring_write(rdev, num_pages); 359 radeon_ring_write(rdev, num_pages); 360 radeon_ring_write(rdev, cur_pages | (stride_pixels << 16)); 361 } 362 radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); 363 radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL); 364 radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); 365 radeon_ring_write(rdev, 366 RADEON_WAIT_2D_IDLECLEAN | 367 RADEON_WAIT_HOST_IDLECLEAN | 368 RADEON_WAIT_DMA_GUI_IDLE); 369 if (fence) { 370 r = radeon_fence_emit(rdev, fence); 371 } 372 radeon_ring_unlock_commit(rdev); 373 return r; 374 } 375 376 static int r100_cp_wait_for_idle(struct radeon_device *rdev) 377 { 378 unsigned i; 379 u32 tmp; 380 381 for (i = 0; i < rdev->usec_timeout; i++) { 382 tmp = RREG32(R_000E40_RBBM_STATUS); 383 if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) { 384 return 0; 385 } 386 udelay(1); 387 } 388 return -1; 389 } 390 391 void r100_ring_start(struct radeon_device *rdev) 392 { 393 int r; 394 395 r = radeon_ring_lock(rdev, 2); 396 if (r) { 397 return; 398 } 399 radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0)); 400 radeon_ring_write(rdev, 401 RADEON_ISYNC_ANY2D_IDLE3D | 402 RADEON_ISYNC_ANY3D_IDLE2D | 403 RADEON_ISYNC_WAIT_IDLEGUI | 404 RADEON_ISYNC_CPSCRATCH_IDLEGUI); 405 radeon_ring_unlock_commit(rdev); 406 } 407 408 409 /* Load the microcode for the CP */ 410 static int r100_cp_init_microcode(struct radeon_device *rdev) 411 { 412 struct platform_device *pdev; 413 const char *fw_name = NULL; 414 int err; 415 416 DRM_DEBUG("\n"); 417 418 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0); 419 err = IS_ERR(pdev); 420 if (err) { 421 printk(KERN_ERR "radeon_cp: Failed to register firmware\n"); 422 return -EINVAL; 423 } 424 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || 425 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || 426 (rdev->family == CHIP_RS200)) { 427 DRM_INFO("Loading R100 Microcode\n"); 428 fw_name = FIRMWARE_R100; 429 } else if ((rdev->family == CHIP_R200) || 430 (rdev->family == CHIP_RV250) || 431 (rdev->family == CHIP_RV280) || 432 (rdev->family == CHIP_RS300)) { 433 DRM_INFO("Loading R200 Microcode\n"); 434 fw_name = FIRMWARE_R200; 435 } else if ((rdev->family == CHIP_R300) || 436 (rdev->family == CHIP_R350) || 437 (rdev->family == CHIP_RV350) || 438 (rdev->family == CHIP_RV380) || 439 (rdev->family == CHIP_RS400) || 440 (rdev->family == CHIP_RS480)) { 441 DRM_INFO("Loading R300 Microcode\n"); 442 fw_name = FIRMWARE_R300; 443 } else if ((rdev->family == CHIP_R420) || 444 (rdev->family == CHIP_R423) || 445 (rdev->family == CHIP_RV410)) { 446 DRM_INFO("Loading R400 Microcode\n"); 447 fw_name = FIRMWARE_R420; 448 } else if ((rdev->family == CHIP_RS690) || 449 (rdev->family == CHIP_RS740)) { 450 DRM_INFO("Loading RS690/RS740 Microcode\n"); 451 fw_name = FIRMWARE_RS690; 452 } else if (rdev->family == CHIP_RS600) { 453 DRM_INFO("Loading RS600 Microcode\n"); 454 fw_name = FIRMWARE_RS600; 455 } else if ((rdev->family == CHIP_RV515) || 456 (rdev->family == CHIP_R520) || 457 (rdev->family == CHIP_RV530) || 458 (rdev->family == CHIP_R580) || 459 (rdev->family == CHIP_RV560) || 460 (rdev->family == CHIP_RV570)) { 461 DRM_INFO("Loading R500 Microcode\n"); 462 fw_name = FIRMWARE_R520; 463 } 464 465 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); 466 platform_device_unregister(pdev); 467 if (err) { 468 printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n", 469 fw_name); 470 } else if (rdev->me_fw->size % 8) { 471 printk(KERN_ERR 472 "radeon_cp: Bogus length %zu in firmware \"%s\"\n", 473 rdev->me_fw->size, fw_name); 474 err = -EINVAL; 475 release_firmware(rdev->me_fw); 476 rdev->me_fw = NULL; 477 } 478 return err; 479 } 480 481 static void r100_cp_load_microcode(struct radeon_device *rdev) 482 { 483 const __be32 *fw_data; 484 int i, size; 485 486 if (r100_gui_wait_for_idle(rdev)) { 487 printk(KERN_WARNING "Failed to wait GUI idle while " 488 "programming pipes. Bad things might happen.\n"); 489 } 490 491 if (rdev->me_fw) { 492 size = rdev->me_fw->size / 4; 493 fw_data = (const __be32 *)&rdev->me_fw->data[0]; 494 WREG32(RADEON_CP_ME_RAM_ADDR, 0); 495 for (i = 0; i < size; i += 2) { 496 WREG32(RADEON_CP_ME_RAM_DATAH, 497 be32_to_cpup(&fw_data[i])); 498 WREG32(RADEON_CP_ME_RAM_DATAL, 499 be32_to_cpup(&fw_data[i + 1])); 500 } 501 } 502 } 503 504 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) 505 { 506 unsigned rb_bufsz; 507 unsigned rb_blksz; 508 unsigned max_fetch; 509 unsigned pre_write_timer; 510 unsigned pre_write_limit; 511 unsigned indirect2_start; 512 unsigned indirect1_start; 513 uint32_t tmp; 514 int r; 515 516 if (r100_debugfs_cp_init(rdev)) { 517 DRM_ERROR("Failed to register debugfs file for CP !\n"); 518 } 519 /* Reset CP */ 520 tmp = RREG32(RADEON_CP_CSQ_STAT); 521 if ((tmp & (1 << 31))) { 522 DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp); 523 WREG32(RADEON_CP_CSQ_MODE, 0); 524 WREG32(RADEON_CP_CSQ_CNTL, 0); 525 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); 526 tmp = RREG32(RADEON_RBBM_SOFT_RESET); 527 mdelay(2); 528 WREG32(RADEON_RBBM_SOFT_RESET, 0); 529 tmp = RREG32(RADEON_RBBM_SOFT_RESET); 530 mdelay(2); 531 tmp = RREG32(RADEON_CP_CSQ_STAT); 532 if ((tmp & (1 << 31))) { 533 DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp); 534 } 535 } else { 536 DRM_INFO("radeon: cp idle (0x%08X)\n", tmp); 537 } 538 539 if (!rdev->me_fw) { 540 r = r100_cp_init_microcode(rdev); 541 if (r) { 542 DRM_ERROR("Failed to load firmware!\n"); 543 return r; 544 } 545 } 546 547 /* Align ring size */ 548 rb_bufsz = drm_order(ring_size / 8); 549 ring_size = (1 << (rb_bufsz + 1)) * 4; 550 r100_cp_load_microcode(rdev); 551 r = radeon_ring_init(rdev, ring_size); 552 if (r) { 553 return r; 554 } 555 /* Each time the cp read 1024 bytes (16 dword/quadword) update 556 * the rptr copy in system ram */ 557 rb_blksz = 9; 558 /* cp will read 128bytes at a time (4 dwords) */ 559 max_fetch = 1; 560 rdev->cp.align_mask = 16 - 1; 561 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */ 562 pre_write_timer = 64; 563 /* Force CP_RB_WPTR write if written more than one time before the 564 * delay expire 565 */ 566 pre_write_limit = 0; 567 /* Setup the cp cache like this (cache size is 96 dwords) : 568 * RING 0 to 15 569 * INDIRECT1 16 to 79 570 * INDIRECT2 80 to 95 571 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 572 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords)) 573 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 574 * Idea being that most of the gpu cmd will be through indirect1 buffer 575 * so it gets the bigger cache. 576 */ 577 indirect2_start = 80; 578 indirect1_start = 16; 579 /* cp setup */ 580 WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); 581 tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | 582 REG_SET(RADEON_RB_BLKSZ, rb_blksz) | 583 REG_SET(RADEON_MAX_FETCH, max_fetch) | 584 RADEON_RB_NO_UPDATE); 585 #ifdef __BIG_ENDIAN 586 tmp |= RADEON_BUF_SWAP_32BIT; 587 #endif 588 WREG32(RADEON_CP_RB_CNTL, tmp); 589 590 /* Set ring address */ 591 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr); 592 WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr); 593 /* Force read & write ptr to 0 */ 594 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); 595 WREG32(RADEON_CP_RB_RPTR_WR, 0); 596 WREG32(RADEON_CP_RB_WPTR, 0); 597 WREG32(RADEON_CP_RB_CNTL, tmp); 598 udelay(10); 599 rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); 600 rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR); 601 /* Set cp mode to bus mastering & enable cp*/ 602 WREG32(RADEON_CP_CSQ_MODE, 603 REG_SET(RADEON_INDIRECT2_START, indirect2_start) | 604 REG_SET(RADEON_INDIRECT1_START, indirect1_start)); 605 WREG32(0x718, 0); 606 WREG32(0x744, 0x00004D4D); 607 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM); 608 radeon_ring_start(rdev); 609 r = radeon_ring_test(rdev); 610 if (r) { 611 DRM_ERROR("radeon: cp isn't working (%d).\n", r); 612 return r; 613 } 614 rdev->cp.ready = true; 615 return 0; 616 } 617 618 void r100_cp_fini(struct radeon_device *rdev) 619 { 620 if (r100_cp_wait_for_idle(rdev)) { 621 DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n"); 622 } 623 /* Disable ring */ 624 r100_cp_disable(rdev); 625 radeon_ring_fini(rdev); 626 DRM_INFO("radeon: cp finalized\n"); 627 } 628 629 void r100_cp_disable(struct radeon_device *rdev) 630 { 631 /* Disable ring */ 632 rdev->cp.ready = false; 633 WREG32(RADEON_CP_CSQ_MODE, 0); 634 WREG32(RADEON_CP_CSQ_CNTL, 0); 635 if (r100_gui_wait_for_idle(rdev)) { 636 printk(KERN_WARNING "Failed to wait GUI idle while " 637 "programming pipes. Bad things might happen.\n"); 638 } 639 } 640 641 int r100_cp_reset(struct radeon_device *rdev) 642 { 643 uint32_t tmp; 644 bool reinit_cp; 645 int i; 646 647 reinit_cp = rdev->cp.ready; 648 rdev->cp.ready = false; 649 WREG32(RADEON_CP_CSQ_MODE, 0); 650 WREG32(RADEON_CP_CSQ_CNTL, 0); 651 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); 652 (void)RREG32(RADEON_RBBM_SOFT_RESET); 653 udelay(200); 654 WREG32(RADEON_RBBM_SOFT_RESET, 0); 655 /* Wait to prevent race in RBBM_STATUS */ 656 mdelay(1); 657 for (i = 0; i < rdev->usec_timeout; i++) { 658 tmp = RREG32(RADEON_RBBM_STATUS); 659 if (!(tmp & (1 << 16))) { 660 DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n", 661 tmp); 662 if (reinit_cp) { 663 return r100_cp_init(rdev, rdev->cp.ring_size); 664 } 665 return 0; 666 } 667 DRM_UDELAY(1); 668 } 669 tmp = RREG32(RADEON_RBBM_STATUS); 670 DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp); 671 return -1; 672 } 673 674 void r100_cp_commit(struct radeon_device *rdev) 675 { 676 WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); 677 (void)RREG32(RADEON_CP_RB_WPTR); 678 } 679 680 681 /* 682 * CS functions 683 */ 684 int r100_cs_parse_packet0(struct radeon_cs_parser *p, 685 struct radeon_cs_packet *pkt, 686 const unsigned *auth, unsigned n, 687 radeon_packet0_check_t check) 688 { 689 unsigned reg; 690 unsigned i, j, m; 691 unsigned idx; 692 int r; 693 694 idx = pkt->idx + 1; 695 reg = pkt->reg; 696 /* Check that register fall into register range 697 * determined by the number of entry (n) in the 698 * safe register bitmap. 699 */ 700 if (pkt->one_reg_wr) { 701 if ((reg >> 7) > n) { 702 return -EINVAL; 703 } 704 } else { 705 if (((reg + (pkt->count << 2)) >> 7) > n) { 706 return -EINVAL; 707 } 708 } 709 for (i = 0; i <= pkt->count; i++, idx++) { 710 j = (reg >> 7); 711 m = 1 << ((reg >> 2) & 31); 712 if (auth[j] & m) { 713 r = check(p, pkt, idx, reg); 714 if (r) { 715 return r; 716 } 717 } 718 if (pkt->one_reg_wr) { 719 if (!(auth[j] & m)) { 720 break; 721 } 722 } else { 723 reg += 4; 724 } 725 } 726 return 0; 727 } 728 729 void r100_cs_dump_packet(struct radeon_cs_parser *p, 730 struct radeon_cs_packet *pkt) 731 { 732 volatile uint32_t *ib; 733 unsigned i; 734 unsigned idx; 735 736 ib = p->ib->ptr; 737 idx = pkt->idx; 738 for (i = 0; i <= (pkt->count + 1); i++, idx++) { 739 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 740 } 741 } 742 743 /** 744 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet 745 * @parser: parser structure holding parsing context. 746 * @pkt: where to store packet informations 747 * 748 * Assume that chunk_ib_index is properly set. Will return -EINVAL 749 * if packet is bigger than remaining ib size. or if packets is unknown. 750 **/ 751 int r100_cs_packet_parse(struct radeon_cs_parser *p, 752 struct radeon_cs_packet *pkt, 753 unsigned idx) 754 { 755 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 756 uint32_t header; 757 758 if (idx >= ib_chunk->length_dw) { 759 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 760 idx, ib_chunk->length_dw); 761 return -EINVAL; 762 } 763 header = radeon_get_ib_value(p, idx); 764 pkt->idx = idx; 765 pkt->type = CP_PACKET_GET_TYPE(header); 766 pkt->count = CP_PACKET_GET_COUNT(header); 767 switch (pkt->type) { 768 case PACKET_TYPE0: 769 pkt->reg = CP_PACKET0_GET_REG(header); 770 pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header); 771 break; 772 case PACKET_TYPE3: 773 pkt->opcode = CP_PACKET3_GET_OPCODE(header); 774 break; 775 case PACKET_TYPE2: 776 pkt->count = -1; 777 break; 778 default: 779 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 780 return -EINVAL; 781 } 782 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 783 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 784 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 785 return -EINVAL; 786 } 787 return 0; 788 } 789 790 /** 791 * r100_cs_packet_next_vline() - parse userspace VLINE packet 792 * @parser: parser structure holding parsing context. 793 * 794 * Userspace sends a special sequence for VLINE waits. 795 * PACKET0 - VLINE_START_END + value 796 * PACKET0 - WAIT_UNTIL +_value 797 * RELOC (P3) - crtc_id in reloc. 798 * 799 * This function parses this and relocates the VLINE START END 800 * and WAIT UNTIL packets to the correct crtc. 801 * It also detects a switched off crtc and nulls out the 802 * wait in that case. 803 */ 804 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) 805 { 806 struct drm_mode_object *obj; 807 struct drm_crtc *crtc; 808 struct radeon_crtc *radeon_crtc; 809 struct radeon_cs_packet p3reloc, waitreloc; 810 int crtc_id; 811 int r; 812 uint32_t header, h_idx, reg; 813 volatile uint32_t *ib; 814 815 ib = p->ib->ptr; 816 817 /* parse the wait until */ 818 r = r100_cs_packet_parse(p, &waitreloc, p->idx); 819 if (r) 820 return r; 821 822 /* check its a wait until and only 1 count */ 823 if (waitreloc.reg != RADEON_WAIT_UNTIL || 824 waitreloc.count != 0) { 825 DRM_ERROR("vline wait had illegal wait until segment\n"); 826 r = -EINVAL; 827 return r; 828 } 829 830 if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) { 831 DRM_ERROR("vline wait had illegal wait until\n"); 832 r = -EINVAL; 833 return r; 834 } 835 836 /* jump over the NOP */ 837 r = r100_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2); 838 if (r) 839 return r; 840 841 h_idx = p->idx - 2; 842 p->idx += waitreloc.count + 2; 843 p->idx += p3reloc.count + 2; 844 845 header = radeon_get_ib_value(p, h_idx); 846 crtc_id = radeon_get_ib_value(p, h_idx + 5); 847 reg = CP_PACKET0_GET_REG(header); 848 mutex_lock(&p->rdev->ddev->mode_config.mutex); 849 obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); 850 if (!obj) { 851 DRM_ERROR("cannot find crtc %d\n", crtc_id); 852 r = -EINVAL; 853 goto out; 854 } 855 crtc = obj_to_crtc(obj); 856 radeon_crtc = to_radeon_crtc(crtc); 857 crtc_id = radeon_crtc->crtc_id; 858 859 if (!crtc->enabled) { 860 /* if the CRTC isn't enabled - we need to nop out the wait until */ 861 ib[h_idx + 2] = PACKET2(0); 862 ib[h_idx + 3] = PACKET2(0); 863 } else if (crtc_id == 1) { 864 switch (reg) { 865 case AVIVO_D1MODE_VLINE_START_END: 866 header &= ~R300_CP_PACKET0_REG_MASK; 867 header |= AVIVO_D2MODE_VLINE_START_END >> 2; 868 break; 869 case RADEON_CRTC_GUI_TRIG_VLINE: 870 header &= ~R300_CP_PACKET0_REG_MASK; 871 header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2; 872 break; 873 default: 874 DRM_ERROR("unknown crtc reloc\n"); 875 r = -EINVAL; 876 goto out; 877 } 878 ib[h_idx] = header; 879 ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1; 880 } 881 out: 882 mutex_unlock(&p->rdev->ddev->mode_config.mutex); 883 return r; 884 } 885 886 /** 887 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3 888 * @parser: parser structure holding parsing context. 889 * @data: pointer to relocation data 890 * @offset_start: starting offset 891 * @offset_mask: offset mask (to align start offset on) 892 * @reloc: reloc informations 893 * 894 * Check next packet is relocation packet3, do bo validation and compute 895 * GPU offset using the provided start. 896 **/ 897 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, 898 struct radeon_cs_reloc **cs_reloc) 899 { 900 struct radeon_cs_chunk *relocs_chunk; 901 struct radeon_cs_packet p3reloc; 902 unsigned idx; 903 int r; 904 905 if (p->chunk_relocs_idx == -1) { 906 DRM_ERROR("No relocation chunk !\n"); 907 return -EINVAL; 908 } 909 *cs_reloc = NULL; 910 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 911 r = r100_cs_packet_parse(p, &p3reloc, p->idx); 912 if (r) { 913 return r; 914 } 915 p->idx += p3reloc.count + 2; 916 if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { 917 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 918 p3reloc.idx); 919 r100_cs_dump_packet(p, &p3reloc); 920 return -EINVAL; 921 } 922 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 923 if (idx >= relocs_chunk->length_dw) { 924 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 925 idx, relocs_chunk->length_dw); 926 r100_cs_dump_packet(p, &p3reloc); 927 return -EINVAL; 928 } 929 /* FIXME: we assume reloc size is 4 dwords */ 930 *cs_reloc = p->relocs_ptr[(idx / 4)]; 931 return 0; 932 } 933 934 static int r100_get_vtx_size(uint32_t vtx_fmt) 935 { 936 int vtx_size; 937 vtx_size = 2; 938 /* ordered according to bits in spec */ 939 if (vtx_fmt & RADEON_SE_VTX_FMT_W0) 940 vtx_size++; 941 if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR) 942 vtx_size += 3; 943 if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA) 944 vtx_size++; 945 if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR) 946 vtx_size++; 947 if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC) 948 vtx_size += 3; 949 if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG) 950 vtx_size++; 951 if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC) 952 vtx_size++; 953 if (vtx_fmt & RADEON_SE_VTX_FMT_ST0) 954 vtx_size += 2; 955 if (vtx_fmt & RADEON_SE_VTX_FMT_ST1) 956 vtx_size += 2; 957 if (vtx_fmt & RADEON_SE_VTX_FMT_Q1) 958 vtx_size++; 959 if (vtx_fmt & RADEON_SE_VTX_FMT_ST2) 960 vtx_size += 2; 961 if (vtx_fmt & RADEON_SE_VTX_FMT_Q2) 962 vtx_size++; 963 if (vtx_fmt & RADEON_SE_VTX_FMT_ST3) 964 vtx_size += 2; 965 if (vtx_fmt & RADEON_SE_VTX_FMT_Q3) 966 vtx_size++; 967 if (vtx_fmt & RADEON_SE_VTX_FMT_Q0) 968 vtx_size++; 969 /* blend weight */ 970 if (vtx_fmt & (0x7 << 15)) 971 vtx_size += (vtx_fmt >> 15) & 0x7; 972 if (vtx_fmt & RADEON_SE_VTX_FMT_N0) 973 vtx_size += 3; 974 if (vtx_fmt & RADEON_SE_VTX_FMT_XY1) 975 vtx_size += 2; 976 if (vtx_fmt & RADEON_SE_VTX_FMT_Z1) 977 vtx_size++; 978 if (vtx_fmt & RADEON_SE_VTX_FMT_W1) 979 vtx_size++; 980 if (vtx_fmt & RADEON_SE_VTX_FMT_N1) 981 vtx_size++; 982 if (vtx_fmt & RADEON_SE_VTX_FMT_Z) 983 vtx_size++; 984 return vtx_size; 985 } 986 987 static int r100_packet0_check(struct radeon_cs_parser *p, 988 struct radeon_cs_packet *pkt, 989 unsigned idx, unsigned reg) 990 { 991 struct radeon_cs_reloc *reloc; 992 struct r100_cs_track *track; 993 volatile uint32_t *ib; 994 uint32_t tmp; 995 int r; 996 int i, face; 997 u32 tile_flags = 0; 998 u32 idx_value; 999 1000 ib = p->ib->ptr; 1001 track = (struct r100_cs_track *)p->track; 1002 1003 idx_value = radeon_get_ib_value(p, idx); 1004 1005 switch (reg) { 1006 case RADEON_CRTC_GUI_TRIG_VLINE: 1007 r = r100_cs_packet_parse_vline(p); 1008 if (r) { 1009 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1010 idx, reg); 1011 r100_cs_dump_packet(p, pkt); 1012 return r; 1013 } 1014 break; 1015 /* FIXME: only allow PACKET3 blit? easier to check for out of 1016 * range access */ 1017 case RADEON_DST_PITCH_OFFSET: 1018 case RADEON_SRC_PITCH_OFFSET: 1019 r = r100_reloc_pitch_offset(p, pkt, idx, reg); 1020 if (r) 1021 return r; 1022 break; 1023 case RADEON_RB3D_DEPTHOFFSET: 1024 r = r100_cs_packet_next_reloc(p, &reloc); 1025 if (r) { 1026 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1027 idx, reg); 1028 r100_cs_dump_packet(p, pkt); 1029 return r; 1030 } 1031 track->zb.robj = reloc->robj; 1032 track->zb.offset = idx_value; 1033 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1034 break; 1035 case RADEON_RB3D_COLOROFFSET: 1036 r = r100_cs_packet_next_reloc(p, &reloc); 1037 if (r) { 1038 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1039 idx, reg); 1040 r100_cs_dump_packet(p, pkt); 1041 return r; 1042 } 1043 track->cb[0].robj = reloc->robj; 1044 track->cb[0].offset = idx_value; 1045 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1046 break; 1047 case RADEON_PP_TXOFFSET_0: 1048 case RADEON_PP_TXOFFSET_1: 1049 case RADEON_PP_TXOFFSET_2: 1050 i = (reg - RADEON_PP_TXOFFSET_0) / 24; 1051 r = r100_cs_packet_next_reloc(p, &reloc); 1052 if (r) { 1053 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1054 idx, reg); 1055 r100_cs_dump_packet(p, pkt); 1056 return r; 1057 } 1058 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1059 track->textures[i].robj = reloc->robj; 1060 break; 1061 case RADEON_PP_CUBIC_OFFSET_T0_0: 1062 case RADEON_PP_CUBIC_OFFSET_T0_1: 1063 case RADEON_PP_CUBIC_OFFSET_T0_2: 1064 case RADEON_PP_CUBIC_OFFSET_T0_3: 1065 case RADEON_PP_CUBIC_OFFSET_T0_4: 1066 i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4; 1067 r = r100_cs_packet_next_reloc(p, &reloc); 1068 if (r) { 1069 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1070 idx, reg); 1071 r100_cs_dump_packet(p, pkt); 1072 return r; 1073 } 1074 track->textures[0].cube_info[i].offset = idx_value; 1075 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1076 track->textures[0].cube_info[i].robj = reloc->robj; 1077 break; 1078 case RADEON_PP_CUBIC_OFFSET_T1_0: 1079 case RADEON_PP_CUBIC_OFFSET_T1_1: 1080 case RADEON_PP_CUBIC_OFFSET_T1_2: 1081 case RADEON_PP_CUBIC_OFFSET_T1_3: 1082 case RADEON_PP_CUBIC_OFFSET_T1_4: 1083 i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4; 1084 r = r100_cs_packet_next_reloc(p, &reloc); 1085 if (r) { 1086 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1087 idx, reg); 1088 r100_cs_dump_packet(p, pkt); 1089 return r; 1090 } 1091 track->textures[1].cube_info[i].offset = idx_value; 1092 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1093 track->textures[1].cube_info[i].robj = reloc->robj; 1094 break; 1095 case RADEON_PP_CUBIC_OFFSET_T2_0: 1096 case RADEON_PP_CUBIC_OFFSET_T2_1: 1097 case RADEON_PP_CUBIC_OFFSET_T2_2: 1098 case RADEON_PP_CUBIC_OFFSET_T2_3: 1099 case RADEON_PP_CUBIC_OFFSET_T2_4: 1100 i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4; 1101 r = r100_cs_packet_next_reloc(p, &reloc); 1102 if (r) { 1103 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1104 idx, reg); 1105 r100_cs_dump_packet(p, pkt); 1106 return r; 1107 } 1108 track->textures[2].cube_info[i].offset = idx_value; 1109 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1110 track->textures[2].cube_info[i].robj = reloc->robj; 1111 break; 1112 case RADEON_RE_WIDTH_HEIGHT: 1113 track->maxy = ((idx_value >> 16) & 0x7FF); 1114 break; 1115 case RADEON_RB3D_COLORPITCH: 1116 r = r100_cs_packet_next_reloc(p, &reloc); 1117 if (r) { 1118 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1119 idx, reg); 1120 r100_cs_dump_packet(p, pkt); 1121 return r; 1122 } 1123 1124 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1125 tile_flags |= RADEON_COLOR_TILE_ENABLE; 1126 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1127 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 1128 1129 tmp = idx_value & ~(0x7 << 16); 1130 tmp |= tile_flags; 1131 ib[idx] = tmp; 1132 1133 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 1134 break; 1135 case RADEON_RB3D_DEPTHPITCH: 1136 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 1137 break; 1138 case RADEON_RB3D_CNTL: 1139 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 1140 case 7: 1141 case 8: 1142 case 9: 1143 case 11: 1144 case 12: 1145 track->cb[0].cpp = 1; 1146 break; 1147 case 3: 1148 case 4: 1149 case 15: 1150 track->cb[0].cpp = 2; 1151 break; 1152 case 6: 1153 track->cb[0].cpp = 4; 1154 break; 1155 default: 1156 DRM_ERROR("Invalid color buffer format (%d) !\n", 1157 ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); 1158 return -EINVAL; 1159 } 1160 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 1161 break; 1162 case RADEON_RB3D_ZSTENCILCNTL: 1163 switch (idx_value & 0xf) { 1164 case 0: 1165 track->zb.cpp = 2; 1166 break; 1167 case 2: 1168 case 3: 1169 case 4: 1170 case 5: 1171 case 9: 1172 case 11: 1173 track->zb.cpp = 4; 1174 break; 1175 default: 1176 break; 1177 } 1178 break; 1179 case RADEON_RB3D_ZPASS_ADDR: 1180 r = r100_cs_packet_next_reloc(p, &reloc); 1181 if (r) { 1182 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1183 idx, reg); 1184 r100_cs_dump_packet(p, pkt); 1185 return r; 1186 } 1187 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1188 break; 1189 case RADEON_PP_CNTL: 1190 { 1191 uint32_t temp = idx_value >> 4; 1192 for (i = 0; i < track->num_texture; i++) 1193 track->textures[i].enabled = !!(temp & (1 << i)); 1194 } 1195 break; 1196 case RADEON_SE_VF_CNTL: 1197 track->vap_vf_cntl = idx_value; 1198 break; 1199 case RADEON_SE_VTX_FMT: 1200 track->vtx_size = r100_get_vtx_size(idx_value); 1201 break; 1202 case RADEON_PP_TEX_SIZE_0: 1203 case RADEON_PP_TEX_SIZE_1: 1204 case RADEON_PP_TEX_SIZE_2: 1205 i = (reg - RADEON_PP_TEX_SIZE_0) / 8; 1206 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 1207 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 1208 break; 1209 case RADEON_PP_TEX_PITCH_0: 1210 case RADEON_PP_TEX_PITCH_1: 1211 case RADEON_PP_TEX_PITCH_2: 1212 i = (reg - RADEON_PP_TEX_PITCH_0) / 8; 1213 track->textures[i].pitch = idx_value + 32; 1214 break; 1215 case RADEON_PP_TXFILTER_0: 1216 case RADEON_PP_TXFILTER_1: 1217 case RADEON_PP_TXFILTER_2: 1218 i = (reg - RADEON_PP_TXFILTER_0) / 24; 1219 track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK) 1220 >> RADEON_MAX_MIP_LEVEL_SHIFT); 1221 tmp = (idx_value >> 23) & 0x7; 1222 if (tmp == 2 || tmp == 6) 1223 track->textures[i].roundup_w = false; 1224 tmp = (idx_value >> 27) & 0x7; 1225 if (tmp == 2 || tmp == 6) 1226 track->textures[i].roundup_h = false; 1227 break; 1228 case RADEON_PP_TXFORMAT_0: 1229 case RADEON_PP_TXFORMAT_1: 1230 case RADEON_PP_TXFORMAT_2: 1231 i = (reg - RADEON_PP_TXFORMAT_0) / 24; 1232 if (idx_value & RADEON_TXFORMAT_NON_POWER2) { 1233 track->textures[i].use_pitch = 1; 1234 } else { 1235 track->textures[i].use_pitch = 0; 1236 track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); 1237 track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); 1238 } 1239 if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) 1240 track->textures[i].tex_coord_type = 2; 1241 switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { 1242 case RADEON_TXFORMAT_I8: 1243 case RADEON_TXFORMAT_RGB332: 1244 case RADEON_TXFORMAT_Y8: 1245 track->textures[i].cpp = 1; 1246 break; 1247 case RADEON_TXFORMAT_AI88: 1248 case RADEON_TXFORMAT_ARGB1555: 1249 case RADEON_TXFORMAT_RGB565: 1250 case RADEON_TXFORMAT_ARGB4444: 1251 case RADEON_TXFORMAT_VYUY422: 1252 case RADEON_TXFORMAT_YVYU422: 1253 case RADEON_TXFORMAT_DXT1: 1254 case RADEON_TXFORMAT_SHADOW16: 1255 case RADEON_TXFORMAT_LDUDV655: 1256 case RADEON_TXFORMAT_DUDV88: 1257 track->textures[i].cpp = 2; 1258 break; 1259 case RADEON_TXFORMAT_ARGB8888: 1260 case RADEON_TXFORMAT_RGBA8888: 1261 case RADEON_TXFORMAT_DXT23: 1262 case RADEON_TXFORMAT_DXT45: 1263 case RADEON_TXFORMAT_SHADOW32: 1264 case RADEON_TXFORMAT_LDUDUV8888: 1265 track->textures[i].cpp = 4; 1266 break; 1267 } 1268 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 1269 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 1270 break; 1271 case RADEON_PP_CUBIC_FACES_0: 1272 case RADEON_PP_CUBIC_FACES_1: 1273 case RADEON_PP_CUBIC_FACES_2: 1274 tmp = idx_value; 1275 i = (reg - RADEON_PP_CUBIC_FACES_0) / 4; 1276 for (face = 0; face < 4; face++) { 1277 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 1278 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 1279 } 1280 break; 1281 default: 1282 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 1283 reg, idx); 1284 return -EINVAL; 1285 } 1286 return 0; 1287 } 1288 1289 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, 1290 struct radeon_cs_packet *pkt, 1291 struct radeon_object *robj) 1292 { 1293 unsigned idx; 1294 u32 value; 1295 idx = pkt->idx + 1; 1296 value = radeon_get_ib_value(p, idx + 2); 1297 if ((value + 1) > radeon_object_size(robj)) { 1298 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " 1299 "(need %u have %lu) !\n", 1300 value + 1, 1301 radeon_object_size(robj)); 1302 return -EINVAL; 1303 } 1304 return 0; 1305 } 1306 1307 static int r100_packet3_check(struct radeon_cs_parser *p, 1308 struct radeon_cs_packet *pkt) 1309 { 1310 struct radeon_cs_reloc *reloc; 1311 struct r100_cs_track *track; 1312 unsigned idx; 1313 volatile uint32_t *ib; 1314 int r; 1315 1316 ib = p->ib->ptr; 1317 idx = pkt->idx + 1; 1318 track = (struct r100_cs_track *)p->track; 1319 switch (pkt->opcode) { 1320 case PACKET3_3D_LOAD_VBPNTR: 1321 r = r100_packet3_load_vbpntr(p, pkt, idx); 1322 if (r) 1323 return r; 1324 break; 1325 case PACKET3_INDX_BUFFER: 1326 r = r100_cs_packet_next_reloc(p, &reloc); 1327 if (r) { 1328 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1329 r100_cs_dump_packet(p, pkt); 1330 return r; 1331 } 1332 ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); 1333 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 1334 if (r) { 1335 return r; 1336 } 1337 break; 1338 case 0x23: 1339 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ 1340 r = r100_cs_packet_next_reloc(p, &reloc); 1341 if (r) { 1342 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 1343 r100_cs_dump_packet(p, pkt); 1344 return r; 1345 } 1346 ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); 1347 track->num_arrays = 1; 1348 track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); 1349 1350 track->arrays[0].robj = reloc->robj; 1351 track->arrays[0].esize = track->vtx_size; 1352 1353 track->max_indx = radeon_get_ib_value(p, idx+1); 1354 1355 track->vap_vf_cntl = radeon_get_ib_value(p, idx+3); 1356 track->immd_dwords = pkt->count - 1; 1357 r = r100_cs_track_check(p->rdev, track); 1358 if (r) 1359 return r; 1360 break; 1361 case PACKET3_3D_DRAW_IMMD: 1362 if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { 1363 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 1364 return -EINVAL; 1365 } 1366 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1367 track->immd_dwords = pkt->count - 1; 1368 r = r100_cs_track_check(p->rdev, track); 1369 if (r) 1370 return r; 1371 break; 1372 /* triggers drawing using in-packet vertex data */ 1373 case PACKET3_3D_DRAW_IMMD_2: 1374 if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { 1375 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 1376 return -EINVAL; 1377 } 1378 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1379 track->immd_dwords = pkt->count; 1380 r = r100_cs_track_check(p->rdev, track); 1381 if (r) 1382 return r; 1383 break; 1384 /* triggers drawing using in-packet vertex data */ 1385 case PACKET3_3D_DRAW_VBUF_2: 1386 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1387 r = r100_cs_track_check(p->rdev, track); 1388 if (r) 1389 return r; 1390 break; 1391 /* triggers drawing of vertex buffers setup elsewhere */ 1392 case PACKET3_3D_DRAW_INDX_2: 1393 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 1394 r = r100_cs_track_check(p->rdev, track); 1395 if (r) 1396 return r; 1397 break; 1398 /* triggers drawing using indices to vertex buffer */ 1399 case PACKET3_3D_DRAW_VBUF: 1400 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1401 r = r100_cs_track_check(p->rdev, track); 1402 if (r) 1403 return r; 1404 break; 1405 /* triggers drawing of vertex buffers setup elsewhere */ 1406 case PACKET3_3D_DRAW_INDX: 1407 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 1408 r = r100_cs_track_check(p->rdev, track); 1409 if (r) 1410 return r; 1411 break; 1412 /* triggers drawing using indices to vertex buffer */ 1413 case PACKET3_NOP: 1414 break; 1415 default: 1416 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 1417 return -EINVAL; 1418 } 1419 return 0; 1420 } 1421 1422 int r100_cs_parse(struct radeon_cs_parser *p) 1423 { 1424 struct radeon_cs_packet pkt; 1425 struct r100_cs_track *track; 1426 int r; 1427 1428 track = kzalloc(sizeof(*track), GFP_KERNEL); 1429 r100_cs_track_clear(p->rdev, track); 1430 p->track = track; 1431 do { 1432 r = r100_cs_packet_parse(p, &pkt, p->idx); 1433 if (r) { 1434 return r; 1435 } 1436 p->idx += pkt.count + 2; 1437 switch (pkt.type) { 1438 case PACKET_TYPE0: 1439 if (p->rdev->family >= CHIP_R200) 1440 r = r100_cs_parse_packet0(p, &pkt, 1441 p->rdev->config.r100.reg_safe_bm, 1442 p->rdev->config.r100.reg_safe_bm_size, 1443 &r200_packet0_check); 1444 else 1445 r = r100_cs_parse_packet0(p, &pkt, 1446 p->rdev->config.r100.reg_safe_bm, 1447 p->rdev->config.r100.reg_safe_bm_size, 1448 &r100_packet0_check); 1449 break; 1450 case PACKET_TYPE2: 1451 break; 1452 case PACKET_TYPE3: 1453 r = r100_packet3_check(p, &pkt); 1454 break; 1455 default: 1456 DRM_ERROR("Unknown packet type %d !\n", 1457 pkt.type); 1458 return -EINVAL; 1459 } 1460 if (r) { 1461 return r; 1462 } 1463 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 1464 return 0; 1465 } 1466 1467 1468 /* 1469 * Global GPU functions 1470 */ 1471 void r100_errata(struct radeon_device *rdev) 1472 { 1473 rdev->pll_errata = 0; 1474 1475 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { 1476 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; 1477 } 1478 1479 if (rdev->family == CHIP_RV100 || 1480 rdev->family == CHIP_RS100 || 1481 rdev->family == CHIP_RS200) { 1482 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; 1483 } 1484 } 1485 1486 /* Wait for vertical sync on primary CRTC */ 1487 void r100_gpu_wait_for_vsync(struct radeon_device *rdev) 1488 { 1489 uint32_t crtc_gen_cntl, tmp; 1490 int i; 1491 1492 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); 1493 if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || 1494 !(crtc_gen_cntl & RADEON_CRTC_EN)) { 1495 return; 1496 } 1497 /* Clear the CRTC_VBLANK_SAVE bit */ 1498 WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); 1499 for (i = 0; i < rdev->usec_timeout; i++) { 1500 tmp = RREG32(RADEON_CRTC_STATUS); 1501 if (tmp & RADEON_CRTC_VBLANK_SAVE) { 1502 return; 1503 } 1504 DRM_UDELAY(1); 1505 } 1506 } 1507 1508 /* Wait for vertical sync on secondary CRTC */ 1509 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) 1510 { 1511 uint32_t crtc2_gen_cntl, tmp; 1512 int i; 1513 1514 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); 1515 if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || 1516 !(crtc2_gen_cntl & RADEON_CRTC2_EN)) 1517 return; 1518 1519 /* Clear the CRTC_VBLANK_SAVE bit */ 1520 WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); 1521 for (i = 0; i < rdev->usec_timeout; i++) { 1522 tmp = RREG32(RADEON_CRTC2_STATUS); 1523 if (tmp & RADEON_CRTC2_VBLANK_SAVE) { 1524 return; 1525 } 1526 DRM_UDELAY(1); 1527 } 1528 } 1529 1530 int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) 1531 { 1532 unsigned i; 1533 uint32_t tmp; 1534 1535 for (i = 0; i < rdev->usec_timeout; i++) { 1536 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 1537 if (tmp >= n) { 1538 return 0; 1539 } 1540 DRM_UDELAY(1); 1541 } 1542 return -1; 1543 } 1544 1545 int r100_gui_wait_for_idle(struct radeon_device *rdev) 1546 { 1547 unsigned i; 1548 uint32_t tmp; 1549 1550 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { 1551 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" 1552 " Bad things might happen.\n"); 1553 } 1554 for (i = 0; i < rdev->usec_timeout; i++) { 1555 tmp = RREG32(RADEON_RBBM_STATUS); 1556 if (!(tmp & (1 << 31))) { 1557 return 0; 1558 } 1559 DRM_UDELAY(1); 1560 } 1561 return -1; 1562 } 1563 1564 int r100_mc_wait_for_idle(struct radeon_device *rdev) 1565 { 1566 unsigned i; 1567 uint32_t tmp; 1568 1569 for (i = 0; i < rdev->usec_timeout; i++) { 1570 /* read MC_STATUS */ 1571 tmp = RREG32(0x0150); 1572 if (tmp & (1 << 2)) { 1573 return 0; 1574 } 1575 DRM_UDELAY(1); 1576 } 1577 return -1; 1578 } 1579 1580 void r100_gpu_init(struct radeon_device *rdev) 1581 { 1582 /* TODO: anythings to do here ? pipes ? */ 1583 r100_hdp_reset(rdev); 1584 } 1585 1586 void r100_hdp_reset(struct radeon_device *rdev) 1587 { 1588 uint32_t tmp; 1589 1590 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL; 1591 tmp |= (7 << 28); 1592 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE); 1593 (void)RREG32(RADEON_HOST_PATH_CNTL); 1594 udelay(200); 1595 WREG32(RADEON_RBBM_SOFT_RESET, 0); 1596 WREG32(RADEON_HOST_PATH_CNTL, tmp); 1597 (void)RREG32(RADEON_HOST_PATH_CNTL); 1598 } 1599 1600 int r100_rb2d_reset(struct radeon_device *rdev) 1601 { 1602 uint32_t tmp; 1603 int i; 1604 1605 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2); 1606 (void)RREG32(RADEON_RBBM_SOFT_RESET); 1607 udelay(200); 1608 WREG32(RADEON_RBBM_SOFT_RESET, 0); 1609 /* Wait to prevent race in RBBM_STATUS */ 1610 mdelay(1); 1611 for (i = 0; i < rdev->usec_timeout; i++) { 1612 tmp = RREG32(RADEON_RBBM_STATUS); 1613 if (!(tmp & (1 << 26))) { 1614 DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n", 1615 tmp); 1616 return 0; 1617 } 1618 DRM_UDELAY(1); 1619 } 1620 tmp = RREG32(RADEON_RBBM_STATUS); 1621 DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp); 1622 return -1; 1623 } 1624 1625 int r100_gpu_reset(struct radeon_device *rdev) 1626 { 1627 uint32_t status; 1628 1629 /* reset order likely matter */ 1630 status = RREG32(RADEON_RBBM_STATUS); 1631 /* reset HDP */ 1632 r100_hdp_reset(rdev); 1633 /* reset rb2d */ 1634 if (status & ((1 << 17) | (1 << 18) | (1 << 27))) { 1635 r100_rb2d_reset(rdev); 1636 } 1637 /* TODO: reset 3D engine */ 1638 /* reset CP */ 1639 status = RREG32(RADEON_RBBM_STATUS); 1640 if (status & (1 << 16)) { 1641 r100_cp_reset(rdev); 1642 } 1643 /* Check if GPU is idle */ 1644 status = RREG32(RADEON_RBBM_STATUS); 1645 if (status & (1 << 31)) { 1646 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status); 1647 return -1; 1648 } 1649 DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status); 1650 return 0; 1651 } 1652 1653 1654 /* 1655 * VRAM info 1656 */ 1657 static void r100_vram_get_type(struct radeon_device *rdev) 1658 { 1659 uint32_t tmp; 1660 1661 rdev->mc.vram_is_ddr = false; 1662 if (rdev->flags & RADEON_IS_IGP) 1663 rdev->mc.vram_is_ddr = true; 1664 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) 1665 rdev->mc.vram_is_ddr = true; 1666 if ((rdev->family == CHIP_RV100) || 1667 (rdev->family == CHIP_RS100) || 1668 (rdev->family == CHIP_RS200)) { 1669 tmp = RREG32(RADEON_MEM_CNTL); 1670 if (tmp & RV100_HALF_MODE) { 1671 rdev->mc.vram_width = 32; 1672 } else { 1673 rdev->mc.vram_width = 64; 1674 } 1675 if (rdev->flags & RADEON_SINGLE_CRTC) { 1676 rdev->mc.vram_width /= 4; 1677 rdev->mc.vram_is_ddr = true; 1678 } 1679 } else if (rdev->family <= CHIP_RV280) { 1680 tmp = RREG32(RADEON_MEM_CNTL); 1681 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { 1682 rdev->mc.vram_width = 128; 1683 } else { 1684 rdev->mc.vram_width = 64; 1685 } 1686 } else { 1687 /* newer IGPs */ 1688 rdev->mc.vram_width = 128; 1689 } 1690 } 1691 1692 static u32 r100_get_accessible_vram(struct radeon_device *rdev) 1693 { 1694 u32 aper_size; 1695 u8 byte; 1696 1697 aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 1698 1699 /* Set HDP_APER_CNTL only on cards that are known not to be broken, 1700 * that is has the 2nd generation multifunction PCI interface 1701 */ 1702 if (rdev->family == CHIP_RV280 || 1703 rdev->family >= CHIP_RV350) { 1704 WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, 1705 ~RADEON_HDP_APER_CNTL); 1706 DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); 1707 return aper_size * 2; 1708 } 1709 1710 /* Older cards have all sorts of funny issues to deal with. First 1711 * check if it's a multifunction card by reading the PCI config 1712 * header type... Limit those to one aperture size 1713 */ 1714 pci_read_config_byte(rdev->pdev, 0xe, &byte); 1715 if (byte & 0x80) { 1716 DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); 1717 DRM_INFO("Limiting VRAM to one aperture\n"); 1718 return aper_size; 1719 } 1720 1721 /* Single function older card. We read HDP_APER_CNTL to see how the BIOS 1722 * have set it up. We don't write this as it's broken on some ASICs but 1723 * we expect the BIOS to have done the right thing (might be too optimistic...) 1724 */ 1725 if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) 1726 return aper_size * 2; 1727 return aper_size; 1728 } 1729 1730 void r100_vram_init_sizes(struct radeon_device *rdev) 1731 { 1732 u64 config_aper_size; 1733 u32 accessible; 1734 1735 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 1736 1737 if (rdev->flags & RADEON_IS_IGP) { 1738 uint32_t tom; 1739 /* read NB_TOM to get the amount of ram stolen for the GPU */ 1740 tom = RREG32(RADEON_NB_TOM); 1741 rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); 1742 /* for IGPs we need to keep VRAM where it was put by the BIOS */ 1743 rdev->mc.vram_location = (tom & 0xffff) << 16; 1744 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 1745 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 1746 } else { 1747 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 1748 /* Some production boards of m6 will report 0 1749 * if it's 8 MB 1750 */ 1751 if (rdev->mc.real_vram_size == 0) { 1752 rdev->mc.real_vram_size = 8192 * 1024; 1753 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 1754 } 1755 /* let driver place VRAM */ 1756 rdev->mc.vram_location = 0xFFFFFFFFUL; 1757 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 1758 * Novell bug 204882 + along with lots of ubuntu ones */ 1759 if (config_aper_size > rdev->mc.real_vram_size) 1760 rdev->mc.mc_vram_size = config_aper_size; 1761 else 1762 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 1763 } 1764 1765 /* work out accessible VRAM */ 1766 accessible = r100_get_accessible_vram(rdev); 1767 1768 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); 1769 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); 1770 1771 if (accessible > rdev->mc.aper_size) 1772 accessible = rdev->mc.aper_size; 1773 1774 if (rdev->mc.mc_vram_size > rdev->mc.aper_size) 1775 rdev->mc.mc_vram_size = rdev->mc.aper_size; 1776 1777 if (rdev->mc.real_vram_size > rdev->mc.aper_size) 1778 rdev->mc.real_vram_size = rdev->mc.aper_size; 1779 } 1780 1781 void r100_vga_set_state(struct radeon_device *rdev, bool state) 1782 { 1783 uint32_t temp; 1784 1785 temp = RREG32(RADEON_CONFIG_CNTL); 1786 if (state == false) { 1787 temp &= ~(1<<8); 1788 temp |= (1<<9); 1789 } else { 1790 temp &= ~(1<<9); 1791 } 1792 WREG32(RADEON_CONFIG_CNTL, temp); 1793 } 1794 1795 void r100_vram_info(struct radeon_device *rdev) 1796 { 1797 r100_vram_get_type(rdev); 1798 1799 r100_vram_init_sizes(rdev); 1800 } 1801 1802 1803 /* 1804 * Indirect registers accessor 1805 */ 1806 void r100_pll_errata_after_index(struct radeon_device *rdev) 1807 { 1808 if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) { 1809 return; 1810 } 1811 (void)RREG32(RADEON_CLOCK_CNTL_DATA); 1812 (void)RREG32(RADEON_CRTC_GEN_CNTL); 1813 } 1814 1815 static void r100_pll_errata_after_data(struct radeon_device *rdev) 1816 { 1817 /* This workarounds is necessary on RV100, RS100 and RS200 chips 1818 * or the chip could hang on a subsequent access 1819 */ 1820 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { 1821 udelay(5000); 1822 } 1823 1824 /* This function is required to workaround a hardware bug in some (all?) 1825 * revisions of the R300. This workaround should be called after every 1826 * CLOCK_CNTL_INDEX register access. If not, register reads afterward 1827 * may not be correct. 1828 */ 1829 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { 1830 uint32_t save, tmp; 1831 1832 save = RREG32(RADEON_CLOCK_CNTL_INDEX); 1833 tmp = save & ~(0x3f | RADEON_PLL_WR_EN); 1834 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); 1835 tmp = RREG32(RADEON_CLOCK_CNTL_DATA); 1836 WREG32(RADEON_CLOCK_CNTL_INDEX, save); 1837 } 1838 } 1839 1840 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) 1841 { 1842 uint32_t data; 1843 1844 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); 1845 r100_pll_errata_after_index(rdev); 1846 data = RREG32(RADEON_CLOCK_CNTL_DATA); 1847 r100_pll_errata_after_data(rdev); 1848 return data; 1849 } 1850 1851 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 1852 { 1853 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); 1854 r100_pll_errata_after_index(rdev); 1855 WREG32(RADEON_CLOCK_CNTL_DATA, v); 1856 r100_pll_errata_after_data(rdev); 1857 } 1858 1859 void r100_set_safe_registers(struct radeon_device *rdev) 1860 { 1861 if (ASIC_IS_RN50(rdev)) { 1862 rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; 1863 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); 1864 } else if (rdev->family < CHIP_R200) { 1865 rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; 1866 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); 1867 } else { 1868 r200_set_safe_registers(rdev); 1869 } 1870 } 1871 1872 /* 1873 * Debugfs info 1874 */ 1875 #if defined(CONFIG_DEBUG_FS) 1876 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) 1877 { 1878 struct drm_info_node *node = (struct drm_info_node *) m->private; 1879 struct drm_device *dev = node->minor->dev; 1880 struct radeon_device *rdev = dev->dev_private; 1881 uint32_t reg, value; 1882 unsigned i; 1883 1884 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); 1885 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); 1886 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1887 for (i = 0; i < 64; i++) { 1888 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); 1889 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; 1890 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); 1891 value = RREG32(RADEON_RBBM_CMDFIFO_DATA); 1892 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); 1893 } 1894 return 0; 1895 } 1896 1897 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) 1898 { 1899 struct drm_info_node *node = (struct drm_info_node *) m->private; 1900 struct drm_device *dev = node->minor->dev; 1901 struct radeon_device *rdev = dev->dev_private; 1902 uint32_t rdp, wdp; 1903 unsigned count, i, j; 1904 1905 radeon_ring_free_size(rdev); 1906 rdp = RREG32(RADEON_CP_RB_RPTR); 1907 wdp = RREG32(RADEON_CP_RB_WPTR); 1908 count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask; 1909 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1910 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); 1911 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); 1912 seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); 1913 seq_printf(m, "%u dwords in ring\n", count); 1914 for (j = 0; j <= count; j++) { 1915 i = (rdp + j) & rdev->cp.ptr_mask; 1916 seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); 1917 } 1918 return 0; 1919 } 1920 1921 1922 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) 1923 { 1924 struct drm_info_node *node = (struct drm_info_node *) m->private; 1925 struct drm_device *dev = node->minor->dev; 1926 struct radeon_device *rdev = dev->dev_private; 1927 uint32_t csq_stat, csq2_stat, tmp; 1928 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; 1929 unsigned i; 1930 1931 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1932 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); 1933 csq_stat = RREG32(RADEON_CP_CSQ_STAT); 1934 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); 1935 r_rptr = (csq_stat >> 0) & 0x3ff; 1936 r_wptr = (csq_stat >> 10) & 0x3ff; 1937 ib1_rptr = (csq_stat >> 20) & 0x3ff; 1938 ib1_wptr = (csq2_stat >> 0) & 0x3ff; 1939 ib2_rptr = (csq2_stat >> 10) & 0x3ff; 1940 ib2_wptr = (csq2_stat >> 20) & 0x3ff; 1941 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); 1942 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); 1943 seq_printf(m, "Ring rptr %u\n", r_rptr); 1944 seq_printf(m, "Ring wptr %u\n", r_wptr); 1945 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); 1946 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); 1947 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); 1948 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); 1949 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms 1950 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ 1951 seq_printf(m, "Ring fifo:\n"); 1952 for (i = 0; i < 256; i++) { 1953 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1954 tmp = RREG32(RADEON_CP_CSQ_DATA); 1955 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); 1956 } 1957 seq_printf(m, "Indirect1 fifo:\n"); 1958 for (i = 256; i <= 512; i++) { 1959 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1960 tmp = RREG32(RADEON_CP_CSQ_DATA); 1961 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); 1962 } 1963 seq_printf(m, "Indirect2 fifo:\n"); 1964 for (i = 640; i < ib1_wptr; i++) { 1965 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1966 tmp = RREG32(RADEON_CP_CSQ_DATA); 1967 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); 1968 } 1969 return 0; 1970 } 1971 1972 static int r100_debugfs_mc_info(struct seq_file *m, void *data) 1973 { 1974 struct drm_info_node *node = (struct drm_info_node *) m->private; 1975 struct drm_device *dev = node->minor->dev; 1976 struct radeon_device *rdev = dev->dev_private; 1977 uint32_t tmp; 1978 1979 tmp = RREG32(RADEON_CONFIG_MEMSIZE); 1980 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); 1981 tmp = RREG32(RADEON_MC_FB_LOCATION); 1982 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); 1983 tmp = RREG32(RADEON_BUS_CNTL); 1984 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); 1985 tmp = RREG32(RADEON_MC_AGP_LOCATION); 1986 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); 1987 tmp = RREG32(RADEON_AGP_BASE); 1988 seq_printf(m, "AGP_BASE 0x%08x\n", tmp); 1989 tmp = RREG32(RADEON_HOST_PATH_CNTL); 1990 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); 1991 tmp = RREG32(0x01D0); 1992 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); 1993 tmp = RREG32(RADEON_AIC_LO_ADDR); 1994 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); 1995 tmp = RREG32(RADEON_AIC_HI_ADDR); 1996 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); 1997 tmp = RREG32(0x01E4); 1998 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); 1999 return 0; 2000 } 2001 2002 static struct drm_info_list r100_debugfs_rbbm_list[] = { 2003 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, 2004 }; 2005 2006 static struct drm_info_list r100_debugfs_cp_list[] = { 2007 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, 2008 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, 2009 }; 2010 2011 static struct drm_info_list r100_debugfs_mc_info_list[] = { 2012 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, 2013 }; 2014 #endif 2015 2016 int r100_debugfs_rbbm_init(struct radeon_device *rdev) 2017 { 2018 #if defined(CONFIG_DEBUG_FS) 2019 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); 2020 #else 2021 return 0; 2022 #endif 2023 } 2024 2025 int r100_debugfs_cp_init(struct radeon_device *rdev) 2026 { 2027 #if defined(CONFIG_DEBUG_FS) 2028 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); 2029 #else 2030 return 0; 2031 #endif 2032 } 2033 2034 int r100_debugfs_mc_info_init(struct radeon_device *rdev) 2035 { 2036 #if defined(CONFIG_DEBUG_FS) 2037 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); 2038 #else 2039 return 0; 2040 #endif 2041 } 2042 2043 int r100_set_surface_reg(struct radeon_device *rdev, int reg, 2044 uint32_t tiling_flags, uint32_t pitch, 2045 uint32_t offset, uint32_t obj_size) 2046 { 2047 int surf_index = reg * 16; 2048 int flags = 0; 2049 2050 /* r100/r200 divide by 16 */ 2051 if (rdev->family < CHIP_R300) 2052 flags = pitch / 16; 2053 else 2054 flags = pitch / 8; 2055 2056 if (rdev->family <= CHIP_RS200) { 2057 if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 2058 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 2059 flags |= RADEON_SURF_TILE_COLOR_BOTH; 2060 if (tiling_flags & RADEON_TILING_MACRO) 2061 flags |= RADEON_SURF_TILE_COLOR_MACRO; 2062 } else if (rdev->family <= CHIP_RV280) { 2063 if (tiling_flags & (RADEON_TILING_MACRO)) 2064 flags |= R200_SURF_TILE_COLOR_MACRO; 2065 if (tiling_flags & RADEON_TILING_MICRO) 2066 flags |= R200_SURF_TILE_COLOR_MICRO; 2067 } else { 2068 if (tiling_flags & RADEON_TILING_MACRO) 2069 flags |= R300_SURF_TILE_MACRO; 2070 if (tiling_flags & RADEON_TILING_MICRO) 2071 flags |= R300_SURF_TILE_MICRO; 2072 } 2073 2074 if (tiling_flags & RADEON_TILING_SWAP_16BIT) 2075 flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; 2076 if (tiling_flags & RADEON_TILING_SWAP_32BIT) 2077 flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; 2078 2079 DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); 2080 WREG32(RADEON_SURFACE0_INFO + surf_index, flags); 2081 WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); 2082 WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); 2083 return 0; 2084 } 2085 2086 void r100_clear_surface_reg(struct radeon_device *rdev, int reg) 2087 { 2088 int surf_index = reg * 16; 2089 WREG32(RADEON_SURFACE0_INFO + surf_index, 0); 2090 } 2091 2092 void r100_bandwidth_update(struct radeon_device *rdev) 2093 { 2094 fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; 2095 fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; 2096 fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; 2097 uint32_t temp, data, mem_trcd, mem_trp, mem_tras; 2098 fixed20_12 memtcas_ff[8] = { 2099 fixed_init(1), 2100 fixed_init(2), 2101 fixed_init(3), 2102 fixed_init(0), 2103 fixed_init_half(1), 2104 fixed_init_half(2), 2105 fixed_init(0), 2106 }; 2107 fixed20_12 memtcas_rs480_ff[8] = { 2108 fixed_init(0), 2109 fixed_init(1), 2110 fixed_init(2), 2111 fixed_init(3), 2112 fixed_init(0), 2113 fixed_init_half(1), 2114 fixed_init_half(2), 2115 fixed_init_half(3), 2116 }; 2117 fixed20_12 memtcas2_ff[8] = { 2118 fixed_init(0), 2119 fixed_init(1), 2120 fixed_init(2), 2121 fixed_init(3), 2122 fixed_init(4), 2123 fixed_init(5), 2124 fixed_init(6), 2125 fixed_init(7), 2126 }; 2127 fixed20_12 memtrbs[8] = { 2128 fixed_init(1), 2129 fixed_init_half(1), 2130 fixed_init(2), 2131 fixed_init_half(2), 2132 fixed_init(3), 2133 fixed_init_half(3), 2134 fixed_init(4), 2135 fixed_init_half(4) 2136 }; 2137 fixed20_12 memtrbs_r4xx[8] = { 2138 fixed_init(4), 2139 fixed_init(5), 2140 fixed_init(6), 2141 fixed_init(7), 2142 fixed_init(8), 2143 fixed_init(9), 2144 fixed_init(10), 2145 fixed_init(11) 2146 }; 2147 fixed20_12 min_mem_eff; 2148 fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; 2149 fixed20_12 cur_latency_mclk, cur_latency_sclk; 2150 fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, 2151 disp_drain_rate2, read_return_rate; 2152 fixed20_12 time_disp1_drop_priority; 2153 int c; 2154 int cur_size = 16; /* in octawords */ 2155 int critical_point = 0, critical_point2; 2156 /* uint32_t read_return_rate, time_disp1_drop_priority; */ 2157 int stop_req, max_stop_req; 2158 struct drm_display_mode *mode1 = NULL; 2159 struct drm_display_mode *mode2 = NULL; 2160 uint32_t pixel_bytes1 = 0; 2161 uint32_t pixel_bytes2 = 0; 2162 2163 if (rdev->mode_info.crtcs[0]->base.enabled) { 2164 mode1 = &rdev->mode_info.crtcs[0]->base.mode; 2165 pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; 2166 } 2167 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 2168 if (rdev->mode_info.crtcs[1]->base.enabled) { 2169 mode2 = &rdev->mode_info.crtcs[1]->base.mode; 2170 pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; 2171 } 2172 } 2173 2174 min_mem_eff.full = rfixed_const_8(0); 2175 /* get modes */ 2176 if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { 2177 uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); 2178 mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); 2179 mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); 2180 /* check crtc enables */ 2181 if (mode2) 2182 mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); 2183 if (mode1) 2184 mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); 2185 WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); 2186 } 2187 2188 /* 2189 * determine is there is enough bw for current mode 2190 */ 2191 mclk_ff.full = rfixed_const(rdev->clock.default_mclk); 2192 temp_ff.full = rfixed_const(100); 2193 mclk_ff.full = rfixed_div(mclk_ff, temp_ff); 2194 sclk_ff.full = rfixed_const(rdev->clock.default_sclk); 2195 sclk_ff.full = rfixed_div(sclk_ff, temp_ff); 2196 2197 temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); 2198 temp_ff.full = rfixed_const(temp); 2199 mem_bw.full = rfixed_mul(mclk_ff, temp_ff); 2200 2201 pix_clk.full = 0; 2202 pix_clk2.full = 0; 2203 peak_disp_bw.full = 0; 2204 if (mode1) { 2205 temp_ff.full = rfixed_const(1000); 2206 pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */ 2207 pix_clk.full = rfixed_div(pix_clk, temp_ff); 2208 temp_ff.full = rfixed_const(pixel_bytes1); 2209 peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff); 2210 } 2211 if (mode2) { 2212 temp_ff.full = rfixed_const(1000); 2213 pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */ 2214 pix_clk2.full = rfixed_div(pix_clk2, temp_ff); 2215 temp_ff.full = rfixed_const(pixel_bytes2); 2216 peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff); 2217 } 2218 2219 mem_bw.full = rfixed_mul(mem_bw, min_mem_eff); 2220 if (peak_disp_bw.full >= mem_bw.full) { 2221 DRM_ERROR("You may not have enough display bandwidth for current mode\n" 2222 "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); 2223 } 2224 2225 /* Get values from the EXT_MEM_CNTL register...converting its contents. */ 2226 temp = RREG32(RADEON_MEM_TIMING_CNTL); 2227 if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ 2228 mem_trcd = ((temp >> 2) & 0x3) + 1; 2229 mem_trp = ((temp & 0x3)) + 1; 2230 mem_tras = ((temp & 0x70) >> 4) + 1; 2231 } else if (rdev->family == CHIP_R300 || 2232 rdev->family == CHIP_R350) { /* r300, r350 */ 2233 mem_trcd = (temp & 0x7) + 1; 2234 mem_trp = ((temp >> 8) & 0x7) + 1; 2235 mem_tras = ((temp >> 11) & 0xf) + 4; 2236 } else if (rdev->family == CHIP_RV350 || 2237 rdev->family <= CHIP_RV380) { 2238 /* rv3x0 */ 2239 mem_trcd = (temp & 0x7) + 3; 2240 mem_trp = ((temp >> 8) & 0x7) + 3; 2241 mem_tras = ((temp >> 11) & 0xf) + 6; 2242 } else if (rdev->family == CHIP_R420 || 2243 rdev->family == CHIP_R423 || 2244 rdev->family == CHIP_RV410) { 2245 /* r4xx */ 2246 mem_trcd = (temp & 0xf) + 3; 2247 if (mem_trcd > 15) 2248 mem_trcd = 15; 2249 mem_trp = ((temp >> 8) & 0xf) + 3; 2250 if (mem_trp > 15) 2251 mem_trp = 15; 2252 mem_tras = ((temp >> 12) & 0x1f) + 6; 2253 if (mem_tras > 31) 2254 mem_tras = 31; 2255 } else { /* RV200, R200 */ 2256 mem_trcd = (temp & 0x7) + 1; 2257 mem_trp = ((temp >> 8) & 0x7) + 1; 2258 mem_tras = ((temp >> 12) & 0xf) + 4; 2259 } 2260 /* convert to FF */ 2261 trcd_ff.full = rfixed_const(mem_trcd); 2262 trp_ff.full = rfixed_const(mem_trp); 2263 tras_ff.full = rfixed_const(mem_tras); 2264 2265 /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ 2266 temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); 2267 data = (temp & (7 << 20)) >> 20; 2268 if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { 2269 if (rdev->family == CHIP_RS480) /* don't think rs400 */ 2270 tcas_ff = memtcas_rs480_ff[data]; 2271 else 2272 tcas_ff = memtcas_ff[data]; 2273 } else 2274 tcas_ff = memtcas2_ff[data]; 2275 2276 if (rdev->family == CHIP_RS400 || 2277 rdev->family == CHIP_RS480) { 2278 /* extra cas latency stored in bits 23-25 0-4 clocks */ 2279 data = (temp >> 23) & 0x7; 2280 if (data < 5) 2281 tcas_ff.full += rfixed_const(data); 2282 } 2283 2284 if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { 2285 /* on the R300, Tcas is included in Trbs. 2286 */ 2287 temp = RREG32(RADEON_MEM_CNTL); 2288 data = (R300_MEM_NUM_CHANNELS_MASK & temp); 2289 if (data == 1) { 2290 if (R300_MEM_USE_CD_CH_ONLY & temp) { 2291 temp = RREG32(R300_MC_IND_INDEX); 2292 temp &= ~R300_MC_IND_ADDR_MASK; 2293 temp |= R300_MC_READ_CNTL_CD_mcind; 2294 WREG32(R300_MC_IND_INDEX, temp); 2295 temp = RREG32(R300_MC_IND_DATA); 2296 data = (R300_MEM_RBS_POSITION_C_MASK & temp); 2297 } else { 2298 temp = RREG32(R300_MC_READ_CNTL_AB); 2299 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 2300 } 2301 } else { 2302 temp = RREG32(R300_MC_READ_CNTL_AB); 2303 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 2304 } 2305 if (rdev->family == CHIP_RV410 || 2306 rdev->family == CHIP_R420 || 2307 rdev->family == CHIP_R423) 2308 trbs_ff = memtrbs_r4xx[data]; 2309 else 2310 trbs_ff = memtrbs[data]; 2311 tcas_ff.full += trbs_ff.full; 2312 } 2313 2314 sclk_eff_ff.full = sclk_ff.full; 2315 2316 if (rdev->flags & RADEON_IS_AGP) { 2317 fixed20_12 agpmode_ff; 2318 agpmode_ff.full = rfixed_const(radeon_agpmode); 2319 temp_ff.full = rfixed_const_666(16); 2320 sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff); 2321 } 2322 /* TODO PCIE lanes may affect this - agpmode == 16?? */ 2323 2324 if (ASIC_IS_R300(rdev)) { 2325 sclk_delay_ff.full = rfixed_const(250); 2326 } else { 2327 if ((rdev->family == CHIP_RV100) || 2328 rdev->flags & RADEON_IS_IGP) { 2329 if (rdev->mc.vram_is_ddr) 2330 sclk_delay_ff.full = rfixed_const(41); 2331 else 2332 sclk_delay_ff.full = rfixed_const(33); 2333 } else { 2334 if (rdev->mc.vram_width == 128) 2335 sclk_delay_ff.full = rfixed_const(57); 2336 else 2337 sclk_delay_ff.full = rfixed_const(41); 2338 } 2339 } 2340 2341 mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff); 2342 2343 if (rdev->mc.vram_is_ddr) { 2344 if (rdev->mc.vram_width == 32) { 2345 k1.full = rfixed_const(40); 2346 c = 3; 2347 } else { 2348 k1.full = rfixed_const(20); 2349 c = 1; 2350 } 2351 } else { 2352 k1.full = rfixed_const(40); 2353 c = 3; 2354 } 2355 2356 temp_ff.full = rfixed_const(2); 2357 mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff); 2358 temp_ff.full = rfixed_const(c); 2359 mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff); 2360 temp_ff.full = rfixed_const(4); 2361 mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff); 2362 mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff); 2363 mc_latency_mclk.full += k1.full; 2364 2365 mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff); 2366 mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff); 2367 2368 /* 2369 HW cursor time assuming worst case of full size colour cursor. 2370 */ 2371 temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); 2372 temp_ff.full += trcd_ff.full; 2373 if (temp_ff.full < tras_ff.full) 2374 temp_ff.full = tras_ff.full; 2375 cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff); 2376 2377 temp_ff.full = rfixed_const(cur_size); 2378 cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff); 2379 /* 2380 Find the total latency for the display data. 2381 */ 2382 disp_latency_overhead.full = rfixed_const(8); 2383 disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff); 2384 mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; 2385 mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; 2386 2387 if (mc_latency_mclk.full > mc_latency_sclk.full) 2388 disp_latency.full = mc_latency_mclk.full; 2389 else 2390 disp_latency.full = mc_latency_sclk.full; 2391 2392 /* setup Max GRPH_STOP_REQ default value */ 2393 if (ASIC_IS_RV100(rdev)) 2394 max_stop_req = 0x5c; 2395 else 2396 max_stop_req = 0x7c; 2397 2398 if (mode1) { 2399 /* CRTC1 2400 Set GRPH_BUFFER_CNTL register using h/w defined optimal values. 2401 GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] 2402 */ 2403 stop_req = mode1->hdisplay * pixel_bytes1 / 16; 2404 2405 if (stop_req > max_stop_req) 2406 stop_req = max_stop_req; 2407 2408 /* 2409 Find the drain rate of the display buffer. 2410 */ 2411 temp_ff.full = rfixed_const((16/pixel_bytes1)); 2412 disp_drain_rate.full = rfixed_div(pix_clk, temp_ff); 2413 2414 /* 2415 Find the critical point of the display buffer. 2416 */ 2417 crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency); 2418 crit_point_ff.full += rfixed_const_half(0); 2419 2420 critical_point = rfixed_trunc(crit_point_ff); 2421 2422 if (rdev->disp_priority == 2) { 2423 critical_point = 0; 2424 } 2425 2426 /* 2427 The critical point should never be above max_stop_req-4. Setting 2428 GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. 2429 */ 2430 if (max_stop_req - critical_point < 4) 2431 critical_point = 0; 2432 2433 if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { 2434 /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ 2435 critical_point = 0x10; 2436 } 2437 2438 temp = RREG32(RADEON_GRPH_BUFFER_CNTL); 2439 temp &= ~(RADEON_GRPH_STOP_REQ_MASK); 2440 temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 2441 temp &= ~(RADEON_GRPH_START_REQ_MASK); 2442 if ((rdev->family == CHIP_R350) && 2443 (stop_req > 0x15)) { 2444 stop_req -= 0x10; 2445 } 2446 temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 2447 temp |= RADEON_GRPH_BUFFER_SIZE; 2448 temp &= ~(RADEON_GRPH_CRITICAL_CNTL | 2449 RADEON_GRPH_CRITICAL_AT_SOF | 2450 RADEON_GRPH_STOP_CNTL); 2451 /* 2452 Write the result into the register. 2453 */ 2454 WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 2455 (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 2456 2457 #if 0 2458 if ((rdev->family == CHIP_RS400) || 2459 (rdev->family == CHIP_RS480)) { 2460 /* attempt to program RS400 disp regs correctly ??? */ 2461 temp = RREG32(RS400_DISP1_REG_CNTL); 2462 temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | 2463 RS400_DISP1_STOP_REQ_LEVEL_MASK); 2464 WREG32(RS400_DISP1_REQ_CNTL1, (temp | 2465 (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 2466 (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 2467 temp = RREG32(RS400_DMIF_MEM_CNTL1); 2468 temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | 2469 RS400_DISP1_CRITICAL_POINT_STOP_MASK); 2470 WREG32(RS400_DMIF_MEM_CNTL1, (temp | 2471 (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | 2472 (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); 2473 } 2474 #endif 2475 2476 DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n", 2477 /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ 2478 (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); 2479 } 2480 2481 if (mode2) { 2482 u32 grph2_cntl; 2483 stop_req = mode2->hdisplay * pixel_bytes2 / 16; 2484 2485 if (stop_req > max_stop_req) 2486 stop_req = max_stop_req; 2487 2488 /* 2489 Find the drain rate of the display buffer. 2490 */ 2491 temp_ff.full = rfixed_const((16/pixel_bytes2)); 2492 disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff); 2493 2494 grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); 2495 grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); 2496 grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 2497 grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); 2498 if ((rdev->family == CHIP_R350) && 2499 (stop_req > 0x15)) { 2500 stop_req -= 0x10; 2501 } 2502 grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 2503 grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; 2504 grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | 2505 RADEON_GRPH_CRITICAL_AT_SOF | 2506 RADEON_GRPH_STOP_CNTL); 2507 2508 if ((rdev->family == CHIP_RS100) || 2509 (rdev->family == CHIP_RS200)) 2510 critical_point2 = 0; 2511 else { 2512 temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; 2513 temp_ff.full = rfixed_const(temp); 2514 temp_ff.full = rfixed_mul(mclk_ff, temp_ff); 2515 if (sclk_ff.full < temp_ff.full) 2516 temp_ff.full = sclk_ff.full; 2517 2518 read_return_rate.full = temp_ff.full; 2519 2520 if (mode1) { 2521 temp_ff.full = read_return_rate.full - disp_drain_rate.full; 2522 time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff); 2523 } else { 2524 time_disp1_drop_priority.full = 0; 2525 } 2526 crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; 2527 crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2); 2528 crit_point_ff.full += rfixed_const_half(0); 2529 2530 critical_point2 = rfixed_trunc(crit_point_ff); 2531 2532 if (rdev->disp_priority == 2) { 2533 critical_point2 = 0; 2534 } 2535 2536 if (max_stop_req - critical_point2 < 4) 2537 critical_point2 = 0; 2538 2539 } 2540 2541 if (critical_point2 == 0 && rdev->family == CHIP_R300) { 2542 /* some R300 cards have problem with this set to 0 */ 2543 critical_point2 = 0x10; 2544 } 2545 2546 WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 2547 (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 2548 2549 if ((rdev->family == CHIP_RS400) || 2550 (rdev->family == CHIP_RS480)) { 2551 #if 0 2552 /* attempt to program RS400 disp2 regs correctly ??? */ 2553 temp = RREG32(RS400_DISP2_REQ_CNTL1); 2554 temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | 2555 RS400_DISP2_STOP_REQ_LEVEL_MASK); 2556 WREG32(RS400_DISP2_REQ_CNTL1, (temp | 2557 (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 2558 (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 2559 temp = RREG32(RS400_DISP2_REQ_CNTL2); 2560 temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | 2561 RS400_DISP2_CRITICAL_POINT_STOP_MASK); 2562 WREG32(RS400_DISP2_REQ_CNTL2, (temp | 2563 (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | 2564 (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); 2565 #endif 2566 WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); 2567 WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); 2568 WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); 2569 WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); 2570 } 2571 2572 DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n", 2573 (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); 2574 } 2575 } 2576 2577 static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t) 2578 { 2579 DRM_ERROR("pitch %d\n", t->pitch); 2580 DRM_ERROR("use_pitch %d\n", t->use_pitch); 2581 DRM_ERROR("width %d\n", t->width); 2582 DRM_ERROR("width_11 %d\n", t->width_11); 2583 DRM_ERROR("height %d\n", t->height); 2584 DRM_ERROR("height_11 %d\n", t->height_11); 2585 DRM_ERROR("num levels %d\n", t->num_levels); 2586 DRM_ERROR("depth %d\n", t->txdepth); 2587 DRM_ERROR("bpp %d\n", t->cpp); 2588 DRM_ERROR("coordinate type %d\n", t->tex_coord_type); 2589 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); 2590 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); 2591 } 2592 2593 static int r100_cs_track_cube(struct radeon_device *rdev, 2594 struct r100_cs_track *track, unsigned idx) 2595 { 2596 unsigned face, w, h; 2597 struct radeon_object *cube_robj; 2598 unsigned long size; 2599 2600 for (face = 0; face < 5; face++) { 2601 cube_robj = track->textures[idx].cube_info[face].robj; 2602 w = track->textures[idx].cube_info[face].width; 2603 h = track->textures[idx].cube_info[face].height; 2604 2605 size = w * h; 2606 size *= track->textures[idx].cpp; 2607 2608 size += track->textures[idx].cube_info[face].offset; 2609 2610 if (size > radeon_object_size(cube_robj)) { 2611 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", 2612 size, radeon_object_size(cube_robj)); 2613 r100_cs_track_texture_print(&track->textures[idx]); 2614 return -1; 2615 } 2616 } 2617 return 0; 2618 } 2619 2620 static int r100_cs_track_texture_check(struct radeon_device *rdev, 2621 struct r100_cs_track *track) 2622 { 2623 struct radeon_object *robj; 2624 unsigned long size; 2625 unsigned u, i, w, h; 2626 int ret; 2627 2628 for (u = 0; u < track->num_texture; u++) { 2629 if (!track->textures[u].enabled) 2630 continue; 2631 robj = track->textures[u].robj; 2632 if (robj == NULL) { 2633 DRM_ERROR("No texture bound to unit %u\n", u); 2634 return -EINVAL; 2635 } 2636 size = 0; 2637 for (i = 0; i <= track->textures[u].num_levels; i++) { 2638 if (track->textures[u].use_pitch) { 2639 if (rdev->family < CHIP_R300) 2640 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); 2641 else 2642 w = track->textures[u].pitch / (1 << i); 2643 } else { 2644 w = track->textures[u].width; 2645 if (rdev->family >= CHIP_RV515) 2646 w |= track->textures[u].width_11; 2647 w = w / (1 << i); 2648 if (track->textures[u].roundup_w) 2649 w = roundup_pow_of_two(w); 2650 } 2651 h = track->textures[u].height; 2652 if (rdev->family >= CHIP_RV515) 2653 h |= track->textures[u].height_11; 2654 h = h / (1 << i); 2655 if (track->textures[u].roundup_h) 2656 h = roundup_pow_of_two(h); 2657 size += w * h; 2658 } 2659 size *= track->textures[u].cpp; 2660 switch (track->textures[u].tex_coord_type) { 2661 case 0: 2662 break; 2663 case 1: 2664 size *= (1 << track->textures[u].txdepth); 2665 break; 2666 case 2: 2667 if (track->separate_cube) { 2668 ret = r100_cs_track_cube(rdev, track, u); 2669 if (ret) 2670 return ret; 2671 } else 2672 size *= 6; 2673 break; 2674 default: 2675 DRM_ERROR("Invalid texture coordinate type %u for unit " 2676 "%u\n", track->textures[u].tex_coord_type, u); 2677 return -EINVAL; 2678 } 2679 if (size > radeon_object_size(robj)) { 2680 DRM_ERROR("Texture of unit %u needs %lu bytes but is " 2681 "%lu\n", u, size, radeon_object_size(robj)); 2682 r100_cs_track_texture_print(&track->textures[u]); 2683 return -EINVAL; 2684 } 2685 } 2686 return 0; 2687 } 2688 2689 int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) 2690 { 2691 unsigned i; 2692 unsigned long size; 2693 unsigned prim_walk; 2694 unsigned nverts; 2695 2696 for (i = 0; i < track->num_cb; i++) { 2697 if (track->cb[i].robj == NULL) { 2698 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); 2699 return -EINVAL; 2700 } 2701 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; 2702 size += track->cb[i].offset; 2703 if (size > radeon_object_size(track->cb[i].robj)) { 2704 DRM_ERROR("[drm] Buffer too small for color buffer %d " 2705 "(need %lu have %lu) !\n", i, size, 2706 radeon_object_size(track->cb[i].robj)); 2707 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", 2708 i, track->cb[i].pitch, track->cb[i].cpp, 2709 track->cb[i].offset, track->maxy); 2710 return -EINVAL; 2711 } 2712 } 2713 if (track->z_enabled) { 2714 if (track->zb.robj == NULL) { 2715 DRM_ERROR("[drm] No buffer for z buffer !\n"); 2716 return -EINVAL; 2717 } 2718 size = track->zb.pitch * track->zb.cpp * track->maxy; 2719 size += track->zb.offset; 2720 if (size > radeon_object_size(track->zb.robj)) { 2721 DRM_ERROR("[drm] Buffer too small for z buffer " 2722 "(need %lu have %lu) !\n", size, 2723 radeon_object_size(track->zb.robj)); 2724 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", 2725 track->zb.pitch, track->zb.cpp, 2726 track->zb.offset, track->maxy); 2727 return -EINVAL; 2728 } 2729 } 2730 prim_walk = (track->vap_vf_cntl >> 4) & 0x3; 2731 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; 2732 switch (prim_walk) { 2733 case 1: 2734 for (i = 0; i < track->num_arrays; i++) { 2735 size = track->arrays[i].esize * track->max_indx * 4; 2736 if (track->arrays[i].robj == NULL) { 2737 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2738 "bound\n", prim_walk, i); 2739 return -EINVAL; 2740 } 2741 if (size > radeon_object_size(track->arrays[i].robj)) { 2742 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords " 2743 "have %lu dwords\n", prim_walk, i, 2744 size >> 2, 2745 radeon_object_size(track->arrays[i].robj) >> 2); 2746 DRM_ERROR("Max indices %u\n", track->max_indx); 2747 return -EINVAL; 2748 } 2749 } 2750 break; 2751 case 2: 2752 for (i = 0; i < track->num_arrays; i++) { 2753 size = track->arrays[i].esize * (nverts - 1) * 4; 2754 if (track->arrays[i].robj == NULL) { 2755 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2756 "bound\n", prim_walk, i); 2757 return -EINVAL; 2758 } 2759 if (size > radeon_object_size(track->arrays[i].robj)) { 2760 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords " 2761 "have %lu dwords\n", prim_walk, i, size >> 2, 2762 radeon_object_size(track->arrays[i].robj) >> 2); 2763 return -EINVAL; 2764 } 2765 } 2766 break; 2767 case 3: 2768 size = track->vtx_size * nverts; 2769 if (size != track->immd_dwords) { 2770 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", 2771 track->immd_dwords, size); 2772 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", 2773 nverts, track->vtx_size); 2774 return -EINVAL; 2775 } 2776 break; 2777 default: 2778 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", 2779 prim_walk); 2780 return -EINVAL; 2781 } 2782 return r100_cs_track_texture_check(rdev, track); 2783 } 2784 2785 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) 2786 { 2787 unsigned i, face; 2788 2789 if (rdev->family < CHIP_R300) { 2790 track->num_cb = 1; 2791 if (rdev->family <= CHIP_RS200) 2792 track->num_texture = 3; 2793 else 2794 track->num_texture = 6; 2795 track->maxy = 2048; 2796 track->separate_cube = 1; 2797 } else { 2798 track->num_cb = 4; 2799 track->num_texture = 16; 2800 track->maxy = 4096; 2801 track->separate_cube = 0; 2802 } 2803 2804 for (i = 0; i < track->num_cb; i++) { 2805 track->cb[i].robj = NULL; 2806 track->cb[i].pitch = 8192; 2807 track->cb[i].cpp = 16; 2808 track->cb[i].offset = 0; 2809 } 2810 track->z_enabled = true; 2811 track->zb.robj = NULL; 2812 track->zb.pitch = 8192; 2813 track->zb.cpp = 4; 2814 track->zb.offset = 0; 2815 track->vtx_size = 0x7F; 2816 track->immd_dwords = 0xFFFFFFFFUL; 2817 track->num_arrays = 11; 2818 track->max_indx = 0x00FFFFFFUL; 2819 for (i = 0; i < track->num_arrays; i++) { 2820 track->arrays[i].robj = NULL; 2821 track->arrays[i].esize = 0x7F; 2822 } 2823 for (i = 0; i < track->num_texture; i++) { 2824 track->textures[i].pitch = 16536; 2825 track->textures[i].width = 16536; 2826 track->textures[i].height = 16536; 2827 track->textures[i].width_11 = 1 << 11; 2828 track->textures[i].height_11 = 1 << 11; 2829 track->textures[i].num_levels = 12; 2830 if (rdev->family <= CHIP_RS200) { 2831 track->textures[i].tex_coord_type = 0; 2832 track->textures[i].txdepth = 0; 2833 } else { 2834 track->textures[i].txdepth = 16; 2835 track->textures[i].tex_coord_type = 1; 2836 } 2837 track->textures[i].cpp = 64; 2838 track->textures[i].robj = NULL; 2839 /* CS IB emission code makes sure texture unit are disabled */ 2840 track->textures[i].enabled = false; 2841 track->textures[i].roundup_w = true; 2842 track->textures[i].roundup_h = true; 2843 if (track->separate_cube) 2844 for (face = 0; face < 5; face++) { 2845 track->textures[i].cube_info[face].robj = NULL; 2846 track->textures[i].cube_info[face].width = 16536; 2847 track->textures[i].cube_info[face].height = 16536; 2848 track->textures[i].cube_info[face].offset = 0; 2849 } 2850 } 2851 } 2852 2853 int r100_ring_test(struct radeon_device *rdev) 2854 { 2855 uint32_t scratch; 2856 uint32_t tmp = 0; 2857 unsigned i; 2858 int r; 2859 2860 r = radeon_scratch_get(rdev, &scratch); 2861 if (r) { 2862 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 2863 return r; 2864 } 2865 WREG32(scratch, 0xCAFEDEAD); 2866 r = radeon_ring_lock(rdev, 2); 2867 if (r) { 2868 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 2869 radeon_scratch_free(rdev, scratch); 2870 return r; 2871 } 2872 radeon_ring_write(rdev, PACKET0(scratch, 0)); 2873 radeon_ring_write(rdev, 0xDEADBEEF); 2874 radeon_ring_unlock_commit(rdev); 2875 for (i = 0; i < rdev->usec_timeout; i++) { 2876 tmp = RREG32(scratch); 2877 if (tmp == 0xDEADBEEF) { 2878 break; 2879 } 2880 DRM_UDELAY(1); 2881 } 2882 if (i < rdev->usec_timeout) { 2883 DRM_INFO("ring test succeeded in %d usecs\n", i); 2884 } else { 2885 DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n", 2886 scratch, tmp); 2887 r = -EINVAL; 2888 } 2889 radeon_scratch_free(rdev, scratch); 2890 return r; 2891 } 2892 2893 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 2894 { 2895 radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); 2896 radeon_ring_write(rdev, ib->gpu_addr); 2897 radeon_ring_write(rdev, ib->length_dw); 2898 } 2899 2900 int r100_ib_test(struct radeon_device *rdev) 2901 { 2902 struct radeon_ib *ib; 2903 uint32_t scratch; 2904 uint32_t tmp = 0; 2905 unsigned i; 2906 int r; 2907 2908 r = radeon_scratch_get(rdev, &scratch); 2909 if (r) { 2910 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 2911 return r; 2912 } 2913 WREG32(scratch, 0xCAFEDEAD); 2914 r = radeon_ib_get(rdev, &ib); 2915 if (r) { 2916 return r; 2917 } 2918 ib->ptr[0] = PACKET0(scratch, 0); 2919 ib->ptr[1] = 0xDEADBEEF; 2920 ib->ptr[2] = PACKET2(0); 2921 ib->ptr[3] = PACKET2(0); 2922 ib->ptr[4] = PACKET2(0); 2923 ib->ptr[5] = PACKET2(0); 2924 ib->ptr[6] = PACKET2(0); 2925 ib->ptr[7] = PACKET2(0); 2926 ib->length_dw = 8; 2927 r = radeon_ib_schedule(rdev, ib); 2928 if (r) { 2929 radeon_scratch_free(rdev, scratch); 2930 radeon_ib_free(rdev, &ib); 2931 return r; 2932 } 2933 r = radeon_fence_wait(ib->fence, false); 2934 if (r) { 2935 return r; 2936 } 2937 for (i = 0; i < rdev->usec_timeout; i++) { 2938 tmp = RREG32(scratch); 2939 if (tmp == 0xDEADBEEF) { 2940 break; 2941 } 2942 DRM_UDELAY(1); 2943 } 2944 if (i < rdev->usec_timeout) { 2945 DRM_INFO("ib test succeeded in %u usecs\n", i); 2946 } else { 2947 DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", 2948 scratch, tmp); 2949 r = -EINVAL; 2950 } 2951 radeon_scratch_free(rdev, scratch); 2952 radeon_ib_free(rdev, &ib); 2953 return r; 2954 } 2955 2956 void r100_ib_fini(struct radeon_device *rdev) 2957 { 2958 radeon_ib_pool_fini(rdev); 2959 } 2960 2961 int r100_ib_init(struct radeon_device *rdev) 2962 { 2963 int r; 2964 2965 r = radeon_ib_pool_init(rdev); 2966 if (r) { 2967 dev_err(rdev->dev, "failled initializing IB pool (%d).\n", r); 2968 r100_ib_fini(rdev); 2969 return r; 2970 } 2971 r = r100_ib_test(rdev); 2972 if (r) { 2973 dev_err(rdev->dev, "failled testing IB (%d).\n", r); 2974 r100_ib_fini(rdev); 2975 return r; 2976 } 2977 return 0; 2978 } 2979 2980 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) 2981 { 2982 /* Shutdown CP we shouldn't need to do that but better be safe than 2983 * sorry 2984 */ 2985 rdev->cp.ready = false; 2986 WREG32(R_000740_CP_CSQ_CNTL, 0); 2987 2988 /* Save few CRTC registers */ 2989 save->GENMO_WT = RREG8(R_0003C2_GENMO_WT); 2990 save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL); 2991 save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL); 2992 save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET); 2993 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 2994 save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL); 2995 save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET); 2996 } 2997 2998 /* Disable VGA aperture access */ 2999 WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT); 3000 /* Disable cursor, overlay, crtc */ 3001 WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1)); 3002 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL | 3003 S_000054_CRTC_DISPLAY_DIS(1)); 3004 WREG32(R_000050_CRTC_GEN_CNTL, 3005 (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) | 3006 S_000050_CRTC_DISP_REQ_EN_B(1)); 3007 WREG32(R_000420_OV0_SCALE_CNTL, 3008 C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL)); 3009 WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET); 3010 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3011 WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET | 3012 S_000360_CUR2_LOCK(1)); 3013 WREG32(R_0003F8_CRTC2_GEN_CNTL, 3014 (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) | 3015 S_0003F8_CRTC2_DISPLAY_DIS(1) | 3016 S_0003F8_CRTC2_DISP_REQ_EN_B(1)); 3017 WREG32(R_000360_CUR2_OFFSET, 3018 C_000360_CUR2_LOCK & save->CUR2_OFFSET); 3019 } 3020 } 3021 3022 void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save) 3023 { 3024 /* Update base address for crtc */ 3025 WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_location); 3026 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3027 WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, 3028 rdev->mc.vram_location); 3029 } 3030 /* Restore CRTC registers */ 3031 WREG8(R_0003C2_GENMO_WT, save->GENMO_WT); 3032 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL); 3033 WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL); 3034 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3035 WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL); 3036 } 3037 } 3038 3039 void r100_vga_render_disable(struct radeon_device *rdev) 3040 { 3041 u32 tmp; 3042 3043 tmp = RREG8(R_0003C2_GENMO_WT); 3044 WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp); 3045 } 3046 3047 static void r100_debugfs(struct radeon_device *rdev) 3048 { 3049 int r; 3050 3051 r = r100_debugfs_mc_info_init(rdev); 3052 if (r) 3053 dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n"); 3054 } 3055 3056 static void r100_mc_program(struct radeon_device *rdev) 3057 { 3058 struct r100_mc_save save; 3059 3060 /* Stops all mc clients */ 3061 r100_mc_stop(rdev, &save); 3062 if (rdev->flags & RADEON_IS_AGP) { 3063 WREG32(R_00014C_MC_AGP_LOCATION, 3064 S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) | 3065 S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16)); 3066 WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base)); 3067 if (rdev->family > CHIP_RV200) 3068 WREG32(R_00015C_AGP_BASE_2, 3069 upper_32_bits(rdev->mc.agp_base) & 0xff); 3070 } else { 3071 WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF); 3072 WREG32(R_000170_AGP_BASE, 0); 3073 if (rdev->family > CHIP_RV200) 3074 WREG32(R_00015C_AGP_BASE_2, 0); 3075 } 3076 /* Wait for mc idle */ 3077 if (r100_mc_wait_for_idle(rdev)) 3078 dev_warn(rdev->dev, "Wait for MC idle timeout.\n"); 3079 /* Program MC, should be a 32bits limited address space */ 3080 WREG32(R_000148_MC_FB_LOCATION, 3081 S_000148_MC_FB_START(rdev->mc.vram_start >> 16) | 3082 S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16)); 3083 r100_mc_resume(rdev, &save); 3084 } 3085 3086 void r100_clock_startup(struct radeon_device *rdev) 3087 { 3088 u32 tmp; 3089 3090 if (radeon_dynclks != -1 && radeon_dynclks) 3091 radeon_legacy_set_clock_gating(rdev, 1); 3092 /* We need to force on some of the block */ 3093 tmp = RREG32_PLL(R_00000D_SCLK_CNTL); 3094 tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1); 3095 if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280)) 3096 tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1); 3097 WREG32_PLL(R_00000D_SCLK_CNTL, tmp); 3098 } 3099 3100 static int r100_startup(struct radeon_device *rdev) 3101 { 3102 int r; 3103 3104 r100_mc_program(rdev); 3105 /* Resume clock */ 3106 r100_clock_startup(rdev); 3107 /* Initialize GPU configuration (# pipes, ...) */ 3108 r100_gpu_init(rdev); 3109 /* Initialize GART (initialize after TTM so we can allocate 3110 * memory through TTM but finalize after TTM) */ 3111 if (rdev->flags & RADEON_IS_PCI) { 3112 r = r100_pci_gart_enable(rdev); 3113 if (r) 3114 return r; 3115 } 3116 /* Enable IRQ */ 3117 rdev->irq.sw_int = true; 3118 r100_irq_set(rdev); 3119 /* 1M ring buffer */ 3120 r = r100_cp_init(rdev, 1024 * 1024); 3121 if (r) { 3122 dev_err(rdev->dev, "failled initializing CP (%d).\n", r); 3123 return r; 3124 } 3125 r = r100_wb_init(rdev); 3126 if (r) 3127 dev_err(rdev->dev, "failled initializing WB (%d).\n", r); 3128 r = r100_ib_init(rdev); 3129 if (r) { 3130 dev_err(rdev->dev, "failled initializing IB (%d).\n", r); 3131 return r; 3132 } 3133 return 0; 3134 } 3135 3136 int r100_resume(struct radeon_device *rdev) 3137 { 3138 /* Make sur GART are not working */ 3139 if (rdev->flags & RADEON_IS_PCI) 3140 r100_pci_gart_disable(rdev); 3141 /* Resume clock before doing reset */ 3142 r100_clock_startup(rdev); 3143 /* Reset gpu before posting otherwise ATOM will enter infinite loop */ 3144 if (radeon_gpu_reset(rdev)) { 3145 dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", 3146 RREG32(R_000E40_RBBM_STATUS), 3147 RREG32(R_0007C0_CP_STAT)); 3148 } 3149 /* post */ 3150 radeon_combios_asic_init(rdev->ddev); 3151 /* Resume clock after posting */ 3152 r100_clock_startup(rdev); 3153 return r100_startup(rdev); 3154 } 3155 3156 int r100_suspend(struct radeon_device *rdev) 3157 { 3158 r100_cp_disable(rdev); 3159 r100_wb_disable(rdev); 3160 r100_irq_disable(rdev); 3161 if (rdev->flags & RADEON_IS_PCI) 3162 r100_pci_gart_disable(rdev); 3163 return 0; 3164 } 3165 3166 void r100_fini(struct radeon_device *rdev) 3167 { 3168 r100_suspend(rdev); 3169 r100_cp_fini(rdev); 3170 r100_wb_fini(rdev); 3171 r100_ib_fini(rdev); 3172 radeon_gem_fini(rdev); 3173 if (rdev->flags & RADEON_IS_PCI) 3174 r100_pci_gart_fini(rdev); 3175 radeon_irq_kms_fini(rdev); 3176 radeon_fence_driver_fini(rdev); 3177 radeon_object_fini(rdev); 3178 radeon_atombios_fini(rdev); 3179 kfree(rdev->bios); 3180 rdev->bios = NULL; 3181 } 3182 3183 int r100_mc_init(struct radeon_device *rdev) 3184 { 3185 int r; 3186 u32 tmp; 3187 3188 /* Setup GPU memory space */ 3189 rdev->mc.vram_location = 0xFFFFFFFFUL; 3190 rdev->mc.gtt_location = 0xFFFFFFFFUL; 3191 if (rdev->flags & RADEON_IS_IGP) { 3192 tmp = G_00015C_MC_FB_START(RREG32(R_00015C_NB_TOM)); 3193 rdev->mc.vram_location = tmp << 16; 3194 } 3195 if (rdev->flags & RADEON_IS_AGP) { 3196 r = radeon_agp_init(rdev); 3197 if (r) { 3198 printk(KERN_WARNING "[drm] Disabling AGP\n"); 3199 rdev->flags &= ~RADEON_IS_AGP; 3200 rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; 3201 } else { 3202 rdev->mc.gtt_location = rdev->mc.agp_base; 3203 } 3204 } 3205 r = radeon_mc_setup(rdev); 3206 if (r) 3207 return r; 3208 return 0; 3209 } 3210 3211 int r100_init(struct radeon_device *rdev) 3212 { 3213 int r; 3214 3215 /* Register debugfs file specific to this group of asics */ 3216 r100_debugfs(rdev); 3217 /* Disable VGA */ 3218 r100_vga_render_disable(rdev); 3219 /* Initialize scratch registers */ 3220 radeon_scratch_init(rdev); 3221 /* Initialize surface registers */ 3222 radeon_surface_init(rdev); 3223 /* TODO: disable VGA need to use VGA request */ 3224 /* BIOS*/ 3225 if (!radeon_get_bios(rdev)) { 3226 if (ASIC_IS_AVIVO(rdev)) 3227 return -EINVAL; 3228 } 3229 if (rdev->is_atom_bios) { 3230 dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n"); 3231 return -EINVAL; 3232 } else { 3233 r = radeon_combios_init(rdev); 3234 if (r) 3235 return r; 3236 } 3237 /* Reset gpu before posting otherwise ATOM will enter infinite loop */ 3238 if (radeon_gpu_reset(rdev)) { 3239 dev_warn(rdev->dev, 3240 "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", 3241 RREG32(R_000E40_RBBM_STATUS), 3242 RREG32(R_0007C0_CP_STAT)); 3243 } 3244 /* check if cards are posted or not */ 3245 if (!radeon_card_posted(rdev) && rdev->bios) { 3246 DRM_INFO("GPU not posted. posting now...\n"); 3247 radeon_combios_asic_init(rdev->ddev); 3248 } 3249 /* Set asic errata */ 3250 r100_errata(rdev); 3251 /* Initialize clocks */ 3252 radeon_get_clock_info(rdev->ddev); 3253 /* Get vram informations */ 3254 r100_vram_info(rdev); 3255 /* Initialize memory controller (also test AGP) */ 3256 r = r100_mc_init(rdev); 3257 if (r) 3258 return r; 3259 /* Fence driver */ 3260 r = radeon_fence_driver_init(rdev); 3261 if (r) 3262 return r; 3263 r = radeon_irq_kms_init(rdev); 3264 if (r) 3265 return r; 3266 /* Memory manager */ 3267 r = radeon_object_init(rdev); 3268 if (r) 3269 return r; 3270 if (rdev->flags & RADEON_IS_PCI) { 3271 r = r100_pci_gart_init(rdev); 3272 if (r) 3273 return r; 3274 } 3275 r100_set_safe_registers(rdev); 3276 rdev->accel_working = true; 3277 r = r100_startup(rdev); 3278 if (r) { 3279 /* Somethings want wront with the accel init stop accel */ 3280 dev_err(rdev->dev, "Disabling GPU acceleration\n"); 3281 r100_suspend(rdev); 3282 r100_cp_fini(rdev); 3283 r100_wb_fini(rdev); 3284 r100_ib_fini(rdev); 3285 if (rdev->flags & RADEON_IS_PCI) 3286 r100_pci_gart_fini(rdev); 3287 radeon_irq_kms_fini(rdev); 3288 rdev->accel_working = false; 3289 } 3290 return 0; 3291 } 3292