1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/seq_file.h> 29 #include "drmP.h" 30 #include "drm.h" 31 #include "radeon_drm.h" 32 #include "radeon_microcode.h" 33 #include "radeon_reg.h" 34 #include "radeon.h" 35 36 /* This files gather functions specifics to: 37 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 38 * 39 * Some of these functions might be used by newer ASICs. 40 */ 41 void r100_hdp_reset(struct radeon_device *rdev); 42 void r100_gpu_init(struct radeon_device *rdev); 43 int r100_gui_wait_for_idle(struct radeon_device *rdev); 44 int r100_mc_wait_for_idle(struct radeon_device *rdev); 45 void r100_gpu_wait_for_vsync(struct radeon_device *rdev); 46 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev); 47 int r100_debugfs_mc_info_init(struct radeon_device *rdev); 48 49 50 /* 51 * PCI GART 52 */ 53 void r100_pci_gart_tlb_flush(struct radeon_device *rdev) 54 { 55 /* TODO: can we do somethings here ? */ 56 /* It seems hw only cache one entry so we should discard this 57 * entry otherwise if first GPU GART read hit this entry it 58 * could end up in wrong address. */ 59 } 60 61 int r100_pci_gart_enable(struct radeon_device *rdev) 62 { 63 uint32_t tmp; 64 int r; 65 66 /* Initialize common gart structure */ 67 r = radeon_gart_init(rdev); 68 if (r) { 69 return r; 70 } 71 if (rdev->gart.table.ram.ptr == NULL) { 72 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; 73 r = radeon_gart_table_ram_alloc(rdev); 74 if (r) { 75 return r; 76 } 77 } 78 /* discard memory request outside of configured range */ 79 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 80 WREG32(RADEON_AIC_CNTL, tmp); 81 /* set address range for PCI address translate */ 82 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location); 83 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; 84 WREG32(RADEON_AIC_HI_ADDR, tmp); 85 /* Enable bus mastering */ 86 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 87 WREG32(RADEON_BUS_CNTL, tmp); 88 /* set PCI GART page-table base address */ 89 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr); 90 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN; 91 WREG32(RADEON_AIC_CNTL, tmp); 92 r100_pci_gart_tlb_flush(rdev); 93 rdev->gart.ready = true; 94 return 0; 95 } 96 97 void r100_pci_gart_disable(struct radeon_device *rdev) 98 { 99 uint32_t tmp; 100 101 /* discard memory request outside of configured range */ 102 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 103 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN); 104 WREG32(RADEON_AIC_LO_ADDR, 0); 105 WREG32(RADEON_AIC_HI_ADDR, 0); 106 } 107 108 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) 109 { 110 if (i < 0 || i > rdev->gart.num_gpu_pages) { 111 return -EINVAL; 112 } 113 rdev->gart.table.ram.ptr[i] = cpu_to_le32((uint32_t)addr); 114 return 0; 115 } 116 117 int r100_gart_enable(struct radeon_device *rdev) 118 { 119 if (rdev->flags & RADEON_IS_AGP) { 120 r100_pci_gart_disable(rdev); 121 return 0; 122 } 123 return r100_pci_gart_enable(rdev); 124 } 125 126 127 /* 128 * MC 129 */ 130 void r100_mc_disable_clients(struct radeon_device *rdev) 131 { 132 uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl; 133 134 /* FIXME: is this function correct for rs100,rs200,rs300 ? */ 135 if (r100_gui_wait_for_idle(rdev)) { 136 printk(KERN_WARNING "Failed to wait GUI idle while " 137 "programming pipes. Bad things might happen.\n"); 138 } 139 140 /* stop display and memory access */ 141 ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL); 142 WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE); 143 crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL); 144 WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS); 145 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); 146 147 r100_gpu_wait_for_vsync(rdev); 148 149 WREG32(RADEON_CRTC_GEN_CNTL, 150 (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) | 151 RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN); 152 153 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 154 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); 155 156 r100_gpu_wait_for_vsync2(rdev); 157 WREG32(RADEON_CRTC2_GEN_CNTL, 158 (crtc2_gen_cntl & 159 ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) | 160 RADEON_CRTC2_DISP_REQ_EN_B); 161 } 162 163 udelay(500); 164 } 165 166 void r100_mc_setup(struct radeon_device *rdev) 167 { 168 uint32_t tmp; 169 int r; 170 171 r = r100_debugfs_mc_info_init(rdev); 172 if (r) { 173 DRM_ERROR("Failed to register debugfs file for R100 MC !\n"); 174 } 175 /* Write VRAM size in case we are limiting it */ 176 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size); 177 tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1; 178 tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16); 179 tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16); 180 WREG32(RADEON_MC_FB_LOCATION, tmp); 181 182 /* Enable bus mastering */ 183 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 184 WREG32(RADEON_BUS_CNTL, tmp); 185 186 if (rdev->flags & RADEON_IS_AGP) { 187 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; 188 tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16); 189 tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16); 190 WREG32(RADEON_MC_AGP_LOCATION, tmp); 191 WREG32(RADEON_AGP_BASE, rdev->mc.agp_base); 192 } else { 193 WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF); 194 WREG32(RADEON_AGP_BASE, 0); 195 } 196 197 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL; 198 tmp |= (7 << 28); 199 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE); 200 (void)RREG32(RADEON_HOST_PATH_CNTL); 201 WREG32(RADEON_HOST_PATH_CNTL, tmp); 202 (void)RREG32(RADEON_HOST_PATH_CNTL); 203 } 204 205 int r100_mc_init(struct radeon_device *rdev) 206 { 207 int r; 208 209 if (r100_debugfs_rbbm_init(rdev)) { 210 DRM_ERROR("Failed to register debugfs file for RBBM !\n"); 211 } 212 213 r100_gpu_init(rdev); 214 /* Disable gart which also disable out of gart access */ 215 r100_pci_gart_disable(rdev); 216 217 /* Setup GPU memory space */ 218 rdev->mc.vram_location = 0xFFFFFFFFUL; 219 rdev->mc.gtt_location = 0xFFFFFFFFUL; 220 if (rdev->flags & RADEON_IS_AGP) { 221 r = radeon_agp_init(rdev); 222 if (r) { 223 printk(KERN_WARNING "[drm] Disabling AGP\n"); 224 rdev->flags &= ~RADEON_IS_AGP; 225 rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; 226 } else { 227 rdev->mc.gtt_location = rdev->mc.agp_base; 228 } 229 } 230 r = radeon_mc_setup(rdev); 231 if (r) { 232 return r; 233 } 234 235 r100_mc_disable_clients(rdev); 236 if (r100_mc_wait_for_idle(rdev)) { 237 printk(KERN_WARNING "Failed to wait MC idle while " 238 "programming pipes. Bad things might happen.\n"); 239 } 240 241 r100_mc_setup(rdev); 242 return 0; 243 } 244 245 void r100_mc_fini(struct radeon_device *rdev) 246 { 247 r100_pci_gart_disable(rdev); 248 radeon_gart_table_ram_free(rdev); 249 radeon_gart_fini(rdev); 250 } 251 252 253 /* 254 * Fence emission 255 */ 256 void r100_fence_ring_emit(struct radeon_device *rdev, 257 struct radeon_fence *fence) 258 { 259 /* Who ever call radeon_fence_emit should call ring_lock and ask 260 * for enough space (today caller are ib schedule and buffer move) */ 261 /* Wait until IDLE & CLEAN */ 262 radeon_ring_write(rdev, PACKET0(0x1720, 0)); 263 radeon_ring_write(rdev, (1 << 16) | (1 << 17)); 264 /* Emit fence sequence & fire IRQ */ 265 radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0)); 266 radeon_ring_write(rdev, fence->seq); 267 radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0)); 268 radeon_ring_write(rdev, RADEON_SW_INT_FIRE); 269 } 270 271 272 /* 273 * Writeback 274 */ 275 int r100_wb_init(struct radeon_device *rdev) 276 { 277 int r; 278 279 if (rdev->wb.wb_obj == NULL) { 280 r = radeon_object_create(rdev, NULL, 4096, 281 true, 282 RADEON_GEM_DOMAIN_GTT, 283 false, &rdev->wb.wb_obj); 284 if (r) { 285 DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r); 286 return r; 287 } 288 r = radeon_object_pin(rdev->wb.wb_obj, 289 RADEON_GEM_DOMAIN_GTT, 290 &rdev->wb.gpu_addr); 291 if (r) { 292 DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r); 293 return r; 294 } 295 r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); 296 if (r) { 297 DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r); 298 return r; 299 } 300 } 301 WREG32(0x774, rdev->wb.gpu_addr); 302 WREG32(0x70C, rdev->wb.gpu_addr + 1024); 303 WREG32(0x770, 0xff); 304 return 0; 305 } 306 307 void r100_wb_fini(struct radeon_device *rdev) 308 { 309 if (rdev->wb.wb_obj) { 310 radeon_object_kunmap(rdev->wb.wb_obj); 311 radeon_object_unpin(rdev->wb.wb_obj); 312 radeon_object_unref(&rdev->wb.wb_obj); 313 rdev->wb.wb = NULL; 314 rdev->wb.wb_obj = NULL; 315 } 316 } 317 318 int r100_copy_blit(struct radeon_device *rdev, 319 uint64_t src_offset, 320 uint64_t dst_offset, 321 unsigned num_pages, 322 struct radeon_fence *fence) 323 { 324 uint32_t cur_pages; 325 uint32_t stride_bytes = PAGE_SIZE; 326 uint32_t pitch; 327 uint32_t stride_pixels; 328 unsigned ndw; 329 int num_loops; 330 int r = 0; 331 332 /* radeon limited to 16k stride */ 333 stride_bytes &= 0x3fff; 334 /* radeon pitch is /64 */ 335 pitch = stride_bytes / 64; 336 stride_pixels = stride_bytes / 4; 337 num_loops = DIV_ROUND_UP(num_pages, 8191); 338 339 /* Ask for enough room for blit + flush + fence */ 340 ndw = 64 + (10 * num_loops); 341 r = radeon_ring_lock(rdev, ndw); 342 if (r) { 343 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); 344 return -EINVAL; 345 } 346 while (num_pages > 0) { 347 cur_pages = num_pages; 348 if (cur_pages > 8191) { 349 cur_pages = 8191; 350 } 351 num_pages -= cur_pages; 352 353 /* pages are in Y direction - height 354 page width in X direction - width */ 355 radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8)); 356 radeon_ring_write(rdev, 357 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 358 RADEON_GMC_DST_PITCH_OFFSET_CNTL | 359 RADEON_GMC_SRC_CLIPPING | 360 RADEON_GMC_DST_CLIPPING | 361 RADEON_GMC_BRUSH_NONE | 362 (RADEON_COLOR_FORMAT_ARGB8888 << 8) | 363 RADEON_GMC_SRC_DATATYPE_COLOR | 364 RADEON_ROP3_S | 365 RADEON_DP_SRC_SOURCE_MEMORY | 366 RADEON_GMC_CLR_CMP_CNTL_DIS | 367 RADEON_GMC_WR_MSK_DIS); 368 radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10)); 369 radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10)); 370 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); 371 radeon_ring_write(rdev, 0); 372 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16)); 373 radeon_ring_write(rdev, num_pages); 374 radeon_ring_write(rdev, num_pages); 375 radeon_ring_write(rdev, cur_pages | (stride_pixels << 16)); 376 } 377 radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); 378 radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL); 379 radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); 380 radeon_ring_write(rdev, 381 RADEON_WAIT_2D_IDLECLEAN | 382 RADEON_WAIT_HOST_IDLECLEAN | 383 RADEON_WAIT_DMA_GUI_IDLE); 384 if (fence) { 385 r = radeon_fence_emit(rdev, fence); 386 } 387 radeon_ring_unlock_commit(rdev); 388 return r; 389 } 390 391 392 /* 393 * CP 394 */ 395 void r100_ring_start(struct radeon_device *rdev) 396 { 397 int r; 398 399 r = radeon_ring_lock(rdev, 2); 400 if (r) { 401 return; 402 } 403 radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0)); 404 radeon_ring_write(rdev, 405 RADEON_ISYNC_ANY2D_IDLE3D | 406 RADEON_ISYNC_ANY3D_IDLE2D | 407 RADEON_ISYNC_WAIT_IDLEGUI | 408 RADEON_ISYNC_CPSCRATCH_IDLEGUI); 409 radeon_ring_unlock_commit(rdev); 410 } 411 412 static void r100_cp_load_microcode(struct radeon_device *rdev) 413 { 414 int i; 415 416 if (r100_gui_wait_for_idle(rdev)) { 417 printk(KERN_WARNING "Failed to wait GUI idle while " 418 "programming pipes. Bad things might happen.\n"); 419 } 420 421 WREG32(RADEON_CP_ME_RAM_ADDR, 0); 422 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || 423 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || 424 (rdev->family == CHIP_RS200)) { 425 DRM_INFO("Loading R100 Microcode\n"); 426 for (i = 0; i < 256; i++) { 427 WREG32(RADEON_CP_ME_RAM_DATAH, R100_cp_microcode[i][1]); 428 WREG32(RADEON_CP_ME_RAM_DATAL, R100_cp_microcode[i][0]); 429 } 430 } else if ((rdev->family == CHIP_R200) || 431 (rdev->family == CHIP_RV250) || 432 (rdev->family == CHIP_RV280) || 433 (rdev->family == CHIP_RS300)) { 434 DRM_INFO("Loading R200 Microcode\n"); 435 for (i = 0; i < 256; i++) { 436 WREG32(RADEON_CP_ME_RAM_DATAH, R200_cp_microcode[i][1]); 437 WREG32(RADEON_CP_ME_RAM_DATAL, R200_cp_microcode[i][0]); 438 } 439 } else if ((rdev->family == CHIP_R300) || 440 (rdev->family == CHIP_R350) || 441 (rdev->family == CHIP_RV350) || 442 (rdev->family == CHIP_RV380) || 443 (rdev->family == CHIP_RS400) || 444 (rdev->family == CHIP_RS480)) { 445 DRM_INFO("Loading R300 Microcode\n"); 446 for (i = 0; i < 256; i++) { 447 WREG32(RADEON_CP_ME_RAM_DATAH, R300_cp_microcode[i][1]); 448 WREG32(RADEON_CP_ME_RAM_DATAL, R300_cp_microcode[i][0]); 449 } 450 } else if ((rdev->family == CHIP_R420) || 451 (rdev->family == CHIP_R423) || 452 (rdev->family == CHIP_RV410)) { 453 DRM_INFO("Loading R400 Microcode\n"); 454 for (i = 0; i < 256; i++) { 455 WREG32(RADEON_CP_ME_RAM_DATAH, R420_cp_microcode[i][1]); 456 WREG32(RADEON_CP_ME_RAM_DATAL, R420_cp_microcode[i][0]); 457 } 458 } else if ((rdev->family == CHIP_RS690) || 459 (rdev->family == CHIP_RS740)) { 460 DRM_INFO("Loading RS690/RS740 Microcode\n"); 461 for (i = 0; i < 256; i++) { 462 WREG32(RADEON_CP_ME_RAM_DATAH, RS690_cp_microcode[i][1]); 463 WREG32(RADEON_CP_ME_RAM_DATAL, RS690_cp_microcode[i][0]); 464 } 465 } else if (rdev->family == CHIP_RS600) { 466 DRM_INFO("Loading RS600 Microcode\n"); 467 for (i = 0; i < 256; i++) { 468 WREG32(RADEON_CP_ME_RAM_DATAH, RS600_cp_microcode[i][1]); 469 WREG32(RADEON_CP_ME_RAM_DATAL, RS600_cp_microcode[i][0]); 470 } 471 } else if ((rdev->family == CHIP_RV515) || 472 (rdev->family == CHIP_R520) || 473 (rdev->family == CHIP_RV530) || 474 (rdev->family == CHIP_R580) || 475 (rdev->family == CHIP_RV560) || 476 (rdev->family == CHIP_RV570)) { 477 DRM_INFO("Loading R500 Microcode\n"); 478 for (i = 0; i < 256; i++) { 479 WREG32(RADEON_CP_ME_RAM_DATAH, R520_cp_microcode[i][1]); 480 WREG32(RADEON_CP_ME_RAM_DATAL, R520_cp_microcode[i][0]); 481 } 482 } 483 } 484 485 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) 486 { 487 unsigned rb_bufsz; 488 unsigned rb_blksz; 489 unsigned max_fetch; 490 unsigned pre_write_timer; 491 unsigned pre_write_limit; 492 unsigned indirect2_start; 493 unsigned indirect1_start; 494 uint32_t tmp; 495 int r; 496 497 if (r100_debugfs_cp_init(rdev)) { 498 DRM_ERROR("Failed to register debugfs file for CP !\n"); 499 } 500 /* Reset CP */ 501 tmp = RREG32(RADEON_CP_CSQ_STAT); 502 if ((tmp & (1 << 31))) { 503 DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp); 504 WREG32(RADEON_CP_CSQ_MODE, 0); 505 WREG32(RADEON_CP_CSQ_CNTL, 0); 506 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); 507 tmp = RREG32(RADEON_RBBM_SOFT_RESET); 508 mdelay(2); 509 WREG32(RADEON_RBBM_SOFT_RESET, 0); 510 tmp = RREG32(RADEON_RBBM_SOFT_RESET); 511 mdelay(2); 512 tmp = RREG32(RADEON_CP_CSQ_STAT); 513 if ((tmp & (1 << 31))) { 514 DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp); 515 } 516 } else { 517 DRM_INFO("radeon: cp idle (0x%08X)\n", tmp); 518 } 519 /* Align ring size */ 520 rb_bufsz = drm_order(ring_size / 8); 521 ring_size = (1 << (rb_bufsz + 1)) * 4; 522 r100_cp_load_microcode(rdev); 523 r = radeon_ring_init(rdev, ring_size); 524 if (r) { 525 return r; 526 } 527 /* Each time the cp read 1024 bytes (16 dword/quadword) update 528 * the rptr copy in system ram */ 529 rb_blksz = 9; 530 /* cp will read 128bytes at a time (4 dwords) */ 531 max_fetch = 1; 532 rdev->cp.align_mask = 16 - 1; 533 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */ 534 pre_write_timer = 64; 535 /* Force CP_RB_WPTR write if written more than one time before the 536 * delay expire 537 */ 538 pre_write_limit = 0; 539 /* Setup the cp cache like this (cache size is 96 dwords) : 540 * RING 0 to 15 541 * INDIRECT1 16 to 79 542 * INDIRECT2 80 to 95 543 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 544 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords)) 545 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 546 * Idea being that most of the gpu cmd will be through indirect1 buffer 547 * so it gets the bigger cache. 548 */ 549 indirect2_start = 80; 550 indirect1_start = 16; 551 /* cp setup */ 552 WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); 553 WREG32(RADEON_CP_RB_CNTL, 554 #ifdef __BIG_ENDIAN 555 RADEON_BUF_SWAP_32BIT | 556 #endif 557 REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | 558 REG_SET(RADEON_RB_BLKSZ, rb_blksz) | 559 REG_SET(RADEON_MAX_FETCH, max_fetch) | 560 RADEON_RB_NO_UPDATE); 561 /* Set ring address */ 562 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr); 563 WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr); 564 /* Force read & write ptr to 0 */ 565 tmp = RREG32(RADEON_CP_RB_CNTL); 566 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); 567 WREG32(RADEON_CP_RB_RPTR_WR, 0); 568 WREG32(RADEON_CP_RB_WPTR, 0); 569 WREG32(RADEON_CP_RB_CNTL, tmp); 570 udelay(10); 571 rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); 572 rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR); 573 /* Set cp mode to bus mastering & enable cp*/ 574 WREG32(RADEON_CP_CSQ_MODE, 575 REG_SET(RADEON_INDIRECT2_START, indirect2_start) | 576 REG_SET(RADEON_INDIRECT1_START, indirect1_start)); 577 WREG32(0x718, 0); 578 WREG32(0x744, 0x00004D4D); 579 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM); 580 radeon_ring_start(rdev); 581 r = radeon_ring_test(rdev); 582 if (r) { 583 DRM_ERROR("radeon: cp isn't working (%d).\n", r); 584 return r; 585 } 586 rdev->cp.ready = true; 587 return 0; 588 } 589 590 void r100_cp_fini(struct radeon_device *rdev) 591 { 592 /* Disable ring */ 593 rdev->cp.ready = false; 594 WREG32(RADEON_CP_CSQ_CNTL, 0); 595 radeon_ring_fini(rdev); 596 DRM_INFO("radeon: cp finalized\n"); 597 } 598 599 void r100_cp_disable(struct radeon_device *rdev) 600 { 601 /* Disable ring */ 602 rdev->cp.ready = false; 603 WREG32(RADEON_CP_CSQ_MODE, 0); 604 WREG32(RADEON_CP_CSQ_CNTL, 0); 605 if (r100_gui_wait_for_idle(rdev)) { 606 printk(KERN_WARNING "Failed to wait GUI idle while " 607 "programming pipes. Bad things might happen.\n"); 608 } 609 } 610 611 int r100_cp_reset(struct radeon_device *rdev) 612 { 613 uint32_t tmp; 614 bool reinit_cp; 615 int i; 616 617 reinit_cp = rdev->cp.ready; 618 rdev->cp.ready = false; 619 WREG32(RADEON_CP_CSQ_MODE, 0); 620 WREG32(RADEON_CP_CSQ_CNTL, 0); 621 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); 622 (void)RREG32(RADEON_RBBM_SOFT_RESET); 623 udelay(200); 624 WREG32(RADEON_RBBM_SOFT_RESET, 0); 625 /* Wait to prevent race in RBBM_STATUS */ 626 mdelay(1); 627 for (i = 0; i < rdev->usec_timeout; i++) { 628 tmp = RREG32(RADEON_RBBM_STATUS); 629 if (!(tmp & (1 << 16))) { 630 DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n", 631 tmp); 632 if (reinit_cp) { 633 return r100_cp_init(rdev, rdev->cp.ring_size); 634 } 635 return 0; 636 } 637 DRM_UDELAY(1); 638 } 639 tmp = RREG32(RADEON_RBBM_STATUS); 640 DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp); 641 return -1; 642 } 643 644 645 /* 646 * CS functions 647 */ 648 int r100_cs_parse_packet0(struct radeon_cs_parser *p, 649 struct radeon_cs_packet *pkt, 650 const unsigned *auth, unsigned n, 651 radeon_packet0_check_t check) 652 { 653 unsigned reg; 654 unsigned i, j, m; 655 unsigned idx; 656 int r; 657 658 idx = pkt->idx + 1; 659 reg = pkt->reg; 660 /* Check that register fall into register range 661 * determined by the number of entry (n) in the 662 * safe register bitmap. 663 */ 664 if (pkt->one_reg_wr) { 665 if ((reg >> 7) > n) { 666 return -EINVAL; 667 } 668 } else { 669 if (((reg + (pkt->count << 2)) >> 7) > n) { 670 return -EINVAL; 671 } 672 } 673 for (i = 0; i <= pkt->count; i++, idx++) { 674 j = (reg >> 7); 675 m = 1 << ((reg >> 2) & 31); 676 if (auth[j] & m) { 677 r = check(p, pkt, idx, reg); 678 if (r) { 679 return r; 680 } 681 } 682 if (pkt->one_reg_wr) { 683 if (!(auth[j] & m)) { 684 break; 685 } 686 } else { 687 reg += 4; 688 } 689 } 690 return 0; 691 } 692 693 void r100_cs_dump_packet(struct radeon_cs_parser *p, 694 struct radeon_cs_packet *pkt) 695 { 696 struct radeon_cs_chunk *ib_chunk; 697 volatile uint32_t *ib; 698 unsigned i; 699 unsigned idx; 700 701 ib = p->ib->ptr; 702 ib_chunk = &p->chunks[p->chunk_ib_idx]; 703 idx = pkt->idx; 704 for (i = 0; i <= (pkt->count + 1); i++, idx++) { 705 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 706 } 707 } 708 709 /** 710 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet 711 * @parser: parser structure holding parsing context. 712 * @pkt: where to store packet informations 713 * 714 * Assume that chunk_ib_index is properly set. Will return -EINVAL 715 * if packet is bigger than remaining ib size. or if packets is unknown. 716 **/ 717 int r100_cs_packet_parse(struct radeon_cs_parser *p, 718 struct radeon_cs_packet *pkt, 719 unsigned idx) 720 { 721 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 722 uint32_t header = ib_chunk->kdata[idx]; 723 724 if (idx >= ib_chunk->length_dw) { 725 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 726 idx, ib_chunk->length_dw); 727 return -EINVAL; 728 } 729 pkt->idx = idx; 730 pkt->type = CP_PACKET_GET_TYPE(header); 731 pkt->count = CP_PACKET_GET_COUNT(header); 732 switch (pkt->type) { 733 case PACKET_TYPE0: 734 pkt->reg = CP_PACKET0_GET_REG(header); 735 pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header); 736 break; 737 case PACKET_TYPE3: 738 pkt->opcode = CP_PACKET3_GET_OPCODE(header); 739 break; 740 case PACKET_TYPE2: 741 pkt->count = -1; 742 break; 743 default: 744 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 745 return -EINVAL; 746 } 747 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 748 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 749 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 750 return -EINVAL; 751 } 752 return 0; 753 } 754 755 /** 756 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3 757 * @parser: parser structure holding parsing context. 758 * @data: pointer to relocation data 759 * @offset_start: starting offset 760 * @offset_mask: offset mask (to align start offset on) 761 * @reloc: reloc informations 762 * 763 * Check next packet is relocation packet3, do bo validation and compute 764 * GPU offset using the provided start. 765 **/ 766 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, 767 struct radeon_cs_reloc **cs_reloc) 768 { 769 struct radeon_cs_chunk *ib_chunk; 770 struct radeon_cs_chunk *relocs_chunk; 771 struct radeon_cs_packet p3reloc; 772 unsigned idx; 773 int r; 774 775 if (p->chunk_relocs_idx == -1) { 776 DRM_ERROR("No relocation chunk !\n"); 777 return -EINVAL; 778 } 779 *cs_reloc = NULL; 780 ib_chunk = &p->chunks[p->chunk_ib_idx]; 781 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 782 r = r100_cs_packet_parse(p, &p3reloc, p->idx); 783 if (r) { 784 return r; 785 } 786 p->idx += p3reloc.count + 2; 787 if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { 788 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 789 p3reloc.idx); 790 r100_cs_dump_packet(p, &p3reloc); 791 return -EINVAL; 792 } 793 idx = ib_chunk->kdata[p3reloc.idx + 1]; 794 if (idx >= relocs_chunk->length_dw) { 795 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 796 idx, relocs_chunk->length_dw); 797 r100_cs_dump_packet(p, &p3reloc); 798 return -EINVAL; 799 } 800 /* FIXME: we assume reloc size is 4 dwords */ 801 *cs_reloc = p->relocs_ptr[(idx / 4)]; 802 return 0; 803 } 804 805 static int r100_packet0_check(struct radeon_cs_parser *p, 806 struct radeon_cs_packet *pkt) 807 { 808 struct radeon_cs_chunk *ib_chunk; 809 struct radeon_cs_reloc *reloc; 810 volatile uint32_t *ib; 811 uint32_t tmp; 812 unsigned reg; 813 unsigned i; 814 unsigned idx; 815 bool onereg; 816 int r; 817 818 ib = p->ib->ptr; 819 ib_chunk = &p->chunks[p->chunk_ib_idx]; 820 idx = pkt->idx + 1; 821 reg = pkt->reg; 822 onereg = false; 823 if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) { 824 onereg = true; 825 } 826 for (i = 0; i <= pkt->count; i++, idx++, reg += 4) { 827 switch (reg) { 828 /* FIXME: only allow PACKET3 blit? easier to check for out of 829 * range access */ 830 case RADEON_DST_PITCH_OFFSET: 831 case RADEON_SRC_PITCH_OFFSET: 832 r = r100_cs_packet_next_reloc(p, &reloc); 833 if (r) { 834 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 835 idx, reg); 836 r100_cs_dump_packet(p, pkt); 837 return r; 838 } 839 tmp = ib_chunk->kdata[idx] & 0x003fffff; 840 tmp += (((u32)reloc->lobj.gpu_offset) >> 10); 841 ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp; 842 break; 843 case RADEON_RB3D_DEPTHOFFSET: 844 case RADEON_RB3D_COLOROFFSET: 845 case R300_RB3D_COLOROFFSET0: 846 case R300_ZB_DEPTHOFFSET: 847 case R200_PP_TXOFFSET_0: 848 case R200_PP_TXOFFSET_1: 849 case R200_PP_TXOFFSET_2: 850 case R200_PP_TXOFFSET_3: 851 case R200_PP_TXOFFSET_4: 852 case R200_PP_TXOFFSET_5: 853 case RADEON_PP_TXOFFSET_0: 854 case RADEON_PP_TXOFFSET_1: 855 case RADEON_PP_TXOFFSET_2: 856 case R300_TX_OFFSET_0: 857 case R300_TX_OFFSET_0+4: 858 case R300_TX_OFFSET_0+8: 859 case R300_TX_OFFSET_0+12: 860 case R300_TX_OFFSET_0+16: 861 case R300_TX_OFFSET_0+20: 862 case R300_TX_OFFSET_0+24: 863 case R300_TX_OFFSET_0+28: 864 case R300_TX_OFFSET_0+32: 865 case R300_TX_OFFSET_0+36: 866 case R300_TX_OFFSET_0+40: 867 case R300_TX_OFFSET_0+44: 868 case R300_TX_OFFSET_0+48: 869 case R300_TX_OFFSET_0+52: 870 case R300_TX_OFFSET_0+56: 871 case R300_TX_OFFSET_0+60: 872 r = r100_cs_packet_next_reloc(p, &reloc); 873 if (r) { 874 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 875 idx, reg); 876 r100_cs_dump_packet(p, pkt); 877 return r; 878 } 879 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 880 break; 881 default: 882 /* FIXME: we don't want to allow anyothers packet */ 883 break; 884 } 885 if (onereg) { 886 /* FIXME: forbid onereg write to register on relocate */ 887 break; 888 } 889 } 890 return 0; 891 } 892 893 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, 894 struct radeon_cs_packet *pkt, 895 struct radeon_object *robj) 896 { 897 struct radeon_cs_chunk *ib_chunk; 898 unsigned idx; 899 900 ib_chunk = &p->chunks[p->chunk_ib_idx]; 901 idx = pkt->idx + 1; 902 if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) { 903 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " 904 "(need %u have %lu) !\n", 905 ib_chunk->kdata[idx+2] + 1, 906 radeon_object_size(robj)); 907 return -EINVAL; 908 } 909 return 0; 910 } 911 912 static int r100_packet3_check(struct radeon_cs_parser *p, 913 struct radeon_cs_packet *pkt) 914 { 915 struct radeon_cs_chunk *ib_chunk; 916 struct radeon_cs_reloc *reloc; 917 unsigned idx; 918 unsigned i, c; 919 volatile uint32_t *ib; 920 int r; 921 922 ib = p->ib->ptr; 923 ib_chunk = &p->chunks[p->chunk_ib_idx]; 924 idx = pkt->idx + 1; 925 switch (pkt->opcode) { 926 case PACKET3_3D_LOAD_VBPNTR: 927 c = ib_chunk->kdata[idx++]; 928 for (i = 0; i < (c - 1); i += 2, idx += 3) { 929 r = r100_cs_packet_next_reloc(p, &reloc); 930 if (r) { 931 DRM_ERROR("No reloc for packet3 %d\n", 932 pkt->opcode); 933 r100_cs_dump_packet(p, pkt); 934 return r; 935 } 936 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 937 r = r100_cs_packet_next_reloc(p, &reloc); 938 if (r) { 939 DRM_ERROR("No reloc for packet3 %d\n", 940 pkt->opcode); 941 r100_cs_dump_packet(p, pkt); 942 return r; 943 } 944 ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset); 945 } 946 if (c & 1) { 947 r = r100_cs_packet_next_reloc(p, &reloc); 948 if (r) { 949 DRM_ERROR("No reloc for packet3 %d\n", 950 pkt->opcode); 951 r100_cs_dump_packet(p, pkt); 952 return r; 953 } 954 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 955 } 956 break; 957 case PACKET3_INDX_BUFFER: 958 r = r100_cs_packet_next_reloc(p, &reloc); 959 if (r) { 960 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 961 r100_cs_dump_packet(p, pkt); 962 return r; 963 } 964 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset); 965 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 966 if (r) { 967 return r; 968 } 969 break; 970 case 0x23: 971 /* FIXME: cleanup */ 972 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ 973 r = r100_cs_packet_next_reloc(p, &reloc); 974 if (r) { 975 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 976 r100_cs_dump_packet(p, pkt); 977 return r; 978 } 979 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); 980 break; 981 case PACKET3_3D_DRAW_IMMD: 982 /* triggers drawing using in-packet vertex data */ 983 case PACKET3_3D_DRAW_IMMD_2: 984 /* triggers drawing using in-packet vertex data */ 985 case PACKET3_3D_DRAW_VBUF_2: 986 /* triggers drawing of vertex buffers setup elsewhere */ 987 case PACKET3_3D_DRAW_INDX_2: 988 /* triggers drawing using indices to vertex buffer */ 989 case PACKET3_3D_DRAW_VBUF: 990 /* triggers drawing of vertex buffers setup elsewhere */ 991 case PACKET3_3D_DRAW_INDX: 992 /* triggers drawing using indices to vertex buffer */ 993 case PACKET3_NOP: 994 break; 995 default: 996 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 997 return -EINVAL; 998 } 999 return 0; 1000 } 1001 1002 int r100_cs_parse(struct radeon_cs_parser *p) 1003 { 1004 struct radeon_cs_packet pkt; 1005 int r; 1006 1007 do { 1008 r = r100_cs_packet_parse(p, &pkt, p->idx); 1009 if (r) { 1010 return r; 1011 } 1012 p->idx += pkt.count + 2; 1013 switch (pkt.type) { 1014 case PACKET_TYPE0: 1015 r = r100_packet0_check(p, &pkt); 1016 break; 1017 case PACKET_TYPE2: 1018 break; 1019 case PACKET_TYPE3: 1020 r = r100_packet3_check(p, &pkt); 1021 break; 1022 default: 1023 DRM_ERROR("Unknown packet type %d !\n", 1024 pkt.type); 1025 return -EINVAL; 1026 } 1027 if (r) { 1028 return r; 1029 } 1030 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 1031 return 0; 1032 } 1033 1034 1035 /* 1036 * Global GPU functions 1037 */ 1038 void r100_errata(struct radeon_device *rdev) 1039 { 1040 rdev->pll_errata = 0; 1041 1042 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { 1043 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; 1044 } 1045 1046 if (rdev->family == CHIP_RV100 || 1047 rdev->family == CHIP_RS100 || 1048 rdev->family == CHIP_RS200) { 1049 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; 1050 } 1051 } 1052 1053 /* Wait for vertical sync on primary CRTC */ 1054 void r100_gpu_wait_for_vsync(struct radeon_device *rdev) 1055 { 1056 uint32_t crtc_gen_cntl, tmp; 1057 int i; 1058 1059 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); 1060 if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || 1061 !(crtc_gen_cntl & RADEON_CRTC_EN)) { 1062 return; 1063 } 1064 /* Clear the CRTC_VBLANK_SAVE bit */ 1065 WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); 1066 for (i = 0; i < rdev->usec_timeout; i++) { 1067 tmp = RREG32(RADEON_CRTC_STATUS); 1068 if (tmp & RADEON_CRTC_VBLANK_SAVE) { 1069 return; 1070 } 1071 DRM_UDELAY(1); 1072 } 1073 } 1074 1075 /* Wait for vertical sync on secondary CRTC */ 1076 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) 1077 { 1078 uint32_t crtc2_gen_cntl, tmp; 1079 int i; 1080 1081 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); 1082 if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || 1083 !(crtc2_gen_cntl & RADEON_CRTC2_EN)) 1084 return; 1085 1086 /* Clear the CRTC_VBLANK_SAVE bit */ 1087 WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); 1088 for (i = 0; i < rdev->usec_timeout; i++) { 1089 tmp = RREG32(RADEON_CRTC2_STATUS); 1090 if (tmp & RADEON_CRTC2_VBLANK_SAVE) { 1091 return; 1092 } 1093 DRM_UDELAY(1); 1094 } 1095 } 1096 1097 int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) 1098 { 1099 unsigned i; 1100 uint32_t tmp; 1101 1102 for (i = 0; i < rdev->usec_timeout; i++) { 1103 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 1104 if (tmp >= n) { 1105 return 0; 1106 } 1107 DRM_UDELAY(1); 1108 } 1109 return -1; 1110 } 1111 1112 int r100_gui_wait_for_idle(struct radeon_device *rdev) 1113 { 1114 unsigned i; 1115 uint32_t tmp; 1116 1117 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { 1118 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" 1119 " Bad things might happen.\n"); 1120 } 1121 for (i = 0; i < rdev->usec_timeout; i++) { 1122 tmp = RREG32(RADEON_RBBM_STATUS); 1123 if (!(tmp & (1 << 31))) { 1124 return 0; 1125 } 1126 DRM_UDELAY(1); 1127 } 1128 return -1; 1129 } 1130 1131 int r100_mc_wait_for_idle(struct radeon_device *rdev) 1132 { 1133 unsigned i; 1134 uint32_t tmp; 1135 1136 for (i = 0; i < rdev->usec_timeout; i++) { 1137 /* read MC_STATUS */ 1138 tmp = RREG32(0x0150); 1139 if (tmp & (1 << 2)) { 1140 return 0; 1141 } 1142 DRM_UDELAY(1); 1143 } 1144 return -1; 1145 } 1146 1147 void r100_gpu_init(struct radeon_device *rdev) 1148 { 1149 /* TODO: anythings to do here ? pipes ? */ 1150 r100_hdp_reset(rdev); 1151 } 1152 1153 void r100_hdp_reset(struct radeon_device *rdev) 1154 { 1155 uint32_t tmp; 1156 1157 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL; 1158 tmp |= (7 << 28); 1159 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE); 1160 (void)RREG32(RADEON_HOST_PATH_CNTL); 1161 udelay(200); 1162 WREG32(RADEON_RBBM_SOFT_RESET, 0); 1163 WREG32(RADEON_HOST_PATH_CNTL, tmp); 1164 (void)RREG32(RADEON_HOST_PATH_CNTL); 1165 } 1166 1167 int r100_rb2d_reset(struct radeon_device *rdev) 1168 { 1169 uint32_t tmp; 1170 int i; 1171 1172 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2); 1173 (void)RREG32(RADEON_RBBM_SOFT_RESET); 1174 udelay(200); 1175 WREG32(RADEON_RBBM_SOFT_RESET, 0); 1176 /* Wait to prevent race in RBBM_STATUS */ 1177 mdelay(1); 1178 for (i = 0; i < rdev->usec_timeout; i++) { 1179 tmp = RREG32(RADEON_RBBM_STATUS); 1180 if (!(tmp & (1 << 26))) { 1181 DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n", 1182 tmp); 1183 return 0; 1184 } 1185 DRM_UDELAY(1); 1186 } 1187 tmp = RREG32(RADEON_RBBM_STATUS); 1188 DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp); 1189 return -1; 1190 } 1191 1192 int r100_gpu_reset(struct radeon_device *rdev) 1193 { 1194 uint32_t status; 1195 1196 /* reset order likely matter */ 1197 status = RREG32(RADEON_RBBM_STATUS); 1198 /* reset HDP */ 1199 r100_hdp_reset(rdev); 1200 /* reset rb2d */ 1201 if (status & ((1 << 17) | (1 << 18) | (1 << 27))) { 1202 r100_rb2d_reset(rdev); 1203 } 1204 /* TODO: reset 3D engine */ 1205 /* reset CP */ 1206 status = RREG32(RADEON_RBBM_STATUS); 1207 if (status & (1 << 16)) { 1208 r100_cp_reset(rdev); 1209 } 1210 /* Check if GPU is idle */ 1211 status = RREG32(RADEON_RBBM_STATUS); 1212 if (status & (1 << 31)) { 1213 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status); 1214 return -1; 1215 } 1216 DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status); 1217 return 0; 1218 } 1219 1220 1221 /* 1222 * VRAM info 1223 */ 1224 static void r100_vram_get_type(struct radeon_device *rdev) 1225 { 1226 uint32_t tmp; 1227 1228 rdev->mc.vram_is_ddr = false; 1229 if (rdev->flags & RADEON_IS_IGP) 1230 rdev->mc.vram_is_ddr = true; 1231 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) 1232 rdev->mc.vram_is_ddr = true; 1233 if ((rdev->family == CHIP_RV100) || 1234 (rdev->family == CHIP_RS100) || 1235 (rdev->family == CHIP_RS200)) { 1236 tmp = RREG32(RADEON_MEM_CNTL); 1237 if (tmp & RV100_HALF_MODE) { 1238 rdev->mc.vram_width = 32; 1239 } else { 1240 rdev->mc.vram_width = 64; 1241 } 1242 if (rdev->flags & RADEON_SINGLE_CRTC) { 1243 rdev->mc.vram_width /= 4; 1244 rdev->mc.vram_is_ddr = true; 1245 } 1246 } else if (rdev->family <= CHIP_RV280) { 1247 tmp = RREG32(RADEON_MEM_CNTL); 1248 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { 1249 rdev->mc.vram_width = 128; 1250 } else { 1251 rdev->mc.vram_width = 64; 1252 } 1253 } else { 1254 /* newer IGPs */ 1255 rdev->mc.vram_width = 128; 1256 } 1257 } 1258 1259 void r100_vram_info(struct radeon_device *rdev) 1260 { 1261 r100_vram_get_type(rdev); 1262 1263 if (rdev->flags & RADEON_IS_IGP) { 1264 uint32_t tom; 1265 /* read NB_TOM to get the amount of ram stolen for the GPU */ 1266 tom = RREG32(RADEON_NB_TOM); 1267 rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); 1268 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size); 1269 } else { 1270 rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 1271 /* Some production boards of m6 will report 0 1272 * if it's 8 MB 1273 */ 1274 if (rdev->mc.vram_size == 0) { 1275 rdev->mc.vram_size = 8192 * 1024; 1276 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size); 1277 } 1278 } 1279 1280 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); 1281 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); 1282 } 1283 1284 1285 /* 1286 * Indirect registers accessor 1287 */ 1288 void r100_pll_errata_after_index(struct radeon_device *rdev) 1289 { 1290 if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) { 1291 return; 1292 } 1293 (void)RREG32(RADEON_CLOCK_CNTL_DATA); 1294 (void)RREG32(RADEON_CRTC_GEN_CNTL); 1295 } 1296 1297 static void r100_pll_errata_after_data(struct radeon_device *rdev) 1298 { 1299 /* This workarounds is necessary on RV100, RS100 and RS200 chips 1300 * or the chip could hang on a subsequent access 1301 */ 1302 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { 1303 udelay(5000); 1304 } 1305 1306 /* This function is required to workaround a hardware bug in some (all?) 1307 * revisions of the R300. This workaround should be called after every 1308 * CLOCK_CNTL_INDEX register access. If not, register reads afterward 1309 * may not be correct. 1310 */ 1311 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { 1312 uint32_t save, tmp; 1313 1314 save = RREG32(RADEON_CLOCK_CNTL_INDEX); 1315 tmp = save & ~(0x3f | RADEON_PLL_WR_EN); 1316 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); 1317 tmp = RREG32(RADEON_CLOCK_CNTL_DATA); 1318 WREG32(RADEON_CLOCK_CNTL_INDEX, save); 1319 } 1320 } 1321 1322 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) 1323 { 1324 uint32_t data; 1325 1326 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); 1327 r100_pll_errata_after_index(rdev); 1328 data = RREG32(RADEON_CLOCK_CNTL_DATA); 1329 r100_pll_errata_after_data(rdev); 1330 return data; 1331 } 1332 1333 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 1334 { 1335 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); 1336 r100_pll_errata_after_index(rdev); 1337 WREG32(RADEON_CLOCK_CNTL_DATA, v); 1338 r100_pll_errata_after_data(rdev); 1339 } 1340 1341 uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) 1342 { 1343 if (reg < 0x10000) 1344 return readl(((void __iomem *)rdev->rmmio) + reg); 1345 else { 1346 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 1347 return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 1348 } 1349 } 1350 1351 void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 1352 { 1353 if (reg < 0x10000) 1354 writel(v, ((void __iomem *)rdev->rmmio) + reg); 1355 else { 1356 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 1357 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 1358 } 1359 } 1360 1361 int r100_init(struct radeon_device *rdev) 1362 { 1363 return 0; 1364 } 1365 1366 /* 1367 * Debugfs info 1368 */ 1369 #if defined(CONFIG_DEBUG_FS) 1370 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) 1371 { 1372 struct drm_info_node *node = (struct drm_info_node *) m->private; 1373 struct drm_device *dev = node->minor->dev; 1374 struct radeon_device *rdev = dev->dev_private; 1375 uint32_t reg, value; 1376 unsigned i; 1377 1378 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); 1379 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); 1380 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1381 for (i = 0; i < 64; i++) { 1382 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); 1383 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; 1384 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); 1385 value = RREG32(RADEON_RBBM_CMDFIFO_DATA); 1386 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); 1387 } 1388 return 0; 1389 } 1390 1391 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) 1392 { 1393 struct drm_info_node *node = (struct drm_info_node *) m->private; 1394 struct drm_device *dev = node->minor->dev; 1395 struct radeon_device *rdev = dev->dev_private; 1396 uint32_t rdp, wdp; 1397 unsigned count, i, j; 1398 1399 radeon_ring_free_size(rdev); 1400 rdp = RREG32(RADEON_CP_RB_RPTR); 1401 wdp = RREG32(RADEON_CP_RB_WPTR); 1402 count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask; 1403 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1404 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); 1405 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); 1406 seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); 1407 seq_printf(m, "%u dwords in ring\n", count); 1408 for (j = 0; j <= count; j++) { 1409 i = (rdp + j) & rdev->cp.ptr_mask; 1410 seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); 1411 } 1412 return 0; 1413 } 1414 1415 1416 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) 1417 { 1418 struct drm_info_node *node = (struct drm_info_node *) m->private; 1419 struct drm_device *dev = node->minor->dev; 1420 struct radeon_device *rdev = dev->dev_private; 1421 uint32_t csq_stat, csq2_stat, tmp; 1422 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; 1423 unsigned i; 1424 1425 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 1426 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); 1427 csq_stat = RREG32(RADEON_CP_CSQ_STAT); 1428 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); 1429 r_rptr = (csq_stat >> 0) & 0x3ff; 1430 r_wptr = (csq_stat >> 10) & 0x3ff; 1431 ib1_rptr = (csq_stat >> 20) & 0x3ff; 1432 ib1_wptr = (csq2_stat >> 0) & 0x3ff; 1433 ib2_rptr = (csq2_stat >> 10) & 0x3ff; 1434 ib2_wptr = (csq2_stat >> 20) & 0x3ff; 1435 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); 1436 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); 1437 seq_printf(m, "Ring rptr %u\n", r_rptr); 1438 seq_printf(m, "Ring wptr %u\n", r_wptr); 1439 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); 1440 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); 1441 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); 1442 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); 1443 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms 1444 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ 1445 seq_printf(m, "Ring fifo:\n"); 1446 for (i = 0; i < 256; i++) { 1447 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1448 tmp = RREG32(RADEON_CP_CSQ_DATA); 1449 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); 1450 } 1451 seq_printf(m, "Indirect1 fifo:\n"); 1452 for (i = 256; i <= 512; i++) { 1453 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1454 tmp = RREG32(RADEON_CP_CSQ_DATA); 1455 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); 1456 } 1457 seq_printf(m, "Indirect2 fifo:\n"); 1458 for (i = 640; i < ib1_wptr; i++) { 1459 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 1460 tmp = RREG32(RADEON_CP_CSQ_DATA); 1461 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); 1462 } 1463 return 0; 1464 } 1465 1466 static int r100_debugfs_mc_info(struct seq_file *m, void *data) 1467 { 1468 struct drm_info_node *node = (struct drm_info_node *) m->private; 1469 struct drm_device *dev = node->minor->dev; 1470 struct radeon_device *rdev = dev->dev_private; 1471 uint32_t tmp; 1472 1473 tmp = RREG32(RADEON_CONFIG_MEMSIZE); 1474 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); 1475 tmp = RREG32(RADEON_MC_FB_LOCATION); 1476 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); 1477 tmp = RREG32(RADEON_BUS_CNTL); 1478 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); 1479 tmp = RREG32(RADEON_MC_AGP_LOCATION); 1480 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); 1481 tmp = RREG32(RADEON_AGP_BASE); 1482 seq_printf(m, "AGP_BASE 0x%08x\n", tmp); 1483 tmp = RREG32(RADEON_HOST_PATH_CNTL); 1484 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); 1485 tmp = RREG32(0x01D0); 1486 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); 1487 tmp = RREG32(RADEON_AIC_LO_ADDR); 1488 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); 1489 tmp = RREG32(RADEON_AIC_HI_ADDR); 1490 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); 1491 tmp = RREG32(0x01E4); 1492 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); 1493 return 0; 1494 } 1495 1496 static struct drm_info_list r100_debugfs_rbbm_list[] = { 1497 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, 1498 }; 1499 1500 static struct drm_info_list r100_debugfs_cp_list[] = { 1501 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, 1502 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, 1503 }; 1504 1505 static struct drm_info_list r100_debugfs_mc_info_list[] = { 1506 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, 1507 }; 1508 #endif 1509 1510 int r100_debugfs_rbbm_init(struct radeon_device *rdev) 1511 { 1512 #if defined(CONFIG_DEBUG_FS) 1513 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); 1514 #else 1515 return 0; 1516 #endif 1517 } 1518 1519 int r100_debugfs_cp_init(struct radeon_device *rdev) 1520 { 1521 #if defined(CONFIG_DEBUG_FS) 1522 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); 1523 #else 1524 return 0; 1525 #endif 1526 } 1527 1528 int r100_debugfs_mc_info_init(struct radeon_device *rdev) 1529 { 1530 #if defined(CONFIG_DEBUG_FS) 1531 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); 1532 #else 1533 return 0; 1534 #endif 1535 } 1536