1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "radeon_ucode.h" 38 #include "r600d.h" 39 40 /* 1 second timeout */ 41 #define UVD_IDLE_TIMEOUT_MS 1000 42 43 /* Firmware Names */ 44 #define FIRMWARE_R600 "radeon/R600_uvd.bin" 45 #define FIRMWARE_RS780 "radeon/RS780_uvd.bin" 46 #define FIRMWARE_RV770 "radeon/RV770_uvd.bin" 47 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 48 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 49 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 50 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 51 #define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin" 52 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 53 54 MODULE_FIRMWARE(FIRMWARE_R600); 55 MODULE_FIRMWARE(FIRMWARE_RS780); 56 MODULE_FIRMWARE(FIRMWARE_RV770); 57 MODULE_FIRMWARE(FIRMWARE_RV710); 58 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 59 MODULE_FIRMWARE(FIRMWARE_SUMO); 60 MODULE_FIRMWARE(FIRMWARE_TAHITI); 61 MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 62 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 63 64 static void radeon_uvd_idle_work_handler(struct work_struct *work); 65 66 int radeon_uvd_init(struct radeon_device *rdev) 67 { 68 unsigned long bo_size; 69 const char *fw_name = NULL, *legacy_fw_name = NULL; 70 int i, r; 71 72 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 73 74 switch (rdev->family) { 75 case CHIP_RV610: 76 case CHIP_RV630: 77 case CHIP_RV670: 78 case CHIP_RV620: 79 case CHIP_RV635: 80 legacy_fw_name = FIRMWARE_R600; 81 break; 82 83 case CHIP_RS780: 84 case CHIP_RS880: 85 legacy_fw_name = FIRMWARE_RS780; 86 break; 87 88 case CHIP_RV770: 89 legacy_fw_name = FIRMWARE_RV770; 90 break; 91 92 case CHIP_RV710: 93 case CHIP_RV730: 94 case CHIP_RV740: 95 legacy_fw_name = FIRMWARE_RV710; 96 break; 97 98 case CHIP_CYPRESS: 99 case CHIP_HEMLOCK: 100 case CHIP_JUNIPER: 101 case CHIP_REDWOOD: 102 case CHIP_CEDAR: 103 legacy_fw_name = FIRMWARE_CYPRESS; 104 break; 105 106 case CHIP_SUMO: 107 case CHIP_SUMO2: 108 case CHIP_PALM: 109 case CHIP_CAYMAN: 110 case CHIP_BARTS: 111 case CHIP_TURKS: 112 case CHIP_CAICOS: 113 legacy_fw_name = FIRMWARE_SUMO; 114 break; 115 116 case CHIP_TAHITI: 117 case CHIP_VERDE: 118 case CHIP_PITCAIRN: 119 case CHIP_ARUBA: 120 case CHIP_OLAND: 121 legacy_fw_name = FIRMWARE_TAHITI; 122 break; 123 124 case CHIP_BONAIRE: 125 case CHIP_KABINI: 126 case CHIP_KAVERI: 127 case CHIP_HAWAII: 128 case CHIP_MULLINS: 129 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 130 fw_name = FIRMWARE_BONAIRE; 131 break; 132 133 default: 134 return -EINVAL; 135 } 136 137 rdev->uvd.fw_header_present = false; 138 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 139 if (fw_name) { 140 /* Let's try to load the newer firmware first */ 141 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 142 if (r) { 143 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 144 fw_name); 145 } else { 146 struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; 147 unsigned version_major, version_minor, family_id; 148 149 r = radeon_ucode_validate(rdev->uvd_fw); 150 if (r) 151 return r; 152 153 rdev->uvd.fw_header_present = true; 154 155 family_id = (__force u32)(hdr->ucode_version) & 0xff; 156 version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 157 >> 24) & 0xff; 158 version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 159 >> 8) & 0xff; 160 DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n", 161 version_major, version_minor, family_id); 162 163 /* 164 * Limit the number of UVD handles depending on 165 * microcode major and minor versions. 166 */ 167 if ((version_major >= 0x01) && (version_minor >= 0x37)) 168 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 169 } 170 } 171 172 /* 173 * In case there is only legacy firmware, or we encounter an error 174 * while loading the new firmware, we fall back to loading the legacy 175 * firmware now. 176 */ 177 if (!fw_name || r) { 178 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 179 if (r) { 180 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 181 legacy_fw_name); 182 return r; 183 } 184 } 185 186 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 187 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 188 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 189 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 190 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 191 NULL, &rdev->uvd.vcpu_bo); 192 if (r) { 193 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 194 return r; 195 } 196 197 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 198 if (r) { 199 radeon_bo_unref(&rdev->uvd.vcpu_bo); 200 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 201 return r; 202 } 203 204 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 205 &rdev->uvd.gpu_addr); 206 if (r) { 207 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 208 radeon_bo_unref(&rdev->uvd.vcpu_bo); 209 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 210 return r; 211 } 212 213 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 214 if (r) { 215 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 216 return r; 217 } 218 219 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 220 221 for (i = 0; i < rdev->uvd.max_handles; ++i) { 222 atomic_set(&rdev->uvd.handles[i], 0); 223 rdev->uvd.filp[i] = NULL; 224 rdev->uvd.img_size[i] = 0; 225 } 226 227 return 0; 228 } 229 230 void radeon_uvd_fini(struct radeon_device *rdev) 231 { 232 int r; 233 234 if (rdev->uvd.vcpu_bo == NULL) 235 return; 236 237 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 238 if (!r) { 239 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 240 radeon_bo_unpin(rdev->uvd.vcpu_bo); 241 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 242 } 243 244 radeon_bo_unref(&rdev->uvd.vcpu_bo); 245 246 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 247 248 release_firmware(rdev->uvd_fw); 249 } 250 251 int radeon_uvd_suspend(struct radeon_device *rdev) 252 { 253 int i, r; 254 255 if (rdev->uvd.vcpu_bo == NULL) 256 return 0; 257 258 for (i = 0; i < rdev->uvd.max_handles; ++i) { 259 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 260 if (handle != 0) { 261 struct radeon_fence *fence; 262 263 radeon_uvd_note_usage(rdev); 264 265 r = radeon_uvd_get_destroy_msg(rdev, 266 R600_RING_TYPE_UVD_INDEX, handle, &fence); 267 if (r) { 268 DRM_ERROR("Error destroying UVD (%d)!\n", r); 269 continue; 270 } 271 272 radeon_fence_wait(fence, false); 273 radeon_fence_unref(&fence); 274 275 rdev->uvd.filp[i] = NULL; 276 atomic_set(&rdev->uvd.handles[i], 0); 277 } 278 } 279 280 return 0; 281 } 282 283 int radeon_uvd_resume(struct radeon_device *rdev) 284 { 285 unsigned size; 286 void *ptr; 287 288 if (rdev->uvd.vcpu_bo == NULL) 289 return -EINVAL; 290 291 memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 292 293 size = radeon_bo_size(rdev->uvd.vcpu_bo); 294 size -= rdev->uvd_fw->size; 295 296 ptr = rdev->uvd.cpu_addr; 297 ptr += rdev->uvd_fw->size; 298 299 memset_io((void __iomem *)ptr, 0, size); 300 301 return 0; 302 } 303 304 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 305 uint32_t allowed_domains) 306 { 307 int i; 308 309 for (i = 0; i < rbo->placement.num_placement; ++i) { 310 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 311 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 312 } 313 314 /* If it must be in VRAM it must be in the first segment as well */ 315 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 316 return; 317 318 /* abort if we already have more than one placement */ 319 if (rbo->placement.num_placement > 1) 320 return; 321 322 /* add another 256MB segment */ 323 rbo->placements[1] = rbo->placements[0]; 324 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 325 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 326 rbo->placement.num_placement++; 327 rbo->placement.num_busy_placement++; 328 } 329 330 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 331 { 332 int i, r; 333 for (i = 0; i < rdev->uvd.max_handles; ++i) { 334 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 335 if (handle != 0 && rdev->uvd.filp[i] == filp) { 336 struct radeon_fence *fence; 337 338 radeon_uvd_note_usage(rdev); 339 340 r = radeon_uvd_get_destroy_msg(rdev, 341 R600_RING_TYPE_UVD_INDEX, handle, &fence); 342 if (r) { 343 DRM_ERROR("Error destroying UVD (%d)!\n", r); 344 continue; 345 } 346 347 radeon_fence_wait(fence, false); 348 radeon_fence_unref(&fence); 349 350 rdev->uvd.filp[i] = NULL; 351 atomic_set(&rdev->uvd.handles[i], 0); 352 } 353 } 354 } 355 356 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 357 { 358 unsigned stream_type = msg[4]; 359 unsigned width = msg[6]; 360 unsigned height = msg[7]; 361 unsigned dpb_size = msg[9]; 362 unsigned pitch = msg[28]; 363 364 unsigned width_in_mb = width / 16; 365 unsigned height_in_mb = ALIGN(height / 16, 2); 366 367 unsigned image_size, tmp, min_dpb_size; 368 369 image_size = width * height; 370 image_size += image_size / 2; 371 image_size = ALIGN(image_size, 1024); 372 373 switch (stream_type) { 374 case 0: /* H264 */ 375 376 /* reference picture buffer */ 377 min_dpb_size = image_size * 17; 378 379 /* macroblock context buffer */ 380 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 381 382 /* IT surface buffer */ 383 min_dpb_size += width_in_mb * height_in_mb * 32; 384 break; 385 386 case 1: /* VC1 */ 387 388 /* reference picture buffer */ 389 min_dpb_size = image_size * 3; 390 391 /* CONTEXT_BUFFER */ 392 min_dpb_size += width_in_mb * height_in_mb * 128; 393 394 /* IT surface buffer */ 395 min_dpb_size += width_in_mb * 64; 396 397 /* DB surface buffer */ 398 min_dpb_size += width_in_mb * 128; 399 400 /* BP */ 401 tmp = max(width_in_mb, height_in_mb); 402 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 403 break; 404 405 case 3: /* MPEG2 */ 406 407 /* reference picture buffer */ 408 min_dpb_size = image_size * 3; 409 break; 410 411 case 4: /* MPEG4 */ 412 413 /* reference picture buffer */ 414 min_dpb_size = image_size * 3; 415 416 /* CM */ 417 min_dpb_size += width_in_mb * height_in_mb * 64; 418 419 /* IT surface buffer */ 420 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 421 break; 422 423 default: 424 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 425 return -EINVAL; 426 } 427 428 if (width > pitch) { 429 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 430 return -EINVAL; 431 } 432 433 if (dpb_size < min_dpb_size) { 434 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 435 dpb_size, min_dpb_size); 436 return -EINVAL; 437 } 438 439 buf_sizes[0x1] = dpb_size; 440 buf_sizes[0x2] = image_size; 441 return 0; 442 } 443 444 static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 445 unsigned stream_type) 446 { 447 switch (stream_type) { 448 case 0: /* H264 */ 449 case 1: /* VC1 */ 450 /* always supported */ 451 return 0; 452 453 case 3: /* MPEG2 */ 454 case 4: /* MPEG4 */ 455 /* only since UVD 3 */ 456 if (p->rdev->family >= CHIP_PALM) 457 return 0; 458 459 fallthrough; 460 default: 461 DRM_ERROR("UVD codec not supported by hardware %d!\n", 462 stream_type); 463 return -EINVAL; 464 } 465 } 466 467 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 468 unsigned offset, unsigned buf_sizes[]) 469 { 470 int32_t *msg, msg_type, handle; 471 unsigned img_size = 0; 472 void *ptr; 473 long r; 474 int i; 475 476 if (offset & 0x3F) { 477 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 478 return -EINVAL; 479 } 480 481 r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, 482 MAX_SCHEDULE_TIMEOUT); 483 if (r <= 0) { 484 DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); 485 return r ? r : -ETIME; 486 } 487 488 r = radeon_bo_kmap(bo, &ptr); 489 if (r) { 490 DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); 491 return r; 492 } 493 494 msg = ptr + offset; 495 496 msg_type = msg[1]; 497 handle = msg[2]; 498 499 if (handle == 0) { 500 radeon_bo_kunmap(bo); 501 DRM_ERROR("Invalid UVD handle!\n"); 502 return -EINVAL; 503 } 504 505 switch (msg_type) { 506 case 0: 507 /* it's a create msg, calc image size (width * height) */ 508 img_size = msg[7] * msg[8]; 509 510 r = radeon_uvd_validate_codec(p, msg[4]); 511 radeon_bo_kunmap(bo); 512 if (r) 513 return r; 514 515 /* try to alloc a new handle */ 516 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 517 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 518 DRM_ERROR("Handle 0x%x already in use!\n", handle); 519 return -EINVAL; 520 } 521 522 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 523 p->rdev->uvd.filp[i] = p->filp; 524 p->rdev->uvd.img_size[i] = img_size; 525 return 0; 526 } 527 } 528 529 DRM_ERROR("No more free UVD handles!\n"); 530 return -EINVAL; 531 532 case 1: 533 /* it's a decode msg, validate codec and calc buffer sizes */ 534 r = radeon_uvd_validate_codec(p, msg[4]); 535 if (!r) 536 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 537 radeon_bo_kunmap(bo); 538 if (r) 539 return r; 540 541 /* validate the handle */ 542 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 543 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 544 if (p->rdev->uvd.filp[i] != p->filp) { 545 DRM_ERROR("UVD handle collision detected!\n"); 546 return -EINVAL; 547 } 548 return 0; 549 } 550 } 551 552 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 553 return -ENOENT; 554 555 case 2: 556 /* it's a destroy msg, free the handle */ 557 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 558 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 559 radeon_bo_kunmap(bo); 560 return 0; 561 562 default: 563 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 564 } 565 566 radeon_bo_kunmap(bo); 567 return -EINVAL; 568 } 569 570 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 571 int data0, int data1, 572 unsigned buf_sizes[], bool *has_msg_cmd) 573 { 574 struct radeon_cs_chunk *relocs_chunk; 575 struct radeon_bo_list *reloc; 576 unsigned idx, cmd, offset; 577 uint64_t start, end; 578 int r; 579 580 relocs_chunk = p->chunk_relocs; 581 offset = radeon_get_ib_value(p, data0); 582 idx = radeon_get_ib_value(p, data1); 583 if (idx >= relocs_chunk->length_dw) { 584 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 585 idx, relocs_chunk->length_dw); 586 return -EINVAL; 587 } 588 589 reloc = &p->relocs[(idx / 4)]; 590 start = reloc->gpu_offset; 591 end = start + radeon_bo_size(reloc->robj); 592 start += offset; 593 594 p->ib.ptr[data0] = start & 0xFFFFFFFF; 595 p->ib.ptr[data1] = start >> 32; 596 597 cmd = radeon_get_ib_value(p, p->idx) >> 1; 598 599 if (cmd < 0x4) { 600 if (end <= start) { 601 DRM_ERROR("invalid reloc offset %X!\n", offset); 602 return -EINVAL; 603 } 604 if ((end - start) < buf_sizes[cmd]) { 605 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 606 (unsigned)(end - start), buf_sizes[cmd]); 607 return -EINVAL; 608 } 609 610 } else if (cmd != 0x100) { 611 DRM_ERROR("invalid UVD command %X!\n", cmd); 612 return -EINVAL; 613 } 614 615 if ((start >> 28) != ((end - 1) >> 28)) { 616 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 617 start, end); 618 return -EINVAL; 619 } 620 621 /* TODO: is this still necessary on NI+ ? */ 622 if ((cmd == 0 || cmd == 0x3) && 623 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 624 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 625 start, end); 626 return -EINVAL; 627 } 628 629 if (cmd == 0) { 630 if (*has_msg_cmd) { 631 DRM_ERROR("More than one message in a UVD-IB!\n"); 632 return -EINVAL; 633 } 634 *has_msg_cmd = true; 635 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 636 if (r) 637 return r; 638 } else if (!*has_msg_cmd) { 639 DRM_ERROR("Message needed before other commands are send!\n"); 640 return -EINVAL; 641 } 642 643 return 0; 644 } 645 646 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 647 struct radeon_cs_packet *pkt, 648 int *data0, int *data1, 649 unsigned buf_sizes[], 650 bool *has_msg_cmd) 651 { 652 int i, r; 653 654 p->idx++; 655 for (i = 0; i <= pkt->count; ++i) { 656 switch (pkt->reg + i*4) { 657 case UVD_GPCOM_VCPU_DATA0: 658 *data0 = p->idx; 659 break; 660 case UVD_GPCOM_VCPU_DATA1: 661 *data1 = p->idx; 662 break; 663 case UVD_GPCOM_VCPU_CMD: 664 r = radeon_uvd_cs_reloc(p, *data0, *data1, 665 buf_sizes, has_msg_cmd); 666 if (r) 667 return r; 668 break; 669 case UVD_ENGINE_CNTL: 670 case UVD_NO_OP: 671 break; 672 default: 673 DRM_ERROR("Invalid reg 0x%X!\n", 674 pkt->reg + i*4); 675 return -EINVAL; 676 } 677 p->idx++; 678 } 679 return 0; 680 } 681 682 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 683 { 684 struct radeon_cs_packet pkt; 685 int r, data0 = 0, data1 = 0; 686 687 /* does the IB has a msg command */ 688 bool has_msg_cmd = false; 689 690 /* minimum buffer sizes */ 691 unsigned buf_sizes[] = { 692 [0x00000000] = 2048, 693 [0x00000001] = 32 * 1024 * 1024, 694 [0x00000002] = 2048 * 1152 * 3, 695 [0x00000003] = 2048, 696 }; 697 698 if (p->chunk_ib->length_dw % 16) { 699 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 700 p->chunk_ib->length_dw); 701 return -EINVAL; 702 } 703 704 if (p->chunk_relocs == NULL) { 705 DRM_ERROR("No relocation chunk !\n"); 706 return -EINVAL; 707 } 708 709 710 do { 711 r = radeon_cs_packet_parse(p, &pkt, p->idx); 712 if (r) 713 return r; 714 switch (pkt.type) { 715 case RADEON_PACKET_TYPE0: 716 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 717 buf_sizes, &has_msg_cmd); 718 if (r) 719 return r; 720 break; 721 case RADEON_PACKET_TYPE2: 722 p->idx += pkt.count + 2; 723 break; 724 default: 725 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 726 return -EINVAL; 727 } 728 } while (p->idx < p->chunk_ib->length_dw); 729 730 if (!has_msg_cmd) { 731 DRM_ERROR("UVD-IBs need a msg command!\n"); 732 return -EINVAL; 733 } 734 735 return 0; 736 } 737 738 static int radeon_uvd_send_msg(struct radeon_device *rdev, 739 int ring, uint64_t addr, 740 struct radeon_fence **fence) 741 { 742 struct radeon_ib ib; 743 int i, r; 744 745 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 746 if (r) 747 return r; 748 749 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 750 ib.ptr[1] = addr; 751 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 752 ib.ptr[3] = addr >> 32; 753 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 754 ib.ptr[5] = 0; 755 for (i = 6; i < 16; i += 2) { 756 ib.ptr[i] = PACKET0(UVD_NO_OP, 0); 757 ib.ptr[i+1] = 0; 758 } 759 ib.length_dw = 16; 760 761 r = radeon_ib_schedule(rdev, &ib, NULL, false); 762 763 if (fence) 764 *fence = radeon_fence_ref(ib.fence); 765 766 radeon_ib_free(rdev, &ib); 767 return r; 768 } 769 770 /* 771 * multiple fence commands without any stream commands in between can 772 * crash the vcpu so just try to emmit a dummy create/destroy msg to 773 * avoid this 774 */ 775 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 776 uint32_t handle, struct radeon_fence **fence) 777 { 778 /* we use the last page of the vcpu bo for the UVD message */ 779 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 780 RADEON_GPU_PAGE_SIZE; 781 782 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 783 uint64_t addr = rdev->uvd.gpu_addr + offs; 784 785 int r, i; 786 787 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 788 if (r) 789 return r; 790 791 /* stitch together an UVD create msg */ 792 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 793 writel(0x0, (void __iomem *)&msg[1]); 794 writel((__force u32)cpu_to_le32(handle), &msg[2]); 795 writel(0x0, &msg[3]); 796 writel(0x0, &msg[4]); 797 writel(0x0, &msg[5]); 798 writel(0x0, &msg[6]); 799 writel((__force u32)cpu_to_le32(0x00000780), &msg[7]); 800 writel((__force u32)cpu_to_le32(0x00000440), &msg[8]); 801 writel(0x0, &msg[9]); 802 writel((__force u32)cpu_to_le32(0x01b37000), &msg[10]); 803 for (i = 11; i < 1024; ++i) 804 writel(0x0, &msg[i]); 805 806 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 807 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 808 return r; 809 } 810 811 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 812 uint32_t handle, struct radeon_fence **fence) 813 { 814 /* we use the last page of the vcpu bo for the UVD message */ 815 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 816 RADEON_GPU_PAGE_SIZE; 817 818 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 819 uint64_t addr = rdev->uvd.gpu_addr + offs; 820 821 int r, i; 822 823 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 824 if (r) 825 return r; 826 827 /* stitch together an UVD destroy msg */ 828 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 829 writel((__force u32)cpu_to_le32(0x00000002), &msg[1]); 830 writel((__force u32)cpu_to_le32(handle), &msg[2]); 831 writel(0x0, &msg[3]); 832 for (i = 4; i < 1024; ++i) 833 writel(0x0, &msg[i]); 834 835 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 836 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 837 return r; 838 } 839 840 /** 841 * radeon_uvd_count_handles - count number of open streams 842 * 843 * @rdev: radeon_device pointer 844 * @sd: number of SD streams 845 * @hd: number of HD streams 846 * 847 * Count the number of open SD/HD streams as a hint for power mangement 848 */ 849 static void radeon_uvd_count_handles(struct radeon_device *rdev, 850 unsigned *sd, unsigned *hd) 851 { 852 unsigned i; 853 854 *sd = 0; 855 *hd = 0; 856 857 for (i = 0; i < rdev->uvd.max_handles; ++i) { 858 if (!atomic_read(&rdev->uvd.handles[i])) 859 continue; 860 861 if (rdev->uvd.img_size[i] >= 720*576) 862 ++(*hd); 863 else 864 ++(*sd); 865 } 866 } 867 868 static void radeon_uvd_idle_work_handler(struct work_struct *work) 869 { 870 struct radeon_device *rdev = 871 container_of(work, struct radeon_device, uvd.idle_work.work); 872 873 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 874 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 875 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 876 &rdev->pm.dpm.hd); 877 radeon_dpm_enable_uvd(rdev, false); 878 } else { 879 radeon_set_uvd_clocks(rdev, 0, 0); 880 } 881 } else { 882 schedule_delayed_work(&rdev->uvd.idle_work, 883 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 884 } 885 } 886 887 void radeon_uvd_note_usage(struct radeon_device *rdev) 888 { 889 bool streams_changed = false; 890 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 891 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 892 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 893 894 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 895 unsigned hd = 0, sd = 0; 896 radeon_uvd_count_handles(rdev, &sd, &hd); 897 if ((rdev->pm.dpm.sd != sd) || 898 (rdev->pm.dpm.hd != hd)) { 899 rdev->pm.dpm.sd = sd; 900 rdev->pm.dpm.hd = hd; 901 /* disable this for now */ 902 /*streams_changed = true;*/ 903 } 904 } 905 906 if (set_clocks || streams_changed) { 907 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 908 radeon_dpm_enable_uvd(rdev, true); 909 } else { 910 radeon_set_uvd_clocks(rdev, 53300, 40000); 911 } 912 } 913 } 914 915 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 916 unsigned target_freq, 917 unsigned pd_min, 918 unsigned pd_even) 919 { 920 unsigned post_div = vco_freq / target_freq; 921 922 /* adjust to post divider minimum value */ 923 if (post_div < pd_min) 924 post_div = pd_min; 925 926 /* we alway need a frequency less than or equal the target */ 927 if ((vco_freq / post_div) > target_freq) 928 post_div += 1; 929 930 /* post dividers above a certain value must be even */ 931 if (post_div > pd_even && post_div % 2) 932 post_div += 1; 933 934 return post_div; 935 } 936 937 /** 938 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 939 * 940 * @rdev: radeon_device pointer 941 * @vclk: wanted VCLK 942 * @dclk: wanted DCLK 943 * @vco_min: minimum VCO frequency 944 * @vco_max: maximum VCO frequency 945 * @fb_factor: factor to multiply vco freq with 946 * @fb_mask: limit and bitmask for feedback divider 947 * @pd_min: post divider minimum 948 * @pd_max: post divider maximum 949 * @pd_even: post divider must be even above this value 950 * @optimal_fb_div: resulting feedback divider 951 * @optimal_vclk_div: resulting vclk post divider 952 * @optimal_dclk_div: resulting dclk post divider 953 * 954 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 955 * Returns zero on success -EINVAL on error. 956 */ 957 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 958 unsigned vclk, unsigned dclk, 959 unsigned vco_min, unsigned vco_max, 960 unsigned fb_factor, unsigned fb_mask, 961 unsigned pd_min, unsigned pd_max, 962 unsigned pd_even, 963 unsigned *optimal_fb_div, 964 unsigned *optimal_vclk_div, 965 unsigned *optimal_dclk_div) 966 { 967 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 968 969 /* start off with something large */ 970 unsigned optimal_score = ~0; 971 972 /* loop through vco from low to high */ 973 vco_min = max(max(vco_min, vclk), dclk); 974 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 975 976 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 977 unsigned vclk_div, dclk_div, score; 978 979 do_div(fb_div, ref_freq); 980 981 /* fb div out of range ? */ 982 if (fb_div > fb_mask) 983 break; /* it can oly get worse */ 984 985 fb_div &= fb_mask; 986 987 /* calc vclk divider with current vco freq */ 988 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 989 pd_min, pd_even); 990 if (vclk_div > pd_max) 991 break; /* vco is too big, it has to stop */ 992 993 /* calc dclk divider with current vco freq */ 994 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 995 pd_min, pd_even); 996 if (dclk_div > pd_max) 997 break; /* vco is too big, it has to stop */ 998 999 /* calc score with current vco freq */ 1000 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 1001 1002 /* determine if this vco setting is better than current optimal settings */ 1003 if (score < optimal_score) { 1004 *optimal_fb_div = fb_div; 1005 *optimal_vclk_div = vclk_div; 1006 *optimal_dclk_div = dclk_div; 1007 optimal_score = score; 1008 if (optimal_score == 0) 1009 break; /* it can't get better than this */ 1010 } 1011 } 1012 1013 /* did we found a valid setup ? */ 1014 if (optimal_score == ~0) 1015 return -EINVAL; 1016 1017 return 0; 1018 } 1019 1020 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1021 unsigned cg_upll_func_cntl) 1022 { 1023 unsigned i; 1024 1025 /* make sure UPLL_CTLREQ is deasserted */ 1026 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1027 1028 mdelay(10); 1029 1030 /* assert UPLL_CTLREQ */ 1031 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1032 1033 /* wait for CTLACK and CTLACK2 to get asserted */ 1034 for (i = 0; i < 100; ++i) { 1035 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1036 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1037 break; 1038 mdelay(10); 1039 } 1040 1041 /* deassert UPLL_CTLREQ */ 1042 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1043 1044 if (i == 100) { 1045 DRM_ERROR("Timeout setting UVD clocks!\n"); 1046 return -ETIMEDOUT; 1047 } 1048 1049 return 0; 1050 } 1051