1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 #include <drm/drmP.h> 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "radeon_ucode.h" 38 #include "r600d.h" 39 40 /* 1 second timeout */ 41 #define UVD_IDLE_TIMEOUT_MS 1000 42 43 /* Firmware Names */ 44 #define FIRMWARE_R600 "radeon/R600_uvd.bin" 45 #define FIRMWARE_RS780 "radeon/RS780_uvd.bin" 46 #define FIRMWARE_RV770 "radeon/RV770_uvd.bin" 47 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 48 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 49 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 50 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 51 #define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin" 52 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 53 54 MODULE_FIRMWARE(FIRMWARE_R600); 55 MODULE_FIRMWARE(FIRMWARE_RS780); 56 MODULE_FIRMWARE(FIRMWARE_RV770); 57 MODULE_FIRMWARE(FIRMWARE_RV710); 58 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 59 MODULE_FIRMWARE(FIRMWARE_SUMO); 60 MODULE_FIRMWARE(FIRMWARE_TAHITI); 61 MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 62 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 63 64 static void radeon_uvd_idle_work_handler(struct work_struct *work); 65 66 int radeon_uvd_init(struct radeon_device *rdev) 67 { 68 unsigned long bo_size; 69 const char *fw_name = NULL, *legacy_fw_name = NULL; 70 int i, r; 71 72 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 73 74 switch (rdev->family) { 75 case CHIP_RV610: 76 case CHIP_RV630: 77 case CHIP_RV670: 78 case CHIP_RV620: 79 case CHIP_RV635: 80 legacy_fw_name = FIRMWARE_R600; 81 break; 82 83 case CHIP_RS780: 84 case CHIP_RS880: 85 legacy_fw_name = FIRMWARE_RS780; 86 break; 87 88 case CHIP_RV770: 89 legacy_fw_name = FIRMWARE_RV770; 90 break; 91 92 case CHIP_RV710: 93 case CHIP_RV730: 94 case CHIP_RV740: 95 legacy_fw_name = FIRMWARE_RV710; 96 break; 97 98 case CHIP_CYPRESS: 99 case CHIP_HEMLOCK: 100 case CHIP_JUNIPER: 101 case CHIP_REDWOOD: 102 case CHIP_CEDAR: 103 legacy_fw_name = FIRMWARE_CYPRESS; 104 break; 105 106 case CHIP_SUMO: 107 case CHIP_SUMO2: 108 case CHIP_PALM: 109 case CHIP_CAYMAN: 110 case CHIP_BARTS: 111 case CHIP_TURKS: 112 case CHIP_CAICOS: 113 legacy_fw_name = FIRMWARE_SUMO; 114 break; 115 116 case CHIP_TAHITI: 117 case CHIP_VERDE: 118 case CHIP_PITCAIRN: 119 case CHIP_ARUBA: 120 case CHIP_OLAND: 121 legacy_fw_name = FIRMWARE_TAHITI; 122 break; 123 124 case CHIP_BONAIRE: 125 case CHIP_KABINI: 126 case CHIP_KAVERI: 127 case CHIP_HAWAII: 128 case CHIP_MULLINS: 129 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 130 fw_name = FIRMWARE_BONAIRE; 131 break; 132 133 default: 134 return -EINVAL; 135 } 136 137 rdev->uvd.fw_header_present = false; 138 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 139 if (fw_name) { 140 /* Let's try to load the newer firmware first */ 141 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 142 if (r) { 143 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 144 fw_name); 145 } else { 146 struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; 147 unsigned version_major, version_minor, family_id; 148 149 r = radeon_ucode_validate(rdev->uvd_fw); 150 if (r) 151 return r; 152 153 rdev->uvd.fw_header_present = true; 154 155 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 156 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 157 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 158 DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", 159 version_major, version_minor, family_id); 160 161 /* 162 * Limit the number of UVD handles depending on 163 * microcode major and minor versions. 164 */ 165 if ((version_major >= 0x01) && (version_minor >= 0x37)) 166 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 167 } 168 } 169 170 /* 171 * In case there is only legacy firmware, or we encounter an error 172 * while loading the new firmware, we fall back to loading the legacy 173 * firmware now. 174 */ 175 if (!fw_name || r) { 176 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 177 if (r) { 178 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 179 legacy_fw_name); 180 return r; 181 } 182 } 183 184 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 185 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 186 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 187 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 188 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 189 NULL, &rdev->uvd.vcpu_bo); 190 if (r) { 191 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 192 return r; 193 } 194 195 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 196 if (r) { 197 radeon_bo_unref(&rdev->uvd.vcpu_bo); 198 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 199 return r; 200 } 201 202 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 203 &rdev->uvd.gpu_addr); 204 if (r) { 205 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 206 radeon_bo_unref(&rdev->uvd.vcpu_bo); 207 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 208 return r; 209 } 210 211 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 212 if (r) { 213 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 214 return r; 215 } 216 217 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 218 219 for (i = 0; i < rdev->uvd.max_handles; ++i) { 220 atomic_set(&rdev->uvd.handles[i], 0); 221 rdev->uvd.filp[i] = NULL; 222 rdev->uvd.img_size[i] = 0; 223 } 224 225 return 0; 226 } 227 228 void radeon_uvd_fini(struct radeon_device *rdev) 229 { 230 int r; 231 232 if (rdev->uvd.vcpu_bo == NULL) 233 return; 234 235 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 236 if (!r) { 237 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 238 radeon_bo_unpin(rdev->uvd.vcpu_bo); 239 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 240 } 241 242 radeon_bo_unref(&rdev->uvd.vcpu_bo); 243 244 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 245 246 release_firmware(rdev->uvd_fw); 247 } 248 249 int radeon_uvd_suspend(struct radeon_device *rdev) 250 { 251 int i, r; 252 253 if (rdev->uvd.vcpu_bo == NULL) 254 return 0; 255 256 for (i = 0; i < rdev->uvd.max_handles; ++i) { 257 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 258 if (handle != 0) { 259 struct radeon_fence *fence; 260 261 radeon_uvd_note_usage(rdev); 262 263 r = radeon_uvd_get_destroy_msg(rdev, 264 R600_RING_TYPE_UVD_INDEX, handle, &fence); 265 if (r) { 266 DRM_ERROR("Error destroying UVD (%d)!\n", r); 267 continue; 268 } 269 270 radeon_fence_wait(fence, false); 271 radeon_fence_unref(&fence); 272 273 rdev->uvd.filp[i] = NULL; 274 atomic_set(&rdev->uvd.handles[i], 0); 275 } 276 } 277 278 return 0; 279 } 280 281 int radeon_uvd_resume(struct radeon_device *rdev) 282 { 283 unsigned size; 284 void *ptr; 285 286 if (rdev->uvd.vcpu_bo == NULL) 287 return -EINVAL; 288 289 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 290 291 size = radeon_bo_size(rdev->uvd.vcpu_bo); 292 size -= rdev->uvd_fw->size; 293 294 ptr = rdev->uvd.cpu_addr; 295 ptr += rdev->uvd_fw->size; 296 297 memset(ptr, 0, size); 298 299 return 0; 300 } 301 302 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 303 uint32_t allowed_domains) 304 { 305 int i; 306 307 for (i = 0; i < rbo->placement.num_placement; ++i) { 308 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 309 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 310 } 311 312 /* If it must be in VRAM it must be in the first segment as well */ 313 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 314 return; 315 316 /* abort if we already have more than one placement */ 317 if (rbo->placement.num_placement > 1) 318 return; 319 320 /* add another 256MB segment */ 321 rbo->placements[1] = rbo->placements[0]; 322 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 323 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 324 rbo->placement.num_placement++; 325 rbo->placement.num_busy_placement++; 326 } 327 328 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 329 { 330 int i, r; 331 for (i = 0; i < rdev->uvd.max_handles; ++i) { 332 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 333 if (handle != 0 && rdev->uvd.filp[i] == filp) { 334 struct radeon_fence *fence; 335 336 radeon_uvd_note_usage(rdev); 337 338 r = radeon_uvd_get_destroy_msg(rdev, 339 R600_RING_TYPE_UVD_INDEX, handle, &fence); 340 if (r) { 341 DRM_ERROR("Error destroying UVD (%d)!\n", r); 342 continue; 343 } 344 345 radeon_fence_wait(fence, false); 346 radeon_fence_unref(&fence); 347 348 rdev->uvd.filp[i] = NULL; 349 atomic_set(&rdev->uvd.handles[i], 0); 350 } 351 } 352 } 353 354 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 355 { 356 unsigned stream_type = msg[4]; 357 unsigned width = msg[6]; 358 unsigned height = msg[7]; 359 unsigned dpb_size = msg[9]; 360 unsigned pitch = msg[28]; 361 362 unsigned width_in_mb = width / 16; 363 unsigned height_in_mb = ALIGN(height / 16, 2); 364 365 unsigned image_size, tmp, min_dpb_size; 366 367 image_size = width * height; 368 image_size += image_size / 2; 369 image_size = ALIGN(image_size, 1024); 370 371 switch (stream_type) { 372 case 0: /* H264 */ 373 374 /* reference picture buffer */ 375 min_dpb_size = image_size * 17; 376 377 /* macroblock context buffer */ 378 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 379 380 /* IT surface buffer */ 381 min_dpb_size += width_in_mb * height_in_mb * 32; 382 break; 383 384 case 1: /* VC1 */ 385 386 /* reference picture buffer */ 387 min_dpb_size = image_size * 3; 388 389 /* CONTEXT_BUFFER */ 390 min_dpb_size += width_in_mb * height_in_mb * 128; 391 392 /* IT surface buffer */ 393 min_dpb_size += width_in_mb * 64; 394 395 /* DB surface buffer */ 396 min_dpb_size += width_in_mb * 128; 397 398 /* BP */ 399 tmp = max(width_in_mb, height_in_mb); 400 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 401 break; 402 403 case 3: /* MPEG2 */ 404 405 /* reference picture buffer */ 406 min_dpb_size = image_size * 3; 407 break; 408 409 case 4: /* MPEG4 */ 410 411 /* reference picture buffer */ 412 min_dpb_size = image_size * 3; 413 414 /* CM */ 415 min_dpb_size += width_in_mb * height_in_mb * 64; 416 417 /* IT surface buffer */ 418 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 419 break; 420 421 default: 422 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 423 return -EINVAL; 424 } 425 426 if (width > pitch) { 427 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 428 return -EINVAL; 429 } 430 431 if (dpb_size < min_dpb_size) { 432 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 433 dpb_size, min_dpb_size); 434 return -EINVAL; 435 } 436 437 buf_sizes[0x1] = dpb_size; 438 buf_sizes[0x2] = image_size; 439 return 0; 440 } 441 442 static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 443 unsigned stream_type) 444 { 445 switch (stream_type) { 446 case 0: /* H264 */ 447 case 1: /* VC1 */ 448 /* always supported */ 449 return 0; 450 451 case 3: /* MPEG2 */ 452 case 4: /* MPEG4 */ 453 /* only since UVD 3 */ 454 if (p->rdev->family >= CHIP_PALM) 455 return 0; 456 457 /* fall through */ 458 default: 459 DRM_ERROR("UVD codec not supported by hardware %d!\n", 460 stream_type); 461 return -EINVAL; 462 } 463 } 464 465 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 466 unsigned offset, unsigned buf_sizes[]) 467 { 468 int32_t *msg, msg_type, handle; 469 unsigned img_size = 0; 470 struct fence *f; 471 void *ptr; 472 473 int i, r; 474 475 if (offset & 0x3F) { 476 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 477 return -EINVAL; 478 } 479 480 f = reservation_object_get_excl(bo->tbo.resv); 481 if (f) { 482 r = radeon_fence_wait((struct radeon_fence *)f, false); 483 if (r) { 484 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 485 return r; 486 } 487 } 488 489 r = radeon_bo_kmap(bo, &ptr); 490 if (r) { 491 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 492 return r; 493 } 494 495 msg = ptr + offset; 496 497 msg_type = msg[1]; 498 handle = msg[2]; 499 500 if (handle == 0) { 501 DRM_ERROR("Invalid UVD handle!\n"); 502 return -EINVAL; 503 } 504 505 switch (msg_type) { 506 case 0: 507 /* it's a create msg, calc image size (width * height) */ 508 img_size = msg[7] * msg[8]; 509 510 r = radeon_uvd_validate_codec(p, msg[4]); 511 radeon_bo_kunmap(bo); 512 if (r) 513 return r; 514 515 /* try to alloc a new handle */ 516 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 517 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 518 DRM_ERROR("Handle 0x%x already in use!\n", handle); 519 return -EINVAL; 520 } 521 522 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 523 p->rdev->uvd.filp[i] = p->filp; 524 p->rdev->uvd.img_size[i] = img_size; 525 return 0; 526 } 527 } 528 529 DRM_ERROR("No more free UVD handles!\n"); 530 return -EINVAL; 531 532 case 1: 533 /* it's a decode msg, validate codec and calc buffer sizes */ 534 r = radeon_uvd_validate_codec(p, msg[4]); 535 if (!r) 536 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 537 radeon_bo_kunmap(bo); 538 if (r) 539 return r; 540 541 /* validate the handle */ 542 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 543 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 544 if (p->rdev->uvd.filp[i] != p->filp) { 545 DRM_ERROR("UVD handle collision detected!\n"); 546 return -EINVAL; 547 } 548 return 0; 549 } 550 } 551 552 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 553 return -ENOENT; 554 555 case 2: 556 /* it's a destroy msg, free the handle */ 557 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 558 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 559 radeon_bo_kunmap(bo); 560 return 0; 561 562 default: 563 564 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 565 return -EINVAL; 566 } 567 568 BUG(); 569 return -EINVAL; 570 } 571 572 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 573 int data0, int data1, 574 unsigned buf_sizes[], bool *has_msg_cmd) 575 { 576 struct radeon_cs_chunk *relocs_chunk; 577 struct radeon_bo_list *reloc; 578 unsigned idx, cmd, offset; 579 uint64_t start, end; 580 int r; 581 582 relocs_chunk = p->chunk_relocs; 583 offset = radeon_get_ib_value(p, data0); 584 idx = radeon_get_ib_value(p, data1); 585 if (idx >= relocs_chunk->length_dw) { 586 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 587 idx, relocs_chunk->length_dw); 588 return -EINVAL; 589 } 590 591 reloc = &p->relocs[(idx / 4)]; 592 start = reloc->gpu_offset; 593 end = start + radeon_bo_size(reloc->robj); 594 start += offset; 595 596 p->ib.ptr[data0] = start & 0xFFFFFFFF; 597 p->ib.ptr[data1] = start >> 32; 598 599 cmd = radeon_get_ib_value(p, p->idx) >> 1; 600 601 if (cmd < 0x4) { 602 if (end <= start) { 603 DRM_ERROR("invalid reloc offset %X!\n", offset); 604 return -EINVAL; 605 } 606 if ((end - start) < buf_sizes[cmd]) { 607 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 608 (unsigned)(end - start), buf_sizes[cmd]); 609 return -EINVAL; 610 } 611 612 } else if (cmd != 0x100) { 613 DRM_ERROR("invalid UVD command %X!\n", cmd); 614 return -EINVAL; 615 } 616 617 if ((start >> 28) != ((end - 1) >> 28)) { 618 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 619 start, end); 620 return -EINVAL; 621 } 622 623 /* TODO: is this still necessary on NI+ ? */ 624 if ((cmd == 0 || cmd == 0x3) && 625 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 626 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 627 start, end); 628 return -EINVAL; 629 } 630 631 if (cmd == 0) { 632 if (*has_msg_cmd) { 633 DRM_ERROR("More than one message in a UVD-IB!\n"); 634 return -EINVAL; 635 } 636 *has_msg_cmd = true; 637 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 638 if (r) 639 return r; 640 } else if (!*has_msg_cmd) { 641 DRM_ERROR("Message needed before other commands are send!\n"); 642 return -EINVAL; 643 } 644 645 return 0; 646 } 647 648 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 649 struct radeon_cs_packet *pkt, 650 int *data0, int *data1, 651 unsigned buf_sizes[], 652 bool *has_msg_cmd) 653 { 654 int i, r; 655 656 p->idx++; 657 for (i = 0; i <= pkt->count; ++i) { 658 switch (pkt->reg + i*4) { 659 case UVD_GPCOM_VCPU_DATA0: 660 *data0 = p->idx; 661 break; 662 case UVD_GPCOM_VCPU_DATA1: 663 *data1 = p->idx; 664 break; 665 case UVD_GPCOM_VCPU_CMD: 666 r = radeon_uvd_cs_reloc(p, *data0, *data1, 667 buf_sizes, has_msg_cmd); 668 if (r) 669 return r; 670 break; 671 case UVD_ENGINE_CNTL: 672 break; 673 default: 674 DRM_ERROR("Invalid reg 0x%X!\n", 675 pkt->reg + i*4); 676 return -EINVAL; 677 } 678 p->idx++; 679 } 680 return 0; 681 } 682 683 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 684 { 685 struct radeon_cs_packet pkt; 686 int r, data0 = 0, data1 = 0; 687 688 /* does the IB has a msg command */ 689 bool has_msg_cmd = false; 690 691 /* minimum buffer sizes */ 692 unsigned buf_sizes[] = { 693 [0x00000000] = 2048, 694 [0x00000001] = 32 * 1024 * 1024, 695 [0x00000002] = 2048 * 1152 * 3, 696 [0x00000003] = 2048, 697 }; 698 699 if (p->chunk_ib->length_dw % 16) { 700 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 701 p->chunk_ib->length_dw); 702 return -EINVAL; 703 } 704 705 if (p->chunk_relocs == NULL) { 706 DRM_ERROR("No relocation chunk !\n"); 707 return -EINVAL; 708 } 709 710 711 do { 712 r = radeon_cs_packet_parse(p, &pkt, p->idx); 713 if (r) 714 return r; 715 switch (pkt.type) { 716 case RADEON_PACKET_TYPE0: 717 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 718 buf_sizes, &has_msg_cmd); 719 if (r) 720 return r; 721 break; 722 case RADEON_PACKET_TYPE2: 723 p->idx += pkt.count + 2; 724 break; 725 default: 726 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 727 return -EINVAL; 728 } 729 } while (p->idx < p->chunk_ib->length_dw); 730 731 if (!has_msg_cmd) { 732 DRM_ERROR("UVD-IBs need a msg command!\n"); 733 return -EINVAL; 734 } 735 736 return 0; 737 } 738 739 static int radeon_uvd_send_msg(struct radeon_device *rdev, 740 int ring, uint64_t addr, 741 struct radeon_fence **fence) 742 { 743 struct radeon_ib ib; 744 int i, r; 745 746 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 747 if (r) 748 return r; 749 750 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 751 ib.ptr[1] = addr; 752 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 753 ib.ptr[3] = addr >> 32; 754 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 755 ib.ptr[5] = 0; 756 for (i = 6; i < 16; ++i) 757 ib.ptr[i] = PACKET2(0); 758 ib.length_dw = 16; 759 760 r = radeon_ib_schedule(rdev, &ib, NULL, false); 761 762 if (fence) 763 *fence = radeon_fence_ref(ib.fence); 764 765 radeon_ib_free(rdev, &ib); 766 return r; 767 } 768 769 /* 770 * multiple fence commands without any stream commands in between can 771 * crash the vcpu so just try to emmit a dummy create/destroy msg to 772 * avoid this 773 */ 774 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 775 uint32_t handle, struct radeon_fence **fence) 776 { 777 /* we use the last page of the vcpu bo for the UVD message */ 778 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 779 RADEON_GPU_PAGE_SIZE; 780 781 uint32_t *msg = rdev->uvd.cpu_addr + offs; 782 uint64_t addr = rdev->uvd.gpu_addr + offs; 783 784 int r, i; 785 786 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 787 if (r) 788 return r; 789 790 /* stitch together an UVD create msg */ 791 msg[0] = cpu_to_le32(0x00000de4); 792 msg[1] = cpu_to_le32(0x00000000); 793 msg[2] = cpu_to_le32(handle); 794 msg[3] = cpu_to_le32(0x00000000); 795 msg[4] = cpu_to_le32(0x00000000); 796 msg[5] = cpu_to_le32(0x00000000); 797 msg[6] = cpu_to_le32(0x00000000); 798 msg[7] = cpu_to_le32(0x00000780); 799 msg[8] = cpu_to_le32(0x00000440); 800 msg[9] = cpu_to_le32(0x00000000); 801 msg[10] = cpu_to_le32(0x01b37000); 802 for (i = 11; i < 1024; ++i) 803 msg[i] = cpu_to_le32(0x0); 804 805 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 806 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 807 return r; 808 } 809 810 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 811 uint32_t handle, struct radeon_fence **fence) 812 { 813 /* we use the last page of the vcpu bo for the UVD message */ 814 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 815 RADEON_GPU_PAGE_SIZE; 816 817 uint32_t *msg = rdev->uvd.cpu_addr + offs; 818 uint64_t addr = rdev->uvd.gpu_addr + offs; 819 820 int r, i; 821 822 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 823 if (r) 824 return r; 825 826 /* stitch together an UVD destroy msg */ 827 msg[0] = cpu_to_le32(0x00000de4); 828 msg[1] = cpu_to_le32(0x00000002); 829 msg[2] = cpu_to_le32(handle); 830 msg[3] = cpu_to_le32(0x00000000); 831 for (i = 4; i < 1024; ++i) 832 msg[i] = cpu_to_le32(0x0); 833 834 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 835 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 836 return r; 837 } 838 839 /** 840 * radeon_uvd_count_handles - count number of open streams 841 * 842 * @rdev: radeon_device pointer 843 * @sd: number of SD streams 844 * @hd: number of HD streams 845 * 846 * Count the number of open SD/HD streams as a hint for power mangement 847 */ 848 static void radeon_uvd_count_handles(struct radeon_device *rdev, 849 unsigned *sd, unsigned *hd) 850 { 851 unsigned i; 852 853 *sd = 0; 854 *hd = 0; 855 856 for (i = 0; i < rdev->uvd.max_handles; ++i) { 857 if (!atomic_read(&rdev->uvd.handles[i])) 858 continue; 859 860 if (rdev->uvd.img_size[i] >= 720*576) 861 ++(*hd); 862 else 863 ++(*sd); 864 } 865 } 866 867 static void radeon_uvd_idle_work_handler(struct work_struct *work) 868 { 869 struct radeon_device *rdev = 870 container_of(work, struct radeon_device, uvd.idle_work.work); 871 872 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 873 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 874 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 875 &rdev->pm.dpm.hd); 876 radeon_dpm_enable_uvd(rdev, false); 877 } else { 878 radeon_set_uvd_clocks(rdev, 0, 0); 879 } 880 } else { 881 schedule_delayed_work(&rdev->uvd.idle_work, 882 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 883 } 884 } 885 886 void radeon_uvd_note_usage(struct radeon_device *rdev) 887 { 888 bool streams_changed = false; 889 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 890 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 891 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 892 893 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 894 unsigned hd = 0, sd = 0; 895 radeon_uvd_count_handles(rdev, &sd, &hd); 896 if ((rdev->pm.dpm.sd != sd) || 897 (rdev->pm.dpm.hd != hd)) { 898 rdev->pm.dpm.sd = sd; 899 rdev->pm.dpm.hd = hd; 900 /* disable this for now */ 901 /*streams_changed = true;*/ 902 } 903 } 904 905 if (set_clocks || streams_changed) { 906 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 907 radeon_dpm_enable_uvd(rdev, true); 908 } else { 909 radeon_set_uvd_clocks(rdev, 53300, 40000); 910 } 911 } 912 } 913 914 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 915 unsigned target_freq, 916 unsigned pd_min, 917 unsigned pd_even) 918 { 919 unsigned post_div = vco_freq / target_freq; 920 921 /* adjust to post divider minimum value */ 922 if (post_div < pd_min) 923 post_div = pd_min; 924 925 /* we alway need a frequency less than or equal the target */ 926 if ((vco_freq / post_div) > target_freq) 927 post_div += 1; 928 929 /* post dividers above a certain value must be even */ 930 if (post_div > pd_even && post_div % 2) 931 post_div += 1; 932 933 return post_div; 934 } 935 936 /** 937 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 938 * 939 * @rdev: radeon_device pointer 940 * @vclk: wanted VCLK 941 * @dclk: wanted DCLK 942 * @vco_min: minimum VCO frequency 943 * @vco_max: maximum VCO frequency 944 * @fb_factor: factor to multiply vco freq with 945 * @fb_mask: limit and bitmask for feedback divider 946 * @pd_min: post divider minimum 947 * @pd_max: post divider maximum 948 * @pd_even: post divider must be even above this value 949 * @optimal_fb_div: resulting feedback divider 950 * @optimal_vclk_div: resulting vclk post divider 951 * @optimal_dclk_div: resulting dclk post divider 952 * 953 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 954 * Returns zero on success -EINVAL on error. 955 */ 956 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 957 unsigned vclk, unsigned dclk, 958 unsigned vco_min, unsigned vco_max, 959 unsigned fb_factor, unsigned fb_mask, 960 unsigned pd_min, unsigned pd_max, 961 unsigned pd_even, 962 unsigned *optimal_fb_div, 963 unsigned *optimal_vclk_div, 964 unsigned *optimal_dclk_div) 965 { 966 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 967 968 /* start off with something large */ 969 unsigned optimal_score = ~0; 970 971 /* loop through vco from low to high */ 972 vco_min = max(max(vco_min, vclk), dclk); 973 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 974 975 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 976 unsigned vclk_div, dclk_div, score; 977 978 do_div(fb_div, ref_freq); 979 980 /* fb div out of range ? */ 981 if (fb_div > fb_mask) 982 break; /* it can oly get worse */ 983 984 fb_div &= fb_mask; 985 986 /* calc vclk divider with current vco freq */ 987 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 988 pd_min, pd_even); 989 if (vclk_div > pd_max) 990 break; /* vco is too big, it has to stop */ 991 992 /* calc dclk divider with current vco freq */ 993 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 994 pd_min, pd_even); 995 if (vclk_div > pd_max) 996 break; /* vco is too big, it has to stop */ 997 998 /* calc score with current vco freq */ 999 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 1000 1001 /* determine if this vco setting is better than current optimal settings */ 1002 if (score < optimal_score) { 1003 *optimal_fb_div = fb_div; 1004 *optimal_vclk_div = vclk_div; 1005 *optimal_dclk_div = dclk_div; 1006 optimal_score = score; 1007 if (optimal_score == 0) 1008 break; /* it can't get better than this */ 1009 } 1010 } 1011 1012 /* did we found a valid setup ? */ 1013 if (optimal_score == ~0) 1014 return -EINVAL; 1015 1016 return 0; 1017 } 1018 1019 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1020 unsigned cg_upll_func_cntl) 1021 { 1022 unsigned i; 1023 1024 /* make sure UPLL_CTLREQ is deasserted */ 1025 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1026 1027 mdelay(10); 1028 1029 /* assert UPLL_CTLREQ */ 1030 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1031 1032 /* wait for CTLACK and CTLACK2 to get asserted */ 1033 for (i = 0; i < 100; ++i) { 1034 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1035 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1036 break; 1037 mdelay(10); 1038 } 1039 1040 /* deassert UPLL_CTLREQ */ 1041 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1042 1043 if (i == 100) { 1044 DRM_ERROR("Timeout setting UVD clocks!\n"); 1045 return -ETIMEDOUT; 1046 } 1047 1048 return 0; 1049 } 1050