1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "radeon_ucode.h" 38 #include "r600d.h" 39 40 /* 1 second timeout */ 41 #define UVD_IDLE_TIMEOUT_MS 1000 42 43 /* Firmware Names */ 44 #define FIRMWARE_R600 "radeon/R600_uvd.bin" 45 #define FIRMWARE_RS780 "radeon/RS780_uvd.bin" 46 #define FIRMWARE_RV770 "radeon/RV770_uvd.bin" 47 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 48 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 49 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 50 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 51 #define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin" 52 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 53 54 MODULE_FIRMWARE(FIRMWARE_R600); 55 MODULE_FIRMWARE(FIRMWARE_RS780); 56 MODULE_FIRMWARE(FIRMWARE_RV770); 57 MODULE_FIRMWARE(FIRMWARE_RV710); 58 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 59 MODULE_FIRMWARE(FIRMWARE_SUMO); 60 MODULE_FIRMWARE(FIRMWARE_TAHITI); 61 MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 62 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 63 64 static void radeon_uvd_idle_work_handler(struct work_struct *work); 65 66 int radeon_uvd_init(struct radeon_device *rdev) 67 { 68 unsigned long bo_size; 69 const char *fw_name = NULL, *legacy_fw_name = NULL; 70 int i, r; 71 72 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 73 74 switch (rdev->family) { 75 case CHIP_RV610: 76 case CHIP_RV630: 77 case CHIP_RV670: 78 case CHIP_RV620: 79 case CHIP_RV635: 80 legacy_fw_name = FIRMWARE_R600; 81 break; 82 83 case CHIP_RS780: 84 case CHIP_RS880: 85 legacy_fw_name = FIRMWARE_RS780; 86 break; 87 88 case CHIP_RV770: 89 legacy_fw_name = FIRMWARE_RV770; 90 break; 91 92 case CHIP_RV710: 93 case CHIP_RV730: 94 case CHIP_RV740: 95 legacy_fw_name = FIRMWARE_RV710; 96 break; 97 98 case CHIP_CYPRESS: 99 case CHIP_HEMLOCK: 100 case CHIP_JUNIPER: 101 case CHIP_REDWOOD: 102 case CHIP_CEDAR: 103 legacy_fw_name = FIRMWARE_CYPRESS; 104 break; 105 106 case CHIP_SUMO: 107 case CHIP_SUMO2: 108 case CHIP_PALM: 109 case CHIP_CAYMAN: 110 case CHIP_BARTS: 111 case CHIP_TURKS: 112 case CHIP_CAICOS: 113 legacy_fw_name = FIRMWARE_SUMO; 114 break; 115 116 case CHIP_TAHITI: 117 case CHIP_VERDE: 118 case CHIP_PITCAIRN: 119 case CHIP_ARUBA: 120 case CHIP_OLAND: 121 legacy_fw_name = FIRMWARE_TAHITI; 122 break; 123 124 case CHIP_BONAIRE: 125 case CHIP_KABINI: 126 case CHIP_KAVERI: 127 case CHIP_HAWAII: 128 case CHIP_MULLINS: 129 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 130 fw_name = FIRMWARE_BONAIRE; 131 break; 132 133 default: 134 return -EINVAL; 135 } 136 137 rdev->uvd.fw_header_present = false; 138 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 139 if (fw_name) { 140 /* Let's try to load the newer firmware first */ 141 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 142 if (r) { 143 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 144 fw_name); 145 } else { 146 struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; 147 unsigned version_major, version_minor, family_id; 148 149 r = radeon_ucode_validate(rdev->uvd_fw); 150 if (r) 151 return r; 152 153 rdev->uvd.fw_header_present = true; 154 155 family_id = (__force u32)(hdr->ucode_version) & 0xff; 156 version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 157 >> 24) & 0xff; 158 version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 159 >> 8) & 0xff; 160 DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n", 161 version_major, version_minor, family_id); 162 163 /* 164 * Limit the number of UVD handles depending on 165 * microcode major and minor versions. 166 */ 167 if ((version_major >= 0x01) && (version_minor >= 0x37)) 168 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 169 } 170 } 171 172 /* 173 * In case there is only legacy firmware, or we encounter an error 174 * while loading the new firmware, we fall back to loading the legacy 175 * firmware now. 176 */ 177 if (!fw_name || r) { 178 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 179 if (r) { 180 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 181 legacy_fw_name); 182 return r; 183 } 184 } 185 186 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 187 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 188 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 189 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 190 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 191 NULL, &rdev->uvd.vcpu_bo); 192 if (r) { 193 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 194 return r; 195 } 196 197 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 198 if (r) { 199 radeon_bo_unref(&rdev->uvd.vcpu_bo); 200 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 201 return r; 202 } 203 204 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 205 &rdev->uvd.gpu_addr); 206 if (r) { 207 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 208 radeon_bo_unref(&rdev->uvd.vcpu_bo); 209 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 210 return r; 211 } 212 213 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 214 if (r) { 215 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 216 return r; 217 } 218 219 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 220 221 for (i = 0; i < rdev->uvd.max_handles; ++i) { 222 atomic_set(&rdev->uvd.handles[i], 0); 223 rdev->uvd.filp[i] = NULL; 224 rdev->uvd.img_size[i] = 0; 225 } 226 227 return 0; 228 } 229 230 void radeon_uvd_fini(struct radeon_device *rdev) 231 { 232 int r; 233 234 if (rdev->uvd.vcpu_bo == NULL) 235 return; 236 237 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 238 if (!r) { 239 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 240 radeon_bo_unpin(rdev->uvd.vcpu_bo); 241 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 242 } 243 244 radeon_bo_unref(&rdev->uvd.vcpu_bo); 245 246 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 247 248 release_firmware(rdev->uvd_fw); 249 } 250 251 int radeon_uvd_suspend(struct radeon_device *rdev) 252 { 253 int i, r; 254 255 if (rdev->uvd.vcpu_bo == NULL) 256 return 0; 257 258 for (i = 0; i < rdev->uvd.max_handles; ++i) { 259 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 260 if (handle != 0) { 261 struct radeon_fence *fence; 262 263 radeon_uvd_note_usage(rdev); 264 265 r = radeon_uvd_get_destroy_msg(rdev, 266 R600_RING_TYPE_UVD_INDEX, handle, &fence); 267 if (r) { 268 DRM_ERROR("Error destroying UVD (%d)!\n", r); 269 continue; 270 } 271 272 radeon_fence_wait(fence, false); 273 radeon_fence_unref(&fence); 274 275 rdev->uvd.filp[i] = NULL; 276 atomic_set(&rdev->uvd.handles[i], 0); 277 } 278 } 279 280 return 0; 281 } 282 283 int radeon_uvd_resume(struct radeon_device *rdev) 284 { 285 unsigned size; 286 void *ptr; 287 288 if (rdev->uvd.vcpu_bo == NULL) 289 return -EINVAL; 290 291 memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 292 293 size = radeon_bo_size(rdev->uvd.vcpu_bo); 294 size -= rdev->uvd_fw->size; 295 296 ptr = rdev->uvd.cpu_addr; 297 ptr += rdev->uvd_fw->size; 298 299 memset_io((void __iomem *)ptr, 0, size); 300 301 return 0; 302 } 303 304 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 305 uint32_t allowed_domains) 306 { 307 int i; 308 309 for (i = 0; i < rbo->placement.num_placement; ++i) { 310 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 311 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 312 } 313 314 /* If it must be in VRAM it must be in the first segment as well */ 315 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 316 return; 317 318 /* abort if we already have more than one placement */ 319 if (rbo->placement.num_placement > 1) 320 return; 321 322 /* add another 256MB segment */ 323 rbo->placements[1] = rbo->placements[0]; 324 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 325 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 326 rbo->placement.num_placement++; 327 } 328 329 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 330 { 331 int i, r; 332 for (i = 0; i < rdev->uvd.max_handles; ++i) { 333 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 334 if (handle != 0 && rdev->uvd.filp[i] == filp) { 335 struct radeon_fence *fence; 336 337 radeon_uvd_note_usage(rdev); 338 339 r = radeon_uvd_get_destroy_msg(rdev, 340 R600_RING_TYPE_UVD_INDEX, handle, &fence); 341 if (r) { 342 DRM_ERROR("Error destroying UVD (%d)!\n", r); 343 continue; 344 } 345 346 radeon_fence_wait(fence, false); 347 radeon_fence_unref(&fence); 348 349 rdev->uvd.filp[i] = NULL; 350 atomic_set(&rdev->uvd.handles[i], 0); 351 } 352 } 353 } 354 355 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 356 { 357 unsigned stream_type = msg[4]; 358 unsigned width = msg[6]; 359 unsigned height = msg[7]; 360 unsigned dpb_size = msg[9]; 361 unsigned pitch = msg[28]; 362 363 unsigned width_in_mb = width / 16; 364 unsigned height_in_mb = ALIGN(height / 16, 2); 365 366 unsigned image_size, tmp, min_dpb_size; 367 368 image_size = width * height; 369 image_size += image_size / 2; 370 image_size = ALIGN(image_size, 1024); 371 372 switch (stream_type) { 373 case 0: /* H264 */ 374 375 /* reference picture buffer */ 376 min_dpb_size = image_size * 17; 377 378 /* macroblock context buffer */ 379 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 380 381 /* IT surface buffer */ 382 min_dpb_size += width_in_mb * height_in_mb * 32; 383 break; 384 385 case 1: /* VC1 */ 386 387 /* reference picture buffer */ 388 min_dpb_size = image_size * 3; 389 390 /* CONTEXT_BUFFER */ 391 min_dpb_size += width_in_mb * height_in_mb * 128; 392 393 /* IT surface buffer */ 394 min_dpb_size += width_in_mb * 64; 395 396 /* DB surface buffer */ 397 min_dpb_size += width_in_mb * 128; 398 399 /* BP */ 400 tmp = max(width_in_mb, height_in_mb); 401 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 402 break; 403 404 case 3: /* MPEG2 */ 405 406 /* reference picture buffer */ 407 min_dpb_size = image_size * 3; 408 break; 409 410 case 4: /* MPEG4 */ 411 412 /* reference picture buffer */ 413 min_dpb_size = image_size * 3; 414 415 /* CM */ 416 min_dpb_size += width_in_mb * height_in_mb * 64; 417 418 /* IT surface buffer */ 419 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 420 break; 421 422 default: 423 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 424 return -EINVAL; 425 } 426 427 if (width > pitch) { 428 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 429 return -EINVAL; 430 } 431 432 if (dpb_size < min_dpb_size) { 433 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 434 dpb_size, min_dpb_size); 435 return -EINVAL; 436 } 437 438 buf_sizes[0x1] = dpb_size; 439 buf_sizes[0x2] = image_size; 440 return 0; 441 } 442 443 static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 444 unsigned stream_type) 445 { 446 switch (stream_type) { 447 case 0: /* H264 */ 448 case 1: /* VC1 */ 449 /* always supported */ 450 return 0; 451 452 case 3: /* MPEG2 */ 453 case 4: /* MPEG4 */ 454 /* only since UVD 3 */ 455 if (p->rdev->family >= CHIP_PALM) 456 return 0; 457 458 fallthrough; 459 default: 460 DRM_ERROR("UVD codec not supported by hardware %d!\n", 461 stream_type); 462 return -EINVAL; 463 } 464 } 465 466 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 467 unsigned offset, unsigned buf_sizes[]) 468 { 469 int32_t *msg, msg_type, handle; 470 unsigned img_size = 0; 471 void *ptr; 472 int i, r; 473 474 if (offset & 0x3F) { 475 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 476 return -EINVAL; 477 } 478 479 r = radeon_bo_kmap(bo, &ptr); 480 if (r) { 481 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 482 return r; 483 } 484 485 msg = ptr + offset; 486 487 msg_type = msg[1]; 488 handle = msg[2]; 489 490 if (handle == 0) { 491 radeon_bo_kunmap(bo); 492 DRM_ERROR("Invalid UVD handle!\n"); 493 return -EINVAL; 494 } 495 496 switch (msg_type) { 497 case 0: 498 /* it's a create msg, calc image size (width * height) */ 499 img_size = msg[7] * msg[8]; 500 501 r = radeon_uvd_validate_codec(p, msg[4]); 502 radeon_bo_kunmap(bo); 503 if (r) 504 return r; 505 506 /* try to alloc a new handle */ 507 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 508 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 509 DRM_ERROR("Handle 0x%x already in use!\n", handle); 510 return -EINVAL; 511 } 512 513 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 514 p->rdev->uvd.filp[i] = p->filp; 515 p->rdev->uvd.img_size[i] = img_size; 516 return 0; 517 } 518 } 519 520 DRM_ERROR("No more free UVD handles!\n"); 521 return -EINVAL; 522 523 case 1: 524 /* it's a decode msg, validate codec and calc buffer sizes */ 525 r = radeon_uvd_validate_codec(p, msg[4]); 526 if (!r) 527 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 528 radeon_bo_kunmap(bo); 529 if (r) 530 return r; 531 532 /* validate the handle */ 533 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 534 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 535 if (p->rdev->uvd.filp[i] != p->filp) { 536 DRM_ERROR("UVD handle collision detected!\n"); 537 return -EINVAL; 538 } 539 return 0; 540 } 541 } 542 543 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 544 return -ENOENT; 545 546 case 2: 547 /* it's a destroy msg, free the handle */ 548 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 549 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 550 radeon_bo_kunmap(bo); 551 return 0; 552 553 default: 554 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 555 } 556 557 radeon_bo_kunmap(bo); 558 return -EINVAL; 559 } 560 561 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 562 int data0, int data1, 563 unsigned buf_sizes[], bool *has_msg_cmd) 564 { 565 struct radeon_cs_chunk *relocs_chunk; 566 struct radeon_bo_list *reloc; 567 unsigned idx, cmd, offset; 568 uint64_t start, end; 569 int r; 570 571 relocs_chunk = p->chunk_relocs; 572 offset = radeon_get_ib_value(p, data0); 573 idx = radeon_get_ib_value(p, data1); 574 if (idx >= relocs_chunk->length_dw) { 575 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 576 idx, relocs_chunk->length_dw); 577 return -EINVAL; 578 } 579 580 reloc = &p->relocs[(idx / 4)]; 581 start = reloc->gpu_offset; 582 end = start + radeon_bo_size(reloc->robj); 583 start += offset; 584 585 p->ib.ptr[data0] = start & 0xFFFFFFFF; 586 p->ib.ptr[data1] = start >> 32; 587 588 cmd = radeon_get_ib_value(p, p->idx) >> 1; 589 590 if (cmd < 0x4) { 591 if (end <= start) { 592 DRM_ERROR("invalid reloc offset %X!\n", offset); 593 return -EINVAL; 594 } 595 if ((end - start) < buf_sizes[cmd]) { 596 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 597 (unsigned)(end - start), buf_sizes[cmd]); 598 return -EINVAL; 599 } 600 601 } else if (cmd != 0x100) { 602 DRM_ERROR("invalid UVD command %X!\n", cmd); 603 return -EINVAL; 604 } 605 606 if ((start >> 28) != ((end - 1) >> 28)) { 607 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 608 start, end); 609 return -EINVAL; 610 } 611 612 /* TODO: is this still necessary on NI+ ? */ 613 if ((cmd == 0 || cmd == 0x3) && 614 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 615 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 616 start, end); 617 return -EINVAL; 618 } 619 620 if (cmd == 0) { 621 if (*has_msg_cmd) { 622 DRM_ERROR("More than one message in a UVD-IB!\n"); 623 return -EINVAL; 624 } 625 *has_msg_cmd = true; 626 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 627 if (r) 628 return r; 629 } else if (!*has_msg_cmd) { 630 DRM_ERROR("Message needed before other commands are send!\n"); 631 return -EINVAL; 632 } 633 634 return 0; 635 } 636 637 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 638 struct radeon_cs_packet *pkt, 639 int *data0, int *data1, 640 unsigned buf_sizes[], 641 bool *has_msg_cmd) 642 { 643 int i, r; 644 645 p->idx++; 646 for (i = 0; i <= pkt->count; ++i) { 647 switch (pkt->reg + i*4) { 648 case UVD_GPCOM_VCPU_DATA0: 649 *data0 = p->idx; 650 break; 651 case UVD_GPCOM_VCPU_DATA1: 652 *data1 = p->idx; 653 break; 654 case UVD_GPCOM_VCPU_CMD: 655 r = radeon_uvd_cs_reloc(p, *data0, *data1, 656 buf_sizes, has_msg_cmd); 657 if (r) 658 return r; 659 break; 660 case UVD_ENGINE_CNTL: 661 case UVD_NO_OP: 662 break; 663 default: 664 DRM_ERROR("Invalid reg 0x%X!\n", 665 pkt->reg + i*4); 666 return -EINVAL; 667 } 668 p->idx++; 669 } 670 return 0; 671 } 672 673 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 674 { 675 struct radeon_cs_packet pkt; 676 int r, data0 = 0, data1 = 0; 677 678 /* does the IB has a msg command */ 679 bool has_msg_cmd = false; 680 681 /* minimum buffer sizes */ 682 unsigned buf_sizes[] = { 683 [0x00000000] = 2048, 684 [0x00000001] = 32 * 1024 * 1024, 685 [0x00000002] = 2048 * 1152 * 3, 686 [0x00000003] = 2048, 687 }; 688 689 if (p->chunk_ib->length_dw % 16) { 690 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 691 p->chunk_ib->length_dw); 692 return -EINVAL; 693 } 694 695 if (p->chunk_relocs == NULL) { 696 DRM_ERROR("No relocation chunk !\n"); 697 return -EINVAL; 698 } 699 700 701 do { 702 r = radeon_cs_packet_parse(p, &pkt, p->idx); 703 if (r) 704 return r; 705 switch (pkt.type) { 706 case RADEON_PACKET_TYPE0: 707 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 708 buf_sizes, &has_msg_cmd); 709 if (r) 710 return r; 711 break; 712 case RADEON_PACKET_TYPE2: 713 p->idx += pkt.count + 2; 714 break; 715 default: 716 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 717 return -EINVAL; 718 } 719 } while (p->idx < p->chunk_ib->length_dw); 720 721 if (!has_msg_cmd) { 722 DRM_ERROR("UVD-IBs need a msg command!\n"); 723 return -EINVAL; 724 } 725 726 return 0; 727 } 728 729 static int radeon_uvd_send_msg(struct radeon_device *rdev, 730 int ring, uint64_t addr, 731 struct radeon_fence **fence) 732 { 733 struct radeon_ib ib; 734 int i, r; 735 736 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 737 if (r) 738 return r; 739 740 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 741 ib.ptr[1] = addr; 742 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 743 ib.ptr[3] = addr >> 32; 744 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 745 ib.ptr[5] = 0; 746 for (i = 6; i < 16; i += 2) { 747 ib.ptr[i] = PACKET0(UVD_NO_OP, 0); 748 ib.ptr[i+1] = 0; 749 } 750 ib.length_dw = 16; 751 752 r = radeon_ib_schedule(rdev, &ib, NULL, false); 753 754 if (fence) 755 *fence = radeon_fence_ref(ib.fence); 756 757 radeon_ib_free(rdev, &ib); 758 return r; 759 } 760 761 /* 762 * multiple fence commands without any stream commands in between can 763 * crash the vcpu so just try to emmit a dummy create/destroy msg to 764 * avoid this 765 */ 766 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 767 uint32_t handle, struct radeon_fence **fence) 768 { 769 /* we use the last page of the vcpu bo for the UVD message */ 770 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 771 RADEON_GPU_PAGE_SIZE; 772 773 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 774 uint64_t addr = rdev->uvd.gpu_addr + offs; 775 776 int r, i; 777 778 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 779 if (r) 780 return r; 781 782 /* stitch together an UVD create msg */ 783 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 784 writel(0x0, (void __iomem *)&msg[1]); 785 writel((__force u32)cpu_to_le32(handle), &msg[2]); 786 writel(0x0, &msg[3]); 787 writel(0x0, &msg[4]); 788 writel(0x0, &msg[5]); 789 writel(0x0, &msg[6]); 790 writel((__force u32)cpu_to_le32(0x00000780), &msg[7]); 791 writel((__force u32)cpu_to_le32(0x00000440), &msg[8]); 792 writel(0x0, &msg[9]); 793 writel((__force u32)cpu_to_le32(0x01b37000), &msg[10]); 794 for (i = 11; i < 1024; ++i) 795 writel(0x0, &msg[i]); 796 797 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 798 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 799 return r; 800 } 801 802 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 803 uint32_t handle, struct radeon_fence **fence) 804 { 805 /* we use the last page of the vcpu bo for the UVD message */ 806 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 807 RADEON_GPU_PAGE_SIZE; 808 809 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 810 uint64_t addr = rdev->uvd.gpu_addr + offs; 811 812 int r, i; 813 814 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 815 if (r) 816 return r; 817 818 /* stitch together an UVD destroy msg */ 819 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 820 writel((__force u32)cpu_to_le32(0x00000002), &msg[1]); 821 writel((__force u32)cpu_to_le32(handle), &msg[2]); 822 writel(0x0, &msg[3]); 823 for (i = 4; i < 1024; ++i) 824 writel(0x0, &msg[i]); 825 826 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 827 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 828 return r; 829 } 830 831 /** 832 * radeon_uvd_count_handles - count number of open streams 833 * 834 * @rdev: radeon_device pointer 835 * @sd: number of SD streams 836 * @hd: number of HD streams 837 * 838 * Count the number of open SD/HD streams as a hint for power mangement 839 */ 840 static void radeon_uvd_count_handles(struct radeon_device *rdev, 841 unsigned *sd, unsigned *hd) 842 { 843 unsigned i; 844 845 *sd = 0; 846 *hd = 0; 847 848 for (i = 0; i < rdev->uvd.max_handles; ++i) { 849 if (!atomic_read(&rdev->uvd.handles[i])) 850 continue; 851 852 if (rdev->uvd.img_size[i] >= 720*576) 853 ++(*hd); 854 else 855 ++(*sd); 856 } 857 } 858 859 static void radeon_uvd_idle_work_handler(struct work_struct *work) 860 { 861 struct radeon_device *rdev = 862 container_of(work, struct radeon_device, uvd.idle_work.work); 863 864 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 865 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 866 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 867 &rdev->pm.dpm.hd); 868 radeon_dpm_enable_uvd(rdev, false); 869 } else { 870 radeon_set_uvd_clocks(rdev, 0, 0); 871 } 872 } else { 873 schedule_delayed_work(&rdev->uvd.idle_work, 874 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 875 } 876 } 877 878 void radeon_uvd_note_usage(struct radeon_device *rdev) 879 { 880 bool streams_changed = false; 881 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 882 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 883 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 884 885 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 886 unsigned hd = 0, sd = 0; 887 radeon_uvd_count_handles(rdev, &sd, &hd); 888 if ((rdev->pm.dpm.sd != sd) || 889 (rdev->pm.dpm.hd != hd)) { 890 rdev->pm.dpm.sd = sd; 891 rdev->pm.dpm.hd = hd; 892 /* disable this for now */ 893 /*streams_changed = true;*/ 894 } 895 } 896 897 if (set_clocks || streams_changed) { 898 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 899 radeon_dpm_enable_uvd(rdev, true); 900 } else { 901 radeon_set_uvd_clocks(rdev, 53300, 40000); 902 } 903 } 904 } 905 906 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 907 unsigned target_freq, 908 unsigned pd_min, 909 unsigned pd_even) 910 { 911 unsigned post_div = vco_freq / target_freq; 912 913 /* adjust to post divider minimum value */ 914 if (post_div < pd_min) 915 post_div = pd_min; 916 917 /* we alway need a frequency less than or equal the target */ 918 if ((vco_freq / post_div) > target_freq) 919 post_div += 1; 920 921 /* post dividers above a certain value must be even */ 922 if (post_div > pd_even && post_div % 2) 923 post_div += 1; 924 925 return post_div; 926 } 927 928 /** 929 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 930 * 931 * @rdev: radeon_device pointer 932 * @vclk: wanted VCLK 933 * @dclk: wanted DCLK 934 * @vco_min: minimum VCO frequency 935 * @vco_max: maximum VCO frequency 936 * @fb_factor: factor to multiply vco freq with 937 * @fb_mask: limit and bitmask for feedback divider 938 * @pd_min: post divider minimum 939 * @pd_max: post divider maximum 940 * @pd_even: post divider must be even above this value 941 * @optimal_fb_div: resulting feedback divider 942 * @optimal_vclk_div: resulting vclk post divider 943 * @optimal_dclk_div: resulting dclk post divider 944 * 945 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 946 * Returns zero on success -EINVAL on error. 947 */ 948 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 949 unsigned vclk, unsigned dclk, 950 unsigned vco_min, unsigned vco_max, 951 unsigned fb_factor, unsigned fb_mask, 952 unsigned pd_min, unsigned pd_max, 953 unsigned pd_even, 954 unsigned *optimal_fb_div, 955 unsigned *optimal_vclk_div, 956 unsigned *optimal_dclk_div) 957 { 958 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 959 960 /* start off with something large */ 961 unsigned optimal_score = ~0; 962 963 /* loop through vco from low to high */ 964 vco_min = max(max(vco_min, vclk), dclk); 965 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 966 967 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 968 unsigned vclk_div, dclk_div, score; 969 970 do_div(fb_div, ref_freq); 971 972 /* fb div out of range ? */ 973 if (fb_div > fb_mask) 974 break; /* it can oly get worse */ 975 976 fb_div &= fb_mask; 977 978 /* calc vclk divider with current vco freq */ 979 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 980 pd_min, pd_even); 981 if (vclk_div > pd_max) 982 break; /* vco is too big, it has to stop */ 983 984 /* calc dclk divider with current vco freq */ 985 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 986 pd_min, pd_even); 987 if (dclk_div > pd_max) 988 break; /* vco is too big, it has to stop */ 989 990 /* calc score with current vco freq */ 991 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 992 993 /* determine if this vco setting is better than current optimal settings */ 994 if (score < optimal_score) { 995 *optimal_fb_div = fb_div; 996 *optimal_vclk_div = vclk_div; 997 *optimal_dclk_div = dclk_div; 998 optimal_score = score; 999 if (optimal_score == 0) 1000 break; /* it can't get better than this */ 1001 } 1002 } 1003 1004 /* did we found a valid setup ? */ 1005 if (optimal_score == ~0) 1006 return -EINVAL; 1007 1008 return 0; 1009 } 1010 1011 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1012 unsigned cg_upll_func_cntl) 1013 { 1014 unsigned i; 1015 1016 /* make sure UPLL_CTLREQ is deasserted */ 1017 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1018 1019 mdelay(10); 1020 1021 /* assert UPLL_CTLREQ */ 1022 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1023 1024 /* wait for CTLACK and CTLACK2 to get asserted */ 1025 for (i = 0; i < 100; ++i) { 1026 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1027 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1028 break; 1029 mdelay(10); 1030 } 1031 1032 /* deassert UPLL_CTLREQ */ 1033 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1034 1035 if (i == 100) { 1036 DRM_ERROR("Timeout setting UVD clocks!\n"); 1037 return -ETIMEDOUT; 1038 } 1039 1040 return 0; 1041 } 1042