1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 /* Copyright 2023 Collabora ltd. */ 3 4 #ifdef CONFIG_ARM_ARCH_TIMER 5 #include <asm/arch_timer.h> 6 #endif 7 8 #include <linux/clk.h> 9 #include <linux/dma-mapping.h> 10 #include <linux/firmware.h> 11 #include <linux/iopoll.h> 12 #include <linux/iosys-map.h> 13 #include <linux/mutex.h> 14 #include <linux/platform_device.h> 15 16 #include <drm/drm_drv.h> 17 #include <drm/drm_managed.h> 18 19 #include "panthor_device.h" 20 #include "panthor_fw.h" 21 #include "panthor_gem.h" 22 #include "panthor_gpu.h" 23 #include "panthor_mmu.h" 24 #include "panthor_regs.h" 25 #include "panthor_sched.h" 26 27 #define CSF_FW_NAME "mali_csffw.bin" 28 29 #define PING_INTERVAL_MS 12000 30 #define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024) 31 #define PROGRESS_TIMEOUT_SCALE_SHIFT 10 32 #define IDLE_HYSTERESIS_US 800 33 #define PWROFF_HYSTERESIS_US 10000 34 35 /** 36 * struct panthor_fw_binary_hdr - Firmware binary header. 37 */ 38 struct panthor_fw_binary_hdr { 39 /** @magic: Magic value to check binary validity. */ 40 u32 magic; 41 #define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e 42 43 /** @minor: Minor FW version. */ 44 u8 minor; 45 46 /** @major: Major FW version. */ 47 u8 major; 48 #define CSF_FW_BINARY_HEADER_MAJOR_MAX 0 49 50 /** @padding1: MBZ. */ 51 u16 padding1; 52 53 /** @version_hash: FW version hash. */ 54 u32 version_hash; 55 56 /** @padding2: MBZ. */ 57 u32 padding2; 58 59 /** @size: FW binary size. */ 60 u32 size; 61 }; 62 63 /** 64 * enum panthor_fw_binary_entry_type - Firmware binary entry type 65 */ 66 enum panthor_fw_binary_entry_type { 67 /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */ 68 CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0, 69 70 /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */ 71 CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1, 72 73 /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */ 74 CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2, 75 76 /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */ 77 CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3, 78 79 /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */ 80 CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4, 81 82 /** 83 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how 84 * the FW binary was built. 85 */ 86 CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6 87 }; 88 89 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff) 90 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff) 91 #define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) 92 #define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) 93 94 #define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0) 95 #define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1) 96 #define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2) 97 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3) 98 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3) 99 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3) 100 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3) 101 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3) 102 #define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5) 103 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30) 104 #define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31) 105 106 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \ 107 (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \ 108 CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \ 109 CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \ 110 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \ 111 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \ 112 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \ 113 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) 114 115 /** 116 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary 117 */ 118 struct panthor_fw_binary_section_entry_hdr { 119 /** @flags: Section flags. */ 120 u32 flags; 121 122 /** @va: MCU virtual range to map this binary section to. */ 123 struct { 124 /** @start: Start address. */ 125 u32 start; 126 127 /** @end: End address. */ 128 u32 end; 129 } va; 130 131 /** @data: Data to initialize the FW section with. */ 132 struct { 133 /** @start: Start offset in the FW binary. */ 134 u32 start; 135 136 /** @end: End offset in the FW binary. */ 137 u32 end; 138 } data; 139 }; 140 141 struct panthor_fw_build_info_hdr { 142 /** @meta_start: Offset of the build info data in the FW binary */ 143 u32 meta_start; 144 /** @meta_size: Size of the build info data in the FW binary */ 145 u32 meta_size; 146 }; 147 148 /** 149 * struct panthor_fw_binary_iter - Firmware binary iterator 150 * 151 * Used to parse a firmware binary. 152 */ 153 struct panthor_fw_binary_iter { 154 /** @data: FW binary data. */ 155 const void *data; 156 157 /** @size: FW binary size. */ 158 size_t size; 159 160 /** @offset: Iterator offset. */ 161 size_t offset; 162 }; 163 164 /** 165 * struct panthor_fw_section - FW section 166 */ 167 struct panthor_fw_section { 168 /** @node: Used to keep track of FW sections. */ 169 struct list_head node; 170 171 /** @flags: Section flags, as encoded in the FW binary. */ 172 u32 flags; 173 174 /** @mem: Section memory. */ 175 struct panthor_kernel_bo *mem; 176 177 /** 178 * @name: Name of the section, as specified in the binary. 179 * 180 * Can be NULL. 181 */ 182 const char *name; 183 184 /** 185 * @data: Initial data copied to the FW memory. 186 * 187 * We keep data around so we can reload sections after a reset. 188 */ 189 struct { 190 /** @buf: Buffed used to store init data. */ 191 const void *buf; 192 193 /** @size: Size of @buf in bytes. */ 194 size_t size; 195 } data; 196 }; 197 198 #define CSF_MCU_SHARED_REGION_START 0x04000000ULL 199 #define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL 200 201 #define MIN_CS_PER_CSG 8 202 #define MIN_CSGS 3 203 #define MAX_CSG_PRIO 0xf 204 205 #define CSF_IFACE_VERSION(major, minor, patch) \ 206 (((major) << 24) | ((minor) << 16) | (patch)) 207 #define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24) 208 #define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff) 209 #define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff) 210 211 #define CSF_GROUP_CONTROL_OFFSET 0x1000 212 #define CSF_STREAM_CONTROL_OFFSET 0x40 213 #define CSF_UNPRESERVED_REG_COUNT 4 214 215 /** 216 * struct panthor_fw_iface - FW interfaces 217 */ 218 struct panthor_fw_iface { 219 /** @global: Global interface. */ 220 struct panthor_fw_global_iface global; 221 222 /** @groups: Group slot interfaces. */ 223 struct panthor_fw_csg_iface groups[MAX_CSGS]; 224 225 /** @streams: Command stream slot interfaces. */ 226 struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG]; 227 }; 228 229 /** 230 * struct panthor_fw - Firmware management 231 */ 232 struct panthor_fw { 233 /** @vm: MCU VM. */ 234 struct panthor_vm *vm; 235 236 /** @sections: List of FW sections. */ 237 struct list_head sections; 238 239 /** @shared_section: The section containing the FW interfaces. */ 240 struct panthor_fw_section *shared_section; 241 242 /** @iface: FW interfaces. */ 243 struct panthor_fw_iface iface; 244 245 /** @watchdog: Collection of fields relating to the FW watchdog. */ 246 struct { 247 /** @ping_work: Delayed work used to ping the FW. */ 248 struct delayed_work ping_work; 249 } watchdog; 250 251 /** 252 * @req_waitqueue: FW request waitqueue. 253 * 254 * Everytime a request is sent to a command stream group or the global 255 * interface, the caller will first busy wait for the request to be 256 * acknowledged, and then fallback to a sleeping wait. 257 * 258 * This wait queue is here to support the sleeping wait flavor. 259 */ 260 wait_queue_head_t req_waitqueue; 261 262 /** @booted: True is the FW is booted */ 263 bool booted; 264 265 /** 266 * @fast_reset: True if the post_reset logic can proceed with a fast reset. 267 * 268 * A fast reset is just a reset where the driver doesn't reload the FW sections. 269 * 270 * Any time the firmware is properly suspended, a fast reset can take place. 271 * On the other hand, if the halt operation failed, the driver will reload 272 * all sections to make sure we start from a fresh state. 273 */ 274 bool fast_reset; 275 276 /** @irq: Job irq data. */ 277 struct panthor_irq irq; 278 }; 279 280 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev) 281 { 282 return ptdev->fw->vm; 283 } 284 285 /** 286 * panthor_fw_get_glb_iface() - Get the global interface 287 * @ptdev: Device. 288 * 289 * Return: The global interface. 290 */ 291 struct panthor_fw_global_iface * 292 panthor_fw_get_glb_iface(struct panthor_device *ptdev) 293 { 294 return &ptdev->fw->iface.global; 295 } 296 297 /** 298 * panthor_fw_get_csg_iface() - Get a command stream group slot interface 299 * @ptdev: Device. 300 * @csg_slot: Index of the command stream group slot. 301 * 302 * Return: The command stream group slot interface. 303 */ 304 struct panthor_fw_csg_iface * 305 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot) 306 { 307 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS)) 308 return NULL; 309 310 return &ptdev->fw->iface.groups[csg_slot]; 311 } 312 313 /** 314 * panthor_fw_get_cs_iface() - Get a command stream slot interface 315 * @ptdev: Device. 316 * @csg_slot: Index of the command stream group slot. 317 * @cs_slot: Index of the command stream slot. 318 * 319 * Return: The command stream slot interface. 320 */ 321 struct panthor_fw_cs_iface * 322 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) 323 { 324 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG)) 325 return NULL; 326 327 return &ptdev->fw->iface.streams[csg_slot][cs_slot]; 328 } 329 330 /** 331 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count 332 * @ptdev: Device. 333 * @timeout_us: Timeout expressed in micro-seconds. 334 * 335 * The FW has two timer sources: the GPU counter or arch-timer. We need 336 * to express timeouts in term of number of cycles and specify which 337 * timer source should be used. 338 * 339 * Return: A value suitable for timeout fields in the global interface. 340 */ 341 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us) 342 { 343 bool use_cycle_counter = false; 344 u32 timer_rate = 0; 345 u64 mod_cycles; 346 347 #ifdef CONFIG_ARM_ARCH_TIMER 348 timer_rate = arch_timer_get_cntfrq(); 349 #endif 350 351 if (!timer_rate) { 352 use_cycle_counter = true; 353 timer_rate = clk_get_rate(ptdev->clks.core); 354 } 355 356 if (drm_WARN_ON(&ptdev->base, !timer_rate)) { 357 /* We couldn't get a valid clock rate, let's just pick the 358 * maximum value so the FW still handles the core 359 * power on/off requests. 360 */ 361 return GLB_TIMER_VAL(~0) | 362 GLB_TIMER_SOURCE_GPU_COUNTER; 363 } 364 365 mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate, 366 1000000ull << 10); 367 if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0))) 368 mod_cycles = GLB_TIMER_VAL(~0); 369 370 return GLB_TIMER_VAL(mod_cycles) | 371 (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0); 372 } 373 374 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev, 375 struct panthor_fw_binary_iter *iter, 376 void *out, size_t size) 377 { 378 size_t new_offset = iter->offset + size; 379 380 if (new_offset > iter->size || new_offset < iter->offset) { 381 drm_err(&ptdev->base, "Firmware too small\n"); 382 return -EINVAL; 383 } 384 385 memcpy(out, iter->data + iter->offset, size); 386 iter->offset = new_offset; 387 return 0; 388 } 389 390 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev, 391 struct panthor_fw_binary_iter *iter, 392 struct panthor_fw_binary_iter *sub_iter, 393 size_t size) 394 { 395 size_t new_offset = iter->offset + size; 396 397 if (new_offset > iter->size || new_offset < iter->offset) { 398 drm_err(&ptdev->base, "Firmware entry too long\n"); 399 return -EINVAL; 400 } 401 402 sub_iter->offset = 0; 403 sub_iter->data = iter->data + iter->offset; 404 sub_iter->size = size; 405 iter->offset = new_offset; 406 return 0; 407 } 408 409 static void panthor_fw_init_section_mem(struct panthor_device *ptdev, 410 struct panthor_fw_section *section) 411 { 412 bool was_mapped = !!section->mem->kmap; 413 int ret; 414 415 if (!section->data.size && 416 !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)) 417 return; 418 419 ret = panthor_kernel_bo_vmap(section->mem); 420 if (drm_WARN_ON(&ptdev->base, ret)) 421 return; 422 423 memcpy(section->mem->kmap, section->data.buf, section->data.size); 424 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) { 425 memset(section->mem->kmap + section->data.size, 0, 426 panthor_kernel_bo_size(section->mem) - section->data.size); 427 } 428 429 if (!was_mapped) 430 panthor_kernel_bo_vunmap(section->mem); 431 } 432 433 /** 434 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces. 435 * @ptdev: Device. 436 * @input: Pointer holding the input interface on success. 437 * Should be ignored on failure. 438 * @output: Pointer holding the output interface on success. 439 * Should be ignored on failure. 440 * @input_fw_va: Pointer holding the input interface FW VA on success. 441 * Should be ignored on failure. 442 * @output_fw_va: Pointer holding the output interface FW VA on success. 443 * Should be ignored on failure. 444 * 445 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input 446 * interface is at offset 0, and the output interface at offset 4096. 447 * 448 * Return: A valid pointer in case of success, an ERR_PTR() otherwise. 449 */ 450 struct panthor_kernel_bo * 451 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, 452 struct panthor_fw_ringbuf_input_iface **input, 453 const struct panthor_fw_ringbuf_output_iface **output, 454 u32 *input_fw_va, u32 *output_fw_va) 455 { 456 struct panthor_kernel_bo *mem; 457 int ret; 458 459 mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K, 460 DRM_PANTHOR_BO_NO_MMAP, 461 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | 462 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, 463 PANTHOR_VM_KERNEL_AUTO_VA); 464 if (IS_ERR(mem)) 465 return mem; 466 467 ret = panthor_kernel_bo_vmap(mem); 468 if (ret) { 469 panthor_kernel_bo_destroy(mem); 470 return ERR_PTR(ret); 471 } 472 473 memset(mem->kmap, 0, panthor_kernel_bo_size(mem)); 474 *input = mem->kmap; 475 *output = mem->kmap + SZ_4K; 476 *input_fw_va = panthor_kernel_bo_gpuva(mem); 477 *output_fw_va = *input_fw_va + SZ_4K; 478 479 return mem; 480 } 481 482 /** 483 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group. 484 * @ptdev: Device. 485 * @size: Size of the suspend buffer. 486 * 487 * Return: A valid pointer in case of success, an ERR_PTR() otherwise. 488 */ 489 struct panthor_kernel_bo * 490 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) 491 { 492 return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, 493 DRM_PANTHOR_BO_NO_MMAP, 494 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 495 PANTHOR_VM_KERNEL_AUTO_VA); 496 } 497 498 static int panthor_fw_load_section_entry(struct panthor_device *ptdev, 499 const struct firmware *fw, 500 struct panthor_fw_binary_iter *iter, 501 u32 ehdr) 502 { 503 ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm); 504 struct panthor_fw_binary_section_entry_hdr hdr; 505 struct panthor_fw_section *section; 506 u32 section_size; 507 u32 name_len; 508 int ret; 509 510 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); 511 if (ret) 512 return ret; 513 514 if (hdr.data.end < hdr.data.start) { 515 drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", 516 hdr.data.end, hdr.data.start); 517 return -EINVAL; 518 } 519 520 if (hdr.va.end < hdr.va.start) { 521 drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", 522 hdr.va.end, hdr.va.start); 523 return -EINVAL; 524 } 525 526 if (hdr.data.end > fw->size) { 527 drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", 528 hdr.data.end, fw->size); 529 return -EINVAL; 530 } 531 532 if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) { 533 drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", 534 hdr.va.start, hdr.va.end); 535 return -EINVAL; 536 } 537 538 if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) { 539 drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", 540 hdr.flags); 541 return -EINVAL; 542 } 543 544 if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) { 545 drm_warn(&ptdev->base, 546 "Firmware protected mode entry not be supported, ignoring"); 547 return 0; 548 } 549 550 if (hdr.va.start == CSF_MCU_SHARED_REGION_START && 551 !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) { 552 drm_err(&ptdev->base, 553 "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); 554 return -EINVAL; 555 } 556 557 name_len = iter->size - iter->offset; 558 559 section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); 560 if (!section) 561 return -ENOMEM; 562 563 list_add_tail(§ion->node, &ptdev->fw->sections); 564 section->flags = hdr.flags; 565 section->data.size = hdr.data.end - hdr.data.start; 566 567 if (section->data.size > 0) { 568 void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); 569 570 if (!data) 571 return -ENOMEM; 572 573 memcpy(data, fw->data + hdr.data.start, section->data.size); 574 section->data.buf = data; 575 } 576 577 if (name_len > 0) { 578 char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); 579 580 if (!name) 581 return -ENOMEM; 582 583 memcpy(name, iter->data + iter->offset, name_len); 584 name[name_len] = '\0'; 585 section->name = name; 586 } 587 588 section_size = hdr.va.end - hdr.va.start; 589 if (section_size) { 590 u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK; 591 struct panthor_gem_object *bo; 592 u32 vm_map_flags = 0; 593 struct sg_table *sgt; 594 u64 va = hdr.va.start; 595 596 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) 597 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; 598 599 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX)) 600 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; 601 602 /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to 603 * non-cacheable for now. We might want to introduce a new 604 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device 605 * memory and is currently not used by our driver) for 606 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit 607 * of IO-coherent systems. 608 */ 609 if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED) 610 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; 611 612 section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), 613 section_size, 614 DRM_PANTHOR_BO_NO_MMAP, 615 vm_map_flags, va); 616 if (IS_ERR(section->mem)) 617 return PTR_ERR(section->mem); 618 619 if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) 620 return -EINVAL; 621 622 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) { 623 ret = panthor_kernel_bo_vmap(section->mem); 624 if (ret) 625 return ret; 626 } 627 628 panthor_fw_init_section_mem(ptdev, section); 629 630 bo = to_panthor_bo(section->mem->obj); 631 sgt = drm_gem_shmem_get_pages_sgt(&bo->base); 632 if (IS_ERR(sgt)) 633 return PTR_ERR(sgt); 634 635 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); 636 } 637 638 if (hdr.va.start == CSF_MCU_SHARED_REGION_START) 639 ptdev->fw->shared_section = section; 640 641 return 0; 642 } 643 644 static int panthor_fw_read_build_info(struct panthor_device *ptdev, 645 const struct firmware *fw, 646 struct panthor_fw_binary_iter *iter, 647 u32 ehdr) 648 { 649 struct panthor_fw_build_info_hdr hdr; 650 char header[9]; 651 const char git_sha_header[sizeof(header)] = "git_sha: "; 652 int ret; 653 654 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); 655 if (ret) 656 return ret; 657 658 if (hdr.meta_start > fw->size || 659 hdr.meta_start + hdr.meta_size > fw->size) { 660 drm_err(&ptdev->base, "Firmware build info corrupt\n"); 661 /* We don't need the build info, so continue */ 662 return 0; 663 } 664 665 if (memcmp(git_sha_header, fw->data + hdr.meta_start, 666 sizeof(git_sha_header))) { 667 /* Not the expected header, this isn't metadata we understand */ 668 return 0; 669 } 670 671 /* Check that the git SHA is NULL terminated as expected */ 672 if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') { 673 drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n"); 674 /* Don't treat as fatal */ 675 return 0; 676 } 677 678 drm_info(&ptdev->base, "Firmware git sha: %s\n", 679 fw->data + hdr.meta_start + sizeof(git_sha_header)); 680 681 return 0; 682 } 683 684 static void 685 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) 686 { 687 struct panthor_fw_section *section; 688 689 list_for_each_entry(section, &ptdev->fw->sections, node) { 690 struct sg_table *sgt; 691 692 if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) 693 continue; 694 695 panthor_fw_init_section_mem(ptdev, section); 696 sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base); 697 if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt))) 698 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); 699 } 700 } 701 702 static int panthor_fw_load_entry(struct panthor_device *ptdev, 703 const struct firmware *fw, 704 struct panthor_fw_binary_iter *iter) 705 { 706 struct panthor_fw_binary_iter eiter; 707 u32 ehdr; 708 int ret; 709 710 ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr)); 711 if (ret) 712 return ret; 713 714 if ((iter->offset % sizeof(u32)) || 715 (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) { 716 drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", 717 (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr)); 718 return -EINVAL; 719 } 720 721 if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter, 722 CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr))) 723 return -EINVAL; 724 725 switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { 726 case CSF_FW_BINARY_ENTRY_TYPE_IFACE: 727 return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); 728 case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: 729 return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr); 730 731 /* FIXME: handle those entry types? */ 732 case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: 733 case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: 734 case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: 735 case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: 736 return 0; 737 default: 738 break; 739 } 740 741 if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL) 742 return 0; 743 744 drm_err(&ptdev->base, 745 "Unsupported non-optional entry type %u in firmware\n", 746 CSF_FW_BINARY_ENTRY_TYPE(ehdr)); 747 return -EINVAL; 748 } 749 750 static int panthor_fw_load(struct panthor_device *ptdev) 751 { 752 const struct firmware *fw = NULL; 753 struct panthor_fw_binary_iter iter = {}; 754 struct panthor_fw_binary_hdr hdr; 755 char fw_path[128]; 756 int ret; 757 758 snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s", 759 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id), 760 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id), 761 CSF_FW_NAME); 762 763 ret = request_firmware(&fw, fw_path, ptdev->base.dev); 764 if (ret) { 765 drm_err(&ptdev->base, "Failed to load firmware image '%s'\n", 766 CSF_FW_NAME); 767 return ret; 768 } 769 770 iter.data = fw->data; 771 iter.size = fw->size; 772 ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr)); 773 if (ret) 774 goto out; 775 776 if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) { 777 ret = -EINVAL; 778 drm_err(&ptdev->base, "Invalid firmware magic\n"); 779 goto out; 780 } 781 782 if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) { 783 ret = -EINVAL; 784 drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n", 785 hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX); 786 goto out; 787 } 788 789 if (hdr.size > iter.size) { 790 drm_err(&ptdev->base, "Firmware image is truncated\n"); 791 goto out; 792 } 793 794 iter.size = hdr.size; 795 796 while (iter.offset < hdr.size) { 797 ret = panthor_fw_load_entry(ptdev, fw, &iter); 798 if (ret) 799 goto out; 800 } 801 802 if (!ptdev->fw->shared_section) { 803 drm_err(&ptdev->base, "Shared interface region not found\n"); 804 ret = -EINVAL; 805 goto out; 806 } 807 808 out: 809 release_firmware(fw); 810 return ret; 811 } 812 813 /** 814 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address 815 * @ptdev: Device. 816 * @mcu_va: MCU address. 817 * 818 * Return: NULL if the address is not part of the shared section, non-NULL otherwise. 819 */ 820 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va) 821 { 822 u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem); 823 u64 shared_mem_end = shared_mem_start + 824 panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 825 if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end) 826 return NULL; 827 828 return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start); 829 } 830 831 static int panthor_init_cs_iface(struct panthor_device *ptdev, 832 unsigned int csg_idx, unsigned int cs_idx) 833 { 834 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 835 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx); 836 struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx]; 837 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 838 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + 839 (csg_idx * glb_iface->control->group_stride) + 840 CSF_STREAM_CONTROL_OFFSET + 841 (cs_idx * csg_iface->control->stream_stride); 842 struct panthor_fw_cs_iface *first_cs_iface = 843 panthor_fw_get_cs_iface(ptdev, 0, 0); 844 845 if (iface_offset + sizeof(*cs_iface) >= shared_section_sz) 846 return -EINVAL; 847 848 spin_lock_init(&cs_iface->lock); 849 cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; 850 cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va); 851 cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va); 852 853 if (!cs_iface->input || !cs_iface->output) { 854 drm_err(&ptdev->base, "Invalid stream control interface input/output VA"); 855 return -EINVAL; 856 } 857 858 if (cs_iface != first_cs_iface) { 859 if (cs_iface->control->features != first_cs_iface->control->features) { 860 drm_err(&ptdev->base, "Expecting identical CS slots"); 861 return -EINVAL; 862 } 863 } else { 864 u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features); 865 866 ptdev->csif_info.cs_reg_count = reg_count; 867 ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT; 868 } 869 870 return 0; 871 } 872 873 static bool compare_csg(const struct panthor_fw_csg_control_iface *a, 874 const struct panthor_fw_csg_control_iface *b) 875 { 876 if (a->features != b->features) 877 return false; 878 if (a->suspend_size != b->suspend_size) 879 return false; 880 if (a->protm_suspend_size != b->protm_suspend_size) 881 return false; 882 if (a->stream_num != b->stream_num) 883 return false; 884 return true; 885 } 886 887 static int panthor_init_csg_iface(struct panthor_device *ptdev, 888 unsigned int csg_idx) 889 { 890 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 891 struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx]; 892 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 893 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride); 894 unsigned int i; 895 896 if (iface_offset + sizeof(*csg_iface) >= shared_section_sz) 897 return -EINVAL; 898 899 spin_lock_init(&csg_iface->lock); 900 csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; 901 csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va); 902 csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va); 903 904 if (csg_iface->control->stream_num < MIN_CS_PER_CSG || 905 csg_iface->control->stream_num > MAX_CS_PER_CSG) 906 return -EINVAL; 907 908 if (!csg_iface->input || !csg_iface->output) { 909 drm_err(&ptdev->base, "Invalid group control interface input/output VA"); 910 return -EINVAL; 911 } 912 913 if (csg_idx > 0) { 914 struct panthor_fw_csg_iface *first_csg_iface = 915 panthor_fw_get_csg_iface(ptdev, 0); 916 917 if (!compare_csg(first_csg_iface->control, csg_iface->control)) { 918 drm_err(&ptdev->base, "Expecting identical CSG slots"); 919 return -EINVAL; 920 } 921 } 922 923 for (i = 0; i < csg_iface->control->stream_num; i++) { 924 int ret = panthor_init_cs_iface(ptdev, csg_idx, i); 925 926 if (ret) 927 return ret; 928 } 929 930 return 0; 931 } 932 933 static u32 panthor_get_instr_features(struct panthor_device *ptdev) 934 { 935 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 936 937 if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0)) 938 return 0; 939 940 return glb_iface->control->instr_features; 941 } 942 943 static int panthor_fw_init_ifaces(struct panthor_device *ptdev) 944 { 945 struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global; 946 unsigned int i; 947 948 if (!ptdev->fw->shared_section->mem->kmap) 949 return -EINVAL; 950 951 spin_lock_init(&glb_iface->lock); 952 glb_iface->control = ptdev->fw->shared_section->mem->kmap; 953 954 if (!glb_iface->control->version) { 955 drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot"); 956 return -EINVAL; 957 } 958 959 glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va); 960 glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va); 961 if (!glb_iface->input || !glb_iface->output) { 962 drm_err(&ptdev->base, "Invalid global control interface input/output VA"); 963 return -EINVAL; 964 } 965 966 if (glb_iface->control->group_num > MAX_CSGS || 967 glb_iface->control->group_num < MIN_CSGS) { 968 drm_err(&ptdev->base, "Invalid number of control groups"); 969 return -EINVAL; 970 } 971 972 for (i = 0; i < glb_iface->control->group_num; i++) { 973 int ret = panthor_init_csg_iface(ptdev, i); 974 975 if (ret) 976 return ret; 977 } 978 979 drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x", 980 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), 981 CSF_IFACE_VERSION_MINOR(glb_iface->control->version), 982 CSF_IFACE_VERSION_PATCH(glb_iface->control->version), 983 glb_iface->control->features, 984 panthor_get_instr_features(ptdev)); 985 return 0; 986 } 987 988 static void panthor_fw_init_global_iface(struct panthor_device *ptdev) 989 { 990 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 991 992 /* Enable all cores. */ 993 glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present; 994 995 /* Setup timers. */ 996 glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US); 997 glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; 998 glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US); 999 1000 /* Enable interrupts we care about. */ 1001 glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN | 1002 GLB_PING | 1003 GLB_CFG_PROGRESS_TIMER | 1004 GLB_CFG_POWEROFF_TIMER | 1005 GLB_IDLE_EN | 1006 GLB_IDLE; 1007 1008 panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); 1009 panthor_fw_toggle_reqs(glb_iface, req, ack, 1010 GLB_CFG_ALLOC_EN | 1011 GLB_CFG_POWEROFF_TIMER | 1012 GLB_CFG_PROGRESS_TIMER); 1013 1014 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1015 1016 /* Kick the watchdog. */ 1017 mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work, 1018 msecs_to_jiffies(PING_INTERVAL_MS)); 1019 } 1020 1021 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) 1022 { 1023 if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) 1024 ptdev->fw->booted = true; 1025 1026 wake_up_all(&ptdev->fw->req_waitqueue); 1027 1028 /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */ 1029 if (!ptdev->fw->booted) 1030 return; 1031 1032 panthor_sched_report_fw_events(ptdev, status); 1033 } 1034 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler); 1035 1036 static int panthor_fw_start(struct panthor_device *ptdev) 1037 { 1038 bool timedout = false; 1039 1040 ptdev->fw->booted = false; 1041 panthor_job_irq_resume(&ptdev->fw->irq, ~0); 1042 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO); 1043 1044 if (!wait_event_timeout(ptdev->fw->req_waitqueue, 1045 ptdev->fw->booted, 1046 msecs_to_jiffies(1000))) { 1047 if (!ptdev->fw->booted && 1048 !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF)) 1049 timedout = true; 1050 } 1051 1052 if (timedout) { 1053 static const char * const status_str[] = { 1054 [MCU_STATUS_DISABLED] = "disabled", 1055 [MCU_STATUS_ENABLED] = "enabled", 1056 [MCU_STATUS_HALT] = "halt", 1057 [MCU_STATUS_FATAL] = "fatal", 1058 }; 1059 u32 status = gpu_read(ptdev, MCU_STATUS); 1060 1061 drm_err(&ptdev->base, "Failed to boot MCU (status=%s)", 1062 status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown"); 1063 return -ETIMEDOUT; 1064 } 1065 1066 return 0; 1067 } 1068 1069 static void panthor_fw_stop(struct panthor_device *ptdev) 1070 { 1071 u32 status; 1072 1073 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); 1074 if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, 1075 status == MCU_STATUS_DISABLED, 10, 100000)) 1076 drm_err(&ptdev->base, "Failed to stop MCU"); 1077 } 1078 1079 /** 1080 * panthor_fw_pre_reset() - Call before a reset. 1081 * @ptdev: Device. 1082 * @on_hang: true if the reset was triggered on a GPU hang. 1083 * 1084 * If the reset is not triggered on a hang, we try to gracefully halt the 1085 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called. 1086 */ 1087 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) 1088 { 1089 /* Make sure we won't be woken up by a ping. */ 1090 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); 1091 1092 ptdev->fw->fast_reset = false; 1093 1094 if (!on_hang) { 1095 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1096 u32 status; 1097 1098 panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); 1099 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1100 if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, 1101 status == MCU_STATUS_HALT, 10, 100000) && 1102 glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) { 1103 ptdev->fw->fast_reset = true; 1104 } else { 1105 drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); 1106 } 1107 1108 /* The FW detects 0 -> 1 transitions. Make sure we reset 1109 * the HALT bit before the FW is rebooted. 1110 */ 1111 panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); 1112 } 1113 1114 panthor_job_irq_suspend(&ptdev->fw->irq); 1115 } 1116 1117 /** 1118 * panthor_fw_post_reset() - Call after a reset. 1119 * @ptdev: Device. 1120 * 1121 * Start the FW. If this is not a fast reset, all FW sections are reloaded to 1122 * make sure we can recover from a memory corruption. 1123 */ 1124 int panthor_fw_post_reset(struct panthor_device *ptdev) 1125 { 1126 int ret; 1127 1128 /* Make the MCU VM active. */ 1129 ret = panthor_vm_active(ptdev->fw->vm); 1130 if (ret) 1131 return ret; 1132 1133 /* If this is a fast reset, try to start the MCU without reloading 1134 * the FW sections. If it fails, go for a full reset. 1135 */ 1136 if (ptdev->fw->fast_reset) { 1137 ret = panthor_fw_start(ptdev); 1138 if (!ret) 1139 goto out; 1140 1141 /* Forcibly reset the MCU and force a slow reset, so we get a 1142 * fresh boot on the next panthor_fw_start() call. 1143 */ 1144 panthor_fw_stop(ptdev); 1145 ptdev->fw->fast_reset = false; 1146 drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset"); 1147 1148 ret = panthor_vm_flush_all(ptdev->fw->vm); 1149 if (ret) { 1150 drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)"); 1151 return ret; 1152 } 1153 } 1154 1155 /* Reload all sections, including RO ones. We're not supposed 1156 * to end up here anyway, let's just assume the overhead of 1157 * reloading everything is acceptable. 1158 */ 1159 panthor_reload_fw_sections(ptdev, true); 1160 1161 ret = panthor_fw_start(ptdev); 1162 if (ret) { 1163 drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )"); 1164 return ret; 1165 } 1166 1167 out: 1168 /* We must re-initialize the global interface even on fast-reset. */ 1169 panthor_fw_init_global_iface(ptdev); 1170 return 0; 1171 } 1172 1173 /** 1174 * panthor_fw_unplug() - Called when the device is unplugged. 1175 * @ptdev: Device. 1176 * 1177 * This function must make sure all pending operations are flushed before 1178 * will release device resources, thus preventing any interaction with 1179 * the HW. 1180 * 1181 * If there is still FW-related work running after this function returns, 1182 * they must use drm_dev_{enter,exit}() and skip any HW access when 1183 * drm_dev_enter() returns false. 1184 */ 1185 void panthor_fw_unplug(struct panthor_device *ptdev) 1186 { 1187 struct panthor_fw_section *section; 1188 1189 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); 1190 1191 /* Make sure the IRQ handler can be called after that point. */ 1192 if (ptdev->fw->irq.irq) 1193 panthor_job_irq_suspend(&ptdev->fw->irq); 1194 1195 panthor_fw_stop(ptdev); 1196 1197 list_for_each_entry(section, &ptdev->fw->sections, node) 1198 panthor_kernel_bo_destroy(section->mem); 1199 1200 /* We intentionally don't call panthor_vm_idle() and let 1201 * panthor_mmu_unplug() release the AS we acquired with 1202 * panthor_vm_active() so we don't have to track the VM active/idle 1203 * state to keep the active_refcnt balanced. 1204 */ 1205 panthor_vm_put(ptdev->fw->vm); 1206 ptdev->fw->vm = NULL; 1207 1208 panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); 1209 } 1210 1211 /** 1212 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW. 1213 * @req_ptr: Pointer to the req register. 1214 * @ack_ptr: Pointer to the ack register. 1215 * @wq: Wait queue to use for the sleeping wait. 1216 * @req_mask: Mask of requests to wait for. 1217 * @acked: Pointer to field that's updated with the acked requests. 1218 * If the function returns 0, *acked == req_mask. 1219 * @timeout_ms: Timeout expressed in milliseconds. 1220 * 1221 * Return: 0 on success, -ETIMEDOUT otherwise. 1222 */ 1223 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr, 1224 wait_queue_head_t *wq, 1225 u32 req_mask, u32 *acked, 1226 u32 timeout_ms) 1227 { 1228 u32 ack, req = READ_ONCE(*req_ptr) & req_mask; 1229 int ret; 1230 1231 /* Busy wait for a few µsecs before falling back to a sleeping wait. */ 1232 *acked = req_mask; 1233 ret = read_poll_timeout_atomic(READ_ONCE, ack, 1234 (ack & req_mask) == req, 1235 0, 10, 0, 1236 *ack_ptr); 1237 if (!ret) 1238 return 0; 1239 1240 if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req, 1241 msecs_to_jiffies(timeout_ms))) 1242 return 0; 1243 1244 /* Check one last time, in case we were not woken up for some reason. */ 1245 ack = READ_ONCE(*ack_ptr); 1246 if ((ack & req_mask) == req) 1247 return 0; 1248 1249 *acked = ~(req ^ ack) & req_mask; 1250 return -ETIMEDOUT; 1251 } 1252 1253 /** 1254 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged. 1255 * @ptdev: Device. 1256 * @req_mask: Mask of requests to wait for. 1257 * @acked: Pointer to field that's updated with the acked requests. 1258 * If the function returns 0, *acked == req_mask. 1259 * @timeout_ms: Timeout expressed in milliseconds. 1260 * 1261 * Return: 0 on success, -ETIMEDOUT otherwise. 1262 */ 1263 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, 1264 u32 req_mask, u32 *acked, 1265 u32 timeout_ms) 1266 { 1267 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1268 1269 /* GLB_HALT doesn't get acked through the FW interface. */ 1270 if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT))) 1271 return -EINVAL; 1272 1273 return panthor_fw_wait_acks(&glb_iface->input->req, 1274 &glb_iface->output->ack, 1275 &ptdev->fw->req_waitqueue, 1276 req_mask, acked, timeout_ms); 1277 } 1278 1279 /** 1280 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged. 1281 * @ptdev: Device. 1282 * @csg_slot: CSG slot ID. 1283 * @req_mask: Mask of requests to wait for. 1284 * @acked: Pointer to field that's updated with the acked requests. 1285 * If the function returns 0, *acked == req_mask. 1286 * @timeout_ms: Timeout expressed in milliseconds. 1287 * 1288 * Return: 0 on success, -ETIMEDOUT otherwise. 1289 */ 1290 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot, 1291 u32 req_mask, u32 *acked, u32 timeout_ms) 1292 { 1293 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot); 1294 int ret; 1295 1296 if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK)) 1297 return -EINVAL; 1298 1299 ret = panthor_fw_wait_acks(&csg_iface->input->req, 1300 &csg_iface->output->ack, 1301 &ptdev->fw->req_waitqueue, 1302 req_mask, acked, timeout_ms); 1303 1304 /* 1305 * Check that all bits in the state field were updated, if any mismatch 1306 * then clear all bits in the state field. This allows code to do 1307 * (acked & CSG_STATE_MASK) and get the right value. 1308 */ 1309 1310 if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK) 1311 *acked &= ~CSG_STATE_MASK; 1312 1313 return ret; 1314 } 1315 1316 /** 1317 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells. 1318 * @ptdev: Device. 1319 * @csg_mask: Bitmask encoding the command stream group doorbells to ring. 1320 * 1321 * This function is toggling bits in the doorbell_req and ringing the 1322 * global doorbell. It doesn't require a user doorbell to be attached to 1323 * the group. 1324 */ 1325 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask) 1326 { 1327 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1328 1329 panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask); 1330 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1331 } 1332 1333 static void panthor_fw_ping_work(struct work_struct *work) 1334 { 1335 struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work); 1336 struct panthor_device *ptdev = fw->irq.ptdev; 1337 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1338 u32 acked; 1339 int ret; 1340 1341 if (panthor_device_reset_is_pending(ptdev)) 1342 return; 1343 1344 panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING); 1345 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1346 1347 ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100); 1348 if (ret) { 1349 panthor_device_schedule_reset(ptdev); 1350 drm_err(&ptdev->base, "FW ping timeout, scheduling a reset"); 1351 } else { 1352 mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work, 1353 msecs_to_jiffies(PING_INTERVAL_MS)); 1354 } 1355 } 1356 1357 /** 1358 * panthor_fw_init() - Initialize FW related data. 1359 * @ptdev: Device. 1360 * 1361 * Return: 0 on success, a negative error code otherwise. 1362 */ 1363 int panthor_fw_init(struct panthor_device *ptdev) 1364 { 1365 struct panthor_fw *fw; 1366 int ret, irq; 1367 1368 fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL); 1369 if (!fw) 1370 return -ENOMEM; 1371 1372 ptdev->fw = fw; 1373 init_waitqueue_head(&fw->req_waitqueue); 1374 INIT_LIST_HEAD(&fw->sections); 1375 INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work); 1376 1377 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job"); 1378 if (irq <= 0) 1379 return -ENODEV; 1380 1381 ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0); 1382 if (ret) { 1383 drm_err(&ptdev->base, "failed to request job irq"); 1384 return ret; 1385 } 1386 1387 ret = panthor_gpu_l2_power_on(ptdev); 1388 if (ret) 1389 return ret; 1390 1391 fw->vm = panthor_vm_create(ptdev, true, 1392 0, SZ_4G, 1393 CSF_MCU_SHARED_REGION_START, 1394 CSF_MCU_SHARED_REGION_SIZE); 1395 if (IS_ERR(fw->vm)) { 1396 ret = PTR_ERR(fw->vm); 1397 fw->vm = NULL; 1398 goto err_unplug_fw; 1399 } 1400 1401 ret = panthor_fw_load(ptdev); 1402 if (ret) 1403 goto err_unplug_fw; 1404 1405 ret = panthor_vm_active(fw->vm); 1406 if (ret) 1407 goto err_unplug_fw; 1408 1409 ret = panthor_fw_start(ptdev); 1410 if (ret) 1411 goto err_unplug_fw; 1412 1413 ret = panthor_fw_init_ifaces(ptdev); 1414 if (ret) 1415 goto err_unplug_fw; 1416 1417 panthor_fw_init_global_iface(ptdev); 1418 return 0; 1419 1420 err_unplug_fw: 1421 panthor_fw_unplug(ptdev); 1422 return ret; 1423 } 1424 1425 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin"); 1426