1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 /* Copyright 2023 Collabora ltd. */ 3 4 #ifdef CONFIG_ARM_ARCH_TIMER 5 #include <asm/arch_timer.h> 6 #endif 7 8 #include <linux/clk.h> 9 #include <linux/dma-mapping.h> 10 #include <linux/firmware.h> 11 #include <linux/iopoll.h> 12 #include <linux/iosys-map.h> 13 #include <linux/mutex.h> 14 #include <linux/platform_device.h> 15 #include <linux/pm_runtime.h> 16 17 #include <drm/drm_drv.h> 18 #include <drm/drm_managed.h> 19 #include <drm/drm_print.h> 20 21 #include "panthor_device.h" 22 #include "panthor_fw.h" 23 #include "panthor_fw_regs.h" 24 #include "panthor_gem.h" 25 #include "panthor_gpu.h" 26 #include "panthor_hw.h" 27 #include "panthor_mmu.h" 28 #include "panthor_sched.h" 29 #include "panthor_trace.h" 30 31 #define CSF_FW_NAME "mali_csffw.bin" 32 33 #define PING_INTERVAL_MS 12000 34 #define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024) 35 #define PROGRESS_TIMEOUT_SCALE_SHIFT 10 36 #define IDLE_HYSTERESIS_US 800 37 #define PWROFF_HYSTERESIS_US 10000 38 #define MCU_HALT_TIMEOUT_US (1ULL * USEC_PER_SEC) 39 40 /** 41 * struct panthor_fw_binary_hdr - Firmware binary header. 42 */ 43 struct panthor_fw_binary_hdr { 44 /** @magic: Magic value to check binary validity. */ 45 u32 magic; 46 #define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e 47 48 /** @minor: Minor FW version. */ 49 u8 minor; 50 51 /** @major: Major FW version. */ 52 u8 major; 53 #define CSF_FW_BINARY_HEADER_MAJOR_MAX 0 54 55 /** @padding1: MBZ. */ 56 u16 padding1; 57 58 /** @version_hash: FW version hash. */ 59 u32 version_hash; 60 61 /** @padding2: MBZ. */ 62 u32 padding2; 63 64 /** @size: FW binary size. */ 65 u32 size; 66 }; 67 68 /** 69 * enum panthor_fw_binary_entry_type - Firmware binary entry type 70 */ 71 enum panthor_fw_binary_entry_type { 72 /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */ 73 CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0, 74 75 /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */ 76 CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1, 77 78 /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */ 79 CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2, 80 81 /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */ 82 CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3, 83 84 /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */ 85 CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4, 86 87 /** 88 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how 89 * the FW binary was built. 90 */ 91 CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6 92 }; 93 94 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff) 95 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff) 96 #define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) 97 #define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) 98 99 #define CSF_FW_BINARY_IFACE_ENTRY_RD BIT(0) 100 #define CSF_FW_BINARY_IFACE_ENTRY_WR BIT(1) 101 #define CSF_FW_BINARY_IFACE_ENTRY_EX BIT(2) 102 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE (0 << 3) 103 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED (1 << 3) 104 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT (2 << 3) 105 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT (3 << 3) 106 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK GENMASK(4, 3) 107 #define CSF_FW_BINARY_IFACE_ENTRY_PROT BIT(5) 108 #define CSF_FW_BINARY_IFACE_ENTRY_SHARED BIT(30) 109 #define CSF_FW_BINARY_IFACE_ENTRY_ZERO BIT(31) 110 111 #define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS \ 112 (CSF_FW_BINARY_IFACE_ENTRY_RD | \ 113 CSF_FW_BINARY_IFACE_ENTRY_WR | \ 114 CSF_FW_BINARY_IFACE_ENTRY_EX | \ 115 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK | \ 116 CSF_FW_BINARY_IFACE_ENTRY_PROT | \ 117 CSF_FW_BINARY_IFACE_ENTRY_SHARED | \ 118 CSF_FW_BINARY_IFACE_ENTRY_ZERO) 119 120 /** 121 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary 122 */ 123 struct panthor_fw_binary_section_entry_hdr { 124 /** @flags: Section flags. */ 125 u32 flags; 126 127 /** @va: MCU virtual range to map this binary section to. */ 128 struct { 129 /** @start: Start address. */ 130 u32 start; 131 132 /** @end: End address. */ 133 u32 end; 134 } va; 135 136 /** @data: Data to initialize the FW section with. */ 137 struct { 138 /** @start: Start offset in the FW binary. */ 139 u32 start; 140 141 /** @end: End offset in the FW binary. */ 142 u32 end; 143 } data; 144 }; 145 146 struct panthor_fw_build_info_hdr { 147 /** @meta_start: Offset of the build info data in the FW binary */ 148 u32 meta_start; 149 /** @meta_size: Size of the build info data in the FW binary */ 150 u32 meta_size; 151 }; 152 153 /** 154 * struct panthor_fw_binary_iter - Firmware binary iterator 155 * 156 * Used to parse a firmware binary. 157 */ 158 struct panthor_fw_binary_iter { 159 /** @data: FW binary data. */ 160 const void *data; 161 162 /** @size: FW binary size. */ 163 size_t size; 164 165 /** @offset: Iterator offset. */ 166 size_t offset; 167 }; 168 169 /** 170 * struct panthor_fw_section - FW section 171 */ 172 struct panthor_fw_section { 173 /** @node: Used to keep track of FW sections. */ 174 struct list_head node; 175 176 /** @flags: Section flags, as encoded in the FW binary. */ 177 u32 flags; 178 179 /** @mem: Section memory. */ 180 struct panthor_kernel_bo *mem; 181 182 /** 183 * @name: Name of the section, as specified in the binary. 184 * 185 * Can be NULL. 186 */ 187 const char *name; 188 189 /** 190 * @data: Initial data copied to the FW memory. 191 * 192 * We keep data around so we can reload sections after a reset. 193 */ 194 struct { 195 /** @buf: Buffed used to store init data. */ 196 const void *buf; 197 198 /** @size: Size of @buf in bytes. */ 199 size_t size; 200 } data; 201 }; 202 203 #define CSF_MCU_SHARED_REGION_START 0x04000000ULL 204 #define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL 205 206 #define MIN_CS_PER_CSG 8 207 #define MIN_CSGS 3 208 209 #define CSF_IFACE_VERSION(major, minor, patch) \ 210 (((major) << 24) | ((minor) << 16) | (patch)) 211 #define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24) 212 #define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff) 213 #define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff) 214 215 #define CSF_GROUP_CONTROL_OFFSET 0x1000 216 #define CSF_STREAM_CONTROL_OFFSET 0x40 217 #define CSF_UNPRESERVED_REG_COUNT 4 218 219 /** 220 * struct panthor_fw_iface - FW interfaces 221 */ 222 struct panthor_fw_iface { 223 /** @global: Global interface. */ 224 struct panthor_fw_global_iface global; 225 226 /** @groups: Group slot interfaces. */ 227 struct panthor_fw_csg_iface groups[MAX_CSGS]; 228 229 /** @streams: Command stream slot interfaces. */ 230 struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG]; 231 }; 232 233 /** 234 * struct panthor_fw - Firmware management 235 */ 236 struct panthor_fw { 237 /** @iomem: CPU mapping of MCU_CONTROL iomem region */ 238 void __iomem *iomem; 239 240 /** @vm: MCU VM. */ 241 struct panthor_vm *vm; 242 243 /** @sections: List of FW sections. */ 244 struct list_head sections; 245 246 /** @shared_section: The section containing the FW interfaces. */ 247 struct panthor_fw_section *shared_section; 248 249 /** @iface: FW interfaces. */ 250 struct panthor_fw_iface iface; 251 252 /** @watchdog: Collection of fields relating to the FW watchdog. */ 253 struct { 254 /** @ping_work: Delayed work used to ping the FW. */ 255 struct delayed_work ping_work; 256 } watchdog; 257 258 /** 259 * @req_waitqueue: FW request waitqueue. 260 * 261 * Everytime a request is sent to a command stream group or the global 262 * interface, the caller will first busy wait for the request to be 263 * acknowledged, and then fallback to a sleeping wait. 264 * 265 * This wait queue is here to support the sleeping wait flavor. 266 */ 267 wait_queue_head_t req_waitqueue; 268 269 /** @booted: True is the FW is booted */ 270 bool booted; 271 272 /** @irq: Job irq data. */ 273 struct panthor_irq irq; 274 }; 275 276 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev) 277 { 278 return ptdev->fw->vm; 279 } 280 281 /** 282 * panthor_fw_get_glb_iface() - Get the global interface 283 * @ptdev: Device. 284 * 285 * Return: The global interface. 286 */ 287 struct panthor_fw_global_iface * 288 panthor_fw_get_glb_iface(struct panthor_device *ptdev) 289 { 290 return &ptdev->fw->iface.global; 291 } 292 293 /** 294 * panthor_fw_get_csg_iface() - Get a command stream group slot interface 295 * @ptdev: Device. 296 * @csg_slot: Index of the command stream group slot. 297 * 298 * Return: The command stream group slot interface. 299 */ 300 struct panthor_fw_csg_iface * 301 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot) 302 { 303 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS)) 304 return NULL; 305 306 return &ptdev->fw->iface.groups[csg_slot]; 307 } 308 309 /** 310 * panthor_fw_get_cs_iface() - Get a command stream slot interface 311 * @ptdev: Device. 312 * @csg_slot: Index of the command stream group slot. 313 * @cs_slot: Index of the command stream slot. 314 * 315 * Return: The command stream slot interface. 316 */ 317 struct panthor_fw_cs_iface * 318 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) 319 { 320 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG)) 321 return NULL; 322 323 return &ptdev->fw->iface.streams[csg_slot][cs_slot]; 324 } 325 326 static bool panthor_fw_has_glb_state(struct panthor_device *ptdev) 327 { 328 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 329 330 return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0); 331 } 332 333 static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev) 334 { 335 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 336 337 return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0); 338 } 339 340 u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev, 341 struct panthor_fw_csg_iface *csg_iface) 342 { 343 if (panthor_fw_has_64bit_ep_req(ptdev)) 344 return csg_iface->input->endpoint_req2; 345 else 346 return csg_iface->input->endpoint_req; 347 } 348 349 void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev, 350 struct panthor_fw_csg_iface *csg_iface, u64 value) 351 { 352 if (panthor_fw_has_64bit_ep_req(ptdev)) 353 csg_iface->input->endpoint_req2 = value; 354 else 355 csg_iface->input->endpoint_req = lower_32_bits(value); 356 } 357 358 void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev, 359 struct panthor_fw_csg_iface *csg_iface, u64 value, 360 u64 mask) 361 { 362 if (panthor_fw_has_64bit_ep_req(ptdev)) 363 panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask); 364 else 365 panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value), 366 lower_32_bits(mask)); 367 } 368 369 /** 370 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count 371 * @ptdev: Device. 372 * @timeout_us: Timeout expressed in micro-seconds. 373 * 374 * The FW has two timer sources: the GPU counter or arch-timer. We need 375 * to express timeouts in term of number of cycles and specify which 376 * timer source should be used. 377 * 378 * Return: A value suitable for timeout fields in the global interface. 379 */ 380 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us) 381 { 382 bool use_cycle_counter = false; 383 u32 timer_rate = 0; 384 u64 mod_cycles; 385 386 #ifdef CONFIG_ARM_ARCH_TIMER 387 timer_rate = arch_timer_get_cntfrq(); 388 #endif 389 390 if (!timer_rate) { 391 use_cycle_counter = true; 392 timer_rate = clk_get_rate(ptdev->clks.core); 393 } 394 395 if (drm_WARN_ON(&ptdev->base, !timer_rate)) { 396 /* We couldn't get a valid clock rate, let's just pick the 397 * maximum value so the FW still handles the core 398 * power on/off requests. 399 */ 400 return GLB_TIMER_VAL(~0) | 401 GLB_TIMER_SOURCE_GPU_COUNTER; 402 } 403 404 mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate, 405 1000000ull << 10); 406 if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0))) 407 mod_cycles = GLB_TIMER_VAL(~0); 408 409 return GLB_TIMER_VAL(mod_cycles) | 410 (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0); 411 } 412 413 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev, 414 struct panthor_fw_binary_iter *iter, 415 void *out, size_t size) 416 { 417 size_t new_offset = iter->offset + size; 418 419 if (new_offset > iter->size || new_offset < iter->offset) { 420 drm_err(&ptdev->base, "Firmware too small\n"); 421 return -EINVAL; 422 } 423 424 memcpy(out, iter->data + iter->offset, size); 425 iter->offset = new_offset; 426 return 0; 427 } 428 429 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev, 430 struct panthor_fw_binary_iter *iter, 431 struct panthor_fw_binary_iter *sub_iter, 432 size_t size) 433 { 434 size_t new_offset = iter->offset + size; 435 436 if (new_offset > iter->size || new_offset < iter->offset) { 437 drm_err(&ptdev->base, "Firmware entry too long\n"); 438 return -EINVAL; 439 } 440 441 sub_iter->offset = 0; 442 sub_iter->data = iter->data + iter->offset; 443 sub_iter->size = size; 444 iter->offset = new_offset; 445 return 0; 446 } 447 448 static void panthor_fw_init_section_mem(struct panthor_device *ptdev, 449 struct panthor_fw_section *section) 450 { 451 bool was_mapped = !!section->mem->kmap; 452 int ret; 453 454 if (!section->data.size && 455 !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO)) 456 return; 457 458 ret = panthor_kernel_bo_vmap(section->mem); 459 if (drm_WARN_ON(&ptdev->base, ret)) 460 return; 461 462 memcpy(section->mem->kmap, section->data.buf, section->data.size); 463 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) { 464 memset(section->mem->kmap + section->data.size, 0, 465 panthor_kernel_bo_size(section->mem) - section->data.size); 466 } 467 468 if (!was_mapped) 469 panthor_kernel_bo_vunmap(section->mem); 470 } 471 472 /** 473 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces. 474 * @ptdev: Device. 475 * @input: Pointer holding the input interface on success. 476 * Should be ignored on failure. 477 * @output: Pointer holding the output interface on success. 478 * Should be ignored on failure. 479 * @input_fw_va: Pointer holding the input interface FW VA on success. 480 * Should be ignored on failure. 481 * @output_fw_va: Pointer holding the output interface FW VA on success. 482 * Should be ignored on failure. 483 * 484 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input 485 * interface is at offset 0, and the output interface at offset 4096. 486 * 487 * Return: A valid pointer in case of success, an ERR_PTR() otherwise. 488 */ 489 struct panthor_kernel_bo * 490 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, 491 struct panthor_fw_ringbuf_input_iface **input, 492 const struct panthor_fw_ringbuf_output_iface **output, 493 u32 *input_fw_va, u32 *output_fw_va) 494 { 495 struct panthor_kernel_bo *mem; 496 int ret; 497 498 mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K, 499 DRM_PANTHOR_BO_NO_MMAP, 500 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | 501 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, 502 PANTHOR_VM_KERNEL_AUTO_VA, 503 "Queue FW interface"); 504 if (IS_ERR(mem)) 505 return mem; 506 507 ret = panthor_kernel_bo_vmap(mem); 508 if (ret) { 509 panthor_kernel_bo_destroy(mem); 510 return ERR_PTR(ret); 511 } 512 513 memset(mem->kmap, 0, panthor_kernel_bo_size(mem)); 514 *input = mem->kmap; 515 *output = mem->kmap + SZ_4K; 516 *input_fw_va = panthor_kernel_bo_gpuva(mem); 517 *output_fw_va = *input_fw_va + SZ_4K; 518 519 return mem; 520 } 521 522 /** 523 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group. 524 * @ptdev: Device. 525 * @size: Size of the suspend buffer. 526 * 527 * Return: A valid pointer in case of success, an ERR_PTR() otherwise. 528 */ 529 struct panthor_kernel_bo * 530 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) 531 { 532 return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, 533 DRM_PANTHOR_BO_NO_MMAP, 534 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 535 PANTHOR_VM_KERNEL_AUTO_VA, 536 "FW suspend buffer"); 537 } 538 539 static int panthor_fw_load_section_entry(struct panthor_device *ptdev, 540 const struct firmware *fw, 541 struct panthor_fw_binary_iter *iter, 542 u32 ehdr) 543 { 544 ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm); 545 struct panthor_fw_binary_section_entry_hdr hdr; 546 struct panthor_fw_section *section; 547 u32 section_size; 548 u32 name_len; 549 int ret; 550 551 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); 552 if (ret) 553 return ret; 554 555 if (hdr.data.end < hdr.data.start) { 556 drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", 557 hdr.data.end, hdr.data.start); 558 return -EINVAL; 559 } 560 561 if (hdr.va.end < hdr.va.start) { 562 drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", 563 hdr.va.end, hdr.va.start); 564 return -EINVAL; 565 } 566 567 if (hdr.data.end > fw->size) { 568 drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", 569 hdr.data.end, fw->size); 570 return -EINVAL; 571 } 572 573 if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) { 574 drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", 575 hdr.va.start, hdr.va.end); 576 return -EINVAL; 577 } 578 579 if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) { 580 drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", 581 hdr.flags); 582 return -EINVAL; 583 } 584 585 if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) { 586 drm_warn(&ptdev->base, 587 "Firmware protected mode entry is not supported, ignoring"); 588 return 0; 589 } 590 591 if (hdr.va.start == CSF_MCU_SHARED_REGION_START && 592 !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) { 593 drm_err(&ptdev->base, 594 "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); 595 return -EINVAL; 596 } 597 598 name_len = iter->size - iter->offset; 599 600 section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); 601 if (!section) 602 return -ENOMEM; 603 604 list_add_tail(§ion->node, &ptdev->fw->sections); 605 section->flags = hdr.flags; 606 section->data.size = hdr.data.end - hdr.data.start; 607 608 if (section->data.size > 0) { 609 void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); 610 611 if (!data) 612 return -ENOMEM; 613 614 memcpy(data, fw->data + hdr.data.start, section->data.size); 615 section->data.buf = data; 616 } 617 618 if (name_len > 0) { 619 char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); 620 621 if (!name) 622 return -ENOMEM; 623 624 memcpy(name, iter->data + iter->offset, name_len); 625 name[name_len] = '\0'; 626 section->name = name; 627 } 628 629 section_size = hdr.va.end - hdr.va.start; 630 if (section_size) { 631 u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK; 632 struct panthor_gem_object *bo; 633 u32 vm_map_flags = 0; 634 u64 va = hdr.va.start; 635 636 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) 637 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; 638 639 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX)) 640 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; 641 642 /* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to 643 * non-cacheable for now. We might want to introduce a new 644 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device 645 * memory and is currently not used by our driver) for 646 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit 647 * of IO-coherent systems. 648 */ 649 if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED) 650 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; 651 652 section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), 653 section_size, 654 DRM_PANTHOR_BO_NO_MMAP, 655 vm_map_flags, va, "FW section"); 656 if (IS_ERR(section->mem)) 657 return PTR_ERR(section->mem); 658 659 if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) 660 return -EINVAL; 661 662 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) { 663 ret = panthor_kernel_bo_vmap(section->mem); 664 if (ret) 665 return ret; 666 } 667 668 panthor_fw_init_section_mem(ptdev, section); 669 670 bo = to_panthor_bo(section->mem->obj); 671 672 /* An sgt should have been requested when the kernel BO was GPU-mapped. */ 673 if (drm_WARN_ON_ONCE(&ptdev->base, !bo->dmap.sgt)) 674 return -EINVAL; 675 676 dma_sync_sgtable_for_device(ptdev->base.dev, bo->dmap.sgt, DMA_TO_DEVICE); 677 } 678 679 if (hdr.va.start == CSF_MCU_SHARED_REGION_START) 680 ptdev->fw->shared_section = section; 681 682 return 0; 683 } 684 685 static int panthor_fw_read_build_info(struct panthor_device *ptdev, 686 const struct firmware *fw, 687 struct panthor_fw_binary_iter *iter, 688 u32 ehdr) 689 { 690 struct panthor_fw_build_info_hdr hdr; 691 static const char git_sha_header[] = "git_sha: "; 692 const int header_len = sizeof(git_sha_header) - 1; 693 int ret; 694 695 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); 696 if (ret) 697 return ret; 698 699 if (hdr.meta_start > fw->size || 700 hdr.meta_start + hdr.meta_size > fw->size) { 701 drm_err(&ptdev->base, "Firmware build info corrupt\n"); 702 /* We don't need the build info, so continue */ 703 return 0; 704 } 705 706 if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) { 707 /* Not the expected header, this isn't metadata we understand */ 708 return 0; 709 } 710 711 /* Check that the git SHA is NULL terminated as expected */ 712 if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') { 713 drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n"); 714 /* Don't treat as fatal */ 715 return 0; 716 } 717 718 drm_info(&ptdev->base, "Firmware git sha: %s\n", 719 fw->data + hdr.meta_start + header_len); 720 721 return 0; 722 } 723 724 static void 725 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) 726 { 727 struct panthor_fw_section *section; 728 729 list_for_each_entry(section, &ptdev->fw->sections, node) { 730 struct sg_table *sgt; 731 732 if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) 733 continue; 734 735 panthor_fw_init_section_mem(ptdev, section); 736 737 /* An sgt should have been requested when the kernel BO was GPU-mapped. */ 738 sgt = to_panthor_bo(section->mem->obj)->dmap.sgt; 739 if (!drm_WARN_ON_ONCE(&ptdev->base, !sgt)) 740 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); 741 } 742 } 743 744 static int panthor_fw_load_entry(struct panthor_device *ptdev, 745 const struct firmware *fw, 746 struct panthor_fw_binary_iter *iter) 747 { 748 struct panthor_fw_binary_iter eiter; 749 u32 ehdr; 750 int ret; 751 752 ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr)); 753 if (ret) 754 return ret; 755 756 if ((iter->offset % sizeof(u32)) || 757 (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) { 758 drm_err(&ptdev->base, "Firmware entry is not 32-bit aligned, offset=0x%x size=0x%x\n", 759 (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr)); 760 return -EINVAL; 761 } 762 763 if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter, 764 CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr))) 765 return -EINVAL; 766 767 switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { 768 case CSF_FW_BINARY_ENTRY_TYPE_IFACE: 769 return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); 770 case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: 771 return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr); 772 773 /* FIXME: handle those entry types? */ 774 case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: 775 case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: 776 case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: 777 case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: 778 return 0; 779 default: 780 break; 781 } 782 783 if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL) 784 return 0; 785 786 drm_err(&ptdev->base, 787 "Unsupported non-optional entry type %u in firmware\n", 788 CSF_FW_BINARY_ENTRY_TYPE(ehdr)); 789 return -EINVAL; 790 } 791 792 static int panthor_fw_load(struct panthor_device *ptdev) 793 { 794 const struct firmware *fw = NULL; 795 struct panthor_fw_binary_iter iter = {}; 796 struct panthor_fw_binary_hdr hdr; 797 char fw_path[128]; 798 int ret; 799 800 snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s", 801 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id), 802 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id), 803 CSF_FW_NAME); 804 805 ret = request_firmware(&fw, fw_path, ptdev->base.dev); 806 if (ret) { 807 drm_err(&ptdev->base, "Failed to load firmware image '%s'\n", 808 CSF_FW_NAME); 809 return ret; 810 } 811 812 iter.data = fw->data; 813 iter.size = fw->size; 814 ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr)); 815 if (ret) 816 goto out; 817 818 if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) { 819 ret = -EINVAL; 820 drm_err(&ptdev->base, "Invalid firmware magic\n"); 821 goto out; 822 } 823 824 if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) { 825 ret = -EINVAL; 826 drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n", 827 hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX); 828 goto out; 829 } 830 831 if (hdr.size > iter.size) { 832 drm_err(&ptdev->base, "Firmware image is truncated\n"); 833 goto out; 834 } 835 836 iter.size = hdr.size; 837 838 while (iter.offset < hdr.size) { 839 ret = panthor_fw_load_entry(ptdev, fw, &iter); 840 if (ret) 841 goto out; 842 } 843 844 if (!ptdev->fw->shared_section) { 845 drm_err(&ptdev->base, "Shared interface region not found\n"); 846 ret = -EINVAL; 847 goto out; 848 } 849 850 out: 851 release_firmware(fw); 852 return ret; 853 } 854 855 /** 856 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address 857 * @ptdev: Device. 858 * @mcu_va: MCU address. 859 * 860 * Return: NULL if the address is not part of the shared section, non-NULL otherwise. 861 */ 862 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va) 863 { 864 u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem); 865 u64 shared_mem_end = shared_mem_start + 866 panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 867 if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end) 868 return NULL; 869 870 return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start); 871 } 872 873 static int panthor_init_cs_iface(struct panthor_device *ptdev, 874 unsigned int csg_idx, unsigned int cs_idx) 875 { 876 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 877 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx); 878 struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx]; 879 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 880 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + 881 (csg_idx * glb_iface->control->group_stride) + 882 CSF_STREAM_CONTROL_OFFSET + 883 (cs_idx * csg_iface->control->stream_stride); 884 struct panthor_fw_cs_iface *first_cs_iface = 885 panthor_fw_get_cs_iface(ptdev, 0, 0); 886 887 if (iface_offset + sizeof(*cs_iface) >= shared_section_sz) 888 return -EINVAL; 889 890 spin_lock_init(&cs_iface->lock); 891 cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; 892 cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va); 893 cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va); 894 895 if (!cs_iface->input || !cs_iface->output) { 896 drm_err(&ptdev->base, "Invalid stream control interface input/output VA"); 897 return -EINVAL; 898 } 899 900 if (cs_iface != first_cs_iface) { 901 if (cs_iface->control->features != first_cs_iface->control->features) { 902 drm_err(&ptdev->base, "Expecting identical CS slots"); 903 return -EINVAL; 904 } 905 } else { 906 u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features); 907 908 ptdev->csif_info.cs_reg_count = reg_count; 909 ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT; 910 } 911 912 return 0; 913 } 914 915 static bool compare_csg(const struct panthor_fw_csg_control_iface *a, 916 const struct panthor_fw_csg_control_iface *b) 917 { 918 if (a->features != b->features) 919 return false; 920 if (a->suspend_size != b->suspend_size) 921 return false; 922 if (a->protm_suspend_size != b->protm_suspend_size) 923 return false; 924 if (a->stream_num != b->stream_num) 925 return false; 926 return true; 927 } 928 929 static int panthor_init_csg_iface(struct panthor_device *ptdev, 930 unsigned int csg_idx) 931 { 932 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 933 struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx]; 934 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 935 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride); 936 unsigned int i; 937 938 if (iface_offset + sizeof(*csg_iface) >= shared_section_sz) 939 return -EINVAL; 940 941 spin_lock_init(&csg_iface->lock); 942 csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; 943 csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va); 944 csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va); 945 946 if (csg_iface->control->stream_num < MIN_CS_PER_CSG || 947 csg_iface->control->stream_num > MAX_CS_PER_CSG) 948 return -EINVAL; 949 950 if (!csg_iface->input || !csg_iface->output) { 951 drm_err(&ptdev->base, "Invalid group control interface input/output VA"); 952 return -EINVAL; 953 } 954 955 if (csg_idx > 0) { 956 struct panthor_fw_csg_iface *first_csg_iface = 957 panthor_fw_get_csg_iface(ptdev, 0); 958 959 if (!compare_csg(first_csg_iface->control, csg_iface->control)) { 960 drm_err(&ptdev->base, "Expecting identical CSG slots"); 961 return -EINVAL; 962 } 963 } 964 965 for (i = 0; i < csg_iface->control->stream_num; i++) { 966 int ret = panthor_init_cs_iface(ptdev, csg_idx, i); 967 968 if (ret) 969 return ret; 970 } 971 972 return 0; 973 } 974 975 static u32 panthor_get_instr_features(struct panthor_device *ptdev) 976 { 977 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 978 979 if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0)) 980 return 0; 981 982 return glb_iface->control->instr_features; 983 } 984 985 static int panthor_fw_init_ifaces(struct panthor_device *ptdev) 986 { 987 struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global; 988 unsigned int i; 989 990 if (!ptdev->fw->shared_section->mem->kmap) 991 return -EINVAL; 992 993 spin_lock_init(&glb_iface->lock); 994 glb_iface->control = ptdev->fw->shared_section->mem->kmap; 995 996 if (!glb_iface->control->version) { 997 drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot"); 998 return -EINVAL; 999 } 1000 1001 glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va); 1002 glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va); 1003 if (!glb_iface->input || !glb_iface->output) { 1004 drm_err(&ptdev->base, "Invalid global control interface input/output VA"); 1005 return -EINVAL; 1006 } 1007 1008 if (glb_iface->control->group_num > MAX_CSGS || 1009 glb_iface->control->group_num < MIN_CSGS) { 1010 drm_err(&ptdev->base, "Invalid number of control groups"); 1011 return -EINVAL; 1012 } 1013 1014 for (i = 0; i < glb_iface->control->group_num; i++) { 1015 int ret = panthor_init_csg_iface(ptdev, i); 1016 1017 if (ret) 1018 return ret; 1019 } 1020 1021 drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x", 1022 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), 1023 CSF_IFACE_VERSION_MINOR(glb_iface->control->version), 1024 CSF_IFACE_VERSION_PATCH(glb_iface->control->version), 1025 glb_iface->control->features, 1026 panthor_get_instr_features(ptdev)); 1027 return 0; 1028 } 1029 1030 static void panthor_fw_init_global_iface(struct panthor_device *ptdev) 1031 { 1032 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1033 1034 /* Enable all cores. */ 1035 glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present; 1036 1037 /* Setup timers. */ 1038 glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US); 1039 glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; 1040 glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US); 1041 1042 /* Enable interrupts we care about. */ 1043 glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN | 1044 GLB_PING | 1045 GLB_CFG_PROGRESS_TIMER | 1046 GLB_CFG_POWEROFF_TIMER | 1047 GLB_IDLE_EN | 1048 GLB_IDLE; 1049 1050 if (panthor_fw_has_glb_state(ptdev)) 1051 glb_iface->input->ack_irq_mask |= GLB_STATE_MASK; 1052 1053 panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN | GLB_COUNTER_EN, 1054 GLB_IDLE_EN | GLB_COUNTER_EN); 1055 panthor_fw_toggle_reqs(glb_iface, req, ack, 1056 GLB_CFG_ALLOC_EN | 1057 GLB_CFG_POWEROFF_TIMER | 1058 GLB_CFG_PROGRESS_TIMER); 1059 1060 panthor_fw_ring_doorbell(ptdev, CSF_GLB_DOORBELL_ID); 1061 1062 /* Kick the watchdog. */ 1063 mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work, 1064 msecs_to_jiffies(PING_INTERVAL_MS)); 1065 } 1066 1067 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) 1068 { 1069 u32 duration; 1070 u64 start = 0; 1071 1072 if (tracepoint_enabled(gpu_job_irq)) 1073 start = ktime_get_ns(); 1074 1075 gpu_write(ptdev->fw->irq.iomem, INT_CLEAR, status); 1076 1077 if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) 1078 ptdev->fw->booted = true; 1079 1080 wake_up_all(&ptdev->fw->req_waitqueue); 1081 1082 /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */ 1083 if (!ptdev->fw->booted) 1084 return; 1085 1086 panthor_sched_report_fw_events(ptdev, status); 1087 1088 if (tracepoint_enabled(gpu_job_irq) && start) { 1089 if (check_sub_overflow(ktime_get_ns(), start, &duration)) 1090 duration = U32_MAX; 1091 trace_gpu_job_irq(ptdev->base.dev, status, duration); 1092 } 1093 } 1094 PANTHOR_IRQ_HANDLER(job, panthor_job_irq_handler); 1095 1096 static int panthor_fw_start(struct panthor_device *ptdev) 1097 { 1098 struct panthor_fw *fw = ptdev->fw; 1099 bool timedout = false; 1100 1101 ptdev->fw->booted = false; 1102 panthor_job_irq_enable_events(&ptdev->fw->irq, ~0); 1103 panthor_job_irq_resume(&ptdev->fw->irq); 1104 gpu_write(fw->iomem, MCU_CONTROL, MCU_CONTROL_AUTO); 1105 1106 if (!wait_event_timeout(ptdev->fw->req_waitqueue, 1107 ptdev->fw->booted, 1108 msecs_to_jiffies(1000))) { 1109 if (!ptdev->fw->booted && 1110 !(gpu_read(fw->irq.iomem, INT_STAT) & JOB_INT_GLOBAL_IF)) 1111 timedout = true; 1112 } 1113 1114 if (timedout) { 1115 static const char * const status_str[] = { 1116 [MCU_STATUS_DISABLED] = "disabled", 1117 [MCU_STATUS_ENABLED] = "enabled", 1118 [MCU_STATUS_HALT] = "halt", 1119 [MCU_STATUS_FATAL] = "fatal", 1120 }; 1121 u32 status = gpu_read(fw->iomem, MCU_STATUS); 1122 1123 drm_err(&ptdev->base, "Failed to boot MCU (status=%s)", 1124 status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown"); 1125 return -ETIMEDOUT; 1126 } 1127 1128 return 0; 1129 } 1130 1131 static void panthor_fw_stop(struct panthor_device *ptdev) 1132 { 1133 struct panthor_fw *fw = ptdev->fw; 1134 u32 status; 1135 1136 gpu_write(fw->iomem, MCU_CONTROL, MCU_CONTROL_DISABLE); 1137 if (gpu_read_poll_timeout(fw->iomem, MCU_STATUS, status, 1138 status == MCU_STATUS_DISABLED, 10, 100000)) 1139 drm_err(&ptdev->base, "Failed to stop MCU"); 1140 } 1141 1142 static bool panthor_fw_mcu_halted(struct panthor_device *ptdev) 1143 { 1144 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1145 bool halted; 1146 1147 halted = gpu_read(ptdev->fw->iomem, MCU_STATUS) == MCU_STATUS_HALT; 1148 1149 if (panthor_fw_has_glb_state(ptdev)) 1150 halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT); 1151 1152 return halted; 1153 } 1154 1155 static void panthor_fw_halt_mcu(struct panthor_device *ptdev) 1156 { 1157 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1158 1159 if (panthor_fw_has_glb_state(ptdev)) 1160 panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK); 1161 else 1162 panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); 1163 1164 panthor_fw_ring_doorbell(ptdev, CSF_GLB_DOORBELL_ID); 1165 } 1166 1167 static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev) 1168 { 1169 bool halted = false; 1170 1171 if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10, 1172 MCU_HALT_TIMEOUT_US, 0, ptdev)) { 1173 drm_warn(&ptdev->base, "Timed out waiting for MCU to halt"); 1174 return false; 1175 } 1176 1177 return true; 1178 } 1179 1180 static void panthor_fw_mcu_set_active(struct panthor_device *ptdev) 1181 { 1182 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1183 1184 if (panthor_fw_has_glb_state(ptdev)) 1185 panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK); 1186 else 1187 panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); 1188 } 1189 1190 /** 1191 * panthor_fw_pre_reset() - Call before a reset. 1192 * @ptdev: Device. 1193 * @on_hang: true if the reset was triggered on a GPU hang. 1194 * 1195 * If the reset is not triggered on a hang, we try to gracefully halt the 1196 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called. 1197 */ 1198 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) 1199 { 1200 /* Make sure we won't be woken up by a ping. */ 1201 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); 1202 1203 ptdev->reset.fast = false; 1204 1205 if (!on_hang) { 1206 panthor_fw_halt_mcu(ptdev); 1207 if (!panthor_fw_wait_mcu_halted(ptdev)) 1208 drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); 1209 else 1210 ptdev->reset.fast = true; 1211 } 1212 1213 panthor_job_irq_suspend(&ptdev->fw->irq); 1214 panthor_fw_stop(ptdev); 1215 } 1216 1217 /** 1218 * panthor_fw_post_reset() - Call after a reset. 1219 * @ptdev: Device. 1220 * 1221 * Start the FW. If this is not a fast reset, all FW sections are reloaded to 1222 * make sure we can recover from a memory corruption. 1223 */ 1224 int panthor_fw_post_reset(struct panthor_device *ptdev) 1225 { 1226 int ret; 1227 1228 /* Make the MCU VM active. */ 1229 ret = panthor_vm_active(ptdev->fw->vm); 1230 if (ret) 1231 return ret; 1232 1233 if (!ptdev->reset.fast) { 1234 /* On a slow reset, reload all sections, including RO ones. 1235 * We're not supposed to end up here anyway, let's just assume 1236 * the overhead of reloading everything is acceptable. 1237 */ 1238 panthor_reload_fw_sections(ptdev, true); 1239 } else { 1240 /* 1241 * If the FW was previously successfully halted in the pre-reset 1242 * operation, we need to transition it to active again before 1243 * the FW is rebooted. 1244 * This is not needed on a slow reset because FW sections are 1245 * re-initialized. 1246 */ 1247 panthor_fw_mcu_set_active(ptdev); 1248 } 1249 1250 ret = panthor_fw_start(ptdev); 1251 if (ret) { 1252 drm_err(&ptdev->base, "FW %s reset failed", 1253 ptdev->reset.fast ? "fast" : "slow"); 1254 return ret; 1255 } 1256 1257 /* We must re-initialize the global interface even on fast-reset. */ 1258 panthor_fw_init_global_iface(ptdev); 1259 return 0; 1260 } 1261 1262 /** 1263 * panthor_fw_unplug() - Called when the device is unplugged. 1264 * @ptdev: Device. 1265 * 1266 * This function must make sure all pending operations are flushed before 1267 * will release device resources, thus preventing any interaction with 1268 * the HW. 1269 * 1270 * If there is still FW-related work running after this function returns, 1271 * they must use drm_dev_{enter,exit}() and skip any HW access when 1272 * drm_dev_enter() returns false. 1273 */ 1274 void panthor_fw_unplug(struct panthor_device *ptdev) 1275 { 1276 struct panthor_fw_section *section; 1277 1278 disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work); 1279 1280 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) { 1281 /* Make sure the IRQ handler cannot be called after that point. */ 1282 if (ptdev->fw->irq.irq) 1283 panthor_job_irq_suspend(&ptdev->fw->irq); 1284 1285 panthor_fw_stop(ptdev); 1286 } 1287 1288 list_for_each_entry(section, &ptdev->fw->sections, node) 1289 panthor_kernel_bo_destroy(section->mem); 1290 1291 /* We intentionally don't call panthor_vm_idle() and let 1292 * panthor_mmu_unplug() release the AS we acquired with 1293 * panthor_vm_active() so we don't have to track the VM active/idle 1294 * state to keep the active_refcnt balanced. 1295 */ 1296 panthor_vm_put(ptdev->fw->vm); 1297 ptdev->fw->vm = NULL; 1298 1299 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) 1300 panthor_hw_l2_power_off(ptdev); 1301 } 1302 1303 /** 1304 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW. 1305 * @req_ptr: Pointer to the req register. 1306 * @ack_ptr: Pointer to the ack register. 1307 * @wq: Wait queue to use for the sleeping wait. 1308 * @req_mask: Mask of requests to wait for. 1309 * @acked: Pointer to field that's updated with the acked requests. 1310 * If the function returns 0, *acked == req_mask. 1311 * @timeout_ms: Timeout expressed in milliseconds. 1312 * 1313 * Return: 0 on success, -ETIMEDOUT otherwise. 1314 */ 1315 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr, 1316 wait_queue_head_t *wq, 1317 u32 req_mask, u32 *acked, 1318 u32 timeout_ms) 1319 { 1320 u32 ack, req = READ_ONCE(*req_ptr) & req_mask; 1321 int ret; 1322 1323 /* Busy wait for a few µsecs before falling back to a sleeping wait. */ 1324 *acked = req_mask; 1325 ret = read_poll_timeout_atomic(READ_ONCE, ack, 1326 (ack & req_mask) == req, 1327 0, 10, 0, 1328 *ack_ptr); 1329 if (!ret) 1330 return 0; 1331 1332 if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req, 1333 msecs_to_jiffies(timeout_ms))) 1334 return 0; 1335 1336 /* Check one last time, in case we were not woken up for some reason. */ 1337 ack = READ_ONCE(*ack_ptr); 1338 if ((ack & req_mask) == req) 1339 return 0; 1340 1341 *acked = ~(req ^ ack) & req_mask; 1342 return -ETIMEDOUT; 1343 } 1344 1345 /** 1346 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged. 1347 * @ptdev: Device. 1348 * @req_mask: Mask of requests to wait for. 1349 * @acked: Pointer to field that's updated with the acked requests. 1350 * If the function returns 0, *acked == req_mask. 1351 * @timeout_ms: Timeout expressed in milliseconds. 1352 * 1353 * Return: 0 on success, -ETIMEDOUT otherwise. 1354 */ 1355 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, 1356 u32 req_mask, u32 *acked, 1357 u32 timeout_ms) 1358 { 1359 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1360 1361 /* GLB_HALT doesn't get acked through the FW interface. */ 1362 if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT))) 1363 return -EINVAL; 1364 1365 return panthor_fw_wait_acks(&glb_iface->input->req, 1366 &glb_iface->output->ack, 1367 &ptdev->fw->req_waitqueue, 1368 req_mask, acked, timeout_ms); 1369 } 1370 1371 /** 1372 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged. 1373 * @ptdev: Device. 1374 * @csg_slot: CSG slot ID. 1375 * @req_mask: Mask of requests to wait for. 1376 * @acked: Pointer to field that's updated with the acked requests. 1377 * If the function returns 0, *acked == req_mask. 1378 * @timeout_ms: Timeout expressed in milliseconds. 1379 * 1380 * Return: 0 on success, -ETIMEDOUT otherwise. 1381 */ 1382 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot, 1383 u32 req_mask, u32 *acked, u32 timeout_ms) 1384 { 1385 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot); 1386 int ret; 1387 1388 if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK)) 1389 return -EINVAL; 1390 1391 ret = panthor_fw_wait_acks(&csg_iface->input->req, 1392 &csg_iface->output->ack, 1393 &ptdev->fw->req_waitqueue, 1394 req_mask, acked, timeout_ms); 1395 1396 /* 1397 * Check that all bits in the state field were updated, if any mismatch 1398 * then clear all bits in the state field. This allows code to do 1399 * (acked & CSG_STATE_MASK) and get the right value. 1400 */ 1401 1402 if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK) 1403 *acked &= ~CSG_STATE_MASK; 1404 1405 return ret; 1406 } 1407 1408 void panthor_fw_ring_doorbell(struct panthor_device *ptdev, u32 doorbell_id) 1409 { 1410 gpu_write(ptdev->iomem, CSF_DOORBELL(doorbell_id), 1); 1411 } 1412 1413 /** 1414 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells. 1415 * @ptdev: Device. 1416 * @csg_mask: Bitmask encoding the command stream group doorbells to ring. 1417 * 1418 * This function is toggling bits in the doorbell_req and ringing the 1419 * global doorbell. It doesn't require a user doorbell to be attached to 1420 * the group. 1421 */ 1422 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask) 1423 { 1424 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1425 1426 panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask); 1427 panthor_fw_ring_doorbell(ptdev, CSF_GLB_DOORBELL_ID); 1428 } 1429 1430 static void panthor_fw_ping_work(struct work_struct *work) 1431 { 1432 struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work); 1433 struct panthor_device *ptdev = fw->irq.ptdev; 1434 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1435 u32 acked; 1436 int ret; 1437 1438 if (panthor_device_reset_is_pending(ptdev)) 1439 return; 1440 1441 panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING); 1442 panthor_fw_ring_doorbell(ptdev, CSF_GLB_DOORBELL_ID); 1443 1444 ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100); 1445 if (ret) { 1446 panthor_device_schedule_reset(ptdev); 1447 drm_err(&ptdev->base, "FW ping timeout, scheduling a reset"); 1448 } else { 1449 mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work, 1450 msecs_to_jiffies(PING_INTERVAL_MS)); 1451 } 1452 } 1453 1454 /** 1455 * panthor_fw_init() - Initialize FW related data. 1456 * @ptdev: Device. 1457 * 1458 * Return: 0 on success, a negative error code otherwise. 1459 */ 1460 int panthor_fw_init(struct panthor_device *ptdev) 1461 { 1462 struct panthor_fw *fw; 1463 int ret, irq; 1464 1465 fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL); 1466 if (!fw) 1467 return -ENOMEM; 1468 1469 fw->iomem = ptdev->iomem + MCU_CONTROL_BASE; 1470 ptdev->fw = fw; 1471 init_waitqueue_head(&fw->req_waitqueue); 1472 INIT_LIST_HEAD(&fw->sections); 1473 INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work); 1474 1475 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job"); 1476 if (irq <= 0) 1477 return -ENODEV; 1478 1479 ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0, 1480 ptdev->iomem + JOB_INT_BASE); 1481 if (ret) { 1482 drm_err(&ptdev->base, "failed to request job irq"); 1483 return ret; 1484 } 1485 1486 ret = panthor_hw_l2_power_on(ptdev); 1487 if (ret) 1488 return ret; 1489 1490 fw->vm = panthor_vm_create(ptdev, true, 1491 0, SZ_4G, 1492 CSF_MCU_SHARED_REGION_START, 1493 CSF_MCU_SHARED_REGION_SIZE); 1494 if (IS_ERR(fw->vm)) { 1495 ret = PTR_ERR(fw->vm); 1496 fw->vm = NULL; 1497 goto err_unplug_fw; 1498 } 1499 1500 ret = panthor_fw_load(ptdev); 1501 if (ret) 1502 goto err_unplug_fw; 1503 1504 ret = panthor_vm_active(fw->vm); 1505 if (ret) 1506 goto err_unplug_fw; 1507 1508 ret = panthor_fw_start(ptdev); 1509 if (ret) 1510 goto err_unplug_fw; 1511 1512 ret = panthor_fw_init_ifaces(ptdev); 1513 if (ret) 1514 goto err_unplug_fw; 1515 1516 panthor_fw_init_global_iface(ptdev); 1517 return 0; 1518 1519 err_unplug_fw: 1520 panthor_fw_unplug(ptdev); 1521 return ret; 1522 } 1523 1524 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin"); 1525 MODULE_FIRMWARE("arm/mali/arch10.10/mali_csffw.bin"); 1526 MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin"); 1527 MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin"); 1528 MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin"); 1529 MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin"); 1530 MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin"); 1531