1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 /* Copyright 2023 Collabora ltd. */ 3 4 #ifdef CONFIG_ARM_ARCH_TIMER 5 #include <asm/arch_timer.h> 6 #endif 7 8 #include <linux/clk.h> 9 #include <linux/dma-mapping.h> 10 #include <linux/firmware.h> 11 #include <linux/iopoll.h> 12 #include <linux/iosys-map.h> 13 #include <linux/mutex.h> 14 #include <linux/platform_device.h> 15 #include <linux/pm_runtime.h> 16 17 #include <drm/drm_drv.h> 18 #include <drm/drm_managed.h> 19 #include <drm/drm_print.h> 20 21 #include "panthor_device.h" 22 #include "panthor_fw.h" 23 #include "panthor_gem.h" 24 #include "panthor_gpu.h" 25 #include "panthor_hw.h" 26 #include "panthor_mmu.h" 27 #include "panthor_regs.h" 28 #include "panthor_sched.h" 29 30 #define CSF_FW_NAME "mali_csffw.bin" 31 32 #define PING_INTERVAL_MS 12000 33 #define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024) 34 #define PROGRESS_TIMEOUT_SCALE_SHIFT 10 35 #define IDLE_HYSTERESIS_US 800 36 #define PWROFF_HYSTERESIS_US 10000 37 #define MCU_HALT_TIMEOUT_US (1ULL * USEC_PER_SEC) 38 39 /** 40 * struct panthor_fw_binary_hdr - Firmware binary header. 41 */ 42 struct panthor_fw_binary_hdr { 43 /** @magic: Magic value to check binary validity. */ 44 u32 magic; 45 #define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e 46 47 /** @minor: Minor FW version. */ 48 u8 minor; 49 50 /** @major: Major FW version. */ 51 u8 major; 52 #define CSF_FW_BINARY_HEADER_MAJOR_MAX 0 53 54 /** @padding1: MBZ. */ 55 u16 padding1; 56 57 /** @version_hash: FW version hash. */ 58 u32 version_hash; 59 60 /** @padding2: MBZ. */ 61 u32 padding2; 62 63 /** @size: FW binary size. */ 64 u32 size; 65 }; 66 67 /** 68 * enum panthor_fw_binary_entry_type - Firmware binary entry type 69 */ 70 enum panthor_fw_binary_entry_type { 71 /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */ 72 CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0, 73 74 /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */ 75 CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1, 76 77 /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */ 78 CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2, 79 80 /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */ 81 CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3, 82 83 /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */ 84 CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4, 85 86 /** 87 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how 88 * the FW binary was built. 89 */ 90 CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6 91 }; 92 93 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff) 94 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff) 95 #define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) 96 #define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) 97 98 #define CSF_FW_BINARY_IFACE_ENTRY_RD BIT(0) 99 #define CSF_FW_BINARY_IFACE_ENTRY_WR BIT(1) 100 #define CSF_FW_BINARY_IFACE_ENTRY_EX BIT(2) 101 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE (0 << 3) 102 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED (1 << 3) 103 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT (2 << 3) 104 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT (3 << 3) 105 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK GENMASK(4, 3) 106 #define CSF_FW_BINARY_IFACE_ENTRY_PROT BIT(5) 107 #define CSF_FW_BINARY_IFACE_ENTRY_SHARED BIT(30) 108 #define CSF_FW_BINARY_IFACE_ENTRY_ZERO BIT(31) 109 110 #define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS \ 111 (CSF_FW_BINARY_IFACE_ENTRY_RD | \ 112 CSF_FW_BINARY_IFACE_ENTRY_WR | \ 113 CSF_FW_BINARY_IFACE_ENTRY_EX | \ 114 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK | \ 115 CSF_FW_BINARY_IFACE_ENTRY_PROT | \ 116 CSF_FW_BINARY_IFACE_ENTRY_SHARED | \ 117 CSF_FW_BINARY_IFACE_ENTRY_ZERO) 118 119 /** 120 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary 121 */ 122 struct panthor_fw_binary_section_entry_hdr { 123 /** @flags: Section flags. */ 124 u32 flags; 125 126 /** @va: MCU virtual range to map this binary section to. */ 127 struct { 128 /** @start: Start address. */ 129 u32 start; 130 131 /** @end: End address. */ 132 u32 end; 133 } va; 134 135 /** @data: Data to initialize the FW section with. */ 136 struct { 137 /** @start: Start offset in the FW binary. */ 138 u32 start; 139 140 /** @end: End offset in the FW binary. */ 141 u32 end; 142 } data; 143 }; 144 145 struct panthor_fw_build_info_hdr { 146 /** @meta_start: Offset of the build info data in the FW binary */ 147 u32 meta_start; 148 /** @meta_size: Size of the build info data in the FW binary */ 149 u32 meta_size; 150 }; 151 152 /** 153 * struct panthor_fw_binary_iter - Firmware binary iterator 154 * 155 * Used to parse a firmware binary. 156 */ 157 struct panthor_fw_binary_iter { 158 /** @data: FW binary data. */ 159 const void *data; 160 161 /** @size: FW binary size. */ 162 size_t size; 163 164 /** @offset: Iterator offset. */ 165 size_t offset; 166 }; 167 168 /** 169 * struct panthor_fw_section - FW section 170 */ 171 struct panthor_fw_section { 172 /** @node: Used to keep track of FW sections. */ 173 struct list_head node; 174 175 /** @flags: Section flags, as encoded in the FW binary. */ 176 u32 flags; 177 178 /** @mem: Section memory. */ 179 struct panthor_kernel_bo *mem; 180 181 /** 182 * @name: Name of the section, as specified in the binary. 183 * 184 * Can be NULL. 185 */ 186 const char *name; 187 188 /** 189 * @data: Initial data copied to the FW memory. 190 * 191 * We keep data around so we can reload sections after a reset. 192 */ 193 struct { 194 /** @buf: Buffed used to store init data. */ 195 const void *buf; 196 197 /** @size: Size of @buf in bytes. */ 198 size_t size; 199 } data; 200 }; 201 202 #define CSF_MCU_SHARED_REGION_START 0x04000000ULL 203 #define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL 204 205 #define MIN_CS_PER_CSG 8 206 #define MIN_CSGS 3 207 208 #define CSF_IFACE_VERSION(major, minor, patch) \ 209 (((major) << 24) | ((minor) << 16) | (patch)) 210 #define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24) 211 #define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff) 212 #define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff) 213 214 #define CSF_GROUP_CONTROL_OFFSET 0x1000 215 #define CSF_STREAM_CONTROL_OFFSET 0x40 216 #define CSF_UNPRESERVED_REG_COUNT 4 217 218 /** 219 * struct panthor_fw_iface - FW interfaces 220 */ 221 struct panthor_fw_iface { 222 /** @global: Global interface. */ 223 struct panthor_fw_global_iface global; 224 225 /** @groups: Group slot interfaces. */ 226 struct panthor_fw_csg_iface groups[MAX_CSGS]; 227 228 /** @streams: Command stream slot interfaces. */ 229 struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG]; 230 }; 231 232 /** 233 * struct panthor_fw - Firmware management 234 */ 235 struct panthor_fw { 236 /** @vm: MCU VM. */ 237 struct panthor_vm *vm; 238 239 /** @sections: List of FW sections. */ 240 struct list_head sections; 241 242 /** @shared_section: The section containing the FW interfaces. */ 243 struct panthor_fw_section *shared_section; 244 245 /** @iface: FW interfaces. */ 246 struct panthor_fw_iface iface; 247 248 /** @watchdog: Collection of fields relating to the FW watchdog. */ 249 struct { 250 /** @ping_work: Delayed work used to ping the FW. */ 251 struct delayed_work ping_work; 252 } watchdog; 253 254 /** 255 * @req_waitqueue: FW request waitqueue. 256 * 257 * Everytime a request is sent to a command stream group or the global 258 * interface, the caller will first busy wait for the request to be 259 * acknowledged, and then fallback to a sleeping wait. 260 * 261 * This wait queue is here to support the sleeping wait flavor. 262 */ 263 wait_queue_head_t req_waitqueue; 264 265 /** @booted: True is the FW is booted */ 266 bool booted; 267 268 /** @irq: Job irq data. */ 269 struct panthor_irq irq; 270 }; 271 272 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev) 273 { 274 return ptdev->fw->vm; 275 } 276 277 /** 278 * panthor_fw_get_glb_iface() - Get the global interface 279 * @ptdev: Device. 280 * 281 * Return: The global interface. 282 */ 283 struct panthor_fw_global_iface * 284 panthor_fw_get_glb_iface(struct panthor_device *ptdev) 285 { 286 return &ptdev->fw->iface.global; 287 } 288 289 /** 290 * panthor_fw_get_csg_iface() - Get a command stream group slot interface 291 * @ptdev: Device. 292 * @csg_slot: Index of the command stream group slot. 293 * 294 * Return: The command stream group slot interface. 295 */ 296 struct panthor_fw_csg_iface * 297 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot) 298 { 299 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS)) 300 return NULL; 301 302 return &ptdev->fw->iface.groups[csg_slot]; 303 } 304 305 /** 306 * panthor_fw_get_cs_iface() - Get a command stream slot interface 307 * @ptdev: Device. 308 * @csg_slot: Index of the command stream group slot. 309 * @cs_slot: Index of the command stream slot. 310 * 311 * Return: The command stream slot interface. 312 */ 313 struct panthor_fw_cs_iface * 314 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) 315 { 316 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG)) 317 return NULL; 318 319 return &ptdev->fw->iface.streams[csg_slot][cs_slot]; 320 } 321 322 static bool panthor_fw_has_glb_state(struct panthor_device *ptdev) 323 { 324 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 325 326 return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0); 327 } 328 329 static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev) 330 { 331 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 332 333 return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0); 334 } 335 336 u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev, 337 struct panthor_fw_csg_iface *csg_iface) 338 { 339 if (panthor_fw_has_64bit_ep_req(ptdev)) 340 return csg_iface->input->endpoint_req2; 341 else 342 return csg_iface->input->endpoint_req; 343 } 344 345 void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev, 346 struct panthor_fw_csg_iface *csg_iface, u64 value) 347 { 348 if (panthor_fw_has_64bit_ep_req(ptdev)) 349 csg_iface->input->endpoint_req2 = value; 350 else 351 csg_iface->input->endpoint_req = lower_32_bits(value); 352 } 353 354 void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev, 355 struct panthor_fw_csg_iface *csg_iface, u64 value, 356 u64 mask) 357 { 358 if (panthor_fw_has_64bit_ep_req(ptdev)) 359 panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask); 360 else 361 panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value), 362 lower_32_bits(mask)); 363 } 364 365 /** 366 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count 367 * @ptdev: Device. 368 * @timeout_us: Timeout expressed in micro-seconds. 369 * 370 * The FW has two timer sources: the GPU counter or arch-timer. We need 371 * to express timeouts in term of number of cycles and specify which 372 * timer source should be used. 373 * 374 * Return: A value suitable for timeout fields in the global interface. 375 */ 376 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us) 377 { 378 bool use_cycle_counter = false; 379 u32 timer_rate = 0; 380 u64 mod_cycles; 381 382 #ifdef CONFIG_ARM_ARCH_TIMER 383 timer_rate = arch_timer_get_cntfrq(); 384 #endif 385 386 if (!timer_rate) { 387 use_cycle_counter = true; 388 timer_rate = clk_get_rate(ptdev->clks.core); 389 } 390 391 if (drm_WARN_ON(&ptdev->base, !timer_rate)) { 392 /* We couldn't get a valid clock rate, let's just pick the 393 * maximum value so the FW still handles the core 394 * power on/off requests. 395 */ 396 return GLB_TIMER_VAL(~0) | 397 GLB_TIMER_SOURCE_GPU_COUNTER; 398 } 399 400 mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate, 401 1000000ull << 10); 402 if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0))) 403 mod_cycles = GLB_TIMER_VAL(~0); 404 405 return GLB_TIMER_VAL(mod_cycles) | 406 (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0); 407 } 408 409 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev, 410 struct panthor_fw_binary_iter *iter, 411 void *out, size_t size) 412 { 413 size_t new_offset = iter->offset + size; 414 415 if (new_offset > iter->size || new_offset < iter->offset) { 416 drm_err(&ptdev->base, "Firmware too small\n"); 417 return -EINVAL; 418 } 419 420 memcpy(out, iter->data + iter->offset, size); 421 iter->offset = new_offset; 422 return 0; 423 } 424 425 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev, 426 struct panthor_fw_binary_iter *iter, 427 struct panthor_fw_binary_iter *sub_iter, 428 size_t size) 429 { 430 size_t new_offset = iter->offset + size; 431 432 if (new_offset > iter->size || new_offset < iter->offset) { 433 drm_err(&ptdev->base, "Firmware entry too long\n"); 434 return -EINVAL; 435 } 436 437 sub_iter->offset = 0; 438 sub_iter->data = iter->data + iter->offset; 439 sub_iter->size = size; 440 iter->offset = new_offset; 441 return 0; 442 } 443 444 static void panthor_fw_init_section_mem(struct panthor_device *ptdev, 445 struct panthor_fw_section *section) 446 { 447 bool was_mapped = !!section->mem->kmap; 448 int ret; 449 450 if (!section->data.size && 451 !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO)) 452 return; 453 454 ret = panthor_kernel_bo_vmap(section->mem); 455 if (drm_WARN_ON(&ptdev->base, ret)) 456 return; 457 458 memcpy(section->mem->kmap, section->data.buf, section->data.size); 459 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) { 460 memset(section->mem->kmap + section->data.size, 0, 461 panthor_kernel_bo_size(section->mem) - section->data.size); 462 } 463 464 if (!was_mapped) 465 panthor_kernel_bo_vunmap(section->mem); 466 } 467 468 /** 469 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces. 470 * @ptdev: Device. 471 * @input: Pointer holding the input interface on success. 472 * Should be ignored on failure. 473 * @output: Pointer holding the output interface on success. 474 * Should be ignored on failure. 475 * @input_fw_va: Pointer holding the input interface FW VA on success. 476 * Should be ignored on failure. 477 * @output_fw_va: Pointer holding the output interface FW VA on success. 478 * Should be ignored on failure. 479 * 480 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input 481 * interface is at offset 0, and the output interface at offset 4096. 482 * 483 * Return: A valid pointer in case of success, an ERR_PTR() otherwise. 484 */ 485 struct panthor_kernel_bo * 486 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, 487 struct panthor_fw_ringbuf_input_iface **input, 488 const struct panthor_fw_ringbuf_output_iface **output, 489 u32 *input_fw_va, u32 *output_fw_va) 490 { 491 struct panthor_kernel_bo *mem; 492 int ret; 493 494 mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K, 495 DRM_PANTHOR_BO_NO_MMAP, 496 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | 497 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, 498 PANTHOR_VM_KERNEL_AUTO_VA, 499 "Queue FW interface"); 500 if (IS_ERR(mem)) 501 return mem; 502 503 ret = panthor_kernel_bo_vmap(mem); 504 if (ret) { 505 panthor_kernel_bo_destroy(mem); 506 return ERR_PTR(ret); 507 } 508 509 memset(mem->kmap, 0, panthor_kernel_bo_size(mem)); 510 *input = mem->kmap; 511 *output = mem->kmap + SZ_4K; 512 *input_fw_va = panthor_kernel_bo_gpuva(mem); 513 *output_fw_va = *input_fw_va + SZ_4K; 514 515 return mem; 516 } 517 518 /** 519 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group. 520 * @ptdev: Device. 521 * @size: Size of the suspend buffer. 522 * 523 * Return: A valid pointer in case of success, an ERR_PTR() otherwise. 524 */ 525 struct panthor_kernel_bo * 526 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) 527 { 528 return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, 529 DRM_PANTHOR_BO_NO_MMAP, 530 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 531 PANTHOR_VM_KERNEL_AUTO_VA, 532 "FW suspend buffer"); 533 } 534 535 static int panthor_fw_load_section_entry(struct panthor_device *ptdev, 536 const struct firmware *fw, 537 struct panthor_fw_binary_iter *iter, 538 u32 ehdr) 539 { 540 ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm); 541 struct panthor_fw_binary_section_entry_hdr hdr; 542 struct panthor_fw_section *section; 543 u32 section_size; 544 u32 name_len; 545 int ret; 546 547 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); 548 if (ret) 549 return ret; 550 551 if (hdr.data.end < hdr.data.start) { 552 drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", 553 hdr.data.end, hdr.data.start); 554 return -EINVAL; 555 } 556 557 if (hdr.va.end < hdr.va.start) { 558 drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", 559 hdr.va.end, hdr.va.start); 560 return -EINVAL; 561 } 562 563 if (hdr.data.end > fw->size) { 564 drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", 565 hdr.data.end, fw->size); 566 return -EINVAL; 567 } 568 569 if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) { 570 drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", 571 hdr.va.start, hdr.va.end); 572 return -EINVAL; 573 } 574 575 if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) { 576 drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", 577 hdr.flags); 578 return -EINVAL; 579 } 580 581 if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) { 582 drm_warn(&ptdev->base, 583 "Firmware protected mode entry not be supported, ignoring"); 584 return 0; 585 } 586 587 if (hdr.va.start == CSF_MCU_SHARED_REGION_START && 588 !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) { 589 drm_err(&ptdev->base, 590 "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); 591 return -EINVAL; 592 } 593 594 name_len = iter->size - iter->offset; 595 596 section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); 597 if (!section) 598 return -ENOMEM; 599 600 list_add_tail(§ion->node, &ptdev->fw->sections); 601 section->flags = hdr.flags; 602 section->data.size = hdr.data.end - hdr.data.start; 603 604 if (section->data.size > 0) { 605 void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); 606 607 if (!data) 608 return -ENOMEM; 609 610 memcpy(data, fw->data + hdr.data.start, section->data.size); 611 section->data.buf = data; 612 } 613 614 if (name_len > 0) { 615 char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); 616 617 if (!name) 618 return -ENOMEM; 619 620 memcpy(name, iter->data + iter->offset, name_len); 621 name[name_len] = '\0'; 622 section->name = name; 623 } 624 625 section_size = hdr.va.end - hdr.va.start; 626 if (section_size) { 627 u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK; 628 struct panthor_gem_object *bo; 629 u32 vm_map_flags = 0; 630 struct sg_table *sgt; 631 u64 va = hdr.va.start; 632 633 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) 634 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; 635 636 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX)) 637 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; 638 639 /* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to 640 * non-cacheable for now. We might want to introduce a new 641 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device 642 * memory and is currently not used by our driver) for 643 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit 644 * of IO-coherent systems. 645 */ 646 if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED) 647 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; 648 649 section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), 650 section_size, 651 DRM_PANTHOR_BO_NO_MMAP, 652 vm_map_flags, va, "FW section"); 653 if (IS_ERR(section->mem)) 654 return PTR_ERR(section->mem); 655 656 if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) 657 return -EINVAL; 658 659 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) { 660 ret = panthor_kernel_bo_vmap(section->mem); 661 if (ret) 662 return ret; 663 } 664 665 panthor_fw_init_section_mem(ptdev, section); 666 667 bo = to_panthor_bo(section->mem->obj); 668 sgt = drm_gem_shmem_get_pages_sgt(&bo->base); 669 if (IS_ERR(sgt)) 670 return PTR_ERR(sgt); 671 672 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); 673 } 674 675 if (hdr.va.start == CSF_MCU_SHARED_REGION_START) 676 ptdev->fw->shared_section = section; 677 678 return 0; 679 } 680 681 static int panthor_fw_read_build_info(struct panthor_device *ptdev, 682 const struct firmware *fw, 683 struct panthor_fw_binary_iter *iter, 684 u32 ehdr) 685 { 686 struct panthor_fw_build_info_hdr hdr; 687 static const char git_sha_header[] = "git_sha: "; 688 const int header_len = sizeof(git_sha_header) - 1; 689 int ret; 690 691 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); 692 if (ret) 693 return ret; 694 695 if (hdr.meta_start > fw->size || 696 hdr.meta_start + hdr.meta_size > fw->size) { 697 drm_err(&ptdev->base, "Firmware build info corrupt\n"); 698 /* We don't need the build info, so continue */ 699 return 0; 700 } 701 702 if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) { 703 /* Not the expected header, this isn't metadata we understand */ 704 return 0; 705 } 706 707 /* Check that the git SHA is NULL terminated as expected */ 708 if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') { 709 drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n"); 710 /* Don't treat as fatal */ 711 return 0; 712 } 713 714 drm_info(&ptdev->base, "Firmware git sha: %s\n", 715 fw->data + hdr.meta_start + header_len); 716 717 return 0; 718 } 719 720 static void 721 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) 722 { 723 struct panthor_fw_section *section; 724 725 list_for_each_entry(section, &ptdev->fw->sections, node) { 726 struct sg_table *sgt; 727 728 if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) 729 continue; 730 731 panthor_fw_init_section_mem(ptdev, section); 732 sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base); 733 if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt))) 734 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); 735 } 736 } 737 738 static int panthor_fw_load_entry(struct panthor_device *ptdev, 739 const struct firmware *fw, 740 struct panthor_fw_binary_iter *iter) 741 { 742 struct panthor_fw_binary_iter eiter; 743 u32 ehdr; 744 int ret; 745 746 ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr)); 747 if (ret) 748 return ret; 749 750 if ((iter->offset % sizeof(u32)) || 751 (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) { 752 drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", 753 (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr)); 754 return -EINVAL; 755 } 756 757 if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter, 758 CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr))) 759 return -EINVAL; 760 761 switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { 762 case CSF_FW_BINARY_ENTRY_TYPE_IFACE: 763 return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); 764 case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: 765 return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr); 766 767 /* FIXME: handle those entry types? */ 768 case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: 769 case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: 770 case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: 771 case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: 772 return 0; 773 default: 774 break; 775 } 776 777 if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL) 778 return 0; 779 780 drm_err(&ptdev->base, 781 "Unsupported non-optional entry type %u in firmware\n", 782 CSF_FW_BINARY_ENTRY_TYPE(ehdr)); 783 return -EINVAL; 784 } 785 786 static int panthor_fw_load(struct panthor_device *ptdev) 787 { 788 const struct firmware *fw = NULL; 789 struct panthor_fw_binary_iter iter = {}; 790 struct panthor_fw_binary_hdr hdr; 791 char fw_path[128]; 792 int ret; 793 794 snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s", 795 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id), 796 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id), 797 CSF_FW_NAME); 798 799 ret = request_firmware(&fw, fw_path, ptdev->base.dev); 800 if (ret) { 801 drm_err(&ptdev->base, "Failed to load firmware image '%s'\n", 802 CSF_FW_NAME); 803 return ret; 804 } 805 806 iter.data = fw->data; 807 iter.size = fw->size; 808 ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr)); 809 if (ret) 810 goto out; 811 812 if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) { 813 ret = -EINVAL; 814 drm_err(&ptdev->base, "Invalid firmware magic\n"); 815 goto out; 816 } 817 818 if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) { 819 ret = -EINVAL; 820 drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n", 821 hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX); 822 goto out; 823 } 824 825 if (hdr.size > iter.size) { 826 drm_err(&ptdev->base, "Firmware image is truncated\n"); 827 goto out; 828 } 829 830 iter.size = hdr.size; 831 832 while (iter.offset < hdr.size) { 833 ret = panthor_fw_load_entry(ptdev, fw, &iter); 834 if (ret) 835 goto out; 836 } 837 838 if (!ptdev->fw->shared_section) { 839 drm_err(&ptdev->base, "Shared interface region not found\n"); 840 ret = -EINVAL; 841 goto out; 842 } 843 844 out: 845 release_firmware(fw); 846 return ret; 847 } 848 849 /** 850 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address 851 * @ptdev: Device. 852 * @mcu_va: MCU address. 853 * 854 * Return: NULL if the address is not part of the shared section, non-NULL otherwise. 855 */ 856 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va) 857 { 858 u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem); 859 u64 shared_mem_end = shared_mem_start + 860 panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 861 if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end) 862 return NULL; 863 864 return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start); 865 } 866 867 static int panthor_init_cs_iface(struct panthor_device *ptdev, 868 unsigned int csg_idx, unsigned int cs_idx) 869 { 870 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 871 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx); 872 struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx]; 873 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 874 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + 875 (csg_idx * glb_iface->control->group_stride) + 876 CSF_STREAM_CONTROL_OFFSET + 877 (cs_idx * csg_iface->control->stream_stride); 878 struct panthor_fw_cs_iface *first_cs_iface = 879 panthor_fw_get_cs_iface(ptdev, 0, 0); 880 881 if (iface_offset + sizeof(*cs_iface) >= shared_section_sz) 882 return -EINVAL; 883 884 spin_lock_init(&cs_iface->lock); 885 cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; 886 cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va); 887 cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va); 888 889 if (!cs_iface->input || !cs_iface->output) { 890 drm_err(&ptdev->base, "Invalid stream control interface input/output VA"); 891 return -EINVAL; 892 } 893 894 if (cs_iface != first_cs_iface) { 895 if (cs_iface->control->features != first_cs_iface->control->features) { 896 drm_err(&ptdev->base, "Expecting identical CS slots"); 897 return -EINVAL; 898 } 899 } else { 900 u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features); 901 902 ptdev->csif_info.cs_reg_count = reg_count; 903 ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT; 904 } 905 906 return 0; 907 } 908 909 static bool compare_csg(const struct panthor_fw_csg_control_iface *a, 910 const struct panthor_fw_csg_control_iface *b) 911 { 912 if (a->features != b->features) 913 return false; 914 if (a->suspend_size != b->suspend_size) 915 return false; 916 if (a->protm_suspend_size != b->protm_suspend_size) 917 return false; 918 if (a->stream_num != b->stream_num) 919 return false; 920 return true; 921 } 922 923 static int panthor_init_csg_iface(struct panthor_device *ptdev, 924 unsigned int csg_idx) 925 { 926 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 927 struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx]; 928 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); 929 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride); 930 unsigned int i; 931 932 if (iface_offset + sizeof(*csg_iface) >= shared_section_sz) 933 return -EINVAL; 934 935 spin_lock_init(&csg_iface->lock); 936 csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; 937 csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va); 938 csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va); 939 940 if (csg_iface->control->stream_num < MIN_CS_PER_CSG || 941 csg_iface->control->stream_num > MAX_CS_PER_CSG) 942 return -EINVAL; 943 944 if (!csg_iface->input || !csg_iface->output) { 945 drm_err(&ptdev->base, "Invalid group control interface input/output VA"); 946 return -EINVAL; 947 } 948 949 if (csg_idx > 0) { 950 struct panthor_fw_csg_iface *first_csg_iface = 951 panthor_fw_get_csg_iface(ptdev, 0); 952 953 if (!compare_csg(first_csg_iface->control, csg_iface->control)) { 954 drm_err(&ptdev->base, "Expecting identical CSG slots"); 955 return -EINVAL; 956 } 957 } 958 959 for (i = 0; i < csg_iface->control->stream_num; i++) { 960 int ret = panthor_init_cs_iface(ptdev, csg_idx, i); 961 962 if (ret) 963 return ret; 964 } 965 966 return 0; 967 } 968 969 static u32 panthor_get_instr_features(struct panthor_device *ptdev) 970 { 971 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 972 973 if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0)) 974 return 0; 975 976 return glb_iface->control->instr_features; 977 } 978 979 static int panthor_fw_init_ifaces(struct panthor_device *ptdev) 980 { 981 struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global; 982 unsigned int i; 983 984 if (!ptdev->fw->shared_section->mem->kmap) 985 return -EINVAL; 986 987 spin_lock_init(&glb_iface->lock); 988 glb_iface->control = ptdev->fw->shared_section->mem->kmap; 989 990 if (!glb_iface->control->version) { 991 drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot"); 992 return -EINVAL; 993 } 994 995 glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va); 996 glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va); 997 if (!glb_iface->input || !glb_iface->output) { 998 drm_err(&ptdev->base, "Invalid global control interface input/output VA"); 999 return -EINVAL; 1000 } 1001 1002 if (glb_iface->control->group_num > MAX_CSGS || 1003 glb_iface->control->group_num < MIN_CSGS) { 1004 drm_err(&ptdev->base, "Invalid number of control groups"); 1005 return -EINVAL; 1006 } 1007 1008 for (i = 0; i < glb_iface->control->group_num; i++) { 1009 int ret = panthor_init_csg_iface(ptdev, i); 1010 1011 if (ret) 1012 return ret; 1013 } 1014 1015 drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x", 1016 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), 1017 CSF_IFACE_VERSION_MINOR(glb_iface->control->version), 1018 CSF_IFACE_VERSION_PATCH(glb_iface->control->version), 1019 glb_iface->control->features, 1020 panthor_get_instr_features(ptdev)); 1021 return 0; 1022 } 1023 1024 static void panthor_fw_init_global_iface(struct panthor_device *ptdev) 1025 { 1026 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1027 1028 /* Enable all cores. */ 1029 glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present; 1030 1031 /* Setup timers. */ 1032 glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US); 1033 glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; 1034 glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US); 1035 1036 /* Enable interrupts we care about. */ 1037 glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN | 1038 GLB_PING | 1039 GLB_CFG_PROGRESS_TIMER | 1040 GLB_CFG_POWEROFF_TIMER | 1041 GLB_IDLE_EN | 1042 GLB_IDLE; 1043 1044 if (panthor_fw_has_glb_state(ptdev)) 1045 glb_iface->input->ack_irq_mask |= GLB_STATE_MASK; 1046 1047 panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); 1048 panthor_fw_toggle_reqs(glb_iface, req, ack, 1049 GLB_CFG_ALLOC_EN | 1050 GLB_CFG_POWEROFF_TIMER | 1051 GLB_CFG_PROGRESS_TIMER); 1052 1053 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1054 1055 /* Kick the watchdog. */ 1056 mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work, 1057 msecs_to_jiffies(PING_INTERVAL_MS)); 1058 } 1059 1060 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) 1061 { 1062 gpu_write(ptdev, JOB_INT_CLEAR, status); 1063 1064 if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) 1065 ptdev->fw->booted = true; 1066 1067 wake_up_all(&ptdev->fw->req_waitqueue); 1068 1069 /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */ 1070 if (!ptdev->fw->booted) 1071 return; 1072 1073 panthor_sched_report_fw_events(ptdev, status); 1074 } 1075 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler); 1076 1077 static int panthor_fw_start(struct panthor_device *ptdev) 1078 { 1079 bool timedout = false; 1080 1081 ptdev->fw->booted = false; 1082 panthor_job_irq_resume(&ptdev->fw->irq, ~0); 1083 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO); 1084 1085 if (!wait_event_timeout(ptdev->fw->req_waitqueue, 1086 ptdev->fw->booted, 1087 msecs_to_jiffies(1000))) { 1088 if (!ptdev->fw->booted && 1089 !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF)) 1090 timedout = true; 1091 } 1092 1093 if (timedout) { 1094 static const char * const status_str[] = { 1095 [MCU_STATUS_DISABLED] = "disabled", 1096 [MCU_STATUS_ENABLED] = "enabled", 1097 [MCU_STATUS_HALT] = "halt", 1098 [MCU_STATUS_FATAL] = "fatal", 1099 }; 1100 u32 status = gpu_read(ptdev, MCU_STATUS); 1101 1102 drm_err(&ptdev->base, "Failed to boot MCU (status=%s)", 1103 status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown"); 1104 return -ETIMEDOUT; 1105 } 1106 1107 return 0; 1108 } 1109 1110 static void panthor_fw_stop(struct panthor_device *ptdev) 1111 { 1112 u32 status; 1113 1114 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); 1115 if (gpu_read_poll_timeout(ptdev, MCU_STATUS, status, 1116 status == MCU_STATUS_DISABLED, 10, 100000)) 1117 drm_err(&ptdev->base, "Failed to stop MCU"); 1118 } 1119 1120 static bool panthor_fw_mcu_halted(struct panthor_device *ptdev) 1121 { 1122 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1123 bool halted; 1124 1125 halted = gpu_read(ptdev, MCU_STATUS) == MCU_STATUS_HALT; 1126 1127 if (panthor_fw_has_glb_state(ptdev)) 1128 halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT); 1129 1130 return halted; 1131 } 1132 1133 static void panthor_fw_halt_mcu(struct panthor_device *ptdev) 1134 { 1135 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1136 1137 if (panthor_fw_has_glb_state(ptdev)) 1138 panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK); 1139 else 1140 panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); 1141 1142 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1143 } 1144 1145 static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev) 1146 { 1147 bool halted = false; 1148 1149 if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10, 1150 MCU_HALT_TIMEOUT_US, 0, ptdev)) { 1151 drm_warn(&ptdev->base, "Timed out waiting for MCU to halt"); 1152 return false; 1153 } 1154 1155 return true; 1156 } 1157 1158 static void panthor_fw_mcu_set_active(struct panthor_device *ptdev) 1159 { 1160 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1161 1162 if (panthor_fw_has_glb_state(ptdev)) 1163 panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK); 1164 else 1165 panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); 1166 } 1167 1168 /** 1169 * panthor_fw_pre_reset() - Call before a reset. 1170 * @ptdev: Device. 1171 * @on_hang: true if the reset was triggered on a GPU hang. 1172 * 1173 * If the reset is not triggered on a hang, we try to gracefully halt the 1174 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called. 1175 */ 1176 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) 1177 { 1178 /* Make sure we won't be woken up by a ping. */ 1179 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); 1180 1181 ptdev->reset.fast = false; 1182 1183 if (!on_hang) { 1184 panthor_fw_halt_mcu(ptdev); 1185 if (!panthor_fw_wait_mcu_halted(ptdev)) 1186 drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); 1187 else 1188 ptdev->reset.fast = true; 1189 } 1190 panthor_fw_stop(ptdev); 1191 1192 panthor_job_irq_suspend(&ptdev->fw->irq); 1193 panthor_fw_stop(ptdev); 1194 } 1195 1196 /** 1197 * panthor_fw_post_reset() - Call after a reset. 1198 * @ptdev: Device. 1199 * 1200 * Start the FW. If this is not a fast reset, all FW sections are reloaded to 1201 * make sure we can recover from a memory corruption. 1202 */ 1203 int panthor_fw_post_reset(struct panthor_device *ptdev) 1204 { 1205 int ret; 1206 1207 /* Make the MCU VM active. */ 1208 ret = panthor_vm_active(ptdev->fw->vm); 1209 if (ret) 1210 return ret; 1211 1212 if (!ptdev->reset.fast) { 1213 /* On a slow reset, reload all sections, including RO ones. 1214 * We're not supposed to end up here anyway, let's just assume 1215 * the overhead of reloading everything is acceptable. 1216 */ 1217 panthor_reload_fw_sections(ptdev, true); 1218 } else { 1219 /* 1220 * If the FW was previously successfully halted in the pre-reset 1221 * operation, we need to transition it to active again before 1222 * the FW is rebooted. 1223 * This is not needed on a slow reset because FW sections are 1224 * re-initialized. 1225 */ 1226 panthor_fw_mcu_set_active(ptdev); 1227 } 1228 1229 ret = panthor_fw_start(ptdev); 1230 if (ret) { 1231 drm_err(&ptdev->base, "FW %s reset failed", 1232 ptdev->reset.fast ? "fast" : "slow"); 1233 return ret; 1234 } 1235 1236 /* We must re-initialize the global interface even on fast-reset. */ 1237 panthor_fw_init_global_iface(ptdev); 1238 return 0; 1239 } 1240 1241 /** 1242 * panthor_fw_unplug() - Called when the device is unplugged. 1243 * @ptdev: Device. 1244 * 1245 * This function must make sure all pending operations are flushed before 1246 * will release device resources, thus preventing any interaction with 1247 * the HW. 1248 * 1249 * If there is still FW-related work running after this function returns, 1250 * they must use drm_dev_{enter,exit}() and skip any HW access when 1251 * drm_dev_enter() returns false. 1252 */ 1253 void panthor_fw_unplug(struct panthor_device *ptdev) 1254 { 1255 struct panthor_fw_section *section; 1256 1257 disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work); 1258 1259 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) { 1260 /* Make sure the IRQ handler cannot be called after that point. */ 1261 if (ptdev->fw->irq.irq) 1262 panthor_job_irq_suspend(&ptdev->fw->irq); 1263 1264 panthor_fw_halt_mcu(ptdev); 1265 if (!panthor_fw_wait_mcu_halted(ptdev)) 1266 drm_warn(&ptdev->base, "Failed to halt MCU on unplug"); 1267 1268 panthor_fw_stop(ptdev); 1269 } 1270 1271 list_for_each_entry(section, &ptdev->fw->sections, node) 1272 panthor_kernel_bo_destroy(section->mem); 1273 1274 /* We intentionally don't call panthor_vm_idle() and let 1275 * panthor_mmu_unplug() release the AS we acquired with 1276 * panthor_vm_active() so we don't have to track the VM active/idle 1277 * state to keep the active_refcnt balanced. 1278 */ 1279 panthor_vm_put(ptdev->fw->vm); 1280 ptdev->fw->vm = NULL; 1281 1282 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) 1283 panthor_hw_l2_power_off(ptdev); 1284 } 1285 1286 /** 1287 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW. 1288 * @req_ptr: Pointer to the req register. 1289 * @ack_ptr: Pointer to the ack register. 1290 * @wq: Wait queue to use for the sleeping wait. 1291 * @req_mask: Mask of requests to wait for. 1292 * @acked: Pointer to field that's updated with the acked requests. 1293 * If the function returns 0, *acked == req_mask. 1294 * @timeout_ms: Timeout expressed in milliseconds. 1295 * 1296 * Return: 0 on success, -ETIMEDOUT otherwise. 1297 */ 1298 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr, 1299 wait_queue_head_t *wq, 1300 u32 req_mask, u32 *acked, 1301 u32 timeout_ms) 1302 { 1303 u32 ack, req = READ_ONCE(*req_ptr) & req_mask; 1304 int ret; 1305 1306 /* Busy wait for a few µsecs before falling back to a sleeping wait. */ 1307 *acked = req_mask; 1308 ret = read_poll_timeout_atomic(READ_ONCE, ack, 1309 (ack & req_mask) == req, 1310 0, 10, 0, 1311 *ack_ptr); 1312 if (!ret) 1313 return 0; 1314 1315 if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req, 1316 msecs_to_jiffies(timeout_ms))) 1317 return 0; 1318 1319 /* Check one last time, in case we were not woken up for some reason. */ 1320 ack = READ_ONCE(*ack_ptr); 1321 if ((ack & req_mask) == req) 1322 return 0; 1323 1324 *acked = ~(req ^ ack) & req_mask; 1325 return -ETIMEDOUT; 1326 } 1327 1328 /** 1329 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged. 1330 * @ptdev: Device. 1331 * @req_mask: Mask of requests to wait for. 1332 * @acked: Pointer to field that's updated with the acked requests. 1333 * If the function returns 0, *acked == req_mask. 1334 * @timeout_ms: Timeout expressed in milliseconds. 1335 * 1336 * Return: 0 on success, -ETIMEDOUT otherwise. 1337 */ 1338 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, 1339 u32 req_mask, u32 *acked, 1340 u32 timeout_ms) 1341 { 1342 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1343 1344 /* GLB_HALT doesn't get acked through the FW interface. */ 1345 if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT))) 1346 return -EINVAL; 1347 1348 return panthor_fw_wait_acks(&glb_iface->input->req, 1349 &glb_iface->output->ack, 1350 &ptdev->fw->req_waitqueue, 1351 req_mask, acked, timeout_ms); 1352 } 1353 1354 /** 1355 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged. 1356 * @ptdev: Device. 1357 * @csg_slot: CSG slot ID. 1358 * @req_mask: Mask of requests to wait for. 1359 * @acked: Pointer to field that's updated with the acked requests. 1360 * If the function returns 0, *acked == req_mask. 1361 * @timeout_ms: Timeout expressed in milliseconds. 1362 * 1363 * Return: 0 on success, -ETIMEDOUT otherwise. 1364 */ 1365 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot, 1366 u32 req_mask, u32 *acked, u32 timeout_ms) 1367 { 1368 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot); 1369 int ret; 1370 1371 if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK)) 1372 return -EINVAL; 1373 1374 ret = panthor_fw_wait_acks(&csg_iface->input->req, 1375 &csg_iface->output->ack, 1376 &ptdev->fw->req_waitqueue, 1377 req_mask, acked, timeout_ms); 1378 1379 /* 1380 * Check that all bits in the state field were updated, if any mismatch 1381 * then clear all bits in the state field. This allows code to do 1382 * (acked & CSG_STATE_MASK) and get the right value. 1383 */ 1384 1385 if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK) 1386 *acked &= ~CSG_STATE_MASK; 1387 1388 return ret; 1389 } 1390 1391 /** 1392 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells. 1393 * @ptdev: Device. 1394 * @csg_mask: Bitmask encoding the command stream group doorbells to ring. 1395 * 1396 * This function is toggling bits in the doorbell_req and ringing the 1397 * global doorbell. It doesn't require a user doorbell to be attached to 1398 * the group. 1399 */ 1400 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask) 1401 { 1402 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1403 1404 panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask); 1405 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1406 } 1407 1408 static void panthor_fw_ping_work(struct work_struct *work) 1409 { 1410 struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work); 1411 struct panthor_device *ptdev = fw->irq.ptdev; 1412 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); 1413 u32 acked; 1414 int ret; 1415 1416 if (panthor_device_reset_is_pending(ptdev)) 1417 return; 1418 1419 panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING); 1420 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); 1421 1422 ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100); 1423 if (ret) { 1424 panthor_device_schedule_reset(ptdev); 1425 drm_err(&ptdev->base, "FW ping timeout, scheduling a reset"); 1426 } else { 1427 mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work, 1428 msecs_to_jiffies(PING_INTERVAL_MS)); 1429 } 1430 } 1431 1432 /** 1433 * panthor_fw_init() - Initialize FW related data. 1434 * @ptdev: Device. 1435 * 1436 * Return: 0 on success, a negative error code otherwise. 1437 */ 1438 int panthor_fw_init(struct panthor_device *ptdev) 1439 { 1440 struct panthor_fw *fw; 1441 int ret, irq; 1442 1443 fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL); 1444 if (!fw) 1445 return -ENOMEM; 1446 1447 ptdev->fw = fw; 1448 init_waitqueue_head(&fw->req_waitqueue); 1449 INIT_LIST_HEAD(&fw->sections); 1450 INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work); 1451 1452 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job"); 1453 if (irq <= 0) 1454 return -ENODEV; 1455 1456 ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0); 1457 if (ret) { 1458 drm_err(&ptdev->base, "failed to request job irq"); 1459 return ret; 1460 } 1461 1462 ret = panthor_hw_l2_power_on(ptdev); 1463 if (ret) 1464 return ret; 1465 1466 fw->vm = panthor_vm_create(ptdev, true, 1467 0, SZ_4G, 1468 CSF_MCU_SHARED_REGION_START, 1469 CSF_MCU_SHARED_REGION_SIZE); 1470 if (IS_ERR(fw->vm)) { 1471 ret = PTR_ERR(fw->vm); 1472 fw->vm = NULL; 1473 goto err_unplug_fw; 1474 } 1475 1476 ret = panthor_fw_load(ptdev); 1477 if (ret) 1478 goto err_unplug_fw; 1479 1480 ret = panthor_vm_active(fw->vm); 1481 if (ret) 1482 goto err_unplug_fw; 1483 1484 ret = panthor_fw_start(ptdev); 1485 if (ret) 1486 goto err_unplug_fw; 1487 1488 ret = panthor_fw_init_ifaces(ptdev); 1489 if (ret) 1490 goto err_unplug_fw; 1491 1492 panthor_fw_init_global_iface(ptdev); 1493 return 0; 1494 1495 err_unplug_fw: 1496 panthor_fw_unplug(ptdev); 1497 return ret; 1498 } 1499 1500 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin"); 1501 MODULE_FIRMWARE("arm/mali/arch10.10/mali_csffw.bin"); 1502 MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin"); 1503 MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin"); 1504 MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin"); 1505 MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin"); 1506 MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin"); 1507