1 /* SPDX-License-Identifier: GPL-2.0 or MIT */ 2 /* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ 3 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 4 /* Copyright 2023 Collabora ltd. */ 5 6 #ifndef __PANTHOR_DEVICE_H__ 7 #define __PANTHOR_DEVICE_H__ 8 9 #include <linux/atomic.h> 10 #include <linux/io-pgtable.h> 11 #include <linux/regulator/consumer.h> 12 #include <linux/pm_runtime.h> 13 #include <linux/sched.h> 14 #include <linux/spinlock.h> 15 16 #include <drm/drm_device.h> 17 #include <drm/drm_gem.h> 18 #include <drm/drm_mm.h> 19 #include <drm/gpu_scheduler.h> 20 #include <drm/panthor_drm.h> 21 22 struct panthor_csf; 23 struct panthor_csf_ctx; 24 struct panthor_device; 25 struct panthor_gpu; 26 struct panthor_group_pool; 27 struct panthor_heap_pool; 28 struct panthor_hw; 29 struct panthor_job; 30 struct panthor_mmu; 31 struct panthor_fw; 32 struct panthor_perfcnt; 33 struct panthor_pwr; 34 struct panthor_vm; 35 struct panthor_vm_pool; 36 37 /** 38 * struct panthor_soc_data - Panthor SoC Data 39 */ 40 struct panthor_soc_data { 41 /** @asn_hash_enable: True if GPU_L2_CONFIG_ASN_HASH_ENABLE must be set. */ 42 bool asn_hash_enable; 43 44 /** @asn_hash: ASN_HASH values when asn_hash_enable is true. */ 45 u32 asn_hash[3]; 46 }; 47 48 /** 49 * enum panthor_device_pm_state - PM state 50 */ 51 enum panthor_device_pm_state { 52 /** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */ 53 PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0, 54 55 /** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */ 56 PANTHOR_DEVICE_PM_STATE_RESUMING, 57 58 /** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */ 59 PANTHOR_DEVICE_PM_STATE_ACTIVE, 60 61 /** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */ 62 PANTHOR_DEVICE_PM_STATE_SUSPENDING, 63 }; 64 65 enum panthor_irq_state { 66 /** @PANTHOR_IRQ_STATE_ACTIVE: IRQ is active and ready to process events. */ 67 PANTHOR_IRQ_STATE_ACTIVE = 0, 68 /** @PANTHOR_IRQ_STATE_PROCESSING: IRQ is currently processing events. */ 69 PANTHOR_IRQ_STATE_PROCESSING, 70 /** @PANTHOR_IRQ_STATE_SUSPENDED: IRQ is suspended. */ 71 PANTHOR_IRQ_STATE_SUSPENDED, 72 /** @PANTHOR_IRQ_STATE_SUSPENDING: IRQ is being suspended. */ 73 PANTHOR_IRQ_STATE_SUSPENDING, 74 }; 75 76 /** 77 * struct panthor_irq - IRQ data 78 * 79 * Used to automate IRQ handling for the 3 different IRQs we have in this driver. 80 */ 81 struct panthor_irq { 82 /** @ptdev: Panthor device */ 83 struct panthor_device *ptdev; 84 85 /** @iomem: CPU mapping of IRQ base address */ 86 void __iomem *iomem; 87 88 /** @irq: IRQ number. */ 89 int irq; 90 91 /** @mask: Values to write to xxx_INT_MASK if active. */ 92 u32 mask; 93 94 /** 95 * @mask_lock: protects modifications to _INT_MASK and @mask. 96 * 97 * In paths where _INT_MASK is updated based on a state 98 * transition/check, it's crucial for the state update/check to be 99 * inside the locked section, otherwise it introduces a race window 100 * leading to potential _INT_MASK inconsistencies. 101 */ 102 spinlock_t mask_lock; 103 104 /** @state: one of &enum panthor_irq_state reflecting the current state. */ 105 atomic_t state; 106 }; 107 108 /** 109 * enum panthor_device_profiling_mode - Profiling state 110 */ 111 enum panthor_device_profiling_flags { 112 /** @PANTHOR_DEVICE_PROFILING_DISABLED: Profiling is disabled. */ 113 PANTHOR_DEVICE_PROFILING_DISABLED = 0, 114 115 /** @PANTHOR_DEVICE_PROFILING_CYCLES: Sampling job cycles. */ 116 PANTHOR_DEVICE_PROFILING_CYCLES = BIT(0), 117 118 /** @PANTHOR_DEVICE_PROFILING_TIMESTAMP: Sampling job timestamp. */ 119 PANTHOR_DEVICE_PROFILING_TIMESTAMP = BIT(1), 120 121 /** @PANTHOR_DEVICE_PROFILING_ALL: Sampling everything. */ 122 PANTHOR_DEVICE_PROFILING_ALL = 123 PANTHOR_DEVICE_PROFILING_CYCLES | 124 PANTHOR_DEVICE_PROFILING_TIMESTAMP, 125 }; 126 127 /** 128 * struct panthor_device - Panthor device 129 */ 130 struct panthor_device { 131 /** @base: Base drm_device. */ 132 struct drm_device base; 133 134 /** @soc_data: Optional SoC data. */ 135 const struct panthor_soc_data *soc_data; 136 137 /** @phys_addr: Physical address of the iomem region. */ 138 phys_addr_t phys_addr; 139 140 /** @iomem: CPU mapping of the IOMEM region. */ 141 void __iomem *iomem; 142 143 /** @clks: GPU clocks. */ 144 struct { 145 /** @core: Core clock. */ 146 struct clk *core; 147 148 /** @stacks: Stacks clock. This clock is optional. */ 149 struct clk *stacks; 150 151 /** @coregroup: Core group clock. This clock is optional. */ 152 struct clk *coregroup; 153 } clks; 154 155 /** @coherent: True if the CPU/GPU are memory coherent. */ 156 bool coherent; 157 158 /** @gpu_info: GPU information. */ 159 struct drm_panthor_gpu_info gpu_info; 160 161 /** @csif_info: Command stream interface information. */ 162 struct drm_panthor_csif_info csif_info; 163 164 /** @hw: GPU-specific data. */ 165 struct panthor_hw *hw; 166 167 /** @pwr: Power control management data. */ 168 struct panthor_pwr *pwr; 169 170 /** @gpu: GPU management data. */ 171 struct panthor_gpu *gpu; 172 173 /** @fw: FW management data. */ 174 struct panthor_fw *fw; 175 176 /** @mmu: MMU management data. */ 177 struct panthor_mmu *mmu; 178 179 /** @scheduler: Scheduler management data. */ 180 struct panthor_scheduler *scheduler; 181 182 /** @devfreq: Device frequency scaling management data. */ 183 struct panthor_devfreq *devfreq; 184 185 /** @reclaim: Reclaim related stuff */ 186 struct { 187 /** @reclaim.shrinker: Shrinker instance */ 188 struct shrinker *shrinker; 189 190 /** 191 * @reclaim.unused: BOs with unused pages 192 * 193 * Basically all buffers that got mmapped, vmapped or GPU mapped and 194 * then unmapped. There should be no contention on these buffers, 195 * making them ideal to reclaim. 196 */ 197 struct drm_gem_lru unused; 198 199 /** 200 * @reclaim.mmapped: mmap()-ed buffers 201 * 202 * Those are relatively easy to reclaim since we don't need user 203 * agreement, we can simply teardown the mapping and let it fault on 204 * the next access. 205 */ 206 struct drm_gem_lru mmapped; 207 208 /** 209 * @reclaim.gpu_mapped_shared: shared BO LRU list 210 * 211 * That's the most tricky BO type to reclaim, because it involves 212 * tearing down all mappings in all VMs where this BO is mapped, 213 * which increases the risk of contention and thus decreases the 214 * likeliness of success. 215 */ 216 struct drm_gem_lru gpu_mapped_shared; 217 218 /** 219 * @reclaim.vms: VM LRU list 220 * 221 * VMs that have reclaimable BOs only mapped to a single VM are placed 222 * in this LRU. Reclaiming such BOs implies waiting for VM idleness 223 * (no in-flight GPU jobs targeting this VM), meaning we can't reclaim 224 * those if we're in a context where we can't block/sleep. 225 */ 226 struct list_head vms; 227 228 /** 229 * @reclaim.gpu_mapped_count: Global counter of pages that are GPU mapped 230 * 231 * Allows us to get the number of reclaimable pages without walking 232 * the vms and gpu_mapped_shared LRUs. 233 */ 234 long gpu_mapped_count; 235 236 /** 237 * @reclaim.retry_count: Number of times we ran the shrinker without being 238 * able to reclaim stuff 239 * 240 * Used to stop scanning GEMs when too many attempts were made 241 * without progress. 242 */ 243 atomic_t retry_count; 244 245 #ifdef CONFIG_DEBUG_FS 246 /** 247 * @reclaim.nr_pages_reclaimed_on_last_scan: Number of pages reclaimed on the last 248 * shrinker scan 249 */ 250 unsigned long nr_pages_reclaimed_on_last_scan; 251 #endif 252 } reclaim; 253 254 /** @unplug: Device unplug related fields. */ 255 struct { 256 /** @lock: Lock used to serialize unplug operations. */ 257 struct mutex lock; 258 259 /** 260 * @done: Completion object signaled when the unplug 261 * operation is done. 262 */ 263 struct completion done; 264 } unplug; 265 266 /** @reset: Reset related fields. */ 267 struct { 268 /** @wq: Ordered worqueud used to schedule reset operations. */ 269 struct workqueue_struct *wq; 270 271 /** @work: Reset work. */ 272 struct work_struct work; 273 274 /** @pending: Set to true if a reset is pending. */ 275 atomic_t pending; 276 277 /** 278 * @fast: True if the post_reset logic can proceed with a fast reset. 279 * 280 * A fast reset is just a reset where the driver doesn't reload the FW sections. 281 * 282 * Any time the firmware is properly suspended, a fast reset can take place. 283 * On the other hand, if the halt operation failed, the driver will reload 284 * all FW sections to make sure we start from a fresh state. 285 */ 286 bool fast; 287 } reset; 288 289 /** @pm: Power management related data. */ 290 struct { 291 /** @state: Power state. */ 292 atomic_t state; 293 294 /** 295 * @mmio_lock: Lock protecting MMIO userspace CPU mappings. 296 * 297 * This is needed to ensure we map the dummy IO pages when 298 * the device is being suspended, and the real IO pages when 299 * the device is being resumed. We can't just do with the 300 * state atomicity to deal with this race. 301 */ 302 struct mutex mmio_lock; 303 304 /** 305 * @dummy_latest_flush: Dummy LATEST_FLUSH page. 306 * 307 * Used to replace the real LATEST_FLUSH page when the GPU 308 * is suspended. 309 */ 310 struct page *dummy_latest_flush; 311 312 /** @recovery_needed: True when a resume attempt failed. */ 313 atomic_t recovery_needed; 314 } pm; 315 316 /** @profile_mask: User-set profiling flags for job accounting. */ 317 u32 profile_mask; 318 319 /** @fast_rate: Maximum device clock frequency. Set by DVFS */ 320 unsigned long fast_rate; 321 322 #ifdef CONFIG_DEBUG_FS 323 /** @gems: Device-wide list of GEM objects owned by at least one file. */ 324 struct { 325 /** @gems.lock: Protects the device-wide list of GEM objects. */ 326 struct mutex lock; 327 328 /** @node: Used to keep track of all the device's DRM objects */ 329 struct list_head node; 330 } gems; 331 #endif 332 }; 333 334 struct panthor_gpu_usage { 335 u64 time; 336 u64 cycles; 337 }; 338 339 /** 340 * struct panthor_file - Panthor file 341 */ 342 struct panthor_file { 343 /** @ptdev: Device attached to this file. */ 344 struct panthor_device *ptdev; 345 346 /** @user_mmio: User MMIO related fields. */ 347 struct { 348 /** 349 * @offset: Offset used for user MMIO mappings. 350 * 351 * This offset should not be used to check the type of mapping 352 * except in panthor_mmap(). After that point, MMIO mapping 353 * offsets have been adjusted to match 354 * DRM_PANTHOR_USER_MMIO_OFFSET and that macro should be used 355 * instead. 356 * Make sure this rule is followed at all times, because 357 * userspace is in control of the offset, and can change the 358 * value behind our back. Otherwise it can lead to erroneous 359 * branching happening in kernel space. 360 */ 361 u64 offset; 362 } user_mmio; 363 364 /** @vms: VM pool attached to this file. */ 365 struct panthor_vm_pool *vms; 366 367 /** @groups: Scheduling group pool attached to this file. */ 368 struct panthor_group_pool *groups; 369 370 /** @stats: cycle and timestamp measures for job execution. */ 371 struct panthor_gpu_usage stats; 372 }; 373 374 int panthor_device_init(struct panthor_device *ptdev); 375 void panthor_device_unplug(struct panthor_device *ptdev); 376 377 /** 378 * panthor_device_schedule_reset() - Schedules a reset operation 379 */ 380 static inline void panthor_device_schedule_reset(struct panthor_device *ptdev) 381 { 382 if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) && 383 atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE) 384 queue_work(ptdev->reset.wq, &ptdev->reset.work); 385 } 386 387 /** 388 * panthor_device_reset_is_pending() - Checks if a reset is pending. 389 * 390 * Return: true if a reset is pending, false otherwise. 391 */ 392 static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev) 393 { 394 return atomic_read(&ptdev->reset.pending) != 0; 395 } 396 397 int panthor_device_mmap_io(struct panthor_device *ptdev, 398 struct vm_area_struct *vma); 399 400 int panthor_device_resume(struct device *dev); 401 int panthor_device_suspend(struct device *dev); 402 403 static inline int panthor_device_resume_and_get(struct panthor_device *ptdev) 404 { 405 int ret = pm_runtime_resume_and_get(ptdev->base.dev); 406 407 /* If the resume failed, we need to clear the runtime_error, which 408 * can done by forcing the RPM state to suspended. If multiple 409 * threads called panthor_device_resume_and_get(), we only want 410 * one of them to update the state, hence the cmpxchg. Note that a 411 * thread might enter panthor_device_resume_and_get() and call 412 * pm_runtime_resume_and_get() after another thread had attempted 413 * to resume and failed. This means we will end up with an error 414 * without even attempting a resume ourselves. The only risk here 415 * is to report an error when the second resume attempt might have 416 * succeeded. Given resume errors are not expected, this is probably 417 * something we can live with. 418 */ 419 if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1) 420 pm_runtime_set_suspended(ptdev->base.dev); 421 422 return ret; 423 } 424 425 enum drm_panthor_exception_type { 426 DRM_PANTHOR_EXCEPTION_OK = 0x00, 427 DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04, 428 DRM_PANTHOR_EXCEPTION_KABOOM = 0x05, 429 DRM_PANTHOR_EXCEPTION_EUREKA = 0x06, 430 DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08, 431 DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, 432 DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, 433 DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, 434 DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41, 435 DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, 436 DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, 437 DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, 438 DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a, 439 DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b, 440 DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50, 441 DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51, 442 DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55, 443 DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58, 444 DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59, 445 DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a, 446 DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b, 447 DRM_PANTHOR_EXCEPTION_OOM = 0x60, 448 DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68, 449 DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69, 450 DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80, 451 DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88, 452 DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89, 453 DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a, 454 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0, 455 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1, 456 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2, 457 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3, 458 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4, 459 DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8, 460 DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9, 461 DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca, 462 DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb, 463 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9, 464 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda, 465 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb, 466 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0, 467 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4, 468 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5, 469 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6, 470 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7, 471 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8, 472 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9, 473 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea, 474 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb, 475 }; 476 477 /** 478 * panthor_exception_is_fault() - Checks if an exception is a fault. 479 * 480 * Return: true if the exception is a fault, false otherwise. 481 */ 482 static inline bool 483 panthor_exception_is_fault(u32 exception_code) 484 { 485 return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT; 486 } 487 488 const char *panthor_exception_name(struct panthor_device *ptdev, 489 u32 exception_code); 490 491 #define INT_RAWSTAT 0x0 492 #define INT_CLEAR 0x4 493 #define INT_MASK 0x8 494 #define INT_STAT 0xc 495 496 /** 497 * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt 498 * registration function. 499 * 500 * The boiler-plate to gracefully deal with shared interrupts is 501 * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER() 502 * just after the actual handler. The handler prototype is: 503 * 504 * void (*handler)(struct panthor_device *, u32 status); 505 */ 506 #define PANTHOR_IRQ_HANDLER(__name, __handler) \ 507 static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \ 508 { \ 509 struct panthor_irq *pirq = data; \ 510 enum panthor_irq_state old_state; \ 511 \ 512 if (!gpu_read(pirq->iomem, INT_STAT)) \ 513 return IRQ_NONE; \ 514 \ 515 guard(spinlock_irqsave)(&pirq->mask_lock); \ 516 old_state = atomic_cmpxchg(&pirq->state, \ 517 PANTHOR_IRQ_STATE_ACTIVE, \ 518 PANTHOR_IRQ_STATE_PROCESSING); \ 519 if (old_state != PANTHOR_IRQ_STATE_ACTIVE) \ 520 return IRQ_NONE; \ 521 \ 522 gpu_write(pirq->iomem, INT_MASK, 0); \ 523 return IRQ_WAKE_THREAD; \ 524 } \ 525 \ 526 static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data) \ 527 { \ 528 struct panthor_irq *pirq = data; \ 529 struct panthor_device *ptdev = pirq->ptdev; \ 530 irqreturn_t ret = IRQ_NONE; \ 531 \ 532 while (true) { \ 533 /* It's safe to access pirq->mask without the lock held here. If a new \ 534 * event gets added to the mask and the corresponding IRQ is pending, \ 535 * we'll process it right away instead of adding an extra raw -> threaded \ 536 * round trip. If an event is removed and the status bit is set, it will \ 537 * be ignored, just like it would have been if the mask had been adjusted \ 538 * right before the HW event kicks in. TLDR; it's all expected races we're \ 539 * covered for. \ 540 */ \ 541 u32 status = gpu_read(pirq->iomem, INT_RAWSTAT) & pirq->mask; \ 542 \ 543 if (!status) \ 544 break; \ 545 \ 546 __handler(ptdev, status); \ 547 ret = IRQ_HANDLED; \ 548 } \ 549 \ 550 scoped_guard(spinlock_irqsave, &pirq->mask_lock) { \ 551 enum panthor_irq_state old_state; \ 552 \ 553 old_state = atomic_cmpxchg(&pirq->state, \ 554 PANTHOR_IRQ_STATE_PROCESSING, \ 555 PANTHOR_IRQ_STATE_ACTIVE); \ 556 if (old_state == PANTHOR_IRQ_STATE_PROCESSING) \ 557 gpu_write(pirq->iomem, INT_MASK, pirq->mask); \ 558 } \ 559 \ 560 return ret; \ 561 } \ 562 \ 563 static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \ 564 { \ 565 scoped_guard(spinlock_irqsave, &pirq->mask_lock) { \ 566 atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDING); \ 567 gpu_write(pirq->iomem, INT_MASK, 0); \ 568 } \ 569 synchronize_irq(pirq->irq); \ 570 atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDED); \ 571 } \ 572 \ 573 static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq) \ 574 { \ 575 guard(spinlock_irqsave)(&pirq->mask_lock); \ 576 \ 577 atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE); \ 578 gpu_write(pirq->iomem, INT_CLEAR, pirq->mask); \ 579 gpu_write(pirq->iomem, INT_MASK, pirq->mask); \ 580 } \ 581 \ 582 static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ 583 struct panthor_irq *pirq, \ 584 int irq, u32 mask, void __iomem *iomem) \ 585 { \ 586 pirq->ptdev = ptdev; \ 587 pirq->irq = irq; \ 588 pirq->mask = mask; \ 589 pirq->iomem = iomem; \ 590 spin_lock_init(&pirq->mask_lock); \ 591 panthor_ ## __name ## _irq_resume(pirq); \ 592 \ 593 return devm_request_threaded_irq(ptdev->base.dev, irq, \ 594 panthor_ ## __name ## _irq_raw_handler, \ 595 panthor_ ## __name ## _irq_threaded_handler, \ 596 IRQF_SHARED, KBUILD_MODNAME "-" # __name, \ 597 pirq); \ 598 } \ 599 \ 600 static inline void panthor_ ## __name ## _irq_enable_events(struct panthor_irq *pirq, u32 mask) \ 601 { \ 602 guard(spinlock_irqsave)(&pirq->mask_lock); \ 603 pirq->mask |= mask; \ 604 \ 605 /* The only situation where we need to write the new mask is if the IRQ is active. \ 606 * If it's being processed, the mask will be restored for us in _irq_threaded_handler() \ 607 * on the PROCESSING -> ACTIVE transition. \ 608 * If the IRQ is suspended/suspending, the mask is restored at resume time. \ 609 */ \ 610 if (atomic_read(&pirq->state) == PANTHOR_IRQ_STATE_ACTIVE) \ 611 gpu_write(pirq->iomem, INT_MASK, pirq->mask); \ 612 } \ 613 \ 614 static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq *pirq, u32 mask)\ 615 { \ 616 guard(spinlock_irqsave)(&pirq->mask_lock); \ 617 pirq->mask &= ~mask; \ 618 \ 619 /* The only situation where we need to write the new mask is if the IRQ is active. \ 620 * If it's being processed, the mask will be restored for us in _irq_threaded_handler() \ 621 * on the PROCESSING -> ACTIVE transition. \ 622 * If the IRQ is suspended/suspending, the mask is restored at resume time. \ 623 */ \ 624 if (atomic_read(&pirq->state) == PANTHOR_IRQ_STATE_ACTIVE) \ 625 gpu_write(pirq->iomem, INT_MASK, pirq->mask); \ 626 } 627 628 extern struct workqueue_struct *panthor_cleanup_wq; 629 630 static inline void gpu_write(void __iomem *iomem, u32 reg, u32 data) 631 { 632 writel(data, iomem + reg); 633 } 634 635 static inline u32 gpu_read(void __iomem *iomem, u32 reg) 636 { 637 return readl(iomem + reg); 638 } 639 640 static inline u32 gpu_read_relaxed(void __iomem *iomem, u32 reg) 641 { 642 return readl_relaxed(iomem + reg); 643 } 644 645 static inline void gpu_write64(void __iomem *iomem, u32 reg, u64 data) 646 { 647 gpu_write(iomem, reg, lower_32_bits(data)); 648 gpu_write(iomem, reg + 4, upper_32_bits(data)); 649 } 650 651 static inline u64 gpu_read64(void __iomem *iomem, u32 reg) 652 { 653 return (gpu_read(iomem, reg) | ((u64)gpu_read(iomem, reg + 4) << 32)); 654 } 655 656 static inline u64 gpu_read64_relaxed(void __iomem *iomem, u32 reg) 657 { 658 return (gpu_read_relaxed(iomem, reg) | 659 ((u64)gpu_read_relaxed(iomem, reg + 4) << 32)); 660 } 661 662 static inline u64 gpu_read64_counter(void __iomem *iomem, u32 reg) 663 { 664 u32 lo, hi1, hi2; 665 do { 666 hi1 = gpu_read(iomem, reg + 4); 667 lo = gpu_read(iomem, reg); 668 hi2 = gpu_read(iomem, reg + 4); 669 } while (hi1 != hi2); 670 return lo | ((u64)hi2 << 32); 671 } 672 673 #define gpu_read_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us) \ 674 read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \ 675 iomem, reg) 676 677 #define gpu_read_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \ 678 timeout_us) \ 679 read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \ 680 false, iomem, reg) 681 682 #define gpu_read64_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us) \ 683 read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \ 684 iomem, reg) 685 686 #define gpu_read64_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \ 687 timeout_us) \ 688 read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \ 689 false, iomem, reg) 690 691 #define gpu_read_relaxed_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \ 692 timeout_us) \ 693 read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us, \ 694 timeout_us, false, iomem, reg) 695 696 #define gpu_read64_relaxed_poll_timeout(iomem, reg, val, cond, delay_us, \ 697 timeout_us) \ 698 read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \ 699 false, iomem, reg) 700 701 #endif 702