1 /* SPDX-License-Identifier: GPL-2.0 or MIT */ 2 /* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ 3 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 4 /* Copyright 2023 Collabora ltd. */ 5 6 #ifndef __PANTHOR_DEVICE_H__ 7 #define __PANTHOR_DEVICE_H__ 8 9 #include <linux/atomic.h> 10 #include <linux/io-pgtable.h> 11 #include <linux/regulator/consumer.h> 12 #include <linux/pm_runtime.h> 13 #include <linux/sched.h> 14 #include <linux/spinlock.h> 15 16 #include <drm/drm_device.h> 17 #include <drm/drm_gem.h> 18 #include <drm/drm_mm.h> 19 #include <drm/gpu_scheduler.h> 20 #include <drm/panthor_drm.h> 21 22 struct panthor_csf; 23 struct panthor_csf_ctx; 24 struct panthor_device; 25 struct panthor_gpu; 26 struct panthor_group_pool; 27 struct panthor_heap_pool; 28 struct panthor_hw; 29 struct panthor_job; 30 struct panthor_mmu; 31 struct panthor_fw; 32 struct panthor_perfcnt; 33 struct panthor_pwr; 34 struct panthor_vm; 35 struct panthor_vm_pool; 36 37 /** 38 * struct panthor_soc_data - Panthor SoC Data 39 */ 40 struct panthor_soc_data { 41 /** @asn_hash_enable: True if GPU_L2_CONFIG_ASN_HASH_ENABLE must be set. */ 42 bool asn_hash_enable; 43 44 /** @asn_hash: ASN_HASH values when asn_hash_enable is true. */ 45 u32 asn_hash[3]; 46 }; 47 48 /** 49 * enum panthor_device_pm_state - PM state 50 */ 51 enum panthor_device_pm_state { 52 /** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */ 53 PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0, 54 55 /** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */ 56 PANTHOR_DEVICE_PM_STATE_RESUMING, 57 58 /** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */ 59 PANTHOR_DEVICE_PM_STATE_ACTIVE, 60 61 /** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */ 62 PANTHOR_DEVICE_PM_STATE_SUSPENDING, 63 }; 64 65 enum panthor_irq_state { 66 /** @PANTHOR_IRQ_STATE_ACTIVE: IRQ is active and ready to process events. */ 67 PANTHOR_IRQ_STATE_ACTIVE = 0, 68 /** @PANTHOR_IRQ_STATE_PROCESSING: IRQ is currently processing events. */ 69 PANTHOR_IRQ_STATE_PROCESSING, 70 /** @PANTHOR_IRQ_STATE_SUSPENDED: IRQ is suspended. */ 71 PANTHOR_IRQ_STATE_SUSPENDED, 72 /** @PANTHOR_IRQ_STATE_SUSPENDING: IRQ is being suspended. */ 73 PANTHOR_IRQ_STATE_SUSPENDING, 74 }; 75 76 /** 77 * struct panthor_irq - IRQ data 78 * 79 * Used to automate IRQ handling for the 3 different IRQs we have in this driver. 80 */ 81 struct panthor_irq { 82 /** @ptdev: Panthor device */ 83 struct panthor_device *ptdev; 84 85 /** @iomem: CPU mapping of IRQ base address */ 86 void __iomem *iomem; 87 88 /** @irq: IRQ number. */ 89 int irq; 90 91 /** @mask: Values to write to xxx_INT_MASK if active. */ 92 u32 mask; 93 94 /** 95 * @mask_lock: protects modifications to _INT_MASK and @mask. 96 * 97 * In paths where _INT_MASK is updated based on a state 98 * transition/check, it's crucial for the state update/check to be 99 * inside the locked section, otherwise it introduces a race window 100 * leading to potential _INT_MASK inconsistencies. 101 */ 102 spinlock_t mask_lock; 103 104 /** @state: one of &enum panthor_irq_state reflecting the current state. */ 105 atomic_t state; 106 }; 107 108 /** 109 * enum panthor_device_profiling_mode - Profiling state 110 */ 111 enum panthor_device_profiling_flags { 112 /** @PANTHOR_DEVICE_PROFILING_DISABLED: Profiling is disabled. */ 113 PANTHOR_DEVICE_PROFILING_DISABLED = 0, 114 115 /** @PANTHOR_DEVICE_PROFILING_CYCLES: Sampling job cycles. */ 116 PANTHOR_DEVICE_PROFILING_CYCLES = BIT(0), 117 118 /** @PANTHOR_DEVICE_PROFILING_TIMESTAMP: Sampling job timestamp. */ 119 PANTHOR_DEVICE_PROFILING_TIMESTAMP = BIT(1), 120 121 /** @PANTHOR_DEVICE_PROFILING_ALL: Sampling everything. */ 122 PANTHOR_DEVICE_PROFILING_ALL = 123 PANTHOR_DEVICE_PROFILING_CYCLES | 124 PANTHOR_DEVICE_PROFILING_TIMESTAMP, 125 }; 126 127 /** 128 * struct panthor_device - Panthor device 129 */ 130 struct panthor_device { 131 /** @base: Base drm_device. */ 132 struct drm_device base; 133 134 /** @soc_data: Optional SoC data. */ 135 const struct panthor_soc_data *soc_data; 136 137 /** @phys_addr: Physical address of the iomem region. */ 138 phys_addr_t phys_addr; 139 140 /** @iomem: CPU mapping of the IOMEM region. */ 141 void __iomem *iomem; 142 143 /** @clks: GPU clocks. */ 144 struct { 145 /** @core: Core clock. */ 146 struct clk *core; 147 148 /** @stacks: Stacks clock. This clock is optional. */ 149 struct clk *stacks; 150 151 /** @coregroup: Core group clock. This clock is optional. */ 152 struct clk *coregroup; 153 } clks; 154 155 /** @coherent: True if the CPU/GPU are memory coherent. */ 156 bool coherent; 157 158 /** @gpu_info: GPU information. */ 159 struct drm_panthor_gpu_info gpu_info; 160 161 /** @csif_info: Command stream interface information. */ 162 struct drm_panthor_csif_info csif_info; 163 164 /** @hw: GPU-specific data. */ 165 struct panthor_hw *hw; 166 167 /** @pwr: Power control management data. */ 168 struct panthor_pwr *pwr; 169 170 /** @gpu: GPU management data. */ 171 struct panthor_gpu *gpu; 172 173 /** @fw: FW management data. */ 174 struct panthor_fw *fw; 175 176 /** @mmu: MMU management data. */ 177 struct panthor_mmu *mmu; 178 179 /** @scheduler: Scheduler management data. */ 180 struct panthor_scheduler *scheduler; 181 182 /** @devfreq: Device frequency scaling management data. */ 183 struct panthor_devfreq *devfreq; 184 185 /** @reclaim: Reclaim related stuff */ 186 struct { 187 /** @reclaim.shrinker: Shrinker instance */ 188 struct shrinker *shrinker; 189 190 /** @reclaim.lock: Lock protecting all LRUs */ 191 struct mutex lock; 192 193 /** 194 * @reclaim.unused: BOs with unused pages 195 * 196 * Basically all buffers that got mmapped, vmapped or GPU mapped and 197 * then unmapped. There should be no contention on these buffers, 198 * making them ideal to reclaim. 199 */ 200 struct drm_gem_lru unused; 201 202 /** 203 * @reclaim.mmapped: mmap()-ed buffers 204 * 205 * Those are relatively easy to reclaim since we don't need user 206 * agreement, we can simply teardown the mapping and let it fault on 207 * the next access. 208 */ 209 struct drm_gem_lru mmapped; 210 211 /** 212 * @reclaim.gpu_mapped_shared: shared BO LRU list 213 * 214 * That's the most tricky BO type to reclaim, because it involves 215 * tearing down all mappings in all VMs where this BO is mapped, 216 * which increases the risk of contention and thus decreases the 217 * likeliness of success. 218 */ 219 struct drm_gem_lru gpu_mapped_shared; 220 221 /** 222 * @reclaim.vms: VM LRU list 223 * 224 * VMs that have reclaimable BOs only mapped to a single VM are placed 225 * in this LRU. Reclaiming such BOs implies waiting for VM idleness 226 * (no in-flight GPU jobs targeting this VM), meaning we can't reclaim 227 * those if we're in a context where we can't block/sleep. 228 */ 229 struct list_head vms; 230 231 /** 232 * @reclaim.gpu_mapped_count: Global counter of pages that are GPU mapped 233 * 234 * Allows us to get the number of reclaimable pages without walking 235 * the vms and gpu_mapped_shared LRUs. 236 */ 237 long gpu_mapped_count; 238 239 /** 240 * @reclaim.retry_count: Number of times we ran the shrinker without being 241 * able to reclaim stuff 242 * 243 * Used to stop scanning GEMs when too many attempts were made 244 * without progress. 245 */ 246 atomic_t retry_count; 247 248 #ifdef CONFIG_DEBUG_FS 249 /** 250 * @reclaim.nr_pages_reclaimed_on_last_scan: Number of pages reclaimed on the last 251 * shrinker scan 252 */ 253 unsigned long nr_pages_reclaimed_on_last_scan; 254 #endif 255 } reclaim; 256 257 /** @unplug: Device unplug related fields. */ 258 struct { 259 /** @lock: Lock used to serialize unplug operations. */ 260 struct mutex lock; 261 262 /** 263 * @done: Completion object signaled when the unplug 264 * operation is done. 265 */ 266 struct completion done; 267 } unplug; 268 269 /** @reset: Reset related fields. */ 270 struct { 271 /** @wq: Ordered worqueud used to schedule reset operations. */ 272 struct workqueue_struct *wq; 273 274 /** @work: Reset work. */ 275 struct work_struct work; 276 277 /** @pending: Set to true if a reset is pending. */ 278 atomic_t pending; 279 280 /** 281 * @fast: True if the post_reset logic can proceed with a fast reset. 282 * 283 * A fast reset is just a reset where the driver doesn't reload the FW sections. 284 * 285 * Any time the firmware is properly suspended, a fast reset can take place. 286 * On the other hand, if the halt operation failed, the driver will reload 287 * all FW sections to make sure we start from a fresh state. 288 */ 289 bool fast; 290 } reset; 291 292 /** @pm: Power management related data. */ 293 struct { 294 /** @state: Power state. */ 295 atomic_t state; 296 297 /** 298 * @mmio_lock: Lock protecting MMIO userspace CPU mappings. 299 * 300 * This is needed to ensure we map the dummy IO pages when 301 * the device is being suspended, and the real IO pages when 302 * the device is being resumed. We can't just do with the 303 * state atomicity to deal with this race. 304 */ 305 struct mutex mmio_lock; 306 307 /** 308 * @dummy_latest_flush: Dummy LATEST_FLUSH page. 309 * 310 * Used to replace the real LATEST_FLUSH page when the GPU 311 * is suspended. 312 */ 313 struct page *dummy_latest_flush; 314 315 /** @recovery_needed: True when a resume attempt failed. */ 316 atomic_t recovery_needed; 317 } pm; 318 319 /** @profile_mask: User-set profiling flags for job accounting. */ 320 u32 profile_mask; 321 322 /** @fast_rate: Maximum device clock frequency. Set by DVFS */ 323 unsigned long fast_rate; 324 325 #ifdef CONFIG_DEBUG_FS 326 /** @gems: Device-wide list of GEM objects owned by at least one file. */ 327 struct { 328 /** @gems.lock: Protects the device-wide list of GEM objects. */ 329 struct mutex lock; 330 331 /** @node: Used to keep track of all the device's DRM objects */ 332 struct list_head node; 333 } gems; 334 #endif 335 }; 336 337 struct panthor_gpu_usage { 338 u64 time; 339 u64 cycles; 340 }; 341 342 /** 343 * struct panthor_file - Panthor file 344 */ 345 struct panthor_file { 346 /** @ptdev: Device attached to this file. */ 347 struct panthor_device *ptdev; 348 349 /** @user_mmio: User MMIO related fields. */ 350 struct { 351 /** 352 * @offset: Offset used for user MMIO mappings. 353 * 354 * This offset should not be used to check the type of mapping 355 * except in panthor_mmap(). After that point, MMIO mapping 356 * offsets have been adjusted to match 357 * DRM_PANTHOR_USER_MMIO_OFFSET and that macro should be used 358 * instead. 359 * Make sure this rule is followed at all times, because 360 * userspace is in control of the offset, and can change the 361 * value behind our back. Otherwise it can lead to erroneous 362 * branching happening in kernel space. 363 */ 364 u64 offset; 365 } user_mmio; 366 367 /** @vms: VM pool attached to this file. */ 368 struct panthor_vm_pool *vms; 369 370 /** @groups: Scheduling group pool attached to this file. */ 371 struct panthor_group_pool *groups; 372 373 /** @stats: cycle and timestamp measures for job execution. */ 374 struct panthor_gpu_usage stats; 375 }; 376 377 int panthor_device_init(struct panthor_device *ptdev); 378 void panthor_device_unplug(struct panthor_device *ptdev); 379 380 /** 381 * panthor_device_schedule_reset() - Schedules a reset operation 382 */ 383 static inline void panthor_device_schedule_reset(struct panthor_device *ptdev) 384 { 385 if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) && 386 atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE) 387 queue_work(ptdev->reset.wq, &ptdev->reset.work); 388 } 389 390 /** 391 * panthor_device_reset_is_pending() - Checks if a reset is pending. 392 * 393 * Return: true if a reset is pending, false otherwise. 394 */ 395 static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev) 396 { 397 return atomic_read(&ptdev->reset.pending) != 0; 398 } 399 400 int panthor_device_mmap_io(struct panthor_device *ptdev, 401 struct vm_area_struct *vma); 402 403 int panthor_device_resume(struct device *dev); 404 int panthor_device_suspend(struct device *dev); 405 406 static inline int panthor_device_resume_and_get(struct panthor_device *ptdev) 407 { 408 int ret = pm_runtime_resume_and_get(ptdev->base.dev); 409 410 /* If the resume failed, we need to clear the runtime_error, which 411 * can done by forcing the RPM state to suspended. If multiple 412 * threads called panthor_device_resume_and_get(), we only want 413 * one of them to update the state, hence the cmpxchg. Note that a 414 * thread might enter panthor_device_resume_and_get() and call 415 * pm_runtime_resume_and_get() after another thread had attempted 416 * to resume and failed. This means we will end up with an error 417 * without even attempting a resume ourselves. The only risk here 418 * is to report an error when the second resume attempt might have 419 * succeeded. Given resume errors are not expected, this is probably 420 * something we can live with. 421 */ 422 if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1) 423 pm_runtime_set_suspended(ptdev->base.dev); 424 425 return ret; 426 } 427 428 enum drm_panthor_exception_type { 429 DRM_PANTHOR_EXCEPTION_OK = 0x00, 430 DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04, 431 DRM_PANTHOR_EXCEPTION_KABOOM = 0x05, 432 DRM_PANTHOR_EXCEPTION_EUREKA = 0x06, 433 DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08, 434 DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, 435 DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, 436 DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, 437 DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41, 438 DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, 439 DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, 440 DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, 441 DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a, 442 DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b, 443 DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50, 444 DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51, 445 DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55, 446 DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58, 447 DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59, 448 DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a, 449 DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b, 450 DRM_PANTHOR_EXCEPTION_OOM = 0x60, 451 DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68, 452 DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69, 453 DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80, 454 DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88, 455 DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89, 456 DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a, 457 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0, 458 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1, 459 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2, 460 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3, 461 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4, 462 DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8, 463 DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9, 464 DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca, 465 DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb, 466 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9, 467 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda, 468 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb, 469 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0, 470 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4, 471 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5, 472 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6, 473 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7, 474 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8, 475 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9, 476 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea, 477 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb, 478 }; 479 480 /** 481 * panthor_exception_is_fault() - Checks if an exception is a fault. 482 * 483 * Return: true if the exception is a fault, false otherwise. 484 */ 485 static inline bool 486 panthor_exception_is_fault(u32 exception_code) 487 { 488 return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT; 489 } 490 491 const char *panthor_exception_name(struct panthor_device *ptdev, 492 u32 exception_code); 493 494 #define INT_RAWSTAT 0x0 495 #define INT_CLEAR 0x4 496 #define INT_MASK 0x8 497 #define INT_STAT 0xc 498 499 /** 500 * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt 501 * registration function. 502 * 503 * The boiler-plate to gracefully deal with shared interrupts is 504 * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER() 505 * just after the actual handler. The handler prototype is: 506 * 507 * void (*handler)(struct panthor_device *, u32 status); 508 */ 509 #define PANTHOR_IRQ_HANDLER(__name, __handler) \ 510 static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \ 511 { \ 512 struct panthor_irq *pirq = data; \ 513 enum panthor_irq_state old_state; \ 514 \ 515 if (!gpu_read(pirq->iomem, INT_STAT)) \ 516 return IRQ_NONE; \ 517 \ 518 guard(spinlock_irqsave)(&pirq->mask_lock); \ 519 old_state = atomic_cmpxchg(&pirq->state, \ 520 PANTHOR_IRQ_STATE_ACTIVE, \ 521 PANTHOR_IRQ_STATE_PROCESSING); \ 522 if (old_state != PANTHOR_IRQ_STATE_ACTIVE) \ 523 return IRQ_NONE; \ 524 \ 525 gpu_write(pirq->iomem, INT_MASK, 0); \ 526 return IRQ_WAKE_THREAD; \ 527 } \ 528 \ 529 static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data) \ 530 { \ 531 struct panthor_irq *pirq = data; \ 532 struct panthor_device *ptdev = pirq->ptdev; \ 533 irqreturn_t ret = IRQ_NONE; \ 534 \ 535 while (true) { \ 536 /* It's safe to access pirq->mask without the lock held here. If a new \ 537 * event gets added to the mask and the corresponding IRQ is pending, \ 538 * we'll process it right away instead of adding an extra raw -> threaded \ 539 * round trip. If an event is removed and the status bit is set, it will \ 540 * be ignored, just like it would have been if the mask had been adjusted \ 541 * right before the HW event kicks in. TLDR; it's all expected races we're \ 542 * covered for. \ 543 */ \ 544 u32 status = gpu_read(pirq->iomem, INT_RAWSTAT) & pirq->mask; \ 545 \ 546 if (!status) \ 547 break; \ 548 \ 549 __handler(ptdev, status); \ 550 ret = IRQ_HANDLED; \ 551 } \ 552 \ 553 scoped_guard(spinlock_irqsave, &pirq->mask_lock) { \ 554 enum panthor_irq_state old_state; \ 555 \ 556 old_state = atomic_cmpxchg(&pirq->state, \ 557 PANTHOR_IRQ_STATE_PROCESSING, \ 558 PANTHOR_IRQ_STATE_ACTIVE); \ 559 if (old_state == PANTHOR_IRQ_STATE_PROCESSING) \ 560 gpu_write(pirq->iomem, INT_MASK, pirq->mask); \ 561 } \ 562 \ 563 return ret; \ 564 } \ 565 \ 566 static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \ 567 { \ 568 scoped_guard(spinlock_irqsave, &pirq->mask_lock) { \ 569 atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDING); \ 570 gpu_write(pirq->iomem, INT_MASK, 0); \ 571 } \ 572 synchronize_irq(pirq->irq); \ 573 atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDED); \ 574 } \ 575 \ 576 static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq) \ 577 { \ 578 guard(spinlock_irqsave)(&pirq->mask_lock); \ 579 \ 580 atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE); \ 581 gpu_write(pirq->iomem, INT_CLEAR, pirq->mask); \ 582 gpu_write(pirq->iomem, INT_MASK, pirq->mask); \ 583 } \ 584 \ 585 static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ 586 struct panthor_irq *pirq, \ 587 int irq, u32 mask, void __iomem *iomem) \ 588 { \ 589 pirq->ptdev = ptdev; \ 590 pirq->irq = irq; \ 591 pirq->mask = mask; \ 592 pirq->iomem = iomem; \ 593 spin_lock_init(&pirq->mask_lock); \ 594 panthor_ ## __name ## _irq_resume(pirq); \ 595 \ 596 return devm_request_threaded_irq(ptdev->base.dev, irq, \ 597 panthor_ ## __name ## _irq_raw_handler, \ 598 panthor_ ## __name ## _irq_threaded_handler, \ 599 IRQF_SHARED, KBUILD_MODNAME "-" # __name, \ 600 pirq); \ 601 } \ 602 \ 603 static inline void panthor_ ## __name ## _irq_enable_events(struct panthor_irq *pirq, u32 mask) \ 604 { \ 605 guard(spinlock_irqsave)(&pirq->mask_lock); \ 606 pirq->mask |= mask; \ 607 \ 608 /* The only situation where we need to write the new mask is if the IRQ is active. \ 609 * If it's being processed, the mask will be restored for us in _irq_threaded_handler() \ 610 * on the PROCESSING -> ACTIVE transition. \ 611 * If the IRQ is suspended/suspending, the mask is restored at resume time. \ 612 */ \ 613 if (atomic_read(&pirq->state) == PANTHOR_IRQ_STATE_ACTIVE) \ 614 gpu_write(pirq->iomem, INT_MASK, pirq->mask); \ 615 } \ 616 \ 617 static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq *pirq, u32 mask)\ 618 { \ 619 guard(spinlock_irqsave)(&pirq->mask_lock); \ 620 pirq->mask &= ~mask; \ 621 \ 622 /* The only situation where we need to write the new mask is if the IRQ is active. \ 623 * If it's being processed, the mask will be restored for us in _irq_threaded_handler() \ 624 * on the PROCESSING -> ACTIVE transition. \ 625 * If the IRQ is suspended/suspending, the mask is restored at resume time. \ 626 */ \ 627 if (atomic_read(&pirq->state) == PANTHOR_IRQ_STATE_ACTIVE) \ 628 gpu_write(pirq->iomem, INT_MASK, pirq->mask); \ 629 } 630 631 extern struct workqueue_struct *panthor_cleanup_wq; 632 633 static inline void gpu_write(void __iomem *iomem, u32 reg, u32 data) 634 { 635 writel(data, iomem + reg); 636 } 637 638 static inline u32 gpu_read(void __iomem *iomem, u32 reg) 639 { 640 return readl(iomem + reg); 641 } 642 643 static inline u32 gpu_read_relaxed(void __iomem *iomem, u32 reg) 644 { 645 return readl_relaxed(iomem + reg); 646 } 647 648 static inline void gpu_write64(void __iomem *iomem, u32 reg, u64 data) 649 { 650 gpu_write(iomem, reg, lower_32_bits(data)); 651 gpu_write(iomem, reg + 4, upper_32_bits(data)); 652 } 653 654 static inline u64 gpu_read64(void __iomem *iomem, u32 reg) 655 { 656 return (gpu_read(iomem, reg) | ((u64)gpu_read(iomem, reg + 4) << 32)); 657 } 658 659 static inline u64 gpu_read64_relaxed(void __iomem *iomem, u32 reg) 660 { 661 return (gpu_read_relaxed(iomem, reg) | 662 ((u64)gpu_read_relaxed(iomem, reg + 4) << 32)); 663 } 664 665 static inline u64 gpu_read64_counter(void __iomem *iomem, u32 reg) 666 { 667 u32 lo, hi1, hi2; 668 do { 669 hi1 = gpu_read(iomem, reg + 4); 670 lo = gpu_read(iomem, reg); 671 hi2 = gpu_read(iomem, reg + 4); 672 } while (hi1 != hi2); 673 return lo | ((u64)hi2 << 32); 674 } 675 676 #define gpu_read_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us) \ 677 read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \ 678 iomem, reg) 679 680 #define gpu_read_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \ 681 timeout_us) \ 682 read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \ 683 false, iomem, reg) 684 685 #define gpu_read64_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us) \ 686 read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \ 687 iomem, reg) 688 689 #define gpu_read64_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \ 690 timeout_us) \ 691 read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \ 692 false, iomem, reg) 693 694 #define gpu_read_relaxed_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \ 695 timeout_us) \ 696 read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us, \ 697 timeout_us, false, iomem, reg) 698 699 #define gpu_read64_relaxed_poll_timeout(iomem, reg, val, cond, delay_us, \ 700 timeout_us) \ 701 read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \ 702 false, iomem, reg) 703 704 #endif 705