1 /* SPDX-License-Identifier: GPL-2.0 or MIT */ 2 /* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ 3 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 4 /* Copyright 2023 Collabora ltd. */ 5 6 #ifndef __PANTHOR_DEVICE_H__ 7 #define __PANTHOR_DEVICE_H__ 8 9 #include <linux/atomic.h> 10 #include <linux/io-pgtable.h> 11 #include <linux/regulator/consumer.h> 12 #include <linux/pm_runtime.h> 13 #include <linux/sched.h> 14 #include <linux/spinlock.h> 15 16 #include <drm/drm_device.h> 17 #include <drm/drm_mm.h> 18 #include <drm/gpu_scheduler.h> 19 #include <drm/panthor_drm.h> 20 21 struct panthor_csf; 22 struct panthor_csf_ctx; 23 struct panthor_device; 24 struct panthor_gpu; 25 struct panthor_group_pool; 26 struct panthor_heap_pool; 27 struct panthor_hw; 28 struct panthor_job; 29 struct panthor_mmu; 30 struct panthor_fw; 31 struct panthor_perfcnt; 32 struct panthor_pwr; 33 struct panthor_vm; 34 struct panthor_vm_pool; 35 36 /** 37 * struct panthor_soc_data - Panthor SoC Data 38 */ 39 struct panthor_soc_data { 40 /** @asn_hash_enable: True if GPU_L2_CONFIG_ASN_HASH_ENABLE must be set. */ 41 bool asn_hash_enable; 42 43 /** @asn_hash: ASN_HASH values when asn_hash_enable is true. */ 44 u32 asn_hash[3]; 45 }; 46 47 /** 48 * enum panthor_device_pm_state - PM state 49 */ 50 enum panthor_device_pm_state { 51 /** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */ 52 PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0, 53 54 /** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */ 55 PANTHOR_DEVICE_PM_STATE_RESUMING, 56 57 /** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */ 58 PANTHOR_DEVICE_PM_STATE_ACTIVE, 59 60 /** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */ 61 PANTHOR_DEVICE_PM_STATE_SUSPENDING, 62 }; 63 64 /** 65 * struct panthor_irq - IRQ data 66 * 67 * Used to automate IRQ handling for the 3 different IRQs we have in this driver. 68 */ 69 struct panthor_irq { 70 /** @ptdev: Panthor device */ 71 struct panthor_device *ptdev; 72 73 /** @irq: IRQ number. */ 74 int irq; 75 76 /** @mask: Current mask being applied to xxx_INT_MASK. */ 77 u32 mask; 78 79 /** @suspended: Set to true when the IRQ is suspended. */ 80 atomic_t suspended; 81 }; 82 83 /** 84 * enum panthor_device_profiling_mode - Profiling state 85 */ 86 enum panthor_device_profiling_flags { 87 /** @PANTHOR_DEVICE_PROFILING_DISABLED: Profiling is disabled. */ 88 PANTHOR_DEVICE_PROFILING_DISABLED = 0, 89 90 /** @PANTHOR_DEVICE_PROFILING_CYCLES: Sampling job cycles. */ 91 PANTHOR_DEVICE_PROFILING_CYCLES = BIT(0), 92 93 /** @PANTHOR_DEVICE_PROFILING_TIMESTAMP: Sampling job timestamp. */ 94 PANTHOR_DEVICE_PROFILING_TIMESTAMP = BIT(1), 95 96 /** @PANTHOR_DEVICE_PROFILING_ALL: Sampling everything. */ 97 PANTHOR_DEVICE_PROFILING_ALL = 98 PANTHOR_DEVICE_PROFILING_CYCLES | 99 PANTHOR_DEVICE_PROFILING_TIMESTAMP, 100 }; 101 102 /** 103 * struct panthor_device - Panthor device 104 */ 105 struct panthor_device { 106 /** @base: Base drm_device. */ 107 struct drm_device base; 108 109 /** @soc_data: Optional SoC data. */ 110 const struct panthor_soc_data *soc_data; 111 112 /** @phys_addr: Physical address of the iomem region. */ 113 phys_addr_t phys_addr; 114 115 /** @iomem: CPU mapping of the IOMEM region. */ 116 void __iomem *iomem; 117 118 /** @clks: GPU clocks. */ 119 struct { 120 /** @core: Core clock. */ 121 struct clk *core; 122 123 /** @stacks: Stacks clock. This clock is optional. */ 124 struct clk *stacks; 125 126 /** @coregroup: Core group clock. This clock is optional. */ 127 struct clk *coregroup; 128 } clks; 129 130 /** @coherent: True if the CPU/GPU are memory coherent. */ 131 bool coherent; 132 133 /** @gpu_info: GPU information. */ 134 struct drm_panthor_gpu_info gpu_info; 135 136 /** @csif_info: Command stream interface information. */ 137 struct drm_panthor_csif_info csif_info; 138 139 /** @hw: GPU-specific data. */ 140 struct panthor_hw *hw; 141 142 /** @pwr: Power control management data. */ 143 struct panthor_pwr *pwr; 144 145 /** @gpu: GPU management data. */ 146 struct panthor_gpu *gpu; 147 148 /** @fw: FW management data. */ 149 struct panthor_fw *fw; 150 151 /** @mmu: MMU management data. */ 152 struct panthor_mmu *mmu; 153 154 /** @scheduler: Scheduler management data. */ 155 struct panthor_scheduler *scheduler; 156 157 /** @devfreq: Device frequency scaling management data. */ 158 struct panthor_devfreq *devfreq; 159 160 /** @unplug: Device unplug related fields. */ 161 struct { 162 /** @lock: Lock used to serialize unplug operations. */ 163 struct mutex lock; 164 165 /** 166 * @done: Completion object signaled when the unplug 167 * operation is done. 168 */ 169 struct completion done; 170 } unplug; 171 172 /** @reset: Reset related fields. */ 173 struct { 174 /** @wq: Ordered worqueud used to schedule reset operations. */ 175 struct workqueue_struct *wq; 176 177 /** @work: Reset work. */ 178 struct work_struct work; 179 180 /** @pending: Set to true if a reset is pending. */ 181 atomic_t pending; 182 183 /** 184 * @fast: True if the post_reset logic can proceed with a fast reset. 185 * 186 * A fast reset is just a reset where the driver doesn't reload the FW sections. 187 * 188 * Any time the firmware is properly suspended, a fast reset can take place. 189 * On the other hand, if the halt operation failed, the driver will reload 190 * all FW sections to make sure we start from a fresh state. 191 */ 192 bool fast; 193 } reset; 194 195 /** @pm: Power management related data. */ 196 struct { 197 /** @state: Power state. */ 198 atomic_t state; 199 200 /** 201 * @mmio_lock: Lock protecting MMIO userspace CPU mappings. 202 * 203 * This is needed to ensure we map the dummy IO pages when 204 * the device is being suspended, and the real IO pages when 205 * the device is being resumed. We can't just do with the 206 * state atomicity to deal with this race. 207 */ 208 struct mutex mmio_lock; 209 210 /** 211 * @dummy_latest_flush: Dummy LATEST_FLUSH page. 212 * 213 * Used to replace the real LATEST_FLUSH page when the GPU 214 * is suspended. 215 */ 216 struct page *dummy_latest_flush; 217 218 /** @recovery_needed: True when a resume attempt failed. */ 219 atomic_t recovery_needed; 220 } pm; 221 222 /** @profile_mask: User-set profiling flags for job accounting. */ 223 u32 profile_mask; 224 225 /** @fast_rate: Maximum device clock frequency. Set by DVFS */ 226 unsigned long fast_rate; 227 228 #ifdef CONFIG_DEBUG_FS 229 /** @gems: Device-wide list of GEM objects owned by at least one file. */ 230 struct { 231 /** @gems.lock: Protects the device-wide list of GEM objects. */ 232 struct mutex lock; 233 234 /** @node: Used to keep track of all the device's DRM objects */ 235 struct list_head node; 236 } gems; 237 #endif 238 }; 239 240 struct panthor_gpu_usage { 241 u64 time; 242 u64 cycles; 243 }; 244 245 /** 246 * struct panthor_file - Panthor file 247 */ 248 struct panthor_file { 249 /** @ptdev: Device attached to this file. */ 250 struct panthor_device *ptdev; 251 252 /** @user_mmio: User MMIO related fields. */ 253 struct { 254 /** 255 * @offset: Offset used for user MMIO mappings. 256 * 257 * This offset should not be used to check the type of mapping 258 * except in panthor_mmap(). After that point, MMIO mapping 259 * offsets have been adjusted to match 260 * DRM_PANTHOR_USER_MMIO_OFFSET and that macro should be used 261 * instead. 262 * Make sure this rule is followed at all times, because 263 * userspace is in control of the offset, and can change the 264 * value behind our back. Otherwise it can lead to erroneous 265 * branching happening in kernel space. 266 */ 267 u64 offset; 268 } user_mmio; 269 270 /** @vms: VM pool attached to this file. */ 271 struct panthor_vm_pool *vms; 272 273 /** @groups: Scheduling group pool attached to this file. */ 274 struct panthor_group_pool *groups; 275 276 /** @stats: cycle and timestamp measures for job execution. */ 277 struct panthor_gpu_usage stats; 278 }; 279 280 int panthor_device_init(struct panthor_device *ptdev); 281 void panthor_device_unplug(struct panthor_device *ptdev); 282 283 /** 284 * panthor_device_schedule_reset() - Schedules a reset operation 285 */ 286 static inline void panthor_device_schedule_reset(struct panthor_device *ptdev) 287 { 288 if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) && 289 atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE) 290 queue_work(ptdev->reset.wq, &ptdev->reset.work); 291 } 292 293 /** 294 * panthor_device_reset_is_pending() - Checks if a reset is pending. 295 * 296 * Return: true if a reset is pending, false otherwise. 297 */ 298 static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev) 299 { 300 return atomic_read(&ptdev->reset.pending) != 0; 301 } 302 303 int panthor_device_mmap_io(struct panthor_device *ptdev, 304 struct vm_area_struct *vma); 305 306 int panthor_device_resume(struct device *dev); 307 int panthor_device_suspend(struct device *dev); 308 309 static inline int panthor_device_resume_and_get(struct panthor_device *ptdev) 310 { 311 int ret = pm_runtime_resume_and_get(ptdev->base.dev); 312 313 /* If the resume failed, we need to clear the runtime_error, which 314 * can done by forcing the RPM state to suspended. If multiple 315 * threads called panthor_device_resume_and_get(), we only want 316 * one of them to update the state, hence the cmpxchg. Note that a 317 * thread might enter panthor_device_resume_and_get() and call 318 * pm_runtime_resume_and_get() after another thread had attempted 319 * to resume and failed. This means we will end up with an error 320 * without even attempting a resume ourselves. The only risk here 321 * is to report an error when the second resume attempt might have 322 * succeeded. Given resume errors are not expected, this is probably 323 * something we can live with. 324 */ 325 if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1) 326 pm_runtime_set_suspended(ptdev->base.dev); 327 328 return ret; 329 } 330 331 enum drm_panthor_exception_type { 332 DRM_PANTHOR_EXCEPTION_OK = 0x00, 333 DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04, 334 DRM_PANTHOR_EXCEPTION_KABOOM = 0x05, 335 DRM_PANTHOR_EXCEPTION_EUREKA = 0x06, 336 DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08, 337 DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, 338 DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, 339 DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, 340 DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41, 341 DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, 342 DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, 343 DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, 344 DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a, 345 DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b, 346 DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50, 347 DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51, 348 DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55, 349 DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58, 350 DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59, 351 DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a, 352 DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b, 353 DRM_PANTHOR_EXCEPTION_OOM = 0x60, 354 DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68, 355 DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69, 356 DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80, 357 DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88, 358 DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89, 359 DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a, 360 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0, 361 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1, 362 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2, 363 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3, 364 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4, 365 DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8, 366 DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9, 367 DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca, 368 DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb, 369 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9, 370 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda, 371 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb, 372 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0, 373 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4, 374 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5, 375 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6, 376 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7, 377 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8, 378 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9, 379 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea, 380 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb, 381 }; 382 383 /** 384 * panthor_exception_is_fault() - Checks if an exception is a fault. 385 * 386 * Return: true if the exception is a fault, false otherwise. 387 */ 388 static inline bool 389 panthor_exception_is_fault(u32 exception_code) 390 { 391 return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT; 392 } 393 394 const char *panthor_exception_name(struct panthor_device *ptdev, 395 u32 exception_code); 396 397 /** 398 * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt 399 * registration function. 400 * 401 * The boiler-plate to gracefully deal with shared interrupts is 402 * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER() 403 * just after the actual handler. The handler prototype is: 404 * 405 * void (*handler)(struct panthor_device *, u32 status); 406 */ 407 #define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler) \ 408 static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \ 409 { \ 410 struct panthor_irq *pirq = data; \ 411 struct panthor_device *ptdev = pirq->ptdev; \ 412 \ 413 if (atomic_read(&pirq->suspended)) \ 414 return IRQ_NONE; \ 415 if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT)) \ 416 return IRQ_NONE; \ 417 \ 418 gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0); \ 419 return IRQ_WAKE_THREAD; \ 420 } \ 421 \ 422 static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data) \ 423 { \ 424 struct panthor_irq *pirq = data; \ 425 struct panthor_device *ptdev = pirq->ptdev; \ 426 irqreturn_t ret = IRQ_NONE; \ 427 \ 428 while (true) { \ 429 u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask; \ 430 \ 431 if (!status) \ 432 break; \ 433 \ 434 __handler(ptdev, status); \ 435 ret = IRQ_HANDLED; \ 436 } \ 437 \ 438 if (!atomic_read(&pirq->suspended)) \ 439 gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask); \ 440 \ 441 return ret; \ 442 } \ 443 \ 444 static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \ 445 { \ 446 pirq->mask = 0; \ 447 gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \ 448 synchronize_irq(pirq->irq); \ 449 atomic_set(&pirq->suspended, true); \ 450 } \ 451 \ 452 static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \ 453 { \ 454 atomic_set(&pirq->suspended, false); \ 455 pirq->mask = mask; \ 456 gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \ 457 gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \ 458 } \ 459 \ 460 static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ 461 struct panthor_irq *pirq, \ 462 int irq, u32 mask) \ 463 { \ 464 pirq->ptdev = ptdev; \ 465 pirq->irq = irq; \ 466 panthor_ ## __name ## _irq_resume(pirq, mask); \ 467 \ 468 return devm_request_threaded_irq(ptdev->base.dev, irq, \ 469 panthor_ ## __name ## _irq_raw_handler, \ 470 panthor_ ## __name ## _irq_threaded_handler, \ 471 IRQF_SHARED, KBUILD_MODNAME "-" # __name, \ 472 pirq); \ 473 } 474 475 extern struct workqueue_struct *panthor_cleanup_wq; 476 477 static inline void gpu_write(struct panthor_device *ptdev, u32 reg, u32 data) 478 { 479 writel(data, ptdev->iomem + reg); 480 } 481 482 static inline u32 gpu_read(struct panthor_device *ptdev, u32 reg) 483 { 484 return readl(ptdev->iomem + reg); 485 } 486 487 static inline u32 gpu_read_relaxed(struct panthor_device *ptdev, u32 reg) 488 { 489 return readl_relaxed(ptdev->iomem + reg); 490 } 491 492 static inline void gpu_write64(struct panthor_device *ptdev, u32 reg, u64 data) 493 { 494 gpu_write(ptdev, reg, lower_32_bits(data)); 495 gpu_write(ptdev, reg + 4, upper_32_bits(data)); 496 } 497 498 static inline u64 gpu_read64(struct panthor_device *ptdev, u32 reg) 499 { 500 return (gpu_read(ptdev, reg) | ((u64)gpu_read(ptdev, reg + 4) << 32)); 501 } 502 503 static inline u64 gpu_read64_relaxed(struct panthor_device *ptdev, u32 reg) 504 { 505 return (gpu_read_relaxed(ptdev, reg) | 506 ((u64)gpu_read_relaxed(ptdev, reg + 4) << 32)); 507 } 508 509 static inline u64 gpu_read64_counter(struct panthor_device *ptdev, u32 reg) 510 { 511 u32 lo, hi1, hi2; 512 do { 513 hi1 = gpu_read(ptdev, reg + 4); 514 lo = gpu_read(ptdev, reg); 515 hi2 = gpu_read(ptdev, reg + 4); 516 } while (hi1 != hi2); 517 return lo | ((u64)hi2 << 32); 518 } 519 520 #define gpu_read_poll_timeout(dev, reg, val, cond, delay_us, timeout_us) \ 521 read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \ 522 dev, reg) 523 524 #define gpu_read_poll_timeout_atomic(dev, reg, val, cond, delay_us, \ 525 timeout_us) \ 526 read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \ 527 false, dev, reg) 528 529 #define gpu_read64_poll_timeout(dev, reg, val, cond, delay_us, timeout_us) \ 530 read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \ 531 dev, reg) 532 533 #define gpu_read64_poll_timeout_atomic(dev, reg, val, cond, delay_us, \ 534 timeout_us) \ 535 read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \ 536 false, dev, reg) 537 538 #define gpu_read_relaxed_poll_timeout_atomic(dev, reg, val, cond, delay_us, \ 539 timeout_us) \ 540 read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us, \ 541 timeout_us, false, dev, reg) 542 543 #define gpu_read64_relaxed_poll_timeout(dev, reg, val, cond, delay_us, \ 544 timeout_us) \ 545 read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \ 546 false, dev, reg) 547 548 #endif 549