1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 #ifndef _VC4_DRV_H_ 6 #define _VC4_DRV_H_ 7 8 #include <linux/debugfs.h> 9 #include <linux/delay.h> 10 #include <linux/of.h> 11 #include <linux/refcount.h> 12 #include <linux/uaccess.h> 13 14 #include <drm/drm_atomic.h> 15 #include <drm/drm_debugfs.h> 16 #include <drm/drm_device.h> 17 #include <drm/drm_encoder.h> 18 #include <drm/drm_fourcc.h> 19 #include <drm/drm_gem_dma_helper.h> 20 #include <drm/drm_managed.h> 21 #include <drm/drm_mm.h> 22 #include <drm/drm_modeset_lock.h> 23 24 #include <kunit/test-bug.h> 25 26 #include "uapi/drm/vc4_drm.h" 27 28 struct drm_device; 29 struct drm_gem_object; 30 31 extern const struct drm_driver vc4_drm_driver; 32 extern const struct drm_driver vc5_drm_driver; 33 34 /* Don't forget to update vc4_bo.c: bo_type_names[] when adding to 35 * this. 36 */ 37 enum vc4_kernel_bo_type { 38 /* Any kernel allocation (gem_create_object hook) before it 39 * gets another type set. 40 */ 41 VC4_BO_TYPE_KERNEL, 42 VC4_BO_TYPE_V3D, 43 VC4_BO_TYPE_V3D_SHADER, 44 VC4_BO_TYPE_DUMB, 45 VC4_BO_TYPE_BIN, 46 VC4_BO_TYPE_RCL, 47 VC4_BO_TYPE_BCL, 48 VC4_BO_TYPE_KERNEL_CACHE, 49 VC4_BO_TYPE_COUNT 50 }; 51 52 /* Performance monitor object. The perform lifetime is controlled by userspace 53 * using perfmon related ioctls. A perfmon can be attached to a submit_cl 54 * request, and when this is the case, HW perf counters will be activated just 55 * before the submit_cl is submitted to the GPU and disabled when the job is 56 * done. This way, only events related to a specific job will be counted. 57 */ 58 struct vc4_perfmon { 59 struct vc4_dev *dev; 60 61 /* Tracks the number of users of the perfmon, when this counter reaches 62 * zero the perfmon is destroyed. 63 */ 64 refcount_t refcnt; 65 66 /* Number of counters activated in this perfmon instance 67 * (should be less than DRM_VC4_MAX_PERF_COUNTERS). 68 */ 69 u8 ncounters; 70 71 /* Events counted by the HW perf counters. */ 72 u8 events[DRM_VC4_MAX_PERF_COUNTERS]; 73 74 /* Storage for counter values. Counters are incremented by the HW 75 * perf counter values every time the perfmon is attached to a GPU job. 76 * This way, perfmon users don't have to retrieve the results after 77 * each job if they want to track events covering several submissions. 78 * Note that counter values can't be reset, but you can fake a reset by 79 * destroying the perfmon and creating a new one. 80 */ 81 u64 counters[] __counted_by(ncounters); 82 }; 83 84 enum vc4_gen { 85 VC4_GEN_4, 86 VC4_GEN_5, 87 VC4_GEN_6_C, 88 VC4_GEN_6_D, 89 }; 90 91 struct vc4_dev { 92 struct drm_device base; 93 struct device *dev; 94 95 enum vc4_gen gen; 96 97 unsigned int irq; 98 99 struct vc4_hvs *hvs; 100 struct vc4_v3d *v3d; 101 102 struct vc4_hang_state *hang_state; 103 104 /* The kernel-space BO cache. Tracks buffers that have been 105 * unreferenced by all other users (refcounts of 0!) but not 106 * yet freed, so we can do cheap allocations. 107 */ 108 struct vc4_bo_cache { 109 /* Array of list heads for entries in the BO cache, 110 * based on number of pages, so we can do O(1) lookups 111 * in the cache when allocating. 112 */ 113 struct list_head *size_list; 114 uint32_t size_list_size; 115 116 /* List of all BOs in the cache, ordered by age, so we 117 * can do O(1) lookups when trying to free old 118 * buffers. 119 */ 120 struct list_head time_list; 121 struct work_struct time_work; 122 struct timer_list time_timer; 123 } bo_cache; 124 125 u32 num_labels; 126 struct vc4_label { 127 const char *name; 128 u32 num_allocated; 129 u32 size_allocated; 130 } *bo_labels; 131 132 /* Protects bo_cache and bo_labels. */ 133 struct mutex bo_lock; 134 135 /* Purgeable BO pool. All BOs in this pool can have their memory 136 * reclaimed if the driver is unable to allocate new BOs. We also 137 * keep stats related to the purge mechanism here. 138 */ 139 struct { 140 struct list_head list; 141 unsigned int num; 142 size_t size; 143 unsigned int purged_num; 144 size_t purged_size; 145 struct mutex lock; 146 } purgeable; 147 148 uint64_t dma_fence_context; 149 150 /* Sequence number for the last job queued in bin_job_list. 151 * Starts at 0 (no jobs emitted). 152 */ 153 uint64_t emit_seqno; 154 155 /* Sequence number for the last completed job on the GPU. 156 * Starts at 0 (no jobs completed). 157 */ 158 uint64_t finished_seqno; 159 160 /* List of all struct vc4_exec_info for jobs to be executed in 161 * the binner. The first job in the list is the one currently 162 * programmed into ct0ca for execution. 163 */ 164 struct list_head bin_job_list; 165 166 /* List of all struct vc4_exec_info for jobs that have 167 * completed binning and are ready for rendering. The first 168 * job in the list is the one currently programmed into ct1ca 169 * for execution. 170 */ 171 struct list_head render_job_list; 172 173 /* List of the finished vc4_exec_infos waiting to be freed by 174 * job_done_work. 175 */ 176 struct list_head job_done_list; 177 /* Spinlock used to synchronize the job_list and seqno 178 * accesses between the IRQ handler and GEM ioctls. 179 */ 180 spinlock_t job_lock; 181 wait_queue_head_t job_wait_queue; 182 struct work_struct job_done_work; 183 184 /* Used to track the active perfmon if any. Access to this field is 185 * protected by job_lock. 186 */ 187 struct vc4_perfmon *active_perfmon; 188 189 /* The memory used for storing binner tile alloc, tile state, 190 * and overflow memory allocations. This is freed when V3D 191 * powers down. 192 */ 193 struct vc4_bo *bin_bo; 194 195 /* Size of blocks allocated within bin_bo. */ 196 uint32_t bin_alloc_size; 197 198 /* Bitmask of the bin_alloc_size chunks in bin_bo that are 199 * used. 200 */ 201 uint32_t bin_alloc_used; 202 203 /* Bitmask of the current bin_alloc used for overflow memory. */ 204 uint32_t bin_alloc_overflow; 205 206 /* Incremented when an underrun error happened after an atomic commit. 207 * This is particularly useful to detect when a specific modeset is too 208 * demanding in term of memory or HVS bandwidth which is hard to guess 209 * at atomic check time. 210 */ 211 atomic_t underrun; 212 213 struct work_struct overflow_mem_work; 214 215 int power_refcount; 216 217 /* Set to true when the load tracker is active. */ 218 bool load_tracker_enabled; 219 220 /* Mutex controlling the power refcount. */ 221 struct mutex power_lock; 222 223 struct { 224 struct timer_list timer; 225 struct work_struct reset_work; 226 } hangcheck; 227 228 struct drm_modeset_lock ctm_state_lock; 229 struct drm_private_obj ctm_manager; 230 struct drm_private_obj hvs_channels; 231 struct drm_private_obj load_tracker; 232 233 /* Mutex for binner bo allocation. */ 234 struct mutex bin_bo_lock; 235 /* Reference count for our binner bo. */ 236 struct kref bin_bo_kref; 237 }; 238 239 #define to_vc4_dev(_dev) \ 240 container_of_const(_dev, struct vc4_dev, base) 241 242 struct vc4_bo { 243 struct drm_gem_dma_object base; 244 245 bool t_format; 246 247 /* List entry for the BO's position in either 248 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list 249 */ 250 struct list_head unref_head; 251 252 /* Time in jiffies when the BO was put in vc4->bo_cache. */ 253 unsigned long free_time; 254 255 /* List entry for the BO's position in vc4_dev->bo_cache.size_list */ 256 struct list_head size_head; 257 258 /* Struct for shader validation state, if created by 259 * DRM_IOCTL_VC4_CREATE_SHADER_BO. 260 */ 261 struct vc4_validated_shader_info *validated_shader; 262 263 /* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i 264 * for user-allocated labels. 265 */ 266 int label; 267 268 /* Count the number of active users. This is needed to determine 269 * whether we can move the BO to the purgeable list or not (when the BO 270 * is used by the GPU or the display engine we can't purge it). 271 */ 272 refcount_t usecnt; 273 274 /* Store purgeable/purged state here */ 275 u32 madv; 276 struct mutex madv_lock; 277 }; 278 279 #define to_vc4_bo(_bo) \ 280 container_of_const(to_drm_gem_dma_obj(_bo), struct vc4_bo, base) 281 282 struct vc4_fence { 283 struct dma_fence base; 284 struct drm_device *dev; 285 /* vc4 seqno for signaled() test */ 286 uint64_t seqno; 287 }; 288 289 #define to_vc4_fence(_fence) \ 290 container_of_const(_fence, struct vc4_fence, base) 291 292 struct vc4_v3d { 293 struct vc4_dev *vc4; 294 struct platform_device *pdev; 295 void __iomem *regs; 296 struct clk *clk; 297 struct debugfs_regset32 regset; 298 }; 299 300 #define VC4_NUM_UPM_HANDLES 32 301 struct vc4_upm_refcounts { 302 refcount_t refcount; 303 304 /* Allocation size */ 305 size_t size; 306 /* Our allocation in UPM for prefetching. */ 307 struct drm_mm_node upm; 308 309 /* Pointer back to the HVS structure */ 310 struct vc4_hvs *hvs; 311 }; 312 313 #define HVS_NUM_CHANNELS 3 314 315 struct vc4_hvs { 316 struct vc4_dev *vc4; 317 struct platform_device *pdev; 318 void __iomem *regs; 319 u32 __iomem *dlist; 320 unsigned int dlist_mem_size; 321 322 struct clk *core_clk; 323 struct clk *disp_clk; 324 325 unsigned long max_core_rate; 326 327 /* Memory manager for CRTCs to allocate space in the display 328 * list. Units are dwords. 329 */ 330 struct drm_mm dlist_mm; 331 332 /* Memory manager for the LBM memory used by HVS scaling. */ 333 struct drm_mm lbm_mm; 334 335 /* Memory manager for the UPM memory used for prefetching. */ 336 struct drm_mm upm_mm; 337 struct ida upm_handles; 338 struct vc4_upm_refcounts upm_refcounts[VC4_NUM_UPM_HANDLES + 1]; 339 340 spinlock_t mm_lock; 341 342 struct drm_mm_node mitchell_netravali_filter; 343 344 struct debugfs_regset32 regset; 345 346 /* 347 * Even if HDMI0 on the RPi4 can output modes requiring a pixel 348 * rate higher than 297MHz, it needs some adjustments in the 349 * config.txt file to be able to do so and thus won't always be 350 * available. 351 */ 352 bool vc5_hdmi_enable_hdmi_20; 353 354 /* 355 * 4096x2160@60 requires a core overclock to work, so register 356 * whether that is sufficient. 357 */ 358 bool vc5_hdmi_enable_4096by2160; 359 }; 360 361 #define HVS_NUM_CHANNELS 3 362 #define HVS_UBM_WORD_SIZE 256 363 364 struct vc4_hvs_state { 365 struct drm_private_state base; 366 unsigned long core_clock_rate; 367 368 struct { 369 unsigned in_use: 1; 370 unsigned long fifo_load; 371 struct drm_crtc_commit *pending_commit; 372 } fifo_state[HVS_NUM_CHANNELS]; 373 }; 374 375 #define to_vc4_hvs_state(_state) \ 376 container_of_const(_state, struct vc4_hvs_state, base) 377 378 struct vc4_hvs_state *vc4_hvs_get_global_state(struct drm_atomic_state *state); 379 struct vc4_hvs_state *vc4_hvs_get_old_global_state(const struct drm_atomic_state *state); 380 struct vc4_hvs_state *vc4_hvs_get_new_global_state(const struct drm_atomic_state *state); 381 382 struct vc4_plane { 383 struct drm_plane base; 384 }; 385 386 #define to_vc4_plane(_plane) \ 387 container_of_const(_plane, struct vc4_plane, base) 388 389 enum vc4_scaling_mode { 390 VC4_SCALING_NONE, 391 VC4_SCALING_TPZ, 392 VC4_SCALING_PPF, 393 }; 394 395 struct vc4_plane_state { 396 struct drm_plane_state base; 397 /* System memory copy of the display list for this element, computed 398 * at atomic_check time. 399 */ 400 u32 *dlist; 401 u32 dlist_size; /* Number of dwords allocated for the display list */ 402 u32 dlist_count; /* Number of used dwords in the display list. */ 403 404 /* Offset in the dlist to various words, for pageflip or 405 * cursor updates. 406 */ 407 u32 pos0_offset; 408 u32 pos2_offset; 409 u32 ptr0_offset[DRM_FORMAT_MAX_PLANES]; 410 u32 lbm_offset; 411 412 /* Offset where the plane's dlist was last stored in the 413 * hardware at vc4_crtc_atomic_flush() time. 414 */ 415 u32 __iomem *hw_dlist; 416 417 /* Clipped coordinates of the plane on the display. */ 418 int crtc_x, crtc_y, crtc_w, crtc_h; 419 /* Clipped area being scanned from in the FB in u16.16 format */ 420 u32 src_x, src_y; 421 422 u32 src_w[2], src_h[2]; 423 424 /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ 425 enum vc4_scaling_mode x_scaling[2], y_scaling[2]; 426 bool is_unity; 427 bool is_yuv; 428 429 /* Our allocation in LBM for temporary storage during scaling. */ 430 struct drm_mm_node lbm; 431 432 /* The Unified Pre-Fetcher Handle */ 433 unsigned int upm_handle[DRM_FORMAT_MAX_PLANES]; 434 435 /* Number of lines to pre-fetch */ 436 unsigned int upm_buffer_lines; 437 438 /* Set when the plane has per-pixel alpha content or does not cover 439 * the entire screen. This is a hint to the CRTC that it might need 440 * to enable background color fill. 441 */ 442 bool needs_bg_fill; 443 444 /* Mark the dlist as initialized. Useful to avoid initializing it twice 445 * when async update is not possible. 446 */ 447 bool dlist_initialized; 448 449 /* Load of this plane on the HVS block. The load is expressed in HVS 450 * cycles/sec. 451 */ 452 u64 hvs_load; 453 454 /* Memory bandwidth needed for this plane. This is expressed in 455 * bytes/sec. 456 */ 457 u64 membus_load; 458 }; 459 460 #define to_vc4_plane_state(_state) \ 461 container_of_const(_state, struct vc4_plane_state, base) 462 463 enum vc4_encoder_type { 464 VC4_ENCODER_TYPE_NONE, 465 VC4_ENCODER_TYPE_HDMI0, 466 VC4_ENCODER_TYPE_HDMI1, 467 VC4_ENCODER_TYPE_VEC, 468 VC4_ENCODER_TYPE_DSI0, 469 VC4_ENCODER_TYPE_DSI1, 470 VC4_ENCODER_TYPE_SMI, 471 VC4_ENCODER_TYPE_DPI, 472 VC4_ENCODER_TYPE_TXP0, 473 VC4_ENCODER_TYPE_TXP1, 474 }; 475 476 struct vc4_encoder { 477 struct drm_encoder base; 478 enum vc4_encoder_type type; 479 u32 clock_select; 480 481 void (*pre_crtc_configure)(struct drm_encoder *encoder, struct drm_atomic_state *state); 482 void (*pre_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 483 void (*post_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 484 485 void (*post_crtc_disable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 486 void (*post_crtc_powerdown)(struct drm_encoder *encoder, struct drm_atomic_state *state); 487 }; 488 489 #define to_vc4_encoder(_encoder) \ 490 container_of_const(_encoder, struct vc4_encoder, base) 491 492 static inline 493 struct drm_encoder *vc4_find_encoder_by_type(struct drm_device *drm, 494 enum vc4_encoder_type type) 495 { 496 struct drm_encoder *encoder; 497 498 drm_for_each_encoder(encoder, drm) { 499 struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); 500 501 if (vc4_encoder->type == type) 502 return encoder; 503 } 504 505 return NULL; 506 } 507 508 struct vc4_crtc_data { 509 const char *name; 510 511 const char *debugfs_name; 512 513 /* Bitmask of channels (FIFOs) of the HVS that the output can source from */ 514 unsigned int hvs_available_channels; 515 516 /* Which output of the HVS this pixelvalve sources from. */ 517 int hvs_output; 518 }; 519 520 struct vc4_txp_data { 521 struct vc4_crtc_data base; 522 enum vc4_encoder_type encoder_type; 523 unsigned int high_addr_ptr_reg; 524 unsigned int has_byte_enable:1; 525 unsigned int size_minus_one:1; 526 unsigned int supports_40bit_addresses:1; 527 }; 528 529 extern const struct vc4_txp_data bcm2835_txp_data; 530 531 struct vc4_pv_data { 532 struct vc4_crtc_data base; 533 534 /* Depth of the PixelValve FIFO in bytes */ 535 unsigned int fifo_depth; 536 537 /* Number of pixels output per clock period */ 538 u8 pixels_per_clock; 539 540 enum vc4_encoder_type encoder_types[4]; 541 }; 542 543 extern const struct vc4_pv_data bcm2835_pv0_data; 544 extern const struct vc4_pv_data bcm2835_pv1_data; 545 extern const struct vc4_pv_data bcm2835_pv2_data; 546 extern const struct vc4_pv_data bcm2711_pv0_data; 547 extern const struct vc4_pv_data bcm2711_pv1_data; 548 extern const struct vc4_pv_data bcm2711_pv2_data; 549 extern const struct vc4_pv_data bcm2711_pv3_data; 550 extern const struct vc4_pv_data bcm2711_pv4_data; 551 extern const struct vc4_pv_data bcm2712_pv0_data; 552 extern const struct vc4_pv_data bcm2712_pv1_data; 553 554 struct vc4_crtc { 555 struct drm_crtc base; 556 struct platform_device *pdev; 557 const struct vc4_crtc_data *data; 558 void __iomem *regs; 559 560 /* Timestamp at start of vblank irq - unaffected by lock delays. */ 561 ktime_t t_vblank; 562 563 u8 lut_r[256]; 564 u8 lut_g[256]; 565 u8 lut_b[256]; 566 567 struct drm_pending_vblank_event *event; 568 569 struct debugfs_regset32 regset; 570 571 /** 572 * @feeds_txp: True if the CRTC feeds our writeback controller. 573 */ 574 bool feeds_txp; 575 576 /** 577 * @irq_lock: Spinlock protecting the resources shared between 578 * the atomic code and our vblank handler. 579 */ 580 spinlock_t irq_lock; 581 582 /** 583 * @current_dlist: Start offset of the display list currently 584 * set in the HVS for that CRTC. Protected by @irq_lock, and 585 * copied in vc4_hvs_update_dlist() for the CRTC interrupt 586 * handler to have access to that value. 587 */ 588 unsigned int current_dlist; 589 590 /** 591 * @current_hvs_channel: HVS channel currently assigned to the 592 * CRTC. Protected by @irq_lock, and copied in 593 * vc4_hvs_atomic_begin() for the CRTC interrupt handler to have 594 * access to that value. 595 */ 596 unsigned int current_hvs_channel; 597 }; 598 599 #define to_vc4_crtc(_crtc) \ 600 container_of_const(_crtc, struct vc4_crtc, base) 601 602 static inline const struct vc4_crtc_data * 603 vc4_crtc_to_vc4_crtc_data(const struct vc4_crtc *crtc) 604 { 605 return crtc->data; 606 } 607 608 static inline const struct vc4_pv_data * 609 vc4_crtc_to_vc4_pv_data(const struct vc4_crtc *crtc) 610 { 611 const struct vc4_crtc_data *data = vc4_crtc_to_vc4_crtc_data(crtc); 612 613 return container_of_const(data, struct vc4_pv_data, base); 614 } 615 616 struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc, 617 struct drm_crtc_state *state); 618 619 struct vc4_crtc_state { 620 struct drm_crtc_state base; 621 /* Dlist area for this CRTC configuration. */ 622 struct drm_mm_node mm; 623 bool txp_armed; 624 unsigned int assigned_channel; 625 626 struct drm_connector_tv_margins margins; 627 628 unsigned long hvs_load; 629 630 /* Transitional state below, only valid during atomic commits */ 631 bool update_muxing; 632 }; 633 634 #define VC4_HVS_CHANNEL_DISABLED ((unsigned int)-1) 635 636 #define to_vc4_crtc_state(_state) \ 637 container_of_const(_state, struct vc4_crtc_state, base) 638 639 #define V3D_READ(offset) \ 640 ({ \ 641 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 642 readl(vc4->v3d->regs + (offset)); \ 643 }) 644 645 #define V3D_WRITE(offset, val) \ 646 do { \ 647 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 648 writel(val, vc4->v3d->regs + (offset)); \ 649 } while (0) 650 651 #define HVS_READ(offset) \ 652 ({ \ 653 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 654 readl(hvs->regs + (offset)); \ 655 }) 656 657 #define HVS_WRITE(offset, val) \ 658 do { \ 659 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 660 writel(val, hvs->regs + (offset)); \ 661 } while (0) 662 663 #define HVS_READ6(offset) \ 664 HVS_READ(hvs->vc4->gen == VC4_GEN_6_C ? SCALER6_ ## offset : SCALER6D_ ## offset) 665 666 #define HVS_WRITE6(offset, val) \ 667 HVS_WRITE(hvs->vc4->gen == VC4_GEN_6_C ? SCALER6_ ## offset : SCALER6D_ ## offset, val) 668 669 #define VC4_REG32(reg) { .name = #reg, .offset = reg } 670 671 struct vc4_exec_info { 672 struct vc4_dev *dev; 673 674 /* Sequence number for this bin/render job. */ 675 uint64_t seqno; 676 677 struct dma_fence *fence; 678 679 /* Last current addresses the hardware was processing when the 680 * hangcheck timer checked on us. 681 */ 682 uint32_t last_ct0ca, last_ct1ca; 683 684 /* Kernel-space copy of the ioctl arguments */ 685 struct drm_vc4_submit_cl *args; 686 687 /* This is the array of BOs that were looked up at the start of exec. 688 * Command validation will use indices into this array. 689 */ 690 struct drm_gem_object **bo; 691 uint32_t bo_count; 692 693 /* List of BOs that are being written by the RCL. Other than 694 * the binner temporary storage, this is all the BOs written 695 * by the job. 696 */ 697 struct drm_gem_dma_object *rcl_write_bo[4]; 698 uint32_t rcl_write_bo_count; 699 700 /* Pointers for our position in vc4->job_list */ 701 struct list_head head; 702 703 /* List of other BOs used in the job that need to be released 704 * once the job is complete. 705 */ 706 struct list_head unref_list; 707 708 /* Current unvalidated indices into @bo loaded by the non-hardware 709 * VC4_PACKET_GEM_HANDLES. 710 */ 711 uint32_t bo_index[2]; 712 713 /* This is the BO where we store the validated command lists, shader 714 * records, and uniforms. 715 */ 716 struct drm_gem_dma_object *exec_bo; 717 718 /** 719 * This tracks the per-shader-record state (packet 64) that 720 * determines the length of the shader record and the offset 721 * it's expected to be found at. It gets read in from the 722 * command lists. 723 */ 724 struct vc4_shader_state { 725 uint32_t addr; 726 /* Maximum vertex index referenced by any primitive using this 727 * shader state. 728 */ 729 uint32_t max_index; 730 } *shader_state; 731 732 /** How many shader states the user declared they were using. */ 733 uint32_t shader_state_size; 734 /** How many shader state records the validator has seen. */ 735 uint32_t shader_state_count; 736 737 bool found_tile_binning_mode_config_packet; 738 bool found_start_tile_binning_packet; 739 bool found_increment_semaphore_packet; 740 bool found_flush; 741 uint8_t bin_tiles_x, bin_tiles_y; 742 /* Physical address of the start of the tile alloc array 743 * (where each tile's binned CL will start) 744 */ 745 uint32_t tile_alloc_offset; 746 /* Bitmask of which binner slots are freed when this job completes. */ 747 uint32_t bin_slots; 748 749 /** 750 * Computed addresses pointing into exec_bo where we start the 751 * bin thread (ct0) and render thread (ct1). 752 */ 753 uint32_t ct0ca, ct0ea; 754 uint32_t ct1ca, ct1ea; 755 756 /* Pointer to the unvalidated bin CL (if present). */ 757 void *bin_u; 758 759 /* Pointers to the shader recs. These paddr gets incremented as CL 760 * packets are relocated in validate_gl_shader_state, and the vaddrs 761 * (u and v) get incremented and size decremented as the shader recs 762 * themselves are validated. 763 */ 764 void *shader_rec_u; 765 void *shader_rec_v; 766 uint32_t shader_rec_p; 767 uint32_t shader_rec_size; 768 769 /* Pointers to the uniform data. These pointers are incremented, and 770 * size decremented, as each batch of uniforms is uploaded. 771 */ 772 void *uniforms_u; 773 void *uniforms_v; 774 uint32_t uniforms_p; 775 uint32_t uniforms_size; 776 777 /* Pointer to a performance monitor object if the user requested it, 778 * NULL otherwise. 779 */ 780 struct vc4_perfmon *perfmon; 781 782 /* Whether the exec has taken a reference to the binner BO, which should 783 * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet. 784 */ 785 bool bin_bo_used; 786 }; 787 788 /* Per-open file private data. Any driver-specific resource that has to be 789 * released when the DRM file is closed should be placed here. 790 */ 791 struct vc4_file { 792 struct vc4_dev *dev; 793 794 struct { 795 struct idr idr; 796 struct mutex lock; 797 } perfmon; 798 799 bool bin_bo_used; 800 }; 801 802 static inline struct vc4_exec_info * 803 vc4_first_bin_job(struct vc4_dev *vc4) 804 { 805 return list_first_entry_or_null(&vc4->bin_job_list, 806 struct vc4_exec_info, head); 807 } 808 809 static inline struct vc4_exec_info * 810 vc4_first_render_job(struct vc4_dev *vc4) 811 { 812 return list_first_entry_or_null(&vc4->render_job_list, 813 struct vc4_exec_info, head); 814 } 815 816 static inline struct vc4_exec_info * 817 vc4_last_render_job(struct vc4_dev *vc4) 818 { 819 if (list_empty(&vc4->render_job_list)) 820 return NULL; 821 return list_last_entry(&vc4->render_job_list, 822 struct vc4_exec_info, head); 823 } 824 825 /** 826 * struct vc4_texture_sample_info - saves the offsets into the UBO for texture 827 * setup parameters. 828 * 829 * This will be used at draw time to relocate the reference to the texture 830 * contents in p0, and validate that the offset combined with 831 * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO. 832 * Note that the hardware treats unprovided config parameters as 0, so not all 833 * of them need to be set up for every texure sample, and we'll store ~0 as 834 * the offset to mark the unused ones. 835 * 836 * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit 837 * Setup") for definitions of the texture parameters. 838 */ 839 struct vc4_texture_sample_info { 840 bool is_direct; 841 uint32_t p_offset[4]; 842 }; 843 844 /** 845 * struct vc4_validated_shader_info - information about validated shaders that 846 * needs to be used from command list validation. 847 * 848 * For a given shader, each time a shader state record references it, we need 849 * to verify that the shader doesn't read more uniforms than the shader state 850 * record's uniform BO pointer can provide, and we need to apply relocations 851 * and validate the shader state record's uniforms that define the texture 852 * samples. 853 */ 854 struct vc4_validated_shader_info { 855 uint32_t uniforms_size; 856 uint32_t uniforms_src_size; 857 uint32_t num_texture_samples; 858 struct vc4_texture_sample_info *texture_samples; 859 860 uint32_t num_uniform_addr_offsets; 861 uint32_t *uniform_addr_offsets; 862 863 bool is_threaded; 864 }; 865 866 /** 867 * __wait_for - magic wait macro 868 * 869 * Macro to help avoid open coding check/wait/timeout patterns. Note that it's 870 * important that we check the condition again after having timed out, since the 871 * timeout could be due to preemption or similar and we've never had a chance to 872 * check the condition before the timeout. 873 */ 874 #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ 875 const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ 876 long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ 877 int ret__; \ 878 might_sleep(); \ 879 for (;;) { \ 880 const bool expired__ = ktime_after(ktime_get_raw(), end__); \ 881 OP; \ 882 /* Guarantee COND check prior to timeout */ \ 883 barrier(); \ 884 if (COND) { \ 885 ret__ = 0; \ 886 break; \ 887 } \ 888 if (expired__) { \ 889 ret__ = -ETIMEDOUT; \ 890 break; \ 891 } \ 892 usleep_range(wait__, wait__ * 2); \ 893 if (wait__ < (Wmax)) \ 894 wait__ <<= 1; \ 895 } \ 896 ret__; \ 897 }) 898 899 #define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ 900 (Wmax)) 901 #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) 902 903 /* vc4_bo.c */ 904 struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); 905 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, 906 bool from_cache, enum vc4_kernel_bo_type type); 907 int vc4_bo_dumb_create(struct drm_file *file_priv, 908 struct drm_device *dev, 909 struct drm_mode_create_dumb *args); 910 int vc4_create_bo_ioctl(struct drm_device *dev, void *data, 911 struct drm_file *file_priv); 912 int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, 913 struct drm_file *file_priv); 914 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, 915 struct drm_file *file_priv); 916 int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, 917 struct drm_file *file_priv); 918 int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, 919 struct drm_file *file_priv); 920 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 921 struct drm_file *file_priv); 922 int vc4_label_bo_ioctl(struct drm_device *dev, void *data, 923 struct drm_file *file_priv); 924 int vc4_bo_cache_init(struct drm_device *dev); 925 int vc4_bo_inc_usecnt(struct vc4_bo *bo); 926 void vc4_bo_dec_usecnt(struct vc4_bo *bo); 927 void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo); 928 void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo); 929 int vc4_bo_debugfs_init(struct drm_minor *minor); 930 931 /* vc4_crtc.c */ 932 extern struct platform_driver vc4_crtc_driver; 933 int vc4_crtc_disable_at_boot(struct drm_crtc *crtc); 934 int __vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev, 935 struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data, 936 struct drm_plane *primary_plane, 937 const struct drm_crtc_funcs *crtc_funcs, 938 const struct drm_crtc_helper_funcs *crtc_helper_funcs, 939 bool feeds_txp); 940 int vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev, 941 struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data, 942 const struct drm_crtc_funcs *crtc_funcs, 943 const struct drm_crtc_helper_funcs *crtc_helper_funcs, 944 bool feeds_txp); 945 int vc4_page_flip(struct drm_crtc *crtc, 946 struct drm_framebuffer *fb, 947 struct drm_pending_vblank_event *event, 948 uint32_t flags, 949 struct drm_modeset_acquire_ctx *ctx); 950 int vc4_crtc_atomic_check(struct drm_crtc *crtc, 951 struct drm_atomic_state *state); 952 struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc); 953 void vc4_crtc_destroy_state(struct drm_crtc *crtc, 954 struct drm_crtc_state *state); 955 void vc4_crtc_reset(struct drm_crtc *crtc); 956 void vc4_crtc_handle_vblank(struct vc4_crtc *crtc); 957 void vc4_crtc_send_vblank(struct drm_crtc *crtc); 958 int vc4_crtc_late_register(struct drm_crtc *crtc); 959 void vc4_crtc_get_margins(struct drm_crtc_state *state, 960 unsigned int *left, unsigned int *right, 961 unsigned int *top, unsigned int *bottom); 962 963 /* vc4_debugfs.c */ 964 void vc4_debugfs_init(struct drm_minor *minor); 965 #ifdef CONFIG_DEBUG_FS 966 void vc4_debugfs_add_regset32(struct drm_device *drm, 967 const char *filename, 968 struct debugfs_regset32 *regset); 969 #else 970 971 static inline void vc4_debugfs_add_regset32(struct drm_device *drm, 972 const char *filename, 973 struct debugfs_regset32 *regset) 974 {} 975 #endif 976 977 /* vc4_drv.c */ 978 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); 979 int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args); 980 981 /* vc4_dpi.c */ 982 extern struct platform_driver vc4_dpi_driver; 983 984 /* vc4_dsi.c */ 985 extern struct platform_driver vc4_dsi_driver; 986 987 /* vc4_fence.c */ 988 extern const struct dma_fence_ops vc4_fence_ops; 989 990 /* vc4_gem.c */ 991 int vc4_gem_init(struct drm_device *dev); 992 int vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 993 struct drm_file *file_priv); 994 int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 995 struct drm_file *file_priv); 996 int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 997 struct drm_file *file_priv); 998 void vc4_submit_next_bin_job(struct drm_device *dev); 999 void vc4_submit_next_render_job(struct drm_device *dev); 1000 void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec); 1001 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, 1002 uint64_t timeout_ns, bool interruptible); 1003 void vc4_job_handle_completed(struct vc4_dev *vc4); 1004 int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, 1005 struct drm_file *file_priv); 1006 1007 /* vc4_hdmi.c */ 1008 extern struct platform_driver vc4_hdmi_driver; 1009 1010 /* vc4_vec.c */ 1011 extern struct platform_driver vc4_vec_driver; 1012 1013 /* vc4_txp.c */ 1014 extern struct platform_driver vc4_txp_driver; 1015 1016 /* vc4_irq.c */ 1017 void vc4_irq_enable(struct drm_device *dev); 1018 void vc4_irq_disable(struct drm_device *dev); 1019 int vc4_irq_install(struct drm_device *dev, int irq); 1020 void vc4_irq_uninstall(struct drm_device *dev); 1021 void vc4_irq_reset(struct drm_device *dev); 1022 1023 /* vc4_hvs.c */ 1024 extern struct platform_driver vc4_hvs_driver; 1025 struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, 1026 void __iomem *regs, 1027 struct platform_device *pdev); 1028 void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int output); 1029 int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output); 1030 u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo); 1031 int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state); 1032 void vc4_hvs_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state); 1033 void vc4_hvs_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state); 1034 void vc4_hvs_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state); 1035 void vc4_hvs_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state); 1036 void vc4_hvs_dump_state(struct vc4_hvs *hvs); 1037 void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel); 1038 void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel); 1039 int vc4_hvs_debugfs_init(struct drm_minor *minor); 1040 1041 /* vc4_kms.c */ 1042 int vc4_kms_load(struct drm_device *dev); 1043 1044 /* vc4_plane.c */ 1045 struct drm_plane *vc4_plane_init(struct drm_device *dev, 1046 enum drm_plane_type type, 1047 uint32_t possible_crtcs); 1048 int vc4_plane_create_additional_planes(struct drm_device *dev); 1049 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); 1050 u32 vc4_plane_dlist_size(const struct drm_plane_state *state); 1051 void vc4_plane_async_set_fb(struct drm_plane *plane, 1052 struct drm_framebuffer *fb); 1053 1054 /* vc4_v3d.c */ 1055 extern struct platform_driver vc4_v3d_driver; 1056 extern const struct of_device_id vc4_v3d_dt_match[]; 1057 int vc4_v3d_get_bin_slot(struct vc4_dev *vc4); 1058 int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used); 1059 void vc4_v3d_bin_bo_put(struct vc4_dev *vc4); 1060 int vc4_v3d_pm_get(struct vc4_dev *vc4); 1061 void vc4_v3d_pm_put(struct vc4_dev *vc4); 1062 int vc4_v3d_debugfs_init(struct drm_minor *minor); 1063 1064 /* vc4_validate.c */ 1065 int 1066 vc4_validate_bin_cl(struct drm_device *dev, 1067 void *validated, 1068 void *unvalidated, 1069 struct vc4_exec_info *exec); 1070 1071 int 1072 vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); 1073 1074 struct drm_gem_dma_object *vc4_use_bo(struct vc4_exec_info *exec, 1075 uint32_t hindex); 1076 1077 int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); 1078 1079 bool vc4_check_tex_size(struct vc4_exec_info *exec, 1080 struct drm_gem_dma_object *fbo, 1081 uint32_t offset, uint8_t tiling_format, 1082 uint32_t width, uint32_t height, uint8_t cpp); 1083 1084 /* vc4_validate_shader.c */ 1085 struct vc4_validated_shader_info * 1086 vc4_validate_shader(struct drm_gem_dma_object *shader_obj); 1087 1088 /* vc4_perfmon.c */ 1089 void vc4_perfmon_get(struct vc4_perfmon *perfmon); 1090 void vc4_perfmon_put(struct vc4_perfmon *perfmon); 1091 void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon); 1092 void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, 1093 bool capture); 1094 struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id); 1095 void vc4_perfmon_open_file(struct vc4_file *vc4file); 1096 void vc4_perfmon_close_file(struct vc4_file *vc4file); 1097 int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, 1098 struct drm_file *file_priv); 1099 int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, 1100 struct drm_file *file_priv); 1101 int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, 1102 struct drm_file *file_priv); 1103 1104 #endif /* _VC4_DRV_H_ */ 1105