1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 #ifndef _VC4_DRV_H_ 6 #define _VC4_DRV_H_ 7 8 #include <linux/debugfs.h> 9 #include <linux/delay.h> 10 #include <linux/of.h> 11 #include <linux/refcount.h> 12 #include <linux/uaccess.h> 13 14 #include <drm/drm_atomic.h> 15 #include <drm/drm_debugfs.h> 16 #include <drm/drm_device.h> 17 #include <drm/drm_encoder.h> 18 #include <drm/drm_fourcc.h> 19 #include <drm/drm_gem_dma_helper.h> 20 #include <drm/drm_managed.h> 21 #include <drm/drm_mm.h> 22 #include <drm/drm_modeset_lock.h> 23 24 #include <kunit/test-bug.h> 25 26 #include "uapi/drm/vc4_drm.h" 27 28 struct drm_device; 29 struct drm_gem_object; 30 31 extern const struct drm_driver vc4_drm_driver; 32 extern const struct drm_driver vc5_drm_driver; 33 34 /* Don't forget to update vc4_bo.c: bo_type_names[] when adding to 35 * this. 36 */ 37 enum vc4_kernel_bo_type { 38 /* Any kernel allocation (gem_create_object hook) before it 39 * gets another type set. 40 */ 41 VC4_BO_TYPE_KERNEL, 42 VC4_BO_TYPE_V3D, 43 VC4_BO_TYPE_V3D_SHADER, 44 VC4_BO_TYPE_DUMB, 45 VC4_BO_TYPE_BIN, 46 VC4_BO_TYPE_RCL, 47 VC4_BO_TYPE_BCL, 48 VC4_BO_TYPE_KERNEL_CACHE, 49 VC4_BO_TYPE_COUNT 50 }; 51 52 /* Performance monitor object. The perform lifetime is controlled by userspace 53 * using perfmon related ioctls. A perfmon can be attached to a submit_cl 54 * request, and when this is the case, HW perf counters will be activated just 55 * before the submit_cl is submitted to the GPU and disabled when the job is 56 * done. This way, only events related to a specific job will be counted. 57 */ 58 struct vc4_perfmon { 59 struct vc4_dev *dev; 60 61 /* Tracks the number of users of the perfmon, when this counter reaches 62 * zero the perfmon is destroyed. 63 */ 64 refcount_t refcnt; 65 66 /* Number of counters activated in this perfmon instance 67 * (should be less than DRM_VC4_MAX_PERF_COUNTERS). 68 */ 69 u8 ncounters; 70 71 /* Events counted by the HW perf counters. */ 72 u8 events[DRM_VC4_MAX_PERF_COUNTERS]; 73 74 /* Storage for counter values. Counters are incremented by the HW 75 * perf counter values every time the perfmon is attached to a GPU job. 76 * This way, perfmon users don't have to retrieve the results after 77 * each job if they want to track events covering several submissions. 78 * Note that counter values can't be reset, but you can fake a reset by 79 * destroying the perfmon and creating a new one. 80 */ 81 u64 counters[] __counted_by(ncounters); 82 }; 83 84 enum vc4_gen { 85 VC4_GEN_4, 86 VC4_GEN_5, 87 }; 88 89 struct vc4_dev { 90 struct drm_device base; 91 struct device *dev; 92 93 enum vc4_gen gen; 94 95 unsigned int irq; 96 97 struct vc4_hvs *hvs; 98 struct vc4_v3d *v3d; 99 100 struct vc4_hang_state *hang_state; 101 102 /* The kernel-space BO cache. Tracks buffers that have been 103 * unreferenced by all other users (refcounts of 0!) but not 104 * yet freed, so we can do cheap allocations. 105 */ 106 struct vc4_bo_cache { 107 /* Array of list heads for entries in the BO cache, 108 * based on number of pages, so we can do O(1) lookups 109 * in the cache when allocating. 110 */ 111 struct list_head *size_list; 112 uint32_t size_list_size; 113 114 /* List of all BOs in the cache, ordered by age, so we 115 * can do O(1) lookups when trying to free old 116 * buffers. 117 */ 118 struct list_head time_list; 119 struct work_struct time_work; 120 struct timer_list time_timer; 121 } bo_cache; 122 123 u32 num_labels; 124 struct vc4_label { 125 const char *name; 126 u32 num_allocated; 127 u32 size_allocated; 128 } *bo_labels; 129 130 /* Protects bo_cache and bo_labels. */ 131 struct mutex bo_lock; 132 133 /* Purgeable BO pool. All BOs in this pool can have their memory 134 * reclaimed if the driver is unable to allocate new BOs. We also 135 * keep stats related to the purge mechanism here. 136 */ 137 struct { 138 struct list_head list; 139 unsigned int num; 140 size_t size; 141 unsigned int purged_num; 142 size_t purged_size; 143 struct mutex lock; 144 } purgeable; 145 146 uint64_t dma_fence_context; 147 148 /* Sequence number for the last job queued in bin_job_list. 149 * Starts at 0 (no jobs emitted). 150 */ 151 uint64_t emit_seqno; 152 153 /* Sequence number for the last completed job on the GPU. 154 * Starts at 0 (no jobs completed). 155 */ 156 uint64_t finished_seqno; 157 158 /* List of all struct vc4_exec_info for jobs to be executed in 159 * the binner. The first job in the list is the one currently 160 * programmed into ct0ca for execution. 161 */ 162 struct list_head bin_job_list; 163 164 /* List of all struct vc4_exec_info for jobs that have 165 * completed binning and are ready for rendering. The first 166 * job in the list is the one currently programmed into ct1ca 167 * for execution. 168 */ 169 struct list_head render_job_list; 170 171 /* List of the finished vc4_exec_infos waiting to be freed by 172 * job_done_work. 173 */ 174 struct list_head job_done_list; 175 /* Spinlock used to synchronize the job_list and seqno 176 * accesses between the IRQ handler and GEM ioctls. 177 */ 178 spinlock_t job_lock; 179 wait_queue_head_t job_wait_queue; 180 struct work_struct job_done_work; 181 182 /* Used to track the active perfmon if any. Access to this field is 183 * protected by job_lock. 184 */ 185 struct vc4_perfmon *active_perfmon; 186 187 /* List of struct vc4_seqno_cb for callbacks to be made from a 188 * workqueue when the given seqno is passed. 189 */ 190 struct list_head seqno_cb_list; 191 192 /* The memory used for storing binner tile alloc, tile state, 193 * and overflow memory allocations. This is freed when V3D 194 * powers down. 195 */ 196 struct vc4_bo *bin_bo; 197 198 /* Size of blocks allocated within bin_bo. */ 199 uint32_t bin_alloc_size; 200 201 /* Bitmask of the bin_alloc_size chunks in bin_bo that are 202 * used. 203 */ 204 uint32_t bin_alloc_used; 205 206 /* Bitmask of the current bin_alloc used for overflow memory. */ 207 uint32_t bin_alloc_overflow; 208 209 /* Incremented when an underrun error happened after an atomic commit. 210 * This is particularly useful to detect when a specific modeset is too 211 * demanding in term of memory or HVS bandwidth which is hard to guess 212 * at atomic check time. 213 */ 214 atomic_t underrun; 215 216 struct work_struct overflow_mem_work; 217 218 int power_refcount; 219 220 /* Set to true when the load tracker is active. */ 221 bool load_tracker_enabled; 222 223 /* Mutex controlling the power refcount. */ 224 struct mutex power_lock; 225 226 struct { 227 struct timer_list timer; 228 struct work_struct reset_work; 229 } hangcheck; 230 231 struct drm_modeset_lock ctm_state_lock; 232 struct drm_private_obj ctm_manager; 233 struct drm_private_obj hvs_channels; 234 struct drm_private_obj load_tracker; 235 236 /* Mutex for binner bo allocation. */ 237 struct mutex bin_bo_lock; 238 /* Reference count for our binner bo. */ 239 struct kref bin_bo_kref; 240 }; 241 242 #define to_vc4_dev(_dev) \ 243 container_of_const(_dev, struct vc4_dev, base) 244 245 struct vc4_bo { 246 struct drm_gem_dma_object base; 247 248 /* seqno of the last job to render using this BO. */ 249 uint64_t seqno; 250 251 /* seqno of the last job to use the RCL to write to this BO. 252 * 253 * Note that this doesn't include binner overflow memory 254 * writes. 255 */ 256 uint64_t write_seqno; 257 258 bool t_format; 259 260 /* List entry for the BO's position in either 261 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list 262 */ 263 struct list_head unref_head; 264 265 /* Time in jiffies when the BO was put in vc4->bo_cache. */ 266 unsigned long free_time; 267 268 /* List entry for the BO's position in vc4_dev->bo_cache.size_list */ 269 struct list_head size_head; 270 271 /* Struct for shader validation state, if created by 272 * DRM_IOCTL_VC4_CREATE_SHADER_BO. 273 */ 274 struct vc4_validated_shader_info *validated_shader; 275 276 /* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i 277 * for user-allocated labels. 278 */ 279 int label; 280 281 /* Count the number of active users. This is needed to determine 282 * whether we can move the BO to the purgeable list or not (when the BO 283 * is used by the GPU or the display engine we can't purge it). 284 */ 285 refcount_t usecnt; 286 287 /* Store purgeable/purged state here */ 288 u32 madv; 289 struct mutex madv_lock; 290 }; 291 292 #define to_vc4_bo(_bo) \ 293 container_of_const(to_drm_gem_dma_obj(_bo), struct vc4_bo, base) 294 295 struct vc4_fence { 296 struct dma_fence base; 297 struct drm_device *dev; 298 /* vc4 seqno for signaled() test */ 299 uint64_t seqno; 300 }; 301 302 #define to_vc4_fence(_fence) \ 303 container_of_const(_fence, struct vc4_fence, base) 304 305 struct vc4_seqno_cb { 306 struct work_struct work; 307 uint64_t seqno; 308 void (*func)(struct vc4_seqno_cb *cb); 309 }; 310 311 struct vc4_v3d { 312 struct vc4_dev *vc4; 313 struct platform_device *pdev; 314 void __iomem *regs; 315 struct clk *clk; 316 struct debugfs_regset32 regset; 317 }; 318 319 struct vc4_hvs { 320 struct vc4_dev *vc4; 321 struct platform_device *pdev; 322 void __iomem *regs; 323 u32 __iomem *dlist; 324 unsigned int dlist_mem_size; 325 326 struct clk *core_clk; 327 328 unsigned long max_core_rate; 329 330 /* Memory manager for CRTCs to allocate space in the display 331 * list. Units are dwords. 332 */ 333 struct drm_mm dlist_mm; 334 /* Memory manager for the LBM memory used by HVS scaling. */ 335 struct drm_mm lbm_mm; 336 spinlock_t mm_lock; 337 338 struct drm_mm_node mitchell_netravali_filter; 339 340 struct debugfs_regset32 regset; 341 342 /* 343 * Even if HDMI0 on the RPi4 can output modes requiring a pixel 344 * rate higher than 297MHz, it needs some adjustments in the 345 * config.txt file to be able to do so and thus won't always be 346 * available. 347 */ 348 bool vc5_hdmi_enable_hdmi_20; 349 350 /* 351 * 4096x2160@60 requires a core overclock to work, so register 352 * whether that is sufficient. 353 */ 354 bool vc5_hdmi_enable_4096by2160; 355 }; 356 357 #define HVS_NUM_CHANNELS 3 358 359 struct vc4_hvs_state { 360 struct drm_private_state base; 361 unsigned long core_clock_rate; 362 363 struct { 364 unsigned in_use: 1; 365 unsigned long fifo_load; 366 struct drm_crtc_commit *pending_commit; 367 } fifo_state[HVS_NUM_CHANNELS]; 368 }; 369 370 #define to_vc4_hvs_state(_state) \ 371 container_of_const(_state, struct vc4_hvs_state, base) 372 373 struct vc4_hvs_state *vc4_hvs_get_global_state(struct drm_atomic_state *state); 374 struct vc4_hvs_state *vc4_hvs_get_old_global_state(const struct drm_atomic_state *state); 375 struct vc4_hvs_state *vc4_hvs_get_new_global_state(const struct drm_atomic_state *state); 376 377 struct vc4_plane { 378 struct drm_plane base; 379 }; 380 381 #define to_vc4_plane(_plane) \ 382 container_of_const(_plane, struct vc4_plane, base) 383 384 enum vc4_scaling_mode { 385 VC4_SCALING_NONE, 386 VC4_SCALING_TPZ, 387 VC4_SCALING_PPF, 388 }; 389 390 struct vc4_plane_state { 391 struct drm_plane_state base; 392 /* System memory copy of the display list for this element, computed 393 * at atomic_check time. 394 */ 395 u32 *dlist; 396 u32 dlist_size; /* Number of dwords allocated for the display list */ 397 u32 dlist_count; /* Number of used dwords in the display list. */ 398 399 /* Offset in the dlist to various words, for pageflip or 400 * cursor updates. 401 */ 402 u32 pos0_offset; 403 u32 pos2_offset; 404 u32 ptr0_offset[DRM_FORMAT_MAX_PLANES]; 405 u32 lbm_offset; 406 407 /* Offset where the plane's dlist was last stored in the 408 * hardware at vc4_crtc_atomic_flush() time. 409 */ 410 u32 __iomem *hw_dlist; 411 412 /* Clipped coordinates of the plane on the display. */ 413 int crtc_x, crtc_y, crtc_w, crtc_h; 414 /* Clipped area being scanned from in the FB in u16.16 format */ 415 u32 src_x, src_y; 416 417 u32 src_w[2], src_h[2]; 418 419 /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ 420 enum vc4_scaling_mode x_scaling[2], y_scaling[2]; 421 bool is_unity; 422 bool is_yuv; 423 424 /* Our allocation in LBM for temporary storage during scaling. */ 425 struct drm_mm_node lbm; 426 427 /* Set when the plane has per-pixel alpha content or does not cover 428 * the entire screen. This is a hint to the CRTC that it might need 429 * to enable background color fill. 430 */ 431 bool needs_bg_fill; 432 433 /* Mark the dlist as initialized. Useful to avoid initializing it twice 434 * when async update is not possible. 435 */ 436 bool dlist_initialized; 437 438 /* Load of this plane on the HVS block. The load is expressed in HVS 439 * cycles/sec. 440 */ 441 u64 hvs_load; 442 443 /* Memory bandwidth needed for this plane. This is expressed in 444 * bytes/sec. 445 */ 446 u64 membus_load; 447 }; 448 449 #define to_vc4_plane_state(_state) \ 450 container_of_const(_state, struct vc4_plane_state, base) 451 452 enum vc4_encoder_type { 453 VC4_ENCODER_TYPE_NONE, 454 VC4_ENCODER_TYPE_HDMI0, 455 VC4_ENCODER_TYPE_HDMI1, 456 VC4_ENCODER_TYPE_VEC, 457 VC4_ENCODER_TYPE_DSI0, 458 VC4_ENCODER_TYPE_DSI1, 459 VC4_ENCODER_TYPE_SMI, 460 VC4_ENCODER_TYPE_DPI, 461 VC4_ENCODER_TYPE_TXP, 462 }; 463 464 struct vc4_encoder { 465 struct drm_encoder base; 466 enum vc4_encoder_type type; 467 u32 clock_select; 468 469 void (*pre_crtc_configure)(struct drm_encoder *encoder, struct drm_atomic_state *state); 470 void (*pre_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 471 void (*post_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 472 473 void (*post_crtc_disable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 474 void (*post_crtc_powerdown)(struct drm_encoder *encoder, struct drm_atomic_state *state); 475 }; 476 477 #define to_vc4_encoder(_encoder) \ 478 container_of_const(_encoder, struct vc4_encoder, base) 479 480 static inline 481 struct drm_encoder *vc4_find_encoder_by_type(struct drm_device *drm, 482 enum vc4_encoder_type type) 483 { 484 struct drm_encoder *encoder; 485 486 drm_for_each_encoder(encoder, drm) { 487 struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); 488 489 if (vc4_encoder->type == type) 490 return encoder; 491 } 492 493 return NULL; 494 } 495 496 struct vc4_crtc_data { 497 const char *name; 498 499 const char *debugfs_name; 500 501 /* Bitmask of channels (FIFOs) of the HVS that the output can source from */ 502 unsigned int hvs_available_channels; 503 504 /* Which output of the HVS this pixelvalve sources from. */ 505 int hvs_output; 506 }; 507 508 extern const struct vc4_crtc_data vc4_txp_crtc_data; 509 510 struct vc4_pv_data { 511 struct vc4_crtc_data base; 512 513 /* Depth of the PixelValve FIFO in bytes */ 514 unsigned int fifo_depth; 515 516 /* Number of pixels output per clock period */ 517 u8 pixels_per_clock; 518 519 enum vc4_encoder_type encoder_types[4]; 520 }; 521 522 extern const struct vc4_pv_data bcm2835_pv0_data; 523 extern const struct vc4_pv_data bcm2835_pv1_data; 524 extern const struct vc4_pv_data bcm2835_pv2_data; 525 extern const struct vc4_pv_data bcm2711_pv0_data; 526 extern const struct vc4_pv_data bcm2711_pv1_data; 527 extern const struct vc4_pv_data bcm2711_pv2_data; 528 extern const struct vc4_pv_data bcm2711_pv3_data; 529 extern const struct vc4_pv_data bcm2711_pv4_data; 530 531 struct vc4_crtc { 532 struct drm_crtc base; 533 struct platform_device *pdev; 534 const struct vc4_crtc_data *data; 535 void __iomem *regs; 536 537 /* Timestamp at start of vblank irq - unaffected by lock delays. */ 538 ktime_t t_vblank; 539 540 u8 lut_r[256]; 541 u8 lut_g[256]; 542 u8 lut_b[256]; 543 544 struct drm_pending_vblank_event *event; 545 546 struct debugfs_regset32 regset; 547 548 /** 549 * @feeds_txp: True if the CRTC feeds our writeback controller. 550 */ 551 bool feeds_txp; 552 553 /** 554 * @irq_lock: Spinlock protecting the resources shared between 555 * the atomic code and our vblank handler. 556 */ 557 spinlock_t irq_lock; 558 559 /** 560 * @current_dlist: Start offset of the display list currently 561 * set in the HVS for that CRTC. Protected by @irq_lock, and 562 * copied in vc4_hvs_update_dlist() for the CRTC interrupt 563 * handler to have access to that value. 564 */ 565 unsigned int current_dlist; 566 567 /** 568 * @current_hvs_channel: HVS channel currently assigned to the 569 * CRTC. Protected by @irq_lock, and copied in 570 * vc4_hvs_atomic_begin() for the CRTC interrupt handler to have 571 * access to that value. 572 */ 573 unsigned int current_hvs_channel; 574 }; 575 576 #define to_vc4_crtc(_crtc) \ 577 container_of_const(_crtc, struct vc4_crtc, base) 578 579 static inline const struct vc4_crtc_data * 580 vc4_crtc_to_vc4_crtc_data(const struct vc4_crtc *crtc) 581 { 582 return crtc->data; 583 } 584 585 static inline const struct vc4_pv_data * 586 vc4_crtc_to_vc4_pv_data(const struct vc4_crtc *crtc) 587 { 588 const struct vc4_crtc_data *data = vc4_crtc_to_vc4_crtc_data(crtc); 589 590 return container_of_const(data, struct vc4_pv_data, base); 591 } 592 593 struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc, 594 struct drm_crtc_state *state); 595 596 struct vc4_crtc_state { 597 struct drm_crtc_state base; 598 /* Dlist area for this CRTC configuration. */ 599 struct drm_mm_node mm; 600 bool txp_armed; 601 unsigned int assigned_channel; 602 603 struct drm_connector_tv_margins margins; 604 605 unsigned long hvs_load; 606 607 /* Transitional state below, only valid during atomic commits */ 608 bool update_muxing; 609 }; 610 611 #define VC4_HVS_CHANNEL_DISABLED ((unsigned int)-1) 612 613 #define to_vc4_crtc_state(_state) \ 614 container_of_const(_state, struct vc4_crtc_state, base) 615 616 #define V3D_READ(offset) \ 617 ({ \ 618 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 619 readl(vc4->v3d->regs + (offset)); \ 620 }) 621 622 #define V3D_WRITE(offset, val) \ 623 do { \ 624 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 625 writel(val, vc4->v3d->regs + (offset)); \ 626 } while (0) 627 628 #define HVS_READ(offset) \ 629 ({ \ 630 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 631 readl(hvs->regs + (offset)); \ 632 }) 633 634 #define HVS_WRITE(offset, val) \ 635 do { \ 636 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 637 writel(val, hvs->regs + (offset)); \ 638 } while (0) 639 640 #define VC4_REG32(reg) { .name = #reg, .offset = reg } 641 642 struct vc4_exec_info { 643 struct vc4_dev *dev; 644 645 /* Sequence number for this bin/render job. */ 646 uint64_t seqno; 647 648 /* Latest write_seqno of any BO that binning depends on. */ 649 uint64_t bin_dep_seqno; 650 651 struct dma_fence *fence; 652 653 /* Last current addresses the hardware was processing when the 654 * hangcheck timer checked on us. 655 */ 656 uint32_t last_ct0ca, last_ct1ca; 657 658 /* Kernel-space copy of the ioctl arguments */ 659 struct drm_vc4_submit_cl *args; 660 661 /* This is the array of BOs that were looked up at the start of exec. 662 * Command validation will use indices into this array. 663 */ 664 struct drm_gem_object **bo; 665 uint32_t bo_count; 666 667 /* List of BOs that are being written by the RCL. Other than 668 * the binner temporary storage, this is all the BOs written 669 * by the job. 670 */ 671 struct drm_gem_dma_object *rcl_write_bo[4]; 672 uint32_t rcl_write_bo_count; 673 674 /* Pointers for our position in vc4->job_list */ 675 struct list_head head; 676 677 /* List of other BOs used in the job that need to be released 678 * once the job is complete. 679 */ 680 struct list_head unref_list; 681 682 /* Current unvalidated indices into @bo loaded by the non-hardware 683 * VC4_PACKET_GEM_HANDLES. 684 */ 685 uint32_t bo_index[2]; 686 687 /* This is the BO where we store the validated command lists, shader 688 * records, and uniforms. 689 */ 690 struct drm_gem_dma_object *exec_bo; 691 692 /** 693 * This tracks the per-shader-record state (packet 64) that 694 * determines the length of the shader record and the offset 695 * it's expected to be found at. It gets read in from the 696 * command lists. 697 */ 698 struct vc4_shader_state { 699 uint32_t addr; 700 /* Maximum vertex index referenced by any primitive using this 701 * shader state. 702 */ 703 uint32_t max_index; 704 } *shader_state; 705 706 /** How many shader states the user declared they were using. */ 707 uint32_t shader_state_size; 708 /** How many shader state records the validator has seen. */ 709 uint32_t shader_state_count; 710 711 bool found_tile_binning_mode_config_packet; 712 bool found_start_tile_binning_packet; 713 bool found_increment_semaphore_packet; 714 bool found_flush; 715 uint8_t bin_tiles_x, bin_tiles_y; 716 /* Physical address of the start of the tile alloc array 717 * (where each tile's binned CL will start) 718 */ 719 uint32_t tile_alloc_offset; 720 /* Bitmask of which binner slots are freed when this job completes. */ 721 uint32_t bin_slots; 722 723 /** 724 * Computed addresses pointing into exec_bo where we start the 725 * bin thread (ct0) and render thread (ct1). 726 */ 727 uint32_t ct0ca, ct0ea; 728 uint32_t ct1ca, ct1ea; 729 730 /* Pointer to the unvalidated bin CL (if present). */ 731 void *bin_u; 732 733 /* Pointers to the shader recs. These paddr gets incremented as CL 734 * packets are relocated in validate_gl_shader_state, and the vaddrs 735 * (u and v) get incremented and size decremented as the shader recs 736 * themselves are validated. 737 */ 738 void *shader_rec_u; 739 void *shader_rec_v; 740 uint32_t shader_rec_p; 741 uint32_t shader_rec_size; 742 743 /* Pointers to the uniform data. These pointers are incremented, and 744 * size decremented, as each batch of uniforms is uploaded. 745 */ 746 void *uniforms_u; 747 void *uniforms_v; 748 uint32_t uniforms_p; 749 uint32_t uniforms_size; 750 751 /* Pointer to a performance monitor object if the user requested it, 752 * NULL otherwise. 753 */ 754 struct vc4_perfmon *perfmon; 755 756 /* Whether the exec has taken a reference to the binner BO, which should 757 * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet. 758 */ 759 bool bin_bo_used; 760 }; 761 762 /* Per-open file private data. Any driver-specific resource that has to be 763 * released when the DRM file is closed should be placed here. 764 */ 765 struct vc4_file { 766 struct vc4_dev *dev; 767 768 struct { 769 struct idr idr; 770 struct mutex lock; 771 } perfmon; 772 773 bool bin_bo_used; 774 }; 775 776 static inline struct vc4_exec_info * 777 vc4_first_bin_job(struct vc4_dev *vc4) 778 { 779 return list_first_entry_or_null(&vc4->bin_job_list, 780 struct vc4_exec_info, head); 781 } 782 783 static inline struct vc4_exec_info * 784 vc4_first_render_job(struct vc4_dev *vc4) 785 { 786 return list_first_entry_or_null(&vc4->render_job_list, 787 struct vc4_exec_info, head); 788 } 789 790 static inline struct vc4_exec_info * 791 vc4_last_render_job(struct vc4_dev *vc4) 792 { 793 if (list_empty(&vc4->render_job_list)) 794 return NULL; 795 return list_last_entry(&vc4->render_job_list, 796 struct vc4_exec_info, head); 797 } 798 799 /** 800 * struct vc4_texture_sample_info - saves the offsets into the UBO for texture 801 * setup parameters. 802 * 803 * This will be used at draw time to relocate the reference to the texture 804 * contents in p0, and validate that the offset combined with 805 * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO. 806 * Note that the hardware treats unprovided config parameters as 0, so not all 807 * of them need to be set up for every texure sample, and we'll store ~0 as 808 * the offset to mark the unused ones. 809 * 810 * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit 811 * Setup") for definitions of the texture parameters. 812 */ 813 struct vc4_texture_sample_info { 814 bool is_direct; 815 uint32_t p_offset[4]; 816 }; 817 818 /** 819 * struct vc4_validated_shader_info - information about validated shaders that 820 * needs to be used from command list validation. 821 * 822 * For a given shader, each time a shader state record references it, we need 823 * to verify that the shader doesn't read more uniforms than the shader state 824 * record's uniform BO pointer can provide, and we need to apply relocations 825 * and validate the shader state record's uniforms that define the texture 826 * samples. 827 */ 828 struct vc4_validated_shader_info { 829 uint32_t uniforms_size; 830 uint32_t uniforms_src_size; 831 uint32_t num_texture_samples; 832 struct vc4_texture_sample_info *texture_samples; 833 834 uint32_t num_uniform_addr_offsets; 835 uint32_t *uniform_addr_offsets; 836 837 bool is_threaded; 838 }; 839 840 /** 841 * __wait_for - magic wait macro 842 * 843 * Macro to help avoid open coding check/wait/timeout patterns. Note that it's 844 * important that we check the condition again after having timed out, since the 845 * timeout could be due to preemption or similar and we've never had a chance to 846 * check the condition before the timeout. 847 */ 848 #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ 849 const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ 850 long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ 851 int ret__; \ 852 might_sleep(); \ 853 for (;;) { \ 854 const bool expired__ = ktime_after(ktime_get_raw(), end__); \ 855 OP; \ 856 /* Guarantee COND check prior to timeout */ \ 857 barrier(); \ 858 if (COND) { \ 859 ret__ = 0; \ 860 break; \ 861 } \ 862 if (expired__) { \ 863 ret__ = -ETIMEDOUT; \ 864 break; \ 865 } \ 866 usleep_range(wait__, wait__ * 2); \ 867 if (wait__ < (Wmax)) \ 868 wait__ <<= 1; \ 869 } \ 870 ret__; \ 871 }) 872 873 #define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ 874 (Wmax)) 875 #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) 876 877 /* vc4_bo.c */ 878 struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); 879 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, 880 bool from_cache, enum vc4_kernel_bo_type type); 881 int vc4_bo_dumb_create(struct drm_file *file_priv, 882 struct drm_device *dev, 883 struct drm_mode_create_dumb *args); 884 int vc4_create_bo_ioctl(struct drm_device *dev, void *data, 885 struct drm_file *file_priv); 886 int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, 887 struct drm_file *file_priv); 888 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, 889 struct drm_file *file_priv); 890 int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, 891 struct drm_file *file_priv); 892 int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, 893 struct drm_file *file_priv); 894 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 895 struct drm_file *file_priv); 896 int vc4_label_bo_ioctl(struct drm_device *dev, void *data, 897 struct drm_file *file_priv); 898 int vc4_bo_cache_init(struct drm_device *dev); 899 int vc4_bo_inc_usecnt(struct vc4_bo *bo); 900 void vc4_bo_dec_usecnt(struct vc4_bo *bo); 901 void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo); 902 void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo); 903 int vc4_bo_debugfs_init(struct drm_minor *minor); 904 905 /* vc4_crtc.c */ 906 extern struct platform_driver vc4_crtc_driver; 907 int vc4_crtc_disable_at_boot(struct drm_crtc *crtc); 908 int __vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev, 909 struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data, 910 struct drm_plane *primary_plane, 911 const struct drm_crtc_funcs *crtc_funcs, 912 const struct drm_crtc_helper_funcs *crtc_helper_funcs, 913 bool feeds_txp); 914 int vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev, 915 struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data, 916 const struct drm_crtc_funcs *crtc_funcs, 917 const struct drm_crtc_helper_funcs *crtc_helper_funcs, 918 bool feeds_txp); 919 int vc4_page_flip(struct drm_crtc *crtc, 920 struct drm_framebuffer *fb, 921 struct drm_pending_vblank_event *event, 922 uint32_t flags, 923 struct drm_modeset_acquire_ctx *ctx); 924 int vc4_crtc_atomic_check(struct drm_crtc *crtc, 925 struct drm_atomic_state *state); 926 struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc); 927 void vc4_crtc_destroy_state(struct drm_crtc *crtc, 928 struct drm_crtc_state *state); 929 void vc4_crtc_reset(struct drm_crtc *crtc); 930 void vc4_crtc_handle_vblank(struct vc4_crtc *crtc); 931 void vc4_crtc_send_vblank(struct drm_crtc *crtc); 932 int vc4_crtc_late_register(struct drm_crtc *crtc); 933 void vc4_crtc_get_margins(struct drm_crtc_state *state, 934 unsigned int *left, unsigned int *right, 935 unsigned int *top, unsigned int *bottom); 936 937 /* vc4_debugfs.c */ 938 void vc4_debugfs_init(struct drm_minor *minor); 939 #ifdef CONFIG_DEBUG_FS 940 void vc4_debugfs_add_regset32(struct drm_device *drm, 941 const char *filename, 942 struct debugfs_regset32 *regset); 943 #else 944 945 static inline void vc4_debugfs_add_regset32(struct drm_device *drm, 946 const char *filename, 947 struct debugfs_regset32 *regset) 948 {} 949 #endif 950 951 /* vc4_drv.c */ 952 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); 953 int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args); 954 955 /* vc4_dpi.c */ 956 extern struct platform_driver vc4_dpi_driver; 957 958 /* vc4_dsi.c */ 959 extern struct platform_driver vc4_dsi_driver; 960 961 /* vc4_fence.c */ 962 extern const struct dma_fence_ops vc4_fence_ops; 963 964 /* vc4_gem.c */ 965 int vc4_gem_init(struct drm_device *dev); 966 int vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 967 struct drm_file *file_priv); 968 int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 969 struct drm_file *file_priv); 970 int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 971 struct drm_file *file_priv); 972 void vc4_submit_next_bin_job(struct drm_device *dev); 973 void vc4_submit_next_render_job(struct drm_device *dev); 974 void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec); 975 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, 976 uint64_t timeout_ns, bool interruptible); 977 void vc4_job_handle_completed(struct vc4_dev *vc4); 978 int vc4_queue_seqno_cb(struct drm_device *dev, 979 struct vc4_seqno_cb *cb, uint64_t seqno, 980 void (*func)(struct vc4_seqno_cb *cb)); 981 int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, 982 struct drm_file *file_priv); 983 984 /* vc4_hdmi.c */ 985 extern struct platform_driver vc4_hdmi_driver; 986 987 /* vc4_vec.c */ 988 extern struct platform_driver vc4_vec_driver; 989 990 /* vc4_txp.c */ 991 extern struct platform_driver vc4_txp_driver; 992 993 /* vc4_irq.c */ 994 void vc4_irq_enable(struct drm_device *dev); 995 void vc4_irq_disable(struct drm_device *dev); 996 int vc4_irq_install(struct drm_device *dev, int irq); 997 void vc4_irq_uninstall(struct drm_device *dev); 998 void vc4_irq_reset(struct drm_device *dev); 999 1000 /* vc4_hvs.c */ 1001 extern struct platform_driver vc4_hvs_driver; 1002 struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, 1003 void __iomem *regs, 1004 struct platform_device *pdev); 1005 void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int output); 1006 int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output); 1007 u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo); 1008 int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state); 1009 void vc4_hvs_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state); 1010 void vc4_hvs_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state); 1011 void vc4_hvs_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state); 1012 void vc4_hvs_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state); 1013 void vc4_hvs_dump_state(struct vc4_hvs *hvs); 1014 void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel); 1015 void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel); 1016 int vc4_hvs_debugfs_init(struct drm_minor *minor); 1017 1018 /* vc4_kms.c */ 1019 int vc4_kms_load(struct drm_device *dev); 1020 1021 /* vc4_plane.c */ 1022 struct drm_plane *vc4_plane_init(struct drm_device *dev, 1023 enum drm_plane_type type, 1024 uint32_t possible_crtcs); 1025 int vc4_plane_create_additional_planes(struct drm_device *dev); 1026 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); 1027 u32 vc4_plane_dlist_size(const struct drm_plane_state *state); 1028 void vc4_plane_async_set_fb(struct drm_plane *plane, 1029 struct drm_framebuffer *fb); 1030 1031 /* vc4_v3d.c */ 1032 extern struct platform_driver vc4_v3d_driver; 1033 extern const struct of_device_id vc4_v3d_dt_match[]; 1034 int vc4_v3d_get_bin_slot(struct vc4_dev *vc4); 1035 int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used); 1036 void vc4_v3d_bin_bo_put(struct vc4_dev *vc4); 1037 int vc4_v3d_pm_get(struct vc4_dev *vc4); 1038 void vc4_v3d_pm_put(struct vc4_dev *vc4); 1039 int vc4_v3d_debugfs_init(struct drm_minor *minor); 1040 1041 /* vc4_validate.c */ 1042 int 1043 vc4_validate_bin_cl(struct drm_device *dev, 1044 void *validated, 1045 void *unvalidated, 1046 struct vc4_exec_info *exec); 1047 1048 int 1049 vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); 1050 1051 struct drm_gem_dma_object *vc4_use_bo(struct vc4_exec_info *exec, 1052 uint32_t hindex); 1053 1054 int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); 1055 1056 bool vc4_check_tex_size(struct vc4_exec_info *exec, 1057 struct drm_gem_dma_object *fbo, 1058 uint32_t offset, uint8_t tiling_format, 1059 uint32_t width, uint32_t height, uint8_t cpp); 1060 1061 /* vc4_validate_shader.c */ 1062 struct vc4_validated_shader_info * 1063 vc4_validate_shader(struct drm_gem_dma_object *shader_obj); 1064 1065 /* vc4_perfmon.c */ 1066 void vc4_perfmon_get(struct vc4_perfmon *perfmon); 1067 void vc4_perfmon_put(struct vc4_perfmon *perfmon); 1068 void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon); 1069 void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, 1070 bool capture); 1071 struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id); 1072 void vc4_perfmon_open_file(struct vc4_file *vc4file); 1073 void vc4_perfmon_close_file(struct vc4_file *vc4file); 1074 int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, 1075 struct drm_file *file_priv); 1076 int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, 1077 struct drm_file *file_priv); 1078 int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, 1079 struct drm_file *file_priv); 1080 1081 #endif /* _VC4_DRV_H_ */ 1082