1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 #ifndef _VC4_DRV_H_ 6 #define _VC4_DRV_H_ 7 8 #include <linux/delay.h> 9 #include <linux/of.h> 10 #include <linux/refcount.h> 11 #include <linux/uaccess.h> 12 13 #include <drm/drm_atomic.h> 14 #include <drm/drm_debugfs.h> 15 #include <drm/drm_device.h> 16 #include <drm/drm_encoder.h> 17 #include <drm/drm_gem_dma_helper.h> 18 #include <drm/drm_managed.h> 19 #include <drm/drm_mm.h> 20 #include <drm/drm_modeset_lock.h> 21 22 #include <kunit/test-bug.h> 23 24 #include "uapi/drm/vc4_drm.h" 25 26 struct drm_device; 27 struct drm_gem_object; 28 29 extern const struct drm_driver vc4_drm_driver; 30 extern const struct drm_driver vc5_drm_driver; 31 32 /* Don't forget to update vc4_bo.c: bo_type_names[] when adding to 33 * this. 34 */ 35 enum vc4_kernel_bo_type { 36 /* Any kernel allocation (gem_create_object hook) before it 37 * gets another type set. 38 */ 39 VC4_BO_TYPE_KERNEL, 40 VC4_BO_TYPE_V3D, 41 VC4_BO_TYPE_V3D_SHADER, 42 VC4_BO_TYPE_DUMB, 43 VC4_BO_TYPE_BIN, 44 VC4_BO_TYPE_RCL, 45 VC4_BO_TYPE_BCL, 46 VC4_BO_TYPE_KERNEL_CACHE, 47 VC4_BO_TYPE_COUNT 48 }; 49 50 /* Performance monitor object. The perform lifetime is controlled by userspace 51 * using perfmon related ioctls. A perfmon can be attached to a submit_cl 52 * request, and when this is the case, HW perf counters will be activated just 53 * before the submit_cl is submitted to the GPU and disabled when the job is 54 * done. This way, only events related to a specific job will be counted. 55 */ 56 struct vc4_perfmon { 57 struct vc4_dev *dev; 58 59 /* Tracks the number of users of the perfmon, when this counter reaches 60 * zero the perfmon is destroyed. 61 */ 62 refcount_t refcnt; 63 64 /* Number of counters activated in this perfmon instance 65 * (should be less than DRM_VC4_MAX_PERF_COUNTERS). 66 */ 67 u8 ncounters; 68 69 /* Events counted by the HW perf counters. */ 70 u8 events[DRM_VC4_MAX_PERF_COUNTERS]; 71 72 /* Storage for counter values. Counters are incremented by the HW 73 * perf counter values every time the perfmon is attached to a GPU job. 74 * This way, perfmon users don't have to retrieve the results after 75 * each job if they want to track events covering several submissions. 76 * Note that counter values can't be reset, but you can fake a reset by 77 * destroying the perfmon and creating a new one. 78 */ 79 u64 counters[]; 80 }; 81 82 struct vc4_dev { 83 struct drm_device base; 84 struct device *dev; 85 86 bool is_vc5; 87 88 unsigned int irq; 89 90 struct vc4_hvs *hvs; 91 struct vc4_v3d *v3d; 92 93 struct vc4_hang_state *hang_state; 94 95 /* The kernel-space BO cache. Tracks buffers that have been 96 * unreferenced by all other users (refcounts of 0!) but not 97 * yet freed, so we can do cheap allocations. 98 */ 99 struct vc4_bo_cache { 100 /* Array of list heads for entries in the BO cache, 101 * based on number of pages, so we can do O(1) lookups 102 * in the cache when allocating. 103 */ 104 struct list_head *size_list; 105 uint32_t size_list_size; 106 107 /* List of all BOs in the cache, ordered by age, so we 108 * can do O(1) lookups when trying to free old 109 * buffers. 110 */ 111 struct list_head time_list; 112 struct work_struct time_work; 113 struct timer_list time_timer; 114 } bo_cache; 115 116 u32 num_labels; 117 struct vc4_label { 118 const char *name; 119 u32 num_allocated; 120 u32 size_allocated; 121 } *bo_labels; 122 123 /* Protects bo_cache and bo_labels. */ 124 struct mutex bo_lock; 125 126 /* Purgeable BO pool. All BOs in this pool can have their memory 127 * reclaimed if the driver is unable to allocate new BOs. We also 128 * keep stats related to the purge mechanism here. 129 */ 130 struct { 131 struct list_head list; 132 unsigned int num; 133 size_t size; 134 unsigned int purged_num; 135 size_t purged_size; 136 struct mutex lock; 137 } purgeable; 138 139 uint64_t dma_fence_context; 140 141 /* Sequence number for the last job queued in bin_job_list. 142 * Starts at 0 (no jobs emitted). 143 */ 144 uint64_t emit_seqno; 145 146 /* Sequence number for the last completed job on the GPU. 147 * Starts at 0 (no jobs completed). 148 */ 149 uint64_t finished_seqno; 150 151 /* List of all struct vc4_exec_info for jobs to be executed in 152 * the binner. The first job in the list is the one currently 153 * programmed into ct0ca for execution. 154 */ 155 struct list_head bin_job_list; 156 157 /* List of all struct vc4_exec_info for jobs that have 158 * completed binning and are ready for rendering. The first 159 * job in the list is the one currently programmed into ct1ca 160 * for execution. 161 */ 162 struct list_head render_job_list; 163 164 /* List of the finished vc4_exec_infos waiting to be freed by 165 * job_done_work. 166 */ 167 struct list_head job_done_list; 168 /* Spinlock used to synchronize the job_list and seqno 169 * accesses between the IRQ handler and GEM ioctls. 170 */ 171 spinlock_t job_lock; 172 wait_queue_head_t job_wait_queue; 173 struct work_struct job_done_work; 174 175 /* Used to track the active perfmon if any. Access to this field is 176 * protected by job_lock. 177 */ 178 struct vc4_perfmon *active_perfmon; 179 180 /* List of struct vc4_seqno_cb for callbacks to be made from a 181 * workqueue when the given seqno is passed. 182 */ 183 struct list_head seqno_cb_list; 184 185 /* The memory used for storing binner tile alloc, tile state, 186 * and overflow memory allocations. This is freed when V3D 187 * powers down. 188 */ 189 struct vc4_bo *bin_bo; 190 191 /* Size of blocks allocated within bin_bo. */ 192 uint32_t bin_alloc_size; 193 194 /* Bitmask of the bin_alloc_size chunks in bin_bo that are 195 * used. 196 */ 197 uint32_t bin_alloc_used; 198 199 /* Bitmask of the current bin_alloc used for overflow memory. */ 200 uint32_t bin_alloc_overflow; 201 202 /* Incremented when an underrun error happened after an atomic commit. 203 * This is particularly useful to detect when a specific modeset is too 204 * demanding in term of memory or HVS bandwidth which is hard to guess 205 * at atomic check time. 206 */ 207 atomic_t underrun; 208 209 struct work_struct overflow_mem_work; 210 211 int power_refcount; 212 213 /* Set to true when the load tracker is active. */ 214 bool load_tracker_enabled; 215 216 /* Mutex controlling the power refcount. */ 217 struct mutex power_lock; 218 219 struct { 220 struct timer_list timer; 221 struct work_struct reset_work; 222 } hangcheck; 223 224 struct drm_modeset_lock ctm_state_lock; 225 struct drm_private_obj ctm_manager; 226 struct drm_private_obj hvs_channels; 227 struct drm_private_obj load_tracker; 228 229 /* Mutex for binner bo allocation. */ 230 struct mutex bin_bo_lock; 231 /* Reference count for our binner bo. */ 232 struct kref bin_bo_kref; 233 }; 234 235 #define to_vc4_dev(_dev) \ 236 container_of_const(_dev, struct vc4_dev, base) 237 238 struct vc4_bo { 239 struct drm_gem_dma_object base; 240 241 /* seqno of the last job to render using this BO. */ 242 uint64_t seqno; 243 244 /* seqno of the last job to use the RCL to write to this BO. 245 * 246 * Note that this doesn't include binner overflow memory 247 * writes. 248 */ 249 uint64_t write_seqno; 250 251 bool t_format; 252 253 /* List entry for the BO's position in either 254 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list 255 */ 256 struct list_head unref_head; 257 258 /* Time in jiffies when the BO was put in vc4->bo_cache. */ 259 unsigned long free_time; 260 261 /* List entry for the BO's position in vc4_dev->bo_cache.size_list */ 262 struct list_head size_head; 263 264 /* Struct for shader validation state, if created by 265 * DRM_IOCTL_VC4_CREATE_SHADER_BO. 266 */ 267 struct vc4_validated_shader_info *validated_shader; 268 269 /* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i 270 * for user-allocated labels. 271 */ 272 int label; 273 274 /* Count the number of active users. This is needed to determine 275 * whether we can move the BO to the purgeable list or not (when the BO 276 * is used by the GPU or the display engine we can't purge it). 277 */ 278 refcount_t usecnt; 279 280 /* Store purgeable/purged state here */ 281 u32 madv; 282 struct mutex madv_lock; 283 }; 284 285 #define to_vc4_bo(_bo) \ 286 container_of_const(to_drm_gem_dma_obj(_bo), struct vc4_bo, base) 287 288 struct vc4_fence { 289 struct dma_fence base; 290 struct drm_device *dev; 291 /* vc4 seqno for signaled() test */ 292 uint64_t seqno; 293 }; 294 295 #define to_vc4_fence(_fence) \ 296 container_of_const(_fence, struct vc4_fence, base) 297 298 struct vc4_seqno_cb { 299 struct work_struct work; 300 uint64_t seqno; 301 void (*func)(struct vc4_seqno_cb *cb); 302 }; 303 304 struct vc4_v3d { 305 struct vc4_dev *vc4; 306 struct platform_device *pdev; 307 void __iomem *regs; 308 struct clk *clk; 309 struct debugfs_regset32 regset; 310 }; 311 312 struct vc4_hvs { 313 struct vc4_dev *vc4; 314 struct platform_device *pdev; 315 void __iomem *regs; 316 u32 __iomem *dlist; 317 318 struct clk *core_clk; 319 320 unsigned long max_core_rate; 321 322 /* Memory manager for CRTCs to allocate space in the display 323 * list. Units are dwords. 324 */ 325 struct drm_mm dlist_mm; 326 /* Memory manager for the LBM memory used by HVS scaling. */ 327 struct drm_mm lbm_mm; 328 spinlock_t mm_lock; 329 330 struct drm_mm_node mitchell_netravali_filter; 331 332 struct debugfs_regset32 regset; 333 334 /* 335 * Even if HDMI0 on the RPi4 can output modes requiring a pixel 336 * rate higher than 297MHz, it needs some adjustments in the 337 * config.txt file to be able to do so and thus won't always be 338 * available. 339 */ 340 bool vc5_hdmi_enable_hdmi_20; 341 342 /* 343 * 4096x2160@60 requires a core overclock to work, so register 344 * whether that is sufficient. 345 */ 346 bool vc5_hdmi_enable_4096by2160; 347 }; 348 349 #define HVS_NUM_CHANNELS 3 350 351 struct vc4_hvs_state { 352 struct drm_private_state base; 353 unsigned long core_clock_rate; 354 355 struct { 356 unsigned in_use: 1; 357 unsigned long fifo_load; 358 struct drm_crtc_commit *pending_commit; 359 } fifo_state[HVS_NUM_CHANNELS]; 360 }; 361 362 #define to_vc4_hvs_state(_state) \ 363 container_of_const(_state, struct vc4_hvs_state, base) 364 365 struct vc4_hvs_state *vc4_hvs_get_global_state(struct drm_atomic_state *state); 366 struct vc4_hvs_state *vc4_hvs_get_old_global_state(const struct drm_atomic_state *state); 367 struct vc4_hvs_state *vc4_hvs_get_new_global_state(const struct drm_atomic_state *state); 368 369 struct vc4_plane { 370 struct drm_plane base; 371 }; 372 373 #define to_vc4_plane(_plane) \ 374 container_of_const(_plane, struct vc4_plane, base) 375 376 enum vc4_scaling_mode { 377 VC4_SCALING_NONE, 378 VC4_SCALING_TPZ, 379 VC4_SCALING_PPF, 380 }; 381 382 struct vc4_plane_state { 383 struct drm_plane_state base; 384 /* System memory copy of the display list for this element, computed 385 * at atomic_check time. 386 */ 387 u32 *dlist; 388 u32 dlist_size; /* Number of dwords allocated for the display list */ 389 u32 dlist_count; /* Number of used dwords in the display list. */ 390 391 /* Offset in the dlist to various words, for pageflip or 392 * cursor updates. 393 */ 394 u32 pos0_offset; 395 u32 pos2_offset; 396 u32 ptr0_offset; 397 u32 lbm_offset; 398 399 /* Offset where the plane's dlist was last stored in the 400 * hardware at vc4_crtc_atomic_flush() time. 401 */ 402 u32 __iomem *hw_dlist; 403 404 /* Clipped coordinates of the plane on the display. */ 405 int crtc_x, crtc_y, crtc_w, crtc_h; 406 /* Clipped area being scanned from in the FB. */ 407 u32 src_x, src_y; 408 409 u32 src_w[2], src_h[2]; 410 411 /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ 412 enum vc4_scaling_mode x_scaling[2], y_scaling[2]; 413 bool is_unity; 414 bool is_yuv; 415 416 /* Offset to start scanning out from the start of the plane's 417 * BO. 418 */ 419 u32 offsets[3]; 420 421 /* Our allocation in LBM for temporary storage during scaling. */ 422 struct drm_mm_node lbm; 423 424 /* Set when the plane has per-pixel alpha content or does not cover 425 * the entire screen. This is a hint to the CRTC that it might need 426 * to enable background color fill. 427 */ 428 bool needs_bg_fill; 429 430 /* Mark the dlist as initialized. Useful to avoid initializing it twice 431 * when async update is not possible. 432 */ 433 bool dlist_initialized; 434 435 /* Load of this plane on the HVS block. The load is expressed in HVS 436 * cycles/sec. 437 */ 438 u64 hvs_load; 439 440 /* Memory bandwidth needed for this plane. This is expressed in 441 * bytes/sec. 442 */ 443 u64 membus_load; 444 }; 445 446 #define to_vc4_plane_state(_state) \ 447 container_of_const(_state, struct vc4_plane_state, base) 448 449 enum vc4_encoder_type { 450 VC4_ENCODER_TYPE_NONE, 451 VC4_ENCODER_TYPE_HDMI0, 452 VC4_ENCODER_TYPE_HDMI1, 453 VC4_ENCODER_TYPE_VEC, 454 VC4_ENCODER_TYPE_DSI0, 455 VC4_ENCODER_TYPE_DSI1, 456 VC4_ENCODER_TYPE_SMI, 457 VC4_ENCODER_TYPE_DPI, 458 VC4_ENCODER_TYPE_TXP, 459 }; 460 461 struct vc4_encoder { 462 struct drm_encoder base; 463 enum vc4_encoder_type type; 464 u32 clock_select; 465 466 void (*pre_crtc_configure)(struct drm_encoder *encoder, struct drm_atomic_state *state); 467 void (*pre_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 468 void (*post_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 469 470 void (*post_crtc_disable)(struct drm_encoder *encoder, struct drm_atomic_state *state); 471 void (*post_crtc_powerdown)(struct drm_encoder *encoder, struct drm_atomic_state *state); 472 }; 473 474 #define to_vc4_encoder(_encoder) \ 475 container_of_const(_encoder, struct vc4_encoder, base) 476 477 static inline 478 struct drm_encoder *vc4_find_encoder_by_type(struct drm_device *drm, 479 enum vc4_encoder_type type) 480 { 481 struct drm_encoder *encoder; 482 483 drm_for_each_encoder(encoder, drm) { 484 struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); 485 486 if (vc4_encoder->type == type) 487 return encoder; 488 } 489 490 return NULL; 491 } 492 493 struct vc4_crtc_data { 494 const char *name; 495 496 const char *debugfs_name; 497 498 /* Bitmask of channels (FIFOs) of the HVS that the output can source from */ 499 unsigned int hvs_available_channels; 500 501 /* Which output of the HVS this pixelvalve sources from. */ 502 int hvs_output; 503 }; 504 505 extern const struct vc4_crtc_data vc4_txp_crtc_data; 506 507 struct vc4_pv_data { 508 struct vc4_crtc_data base; 509 510 /* Depth of the PixelValve FIFO in bytes */ 511 unsigned int fifo_depth; 512 513 /* Number of pixels output per clock period */ 514 u8 pixels_per_clock; 515 516 enum vc4_encoder_type encoder_types[4]; 517 }; 518 519 extern const struct vc4_pv_data bcm2835_pv0_data; 520 extern const struct vc4_pv_data bcm2835_pv1_data; 521 extern const struct vc4_pv_data bcm2835_pv2_data; 522 extern const struct vc4_pv_data bcm2711_pv0_data; 523 extern const struct vc4_pv_data bcm2711_pv1_data; 524 extern const struct vc4_pv_data bcm2711_pv2_data; 525 extern const struct vc4_pv_data bcm2711_pv3_data; 526 extern const struct vc4_pv_data bcm2711_pv4_data; 527 528 struct vc4_crtc { 529 struct drm_crtc base; 530 struct platform_device *pdev; 531 const struct vc4_crtc_data *data; 532 void __iomem *regs; 533 534 /* Timestamp at start of vblank irq - unaffected by lock delays. */ 535 ktime_t t_vblank; 536 537 u8 lut_r[256]; 538 u8 lut_g[256]; 539 u8 lut_b[256]; 540 541 struct drm_pending_vblank_event *event; 542 543 struct debugfs_regset32 regset; 544 545 /** 546 * @feeds_txp: True if the CRTC feeds our writeback controller. 547 */ 548 bool feeds_txp; 549 550 /** 551 * @irq_lock: Spinlock protecting the resources shared between 552 * the atomic code and our vblank handler. 553 */ 554 spinlock_t irq_lock; 555 556 /** 557 * @current_dlist: Start offset of the display list currently 558 * set in the HVS for that CRTC. Protected by @irq_lock, and 559 * copied in vc4_hvs_update_dlist() for the CRTC interrupt 560 * handler to have access to that value. 561 */ 562 unsigned int current_dlist; 563 564 /** 565 * @current_hvs_channel: HVS channel currently assigned to the 566 * CRTC. Protected by @irq_lock, and copied in 567 * vc4_hvs_atomic_begin() for the CRTC interrupt handler to have 568 * access to that value. 569 */ 570 unsigned int current_hvs_channel; 571 }; 572 573 #define to_vc4_crtc(_crtc) \ 574 container_of_const(_crtc, struct vc4_crtc, base) 575 576 static inline const struct vc4_crtc_data * 577 vc4_crtc_to_vc4_crtc_data(const struct vc4_crtc *crtc) 578 { 579 return crtc->data; 580 } 581 582 static inline const struct vc4_pv_data * 583 vc4_crtc_to_vc4_pv_data(const struct vc4_crtc *crtc) 584 { 585 const struct vc4_crtc_data *data = vc4_crtc_to_vc4_crtc_data(crtc); 586 587 return container_of_const(data, struct vc4_pv_data, base); 588 } 589 590 struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc, 591 struct drm_crtc_state *state); 592 593 struct vc4_crtc_state { 594 struct drm_crtc_state base; 595 /* Dlist area for this CRTC configuration. */ 596 struct drm_mm_node mm; 597 bool txp_armed; 598 unsigned int assigned_channel; 599 600 struct { 601 unsigned int left; 602 unsigned int right; 603 unsigned int top; 604 unsigned int bottom; 605 } margins; 606 607 unsigned long hvs_load; 608 609 /* Transitional state below, only valid during atomic commits */ 610 bool update_muxing; 611 }; 612 613 #define VC4_HVS_CHANNEL_DISABLED ((unsigned int)-1) 614 615 #define to_vc4_crtc_state(_state) \ 616 container_of_const(_state, struct vc4_crtc_state, base) 617 618 #define V3D_READ(offset) \ 619 ({ \ 620 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 621 readl(vc4->v3d->regs + (offset)); \ 622 }) 623 624 #define V3D_WRITE(offset, val) \ 625 do { \ 626 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 627 writel(val, vc4->v3d->regs + (offset)); \ 628 } while (0) 629 630 #define HVS_READ(offset) \ 631 ({ \ 632 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 633 readl(hvs->regs + (offset)); \ 634 }) 635 636 #define HVS_WRITE(offset, val) \ 637 do { \ 638 kunit_fail_current_test("Accessing a register in a unit test!\n"); \ 639 writel(val, hvs->regs + (offset)); \ 640 } while (0) 641 642 #define VC4_REG32(reg) { .name = #reg, .offset = reg } 643 644 struct vc4_exec_info { 645 struct vc4_dev *dev; 646 647 /* Sequence number for this bin/render job. */ 648 uint64_t seqno; 649 650 /* Latest write_seqno of any BO that binning depends on. */ 651 uint64_t bin_dep_seqno; 652 653 struct dma_fence *fence; 654 655 /* Last current addresses the hardware was processing when the 656 * hangcheck timer checked on us. 657 */ 658 uint32_t last_ct0ca, last_ct1ca; 659 660 /* Kernel-space copy of the ioctl arguments */ 661 struct drm_vc4_submit_cl *args; 662 663 /* This is the array of BOs that were looked up at the start of exec. 664 * Command validation will use indices into this array. 665 */ 666 struct drm_gem_object **bo; 667 uint32_t bo_count; 668 669 /* List of BOs that are being written by the RCL. Other than 670 * the binner temporary storage, this is all the BOs written 671 * by the job. 672 */ 673 struct drm_gem_dma_object *rcl_write_bo[4]; 674 uint32_t rcl_write_bo_count; 675 676 /* Pointers for our position in vc4->job_list */ 677 struct list_head head; 678 679 /* List of other BOs used in the job that need to be released 680 * once the job is complete. 681 */ 682 struct list_head unref_list; 683 684 /* Current unvalidated indices into @bo loaded by the non-hardware 685 * VC4_PACKET_GEM_HANDLES. 686 */ 687 uint32_t bo_index[2]; 688 689 /* This is the BO where we store the validated command lists, shader 690 * records, and uniforms. 691 */ 692 struct drm_gem_dma_object *exec_bo; 693 694 /** 695 * This tracks the per-shader-record state (packet 64) that 696 * determines the length of the shader record and the offset 697 * it's expected to be found at. It gets read in from the 698 * command lists. 699 */ 700 struct vc4_shader_state { 701 uint32_t addr; 702 /* Maximum vertex index referenced by any primitive using this 703 * shader state. 704 */ 705 uint32_t max_index; 706 } *shader_state; 707 708 /** How many shader states the user declared they were using. */ 709 uint32_t shader_state_size; 710 /** How many shader state records the validator has seen. */ 711 uint32_t shader_state_count; 712 713 bool found_tile_binning_mode_config_packet; 714 bool found_start_tile_binning_packet; 715 bool found_increment_semaphore_packet; 716 bool found_flush; 717 uint8_t bin_tiles_x, bin_tiles_y; 718 /* Physical address of the start of the tile alloc array 719 * (where each tile's binned CL will start) 720 */ 721 uint32_t tile_alloc_offset; 722 /* Bitmask of which binner slots are freed when this job completes. */ 723 uint32_t bin_slots; 724 725 /** 726 * Computed addresses pointing into exec_bo where we start the 727 * bin thread (ct0) and render thread (ct1). 728 */ 729 uint32_t ct0ca, ct0ea; 730 uint32_t ct1ca, ct1ea; 731 732 /* Pointer to the unvalidated bin CL (if present). */ 733 void *bin_u; 734 735 /* Pointers to the shader recs. These paddr gets incremented as CL 736 * packets are relocated in validate_gl_shader_state, and the vaddrs 737 * (u and v) get incremented and size decremented as the shader recs 738 * themselves are validated. 739 */ 740 void *shader_rec_u; 741 void *shader_rec_v; 742 uint32_t shader_rec_p; 743 uint32_t shader_rec_size; 744 745 /* Pointers to the uniform data. These pointers are incremented, and 746 * size decremented, as each batch of uniforms is uploaded. 747 */ 748 void *uniforms_u; 749 void *uniforms_v; 750 uint32_t uniforms_p; 751 uint32_t uniforms_size; 752 753 /* Pointer to a performance monitor object if the user requested it, 754 * NULL otherwise. 755 */ 756 struct vc4_perfmon *perfmon; 757 758 /* Whether the exec has taken a reference to the binner BO, which should 759 * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet. 760 */ 761 bool bin_bo_used; 762 }; 763 764 /* Per-open file private data. Any driver-specific resource that has to be 765 * released when the DRM file is closed should be placed here. 766 */ 767 struct vc4_file { 768 struct vc4_dev *dev; 769 770 struct { 771 struct idr idr; 772 struct mutex lock; 773 } perfmon; 774 775 bool bin_bo_used; 776 }; 777 778 static inline struct vc4_exec_info * 779 vc4_first_bin_job(struct vc4_dev *vc4) 780 { 781 return list_first_entry_or_null(&vc4->bin_job_list, 782 struct vc4_exec_info, head); 783 } 784 785 static inline struct vc4_exec_info * 786 vc4_first_render_job(struct vc4_dev *vc4) 787 { 788 return list_first_entry_or_null(&vc4->render_job_list, 789 struct vc4_exec_info, head); 790 } 791 792 static inline struct vc4_exec_info * 793 vc4_last_render_job(struct vc4_dev *vc4) 794 { 795 if (list_empty(&vc4->render_job_list)) 796 return NULL; 797 return list_last_entry(&vc4->render_job_list, 798 struct vc4_exec_info, head); 799 } 800 801 /** 802 * struct vc4_texture_sample_info - saves the offsets into the UBO for texture 803 * setup parameters. 804 * 805 * This will be used at draw time to relocate the reference to the texture 806 * contents in p0, and validate that the offset combined with 807 * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO. 808 * Note that the hardware treats unprovided config parameters as 0, so not all 809 * of them need to be set up for every texure sample, and we'll store ~0 as 810 * the offset to mark the unused ones. 811 * 812 * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit 813 * Setup") for definitions of the texture parameters. 814 */ 815 struct vc4_texture_sample_info { 816 bool is_direct; 817 uint32_t p_offset[4]; 818 }; 819 820 /** 821 * struct vc4_validated_shader_info - information about validated shaders that 822 * needs to be used from command list validation. 823 * 824 * For a given shader, each time a shader state record references it, we need 825 * to verify that the shader doesn't read more uniforms than the shader state 826 * record's uniform BO pointer can provide, and we need to apply relocations 827 * and validate the shader state record's uniforms that define the texture 828 * samples. 829 */ 830 struct vc4_validated_shader_info { 831 uint32_t uniforms_size; 832 uint32_t uniforms_src_size; 833 uint32_t num_texture_samples; 834 struct vc4_texture_sample_info *texture_samples; 835 836 uint32_t num_uniform_addr_offsets; 837 uint32_t *uniform_addr_offsets; 838 839 bool is_threaded; 840 }; 841 842 /** 843 * __wait_for - magic wait macro 844 * 845 * Macro to help avoid open coding check/wait/timeout patterns. Note that it's 846 * important that we check the condition again after having timed out, since the 847 * timeout could be due to preemption or similar and we've never had a chance to 848 * check the condition before the timeout. 849 */ 850 #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ 851 const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ 852 long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ 853 int ret__; \ 854 might_sleep(); \ 855 for (;;) { \ 856 const bool expired__ = ktime_after(ktime_get_raw(), end__); \ 857 OP; \ 858 /* Guarantee COND check prior to timeout */ \ 859 barrier(); \ 860 if (COND) { \ 861 ret__ = 0; \ 862 break; \ 863 } \ 864 if (expired__) { \ 865 ret__ = -ETIMEDOUT; \ 866 break; \ 867 } \ 868 usleep_range(wait__, wait__ * 2); \ 869 if (wait__ < (Wmax)) \ 870 wait__ <<= 1; \ 871 } \ 872 ret__; \ 873 }) 874 875 #define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ 876 (Wmax)) 877 #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) 878 879 /* vc4_bo.c */ 880 struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); 881 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, 882 bool from_cache, enum vc4_kernel_bo_type type); 883 int vc4_bo_dumb_create(struct drm_file *file_priv, 884 struct drm_device *dev, 885 struct drm_mode_create_dumb *args); 886 int vc4_create_bo_ioctl(struct drm_device *dev, void *data, 887 struct drm_file *file_priv); 888 int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, 889 struct drm_file *file_priv); 890 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, 891 struct drm_file *file_priv); 892 int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, 893 struct drm_file *file_priv); 894 int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, 895 struct drm_file *file_priv); 896 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 897 struct drm_file *file_priv); 898 int vc4_label_bo_ioctl(struct drm_device *dev, void *data, 899 struct drm_file *file_priv); 900 int vc4_bo_cache_init(struct drm_device *dev); 901 int vc4_bo_inc_usecnt(struct vc4_bo *bo); 902 void vc4_bo_dec_usecnt(struct vc4_bo *bo); 903 void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo); 904 void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo); 905 int vc4_bo_debugfs_init(struct drm_minor *minor); 906 907 /* vc4_crtc.c */ 908 extern struct platform_driver vc4_crtc_driver; 909 int vc4_crtc_disable_at_boot(struct drm_crtc *crtc); 910 int __vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev, 911 struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data, 912 struct drm_plane *primary_plane, 913 const struct drm_crtc_funcs *crtc_funcs, 914 const struct drm_crtc_helper_funcs *crtc_helper_funcs, 915 bool feeds_txp); 916 int vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev, 917 struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data, 918 const struct drm_crtc_funcs *crtc_funcs, 919 const struct drm_crtc_helper_funcs *crtc_helper_funcs, 920 bool feeds_txp); 921 int vc4_page_flip(struct drm_crtc *crtc, 922 struct drm_framebuffer *fb, 923 struct drm_pending_vblank_event *event, 924 uint32_t flags, 925 struct drm_modeset_acquire_ctx *ctx); 926 int vc4_crtc_atomic_check(struct drm_crtc *crtc, 927 struct drm_atomic_state *state); 928 struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc); 929 void vc4_crtc_destroy_state(struct drm_crtc *crtc, 930 struct drm_crtc_state *state); 931 void vc4_crtc_reset(struct drm_crtc *crtc); 932 void vc4_crtc_handle_vblank(struct vc4_crtc *crtc); 933 void vc4_crtc_send_vblank(struct drm_crtc *crtc); 934 int vc4_crtc_late_register(struct drm_crtc *crtc); 935 void vc4_crtc_get_margins(struct drm_crtc_state *state, 936 unsigned int *left, unsigned int *right, 937 unsigned int *top, unsigned int *bottom); 938 939 /* vc4_debugfs.c */ 940 void vc4_debugfs_init(struct drm_minor *minor); 941 #ifdef CONFIG_DEBUG_FS 942 void vc4_debugfs_add_regset32(struct drm_device *drm, 943 const char *filename, 944 struct debugfs_regset32 *regset); 945 #else 946 947 static inline void vc4_debugfs_add_regset32(struct drm_device *drm, 948 const char *filename, 949 struct debugfs_regset32 *regset) 950 {} 951 #endif 952 953 /* vc4_drv.c */ 954 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); 955 int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args); 956 957 /* vc4_dpi.c */ 958 extern struct platform_driver vc4_dpi_driver; 959 960 /* vc4_dsi.c */ 961 extern struct platform_driver vc4_dsi_driver; 962 963 /* vc4_fence.c */ 964 extern const struct dma_fence_ops vc4_fence_ops; 965 966 /* vc4_gem.c */ 967 int vc4_gem_init(struct drm_device *dev); 968 int vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 969 struct drm_file *file_priv); 970 int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 971 struct drm_file *file_priv); 972 int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 973 struct drm_file *file_priv); 974 void vc4_submit_next_bin_job(struct drm_device *dev); 975 void vc4_submit_next_render_job(struct drm_device *dev); 976 void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec); 977 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, 978 uint64_t timeout_ns, bool interruptible); 979 void vc4_job_handle_completed(struct vc4_dev *vc4); 980 int vc4_queue_seqno_cb(struct drm_device *dev, 981 struct vc4_seqno_cb *cb, uint64_t seqno, 982 void (*func)(struct vc4_seqno_cb *cb)); 983 int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, 984 struct drm_file *file_priv); 985 986 /* vc4_hdmi.c */ 987 extern struct platform_driver vc4_hdmi_driver; 988 989 /* vc4_vec.c */ 990 extern struct platform_driver vc4_vec_driver; 991 992 /* vc4_txp.c */ 993 extern struct platform_driver vc4_txp_driver; 994 995 /* vc4_irq.c */ 996 void vc4_irq_enable(struct drm_device *dev); 997 void vc4_irq_disable(struct drm_device *dev); 998 int vc4_irq_install(struct drm_device *dev, int irq); 999 void vc4_irq_uninstall(struct drm_device *dev); 1000 void vc4_irq_reset(struct drm_device *dev); 1001 1002 /* vc4_hvs.c */ 1003 extern struct platform_driver vc4_hvs_driver; 1004 struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, struct platform_device *pdev); 1005 void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int output); 1006 int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output); 1007 u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo); 1008 int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state); 1009 void vc4_hvs_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state); 1010 void vc4_hvs_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state); 1011 void vc4_hvs_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state); 1012 void vc4_hvs_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state); 1013 void vc4_hvs_dump_state(struct vc4_hvs *hvs); 1014 void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel); 1015 void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel); 1016 int vc4_hvs_debugfs_init(struct drm_minor *minor); 1017 1018 /* vc4_kms.c */ 1019 int vc4_kms_load(struct drm_device *dev); 1020 1021 /* vc4_plane.c */ 1022 struct drm_plane *vc4_plane_init(struct drm_device *dev, 1023 enum drm_plane_type type, 1024 uint32_t possible_crtcs); 1025 int vc4_plane_create_additional_planes(struct drm_device *dev); 1026 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); 1027 u32 vc4_plane_dlist_size(const struct drm_plane_state *state); 1028 void vc4_plane_async_set_fb(struct drm_plane *plane, 1029 struct drm_framebuffer *fb); 1030 1031 /* vc4_v3d.c */ 1032 extern struct platform_driver vc4_v3d_driver; 1033 extern const struct of_device_id vc4_v3d_dt_match[]; 1034 int vc4_v3d_get_bin_slot(struct vc4_dev *vc4); 1035 int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used); 1036 void vc4_v3d_bin_bo_put(struct vc4_dev *vc4); 1037 int vc4_v3d_pm_get(struct vc4_dev *vc4); 1038 void vc4_v3d_pm_put(struct vc4_dev *vc4); 1039 int vc4_v3d_debugfs_init(struct drm_minor *minor); 1040 1041 /* vc4_validate.c */ 1042 int 1043 vc4_validate_bin_cl(struct drm_device *dev, 1044 void *validated, 1045 void *unvalidated, 1046 struct vc4_exec_info *exec); 1047 1048 int 1049 vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); 1050 1051 struct drm_gem_dma_object *vc4_use_bo(struct vc4_exec_info *exec, 1052 uint32_t hindex); 1053 1054 int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); 1055 1056 bool vc4_check_tex_size(struct vc4_exec_info *exec, 1057 struct drm_gem_dma_object *fbo, 1058 uint32_t offset, uint8_t tiling_format, 1059 uint32_t width, uint32_t height, uint8_t cpp); 1060 1061 /* vc4_validate_shader.c */ 1062 struct vc4_validated_shader_info * 1063 vc4_validate_shader(struct drm_gem_dma_object *shader_obj); 1064 1065 /* vc4_perfmon.c */ 1066 void vc4_perfmon_get(struct vc4_perfmon *perfmon); 1067 void vc4_perfmon_put(struct vc4_perfmon *perfmon); 1068 void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon); 1069 void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, 1070 bool capture); 1071 struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id); 1072 void vc4_perfmon_open_file(struct vc4_file *vc4file); 1073 void vc4_perfmon_close_file(struct vc4_file *vc4file); 1074 int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, 1075 struct drm_file *file_priv); 1076 int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, 1077 struct drm_file *file_priv); 1078 int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, 1079 struct drm_file *file_priv); 1080 1081 #endif /* _VC4_DRV_H_ */ 1082