xref: /linux/drivers/gpu/drm/vc4/vc4_drv.h (revision 2c1ed907520c50326b8f604907a8478b27881a2e)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5 #ifndef _VC4_DRV_H_
6 #define _VC4_DRV_H_
7 
8 #include <linux/debugfs.h>
9 #include <linux/delay.h>
10 #include <linux/of.h>
11 #include <linux/refcount.h>
12 #include <linux/uaccess.h>
13 
14 #include <drm/drm_atomic.h>
15 #include <drm/drm_debugfs.h>
16 #include <drm/drm_device.h>
17 #include <drm/drm_encoder.h>
18 #include <drm/drm_fourcc.h>
19 #include <drm/drm_gem_dma_helper.h>
20 #include <drm/drm_managed.h>
21 #include <drm/drm_mm.h>
22 #include <drm/drm_modeset_lock.h>
23 
24 #include <kunit/test-bug.h>
25 
26 #include "uapi/drm/vc4_drm.h"
27 
28 struct drm_device;
29 struct drm_gem_object;
30 
31 extern const struct drm_driver vc4_drm_driver;
32 extern const struct drm_driver vc5_drm_driver;
33 
34 /* Don't forget to update vc4_bo.c: bo_type_names[] when adding to
35  * this.
36  */
37 enum vc4_kernel_bo_type {
38 	/* Any kernel allocation (gem_create_object hook) before it
39 	 * gets another type set.
40 	 */
41 	VC4_BO_TYPE_KERNEL,
42 	VC4_BO_TYPE_V3D,
43 	VC4_BO_TYPE_V3D_SHADER,
44 	VC4_BO_TYPE_DUMB,
45 	VC4_BO_TYPE_BIN,
46 	VC4_BO_TYPE_RCL,
47 	VC4_BO_TYPE_BCL,
48 	VC4_BO_TYPE_KERNEL_CACHE,
49 	VC4_BO_TYPE_COUNT
50 };
51 
52 /* Performance monitor object. The perform lifetime is controlled by userspace
53  * using perfmon related ioctls. A perfmon can be attached to a submit_cl
54  * request, and when this is the case, HW perf counters will be activated just
55  * before the submit_cl is submitted to the GPU and disabled when the job is
56  * done. This way, only events related to a specific job will be counted.
57  */
58 struct vc4_perfmon {
59 	struct vc4_dev *dev;
60 
61 	/* Tracks the number of users of the perfmon, when this counter reaches
62 	 * zero the perfmon is destroyed.
63 	 */
64 	refcount_t refcnt;
65 
66 	/* Number of counters activated in this perfmon instance
67 	 * (should be less than DRM_VC4_MAX_PERF_COUNTERS).
68 	 */
69 	u8 ncounters;
70 
71 	/* Events counted by the HW perf counters. */
72 	u8 events[DRM_VC4_MAX_PERF_COUNTERS];
73 
74 	/* Storage for counter values. Counters are incremented by the HW
75 	 * perf counter values every time the perfmon is attached to a GPU job.
76 	 * This way, perfmon users don't have to retrieve the results after
77 	 * each job if they want to track events covering several submissions.
78 	 * Note that counter values can't be reset, but you can fake a reset by
79 	 * destroying the perfmon and creating a new one.
80 	 */
81 	u64 counters[] __counted_by(ncounters);
82 };
83 
84 enum vc4_gen {
85 	VC4_GEN_4,
86 	VC4_GEN_5,
87 	VC4_GEN_6_C,
88 	VC4_GEN_6_D,
89 };
90 
91 struct vc4_dev {
92 	struct drm_device base;
93 	struct device *dev;
94 
95 	enum vc4_gen gen;
96 
97 	unsigned int irq;
98 
99 	struct vc4_hvs *hvs;
100 	struct vc4_v3d *v3d;
101 
102 	struct vc4_hang_state *hang_state;
103 
104 	/* The kernel-space BO cache.  Tracks buffers that have been
105 	 * unreferenced by all other users (refcounts of 0!) but not
106 	 * yet freed, so we can do cheap allocations.
107 	 */
108 	struct vc4_bo_cache {
109 		/* Array of list heads for entries in the BO cache,
110 		 * based on number of pages, so we can do O(1) lookups
111 		 * in the cache when allocating.
112 		 */
113 		struct list_head *size_list;
114 		uint32_t size_list_size;
115 
116 		/* List of all BOs in the cache, ordered by age, so we
117 		 * can do O(1) lookups when trying to free old
118 		 * buffers.
119 		 */
120 		struct list_head time_list;
121 		struct work_struct time_work;
122 		struct timer_list time_timer;
123 	} bo_cache;
124 
125 	u32 num_labels;
126 	struct vc4_label {
127 		const char *name;
128 		u32 num_allocated;
129 		u32 size_allocated;
130 	} *bo_labels;
131 
132 	/* Protects bo_cache and bo_labels. */
133 	struct mutex bo_lock;
134 
135 	/* Purgeable BO pool. All BOs in this pool can have their memory
136 	 * reclaimed if the driver is unable to allocate new BOs. We also
137 	 * keep stats related to the purge mechanism here.
138 	 */
139 	struct {
140 		struct list_head list;
141 		unsigned int num;
142 		size_t size;
143 		unsigned int purged_num;
144 		size_t purged_size;
145 		struct mutex lock;
146 	} purgeable;
147 
148 	uint64_t dma_fence_context;
149 
150 	/* Sequence number for the last job queued in bin_job_list.
151 	 * Starts at 0 (no jobs emitted).
152 	 */
153 	uint64_t emit_seqno;
154 
155 	/* Sequence number for the last completed job on the GPU.
156 	 * Starts at 0 (no jobs completed).
157 	 */
158 	uint64_t finished_seqno;
159 
160 	/* List of all struct vc4_exec_info for jobs to be executed in
161 	 * the binner.  The first job in the list is the one currently
162 	 * programmed into ct0ca for execution.
163 	 */
164 	struct list_head bin_job_list;
165 
166 	/* List of all struct vc4_exec_info for jobs that have
167 	 * completed binning and are ready for rendering.  The first
168 	 * job in the list is the one currently programmed into ct1ca
169 	 * for execution.
170 	 */
171 	struct list_head render_job_list;
172 
173 	/* List of the finished vc4_exec_infos waiting to be freed by
174 	 * job_done_work.
175 	 */
176 	struct list_head job_done_list;
177 	/* Spinlock used to synchronize the job_list and seqno
178 	 * accesses between the IRQ handler and GEM ioctls.
179 	 */
180 	spinlock_t job_lock;
181 	wait_queue_head_t job_wait_queue;
182 	struct work_struct job_done_work;
183 
184 	/* Used to track the active perfmon if any. Access to this field is
185 	 * protected by job_lock.
186 	 */
187 	struct vc4_perfmon *active_perfmon;
188 
189 	/* List of struct vc4_seqno_cb for callbacks to be made from a
190 	 * workqueue when the given seqno is passed.
191 	 */
192 	struct list_head seqno_cb_list;
193 
194 	/* The memory used for storing binner tile alloc, tile state,
195 	 * and overflow memory allocations.  This is freed when V3D
196 	 * powers down.
197 	 */
198 	struct vc4_bo *bin_bo;
199 
200 	/* Size of blocks allocated within bin_bo. */
201 	uint32_t bin_alloc_size;
202 
203 	/* Bitmask of the bin_alloc_size chunks in bin_bo that are
204 	 * used.
205 	 */
206 	uint32_t bin_alloc_used;
207 
208 	/* Bitmask of the current bin_alloc used for overflow memory. */
209 	uint32_t bin_alloc_overflow;
210 
211 	/* Incremented when an underrun error happened after an atomic commit.
212 	 * This is particularly useful to detect when a specific modeset is too
213 	 * demanding in term of memory or HVS bandwidth which is hard to guess
214 	 * at atomic check time.
215 	 */
216 	atomic_t underrun;
217 
218 	struct work_struct overflow_mem_work;
219 
220 	int power_refcount;
221 
222 	/* Set to true when the load tracker is active. */
223 	bool load_tracker_enabled;
224 
225 	/* Mutex controlling the power refcount. */
226 	struct mutex power_lock;
227 
228 	struct {
229 		struct timer_list timer;
230 		struct work_struct reset_work;
231 	} hangcheck;
232 
233 	struct drm_modeset_lock ctm_state_lock;
234 	struct drm_private_obj ctm_manager;
235 	struct drm_private_obj hvs_channels;
236 	struct drm_private_obj load_tracker;
237 
238 	/* Mutex for binner bo allocation. */
239 	struct mutex bin_bo_lock;
240 	/* Reference count for our binner bo. */
241 	struct kref bin_bo_kref;
242 };
243 
244 #define to_vc4_dev(_dev)			\
245 	container_of_const(_dev, struct vc4_dev, base)
246 
247 struct vc4_bo {
248 	struct drm_gem_dma_object base;
249 
250 	/* seqno of the last job to render using this BO. */
251 	uint64_t seqno;
252 
253 	/* seqno of the last job to use the RCL to write to this BO.
254 	 *
255 	 * Note that this doesn't include binner overflow memory
256 	 * writes.
257 	 */
258 	uint64_t write_seqno;
259 
260 	bool t_format;
261 
262 	/* List entry for the BO's position in either
263 	 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
264 	 */
265 	struct list_head unref_head;
266 
267 	/* Time in jiffies when the BO was put in vc4->bo_cache. */
268 	unsigned long free_time;
269 
270 	/* List entry for the BO's position in vc4_dev->bo_cache.size_list */
271 	struct list_head size_head;
272 
273 	/* Struct for shader validation state, if created by
274 	 * DRM_IOCTL_VC4_CREATE_SHADER_BO.
275 	 */
276 	struct vc4_validated_shader_info *validated_shader;
277 
278 	/* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i
279 	 * for user-allocated labels.
280 	 */
281 	int label;
282 
283 	/* Count the number of active users. This is needed to determine
284 	 * whether we can move the BO to the purgeable list or not (when the BO
285 	 * is used by the GPU or the display engine we can't purge it).
286 	 */
287 	refcount_t usecnt;
288 
289 	/* Store purgeable/purged state here */
290 	u32 madv;
291 	struct mutex madv_lock;
292 };
293 
294 #define to_vc4_bo(_bo)							\
295 	container_of_const(to_drm_gem_dma_obj(_bo), struct vc4_bo, base)
296 
297 struct vc4_fence {
298 	struct dma_fence base;
299 	struct drm_device *dev;
300 	/* vc4 seqno for signaled() test */
301 	uint64_t seqno;
302 };
303 
304 #define to_vc4_fence(_fence)					\
305 	container_of_const(_fence, struct vc4_fence, base)
306 
307 struct vc4_seqno_cb {
308 	struct work_struct work;
309 	uint64_t seqno;
310 	void (*func)(struct vc4_seqno_cb *cb);
311 };
312 
313 struct vc4_v3d {
314 	struct vc4_dev *vc4;
315 	struct platform_device *pdev;
316 	void __iomem *regs;
317 	struct clk *clk;
318 	struct debugfs_regset32 regset;
319 };
320 
321 #define VC4_NUM_UPM_HANDLES 32
322 struct vc4_upm_refcounts {
323 	refcount_t refcount;
324 
325 	/* Allocation size */
326 	size_t size;
327 	/* Our allocation in UPM for prefetching. */
328 	struct drm_mm_node upm;
329 
330 	/* Pointer back to the HVS structure */
331 	struct vc4_hvs *hvs;
332 };
333 
334 #define HVS_NUM_CHANNELS 3
335 
336 struct vc4_hvs {
337 	struct vc4_dev *vc4;
338 	struct platform_device *pdev;
339 	void __iomem *regs;
340 	u32 __iomem *dlist;
341 	unsigned int dlist_mem_size;
342 
343 	struct clk *core_clk;
344 	struct clk *disp_clk;
345 
346 	unsigned long max_core_rate;
347 
348 	/* Memory manager for CRTCs to allocate space in the display
349 	 * list.  Units are dwords.
350 	 */
351 	struct drm_mm dlist_mm;
352 
353 	/* Memory manager for the LBM memory used by HVS scaling. */
354 	struct drm_mm lbm_mm;
355 
356 	/* Memory manager for the UPM memory used for prefetching. */
357 	struct drm_mm upm_mm;
358 	struct ida upm_handles;
359 	struct vc4_upm_refcounts upm_refcounts[VC4_NUM_UPM_HANDLES + 1];
360 
361 	spinlock_t mm_lock;
362 
363 	struct drm_mm_node mitchell_netravali_filter;
364 
365 	struct debugfs_regset32 regset;
366 
367 	/*
368 	 * Even if HDMI0 on the RPi4 can output modes requiring a pixel
369 	 * rate higher than 297MHz, it needs some adjustments in the
370 	 * config.txt file to be able to do so and thus won't always be
371 	 * available.
372 	 */
373 	bool vc5_hdmi_enable_hdmi_20;
374 
375 	/*
376 	 * 4096x2160@60 requires a core overclock to work, so register
377 	 * whether that is sufficient.
378 	 */
379 	bool vc5_hdmi_enable_4096by2160;
380 };
381 
382 #define HVS_NUM_CHANNELS 3
383 #define HVS_UBM_WORD_SIZE 256
384 
385 struct vc4_hvs_state {
386 	struct drm_private_state base;
387 	unsigned long core_clock_rate;
388 
389 	struct {
390 		unsigned in_use: 1;
391 		unsigned long fifo_load;
392 		struct drm_crtc_commit *pending_commit;
393 	} fifo_state[HVS_NUM_CHANNELS];
394 };
395 
396 #define to_vc4_hvs_state(_state)				\
397 	container_of_const(_state, struct vc4_hvs_state, base)
398 
399 struct vc4_hvs_state *vc4_hvs_get_global_state(struct drm_atomic_state *state);
400 struct vc4_hvs_state *vc4_hvs_get_old_global_state(const struct drm_atomic_state *state);
401 struct vc4_hvs_state *vc4_hvs_get_new_global_state(const struct drm_atomic_state *state);
402 
403 struct vc4_plane {
404 	struct drm_plane base;
405 };
406 
407 #define to_vc4_plane(_plane)					\
408 	container_of_const(_plane, struct vc4_plane, base)
409 
410 enum vc4_scaling_mode {
411 	VC4_SCALING_NONE,
412 	VC4_SCALING_TPZ,
413 	VC4_SCALING_PPF,
414 };
415 
416 struct vc4_plane_state {
417 	struct drm_plane_state base;
418 	/* System memory copy of the display list for this element, computed
419 	 * at atomic_check time.
420 	 */
421 	u32 *dlist;
422 	u32 dlist_size; /* Number of dwords allocated for the display list */
423 	u32 dlist_count; /* Number of used dwords in the display list. */
424 
425 	/* Offset in the dlist to various words, for pageflip or
426 	 * cursor updates.
427 	 */
428 	u32 pos0_offset;
429 	u32 pos2_offset;
430 	u32 ptr0_offset[DRM_FORMAT_MAX_PLANES];
431 	u32 lbm_offset;
432 
433 	/* Offset where the plane's dlist was last stored in the
434 	 * hardware at vc4_crtc_atomic_flush() time.
435 	 */
436 	u32 __iomem *hw_dlist;
437 
438 	/* Clipped coordinates of the plane on the display. */
439 	int crtc_x, crtc_y, crtc_w, crtc_h;
440 	/* Clipped area being scanned from in the FB in u16.16 format */
441 	u32 src_x, src_y;
442 
443 	u32 src_w[2], src_h[2];
444 
445 	/* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */
446 	enum vc4_scaling_mode x_scaling[2], y_scaling[2];
447 	bool is_unity;
448 	bool is_yuv;
449 
450 	/* Our allocation in LBM for temporary storage during scaling. */
451 	struct drm_mm_node lbm;
452 
453 	/* The Unified Pre-Fetcher Handle */
454 	unsigned int upm_handle[DRM_FORMAT_MAX_PLANES];
455 
456 	/* Number of lines to pre-fetch */
457 	unsigned int upm_buffer_lines;
458 
459 	/* Set when the plane has per-pixel alpha content or does not cover
460 	 * the entire screen. This is a hint to the CRTC that it might need
461 	 * to enable background color fill.
462 	 */
463 	bool needs_bg_fill;
464 
465 	/* Mark the dlist as initialized. Useful to avoid initializing it twice
466 	 * when async update is not possible.
467 	 */
468 	bool dlist_initialized;
469 
470 	/* Load of this plane on the HVS block. The load is expressed in HVS
471 	 * cycles/sec.
472 	 */
473 	u64 hvs_load;
474 
475 	/* Memory bandwidth needed for this plane. This is expressed in
476 	 * bytes/sec.
477 	 */
478 	u64 membus_load;
479 };
480 
481 #define to_vc4_plane_state(_state)				\
482 	container_of_const(_state, struct vc4_plane_state, base)
483 
484 enum vc4_encoder_type {
485 	VC4_ENCODER_TYPE_NONE,
486 	VC4_ENCODER_TYPE_HDMI0,
487 	VC4_ENCODER_TYPE_HDMI1,
488 	VC4_ENCODER_TYPE_VEC,
489 	VC4_ENCODER_TYPE_DSI0,
490 	VC4_ENCODER_TYPE_DSI1,
491 	VC4_ENCODER_TYPE_SMI,
492 	VC4_ENCODER_TYPE_DPI,
493 	VC4_ENCODER_TYPE_TXP0,
494 	VC4_ENCODER_TYPE_TXP1,
495 };
496 
497 struct vc4_encoder {
498 	struct drm_encoder base;
499 	enum vc4_encoder_type type;
500 	u32 clock_select;
501 
502 	void (*pre_crtc_configure)(struct drm_encoder *encoder, struct drm_atomic_state *state);
503 	void (*pre_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state);
504 	void (*post_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state);
505 
506 	void (*post_crtc_disable)(struct drm_encoder *encoder, struct drm_atomic_state *state);
507 	void (*post_crtc_powerdown)(struct drm_encoder *encoder, struct drm_atomic_state *state);
508 };
509 
510 #define to_vc4_encoder(_encoder)				\
511 	container_of_const(_encoder, struct vc4_encoder, base)
512 
513 static inline
vc4_find_encoder_by_type(struct drm_device * drm,enum vc4_encoder_type type)514 struct drm_encoder *vc4_find_encoder_by_type(struct drm_device *drm,
515 					     enum vc4_encoder_type type)
516 {
517 	struct drm_encoder *encoder;
518 
519 	drm_for_each_encoder(encoder, drm) {
520 		struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder);
521 
522 		if (vc4_encoder->type == type)
523 			return encoder;
524 	}
525 
526 	return NULL;
527 }
528 
529 struct vc4_crtc_data {
530 	const char *name;
531 
532 	const char *debugfs_name;
533 
534 	/* Bitmask of channels (FIFOs) of the HVS that the output can source from */
535 	unsigned int hvs_available_channels;
536 
537 	/* Which output of the HVS this pixelvalve sources from. */
538 	int hvs_output;
539 };
540 
541 struct vc4_txp_data {
542 	struct vc4_crtc_data	base;
543 	enum vc4_encoder_type encoder_type;
544 	unsigned int high_addr_ptr_reg;
545 	unsigned int has_byte_enable:1;
546 	unsigned int size_minus_one:1;
547 	unsigned int supports_40bit_addresses:1;
548 };
549 
550 extern const struct vc4_txp_data bcm2835_txp_data;
551 
552 struct vc4_pv_data {
553 	struct vc4_crtc_data	base;
554 
555 	/* Depth of the PixelValve FIFO in bytes */
556 	unsigned int fifo_depth;
557 
558 	/* Number of pixels output per clock period */
559 	u8 pixels_per_clock;
560 
561 	enum vc4_encoder_type encoder_types[4];
562 };
563 
564 extern const struct vc4_pv_data bcm2835_pv0_data;
565 extern const struct vc4_pv_data bcm2835_pv1_data;
566 extern const struct vc4_pv_data bcm2835_pv2_data;
567 extern const struct vc4_pv_data bcm2711_pv0_data;
568 extern const struct vc4_pv_data bcm2711_pv1_data;
569 extern const struct vc4_pv_data bcm2711_pv2_data;
570 extern const struct vc4_pv_data bcm2711_pv3_data;
571 extern const struct vc4_pv_data bcm2711_pv4_data;
572 extern const struct vc4_pv_data bcm2712_pv0_data;
573 extern const struct vc4_pv_data bcm2712_pv1_data;
574 
575 struct vc4_crtc {
576 	struct drm_crtc base;
577 	struct platform_device *pdev;
578 	const struct vc4_crtc_data *data;
579 	void __iomem *regs;
580 
581 	/* Timestamp at start of vblank irq - unaffected by lock delays. */
582 	ktime_t t_vblank;
583 
584 	u8 lut_r[256];
585 	u8 lut_g[256];
586 	u8 lut_b[256];
587 
588 	struct drm_pending_vblank_event *event;
589 
590 	struct debugfs_regset32 regset;
591 
592 	/**
593 	 * @feeds_txp: True if the CRTC feeds our writeback controller.
594 	 */
595 	bool feeds_txp;
596 
597 	/**
598 	 * @irq_lock: Spinlock protecting the resources shared between
599 	 * the atomic code and our vblank handler.
600 	 */
601 	spinlock_t irq_lock;
602 
603 	/**
604 	 * @current_dlist: Start offset of the display list currently
605 	 * set in the HVS for that CRTC. Protected by @irq_lock, and
606 	 * copied in vc4_hvs_update_dlist() for the CRTC interrupt
607 	 * handler to have access to that value.
608 	 */
609 	unsigned int current_dlist;
610 
611 	/**
612 	 * @current_hvs_channel: HVS channel currently assigned to the
613 	 * CRTC. Protected by @irq_lock, and copied in
614 	 * vc4_hvs_atomic_begin() for the CRTC interrupt handler to have
615 	 * access to that value.
616 	 */
617 	unsigned int current_hvs_channel;
618 };
619 
620 #define to_vc4_crtc(_crtc)					\
621 	container_of_const(_crtc, struct vc4_crtc, base)
622 
623 static inline const struct vc4_crtc_data *
vc4_crtc_to_vc4_crtc_data(const struct vc4_crtc * crtc)624 vc4_crtc_to_vc4_crtc_data(const struct vc4_crtc *crtc)
625 {
626 	return crtc->data;
627 }
628 
629 static inline const struct vc4_pv_data *
vc4_crtc_to_vc4_pv_data(const struct vc4_crtc * crtc)630 vc4_crtc_to_vc4_pv_data(const struct vc4_crtc *crtc)
631 {
632 	const struct vc4_crtc_data *data = vc4_crtc_to_vc4_crtc_data(crtc);
633 
634 	return container_of_const(data, struct vc4_pv_data, base);
635 }
636 
637 struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc,
638 					 struct drm_crtc_state *state);
639 
640 struct vc4_crtc_state {
641 	struct drm_crtc_state base;
642 	/* Dlist area for this CRTC configuration. */
643 	struct drm_mm_node mm;
644 	bool txp_armed;
645 	unsigned int assigned_channel;
646 
647 	struct drm_connector_tv_margins margins;
648 
649 	unsigned long hvs_load;
650 
651 	/* Transitional state below, only valid during atomic commits */
652 	bool update_muxing;
653 };
654 
655 #define VC4_HVS_CHANNEL_DISABLED ((unsigned int)-1)
656 
657 #define to_vc4_crtc_state(_state)				\
658 	container_of_const(_state, struct vc4_crtc_state, base)
659 
660 #define V3D_READ(offset)								\
661 	({										\
662 		kunit_fail_current_test("Accessing a register in a unit test!\n");	\
663 		readl(vc4->v3d->regs + (offset));						\
664 	})
665 
666 #define V3D_WRITE(offset, val)								\
667 	do {										\
668 		kunit_fail_current_test("Accessing a register in a unit test!\n");	\
669 		writel(val, vc4->v3d->regs + (offset));					\
670 	} while (0)
671 
672 #define HVS_READ(offset)								\
673 	({										\
674 		kunit_fail_current_test("Accessing a register in a unit test!\n");	\
675 		readl(hvs->regs + (offset));						\
676 	})
677 
678 #define HVS_WRITE(offset, val)								\
679 	do {										\
680 		kunit_fail_current_test("Accessing a register in a unit test!\n");	\
681 		writel(val, hvs->regs + (offset));					\
682 	} while (0)
683 
684 #define HVS_READ6(offset) \
685 	HVS_READ(hvs->vc4->gen == VC4_GEN_6_C ? SCALER6_ ## offset : SCALER6D_ ## offset)
686 
687 #define HVS_WRITE6(offset, val) \
688 	HVS_WRITE(hvs->vc4->gen == VC4_GEN_6_C ? SCALER6_ ## offset : SCALER6D_ ## offset, val)
689 
690 #define VC4_REG32(reg) { .name = #reg, .offset = reg }
691 
692 struct vc4_exec_info {
693 	struct vc4_dev *dev;
694 
695 	/* Sequence number for this bin/render job. */
696 	uint64_t seqno;
697 
698 	/* Latest write_seqno of any BO that binning depends on. */
699 	uint64_t bin_dep_seqno;
700 
701 	struct dma_fence *fence;
702 
703 	/* Last current addresses the hardware was processing when the
704 	 * hangcheck timer checked on us.
705 	 */
706 	uint32_t last_ct0ca, last_ct1ca;
707 
708 	/* Kernel-space copy of the ioctl arguments */
709 	struct drm_vc4_submit_cl *args;
710 
711 	/* This is the array of BOs that were looked up at the start of exec.
712 	 * Command validation will use indices into this array.
713 	 */
714 	struct drm_gem_object **bo;
715 	uint32_t bo_count;
716 
717 	/* List of BOs that are being written by the RCL.  Other than
718 	 * the binner temporary storage, this is all the BOs written
719 	 * by the job.
720 	 */
721 	struct drm_gem_dma_object *rcl_write_bo[4];
722 	uint32_t rcl_write_bo_count;
723 
724 	/* Pointers for our position in vc4->job_list */
725 	struct list_head head;
726 
727 	/* List of other BOs used in the job that need to be released
728 	 * once the job is complete.
729 	 */
730 	struct list_head unref_list;
731 
732 	/* Current unvalidated indices into @bo loaded by the non-hardware
733 	 * VC4_PACKET_GEM_HANDLES.
734 	 */
735 	uint32_t bo_index[2];
736 
737 	/* This is the BO where we store the validated command lists, shader
738 	 * records, and uniforms.
739 	 */
740 	struct drm_gem_dma_object *exec_bo;
741 
742 	/**
743 	 * This tracks the per-shader-record state (packet 64) that
744 	 * determines the length of the shader record and the offset
745 	 * it's expected to be found at.  It gets read in from the
746 	 * command lists.
747 	 */
748 	struct vc4_shader_state {
749 		uint32_t addr;
750 		/* Maximum vertex index referenced by any primitive using this
751 		 * shader state.
752 		 */
753 		uint32_t max_index;
754 	} *shader_state;
755 
756 	/** How many shader states the user declared they were using. */
757 	uint32_t shader_state_size;
758 	/** How many shader state records the validator has seen. */
759 	uint32_t shader_state_count;
760 
761 	bool found_tile_binning_mode_config_packet;
762 	bool found_start_tile_binning_packet;
763 	bool found_increment_semaphore_packet;
764 	bool found_flush;
765 	uint8_t bin_tiles_x, bin_tiles_y;
766 	/* Physical address of the start of the tile alloc array
767 	 * (where each tile's binned CL will start)
768 	 */
769 	uint32_t tile_alloc_offset;
770 	/* Bitmask of which binner slots are freed when this job completes. */
771 	uint32_t bin_slots;
772 
773 	/**
774 	 * Computed addresses pointing into exec_bo where we start the
775 	 * bin thread (ct0) and render thread (ct1).
776 	 */
777 	uint32_t ct0ca, ct0ea;
778 	uint32_t ct1ca, ct1ea;
779 
780 	/* Pointer to the unvalidated bin CL (if present). */
781 	void *bin_u;
782 
783 	/* Pointers to the shader recs.  These paddr gets incremented as CL
784 	 * packets are relocated in validate_gl_shader_state, and the vaddrs
785 	 * (u and v) get incremented and size decremented as the shader recs
786 	 * themselves are validated.
787 	 */
788 	void *shader_rec_u;
789 	void *shader_rec_v;
790 	uint32_t shader_rec_p;
791 	uint32_t shader_rec_size;
792 
793 	/* Pointers to the uniform data.  These pointers are incremented, and
794 	 * size decremented, as each batch of uniforms is uploaded.
795 	 */
796 	void *uniforms_u;
797 	void *uniforms_v;
798 	uint32_t uniforms_p;
799 	uint32_t uniforms_size;
800 
801 	/* Pointer to a performance monitor object if the user requested it,
802 	 * NULL otherwise.
803 	 */
804 	struct vc4_perfmon *perfmon;
805 
806 	/* Whether the exec has taken a reference to the binner BO, which should
807 	 * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet.
808 	 */
809 	bool bin_bo_used;
810 };
811 
812 /* Per-open file private data. Any driver-specific resource that has to be
813  * released when the DRM file is closed should be placed here.
814  */
815 struct vc4_file {
816 	struct vc4_dev *dev;
817 
818 	struct {
819 		struct idr idr;
820 		struct mutex lock;
821 	} perfmon;
822 
823 	bool bin_bo_used;
824 };
825 
826 static inline struct vc4_exec_info *
vc4_first_bin_job(struct vc4_dev * vc4)827 vc4_first_bin_job(struct vc4_dev *vc4)
828 {
829 	return list_first_entry_or_null(&vc4->bin_job_list,
830 					struct vc4_exec_info, head);
831 }
832 
833 static inline struct vc4_exec_info *
vc4_first_render_job(struct vc4_dev * vc4)834 vc4_first_render_job(struct vc4_dev *vc4)
835 {
836 	return list_first_entry_or_null(&vc4->render_job_list,
837 					struct vc4_exec_info, head);
838 }
839 
840 static inline struct vc4_exec_info *
vc4_last_render_job(struct vc4_dev * vc4)841 vc4_last_render_job(struct vc4_dev *vc4)
842 {
843 	if (list_empty(&vc4->render_job_list))
844 		return NULL;
845 	return list_last_entry(&vc4->render_job_list,
846 			       struct vc4_exec_info, head);
847 }
848 
849 /**
850  * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
851  * setup parameters.
852  *
853  * This will be used at draw time to relocate the reference to the texture
854  * contents in p0, and validate that the offset combined with
855  * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
856  * Note that the hardware treats unprovided config parameters as 0, so not all
857  * of them need to be set up for every texure sample, and we'll store ~0 as
858  * the offset to mark the unused ones.
859  *
860  * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
861  * Setup") for definitions of the texture parameters.
862  */
863 struct vc4_texture_sample_info {
864 	bool is_direct;
865 	uint32_t p_offset[4];
866 };
867 
868 /**
869  * struct vc4_validated_shader_info - information about validated shaders that
870  * needs to be used from command list validation.
871  *
872  * For a given shader, each time a shader state record references it, we need
873  * to verify that the shader doesn't read more uniforms than the shader state
874  * record's uniform BO pointer can provide, and we need to apply relocations
875  * and validate the shader state record's uniforms that define the texture
876  * samples.
877  */
878 struct vc4_validated_shader_info {
879 	uint32_t uniforms_size;
880 	uint32_t uniforms_src_size;
881 	uint32_t num_texture_samples;
882 	struct vc4_texture_sample_info *texture_samples;
883 
884 	uint32_t num_uniform_addr_offsets;
885 	uint32_t *uniform_addr_offsets;
886 
887 	bool is_threaded;
888 };
889 
890 /**
891  * __wait_for - magic wait macro
892  *
893  * Macro to help avoid open coding check/wait/timeout patterns. Note that it's
894  * important that we check the condition again after having timed out, since the
895  * timeout could be due to preemption or similar and we've never had a chance to
896  * check the condition before the timeout.
897  */
898 #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
899 	const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
900 	long wait__ = (Wmin); /* recommended min for usleep is 10 us */	\
901 	int ret__;							\
902 	might_sleep();							\
903 	for (;;) {							\
904 		const bool expired__ = ktime_after(ktime_get_raw(), end__); \
905 		OP;							\
906 		/* Guarantee COND check prior to timeout */		\
907 		barrier();						\
908 		if (COND) {						\
909 			ret__ = 0;					\
910 			break;						\
911 		}							\
912 		if (expired__) {					\
913 			ret__ = -ETIMEDOUT;				\
914 			break;						\
915 		}							\
916 		usleep_range(wait__, wait__ * 2);			\
917 		if (wait__ < (Wmax))					\
918 			wait__ <<= 1;					\
919 	}								\
920 	ret__;								\
921 })
922 
923 #define _wait_for(COND, US, Wmin, Wmax)	__wait_for(, (COND), (US), (Wmin), \
924 						   (Wmax))
925 #define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
926 
927 /* vc4_bo.c */
928 struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
929 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
930 			     bool from_cache, enum vc4_kernel_bo_type type);
931 int vc4_bo_dumb_create(struct drm_file *file_priv,
932 		       struct drm_device *dev,
933 		       struct drm_mode_create_dumb *args);
934 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
935 			struct drm_file *file_priv);
936 int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
937 			       struct drm_file *file_priv);
938 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
939 		      struct drm_file *file_priv);
940 int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
941 			 struct drm_file *file_priv);
942 int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
943 			 struct drm_file *file_priv);
944 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
945 			     struct drm_file *file_priv);
946 int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
947 		       struct drm_file *file_priv);
948 int vc4_bo_cache_init(struct drm_device *dev);
949 int vc4_bo_inc_usecnt(struct vc4_bo *bo);
950 void vc4_bo_dec_usecnt(struct vc4_bo *bo);
951 void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo);
952 void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo);
953 int vc4_bo_debugfs_init(struct drm_minor *minor);
954 
955 /* vc4_crtc.c */
956 extern struct platform_driver vc4_crtc_driver;
957 int vc4_crtc_disable_at_boot(struct drm_crtc *crtc);
958 int __vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev,
959 		    struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data,
960 		    struct drm_plane *primary_plane,
961 		    const struct drm_crtc_funcs *crtc_funcs,
962 		    const struct drm_crtc_helper_funcs *crtc_helper_funcs,
963 		    bool feeds_txp);
964 int vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev,
965 		  struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data,
966 		  const struct drm_crtc_funcs *crtc_funcs,
967 		  const struct drm_crtc_helper_funcs *crtc_helper_funcs,
968 		  bool feeds_txp);
969 int vc4_page_flip(struct drm_crtc *crtc,
970 		  struct drm_framebuffer *fb,
971 		  struct drm_pending_vblank_event *event,
972 		  uint32_t flags,
973 		  struct drm_modeset_acquire_ctx *ctx);
974 int vc4_crtc_atomic_check(struct drm_crtc *crtc,
975 			  struct drm_atomic_state *state);
976 struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc);
977 void vc4_crtc_destroy_state(struct drm_crtc *crtc,
978 			    struct drm_crtc_state *state);
979 void vc4_crtc_reset(struct drm_crtc *crtc);
980 void vc4_crtc_handle_vblank(struct vc4_crtc *crtc);
981 void vc4_crtc_send_vblank(struct drm_crtc *crtc);
982 int vc4_crtc_late_register(struct drm_crtc *crtc);
983 void vc4_crtc_get_margins(struct drm_crtc_state *state,
984 			  unsigned int *left, unsigned int *right,
985 			  unsigned int *top, unsigned int *bottom);
986 
987 /* vc4_debugfs.c */
988 void vc4_debugfs_init(struct drm_minor *minor);
989 #ifdef CONFIG_DEBUG_FS
990 void vc4_debugfs_add_regset32(struct drm_device *drm,
991 			      const char *filename,
992 			      struct debugfs_regset32 *regset);
993 #else
994 
vc4_debugfs_add_regset32(struct drm_device * drm,const char * filename,struct debugfs_regset32 * regset)995 static inline void vc4_debugfs_add_regset32(struct drm_device *drm,
996 					    const char *filename,
997 					    struct debugfs_regset32 *regset)
998 {}
999 #endif
1000 
1001 /* vc4_drv.c */
1002 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
1003 int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args);
1004 
1005 /* vc4_dpi.c */
1006 extern struct platform_driver vc4_dpi_driver;
1007 
1008 /* vc4_dsi.c */
1009 extern struct platform_driver vc4_dsi_driver;
1010 
1011 /* vc4_fence.c */
1012 extern const struct dma_fence_ops vc4_fence_ops;
1013 
1014 /* vc4_gem.c */
1015 int vc4_gem_init(struct drm_device *dev);
1016 int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
1017 			struct drm_file *file_priv);
1018 int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
1019 			 struct drm_file *file_priv);
1020 int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
1021 		      struct drm_file *file_priv);
1022 void vc4_submit_next_bin_job(struct drm_device *dev);
1023 void vc4_submit_next_render_job(struct drm_device *dev);
1024 void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec);
1025 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
1026 		       uint64_t timeout_ns, bool interruptible);
1027 void vc4_job_handle_completed(struct vc4_dev *vc4);
1028 int vc4_queue_seqno_cb(struct drm_device *dev,
1029 		       struct vc4_seqno_cb *cb, uint64_t seqno,
1030 		       void (*func)(struct vc4_seqno_cb *cb));
1031 int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
1032 			  struct drm_file *file_priv);
1033 
1034 /* vc4_hdmi.c */
1035 extern struct platform_driver vc4_hdmi_driver;
1036 
1037 /* vc4_vec.c */
1038 extern struct platform_driver vc4_vec_driver;
1039 
1040 /* vc4_txp.c */
1041 extern struct platform_driver vc4_txp_driver;
1042 
1043 /* vc4_irq.c */
1044 void vc4_irq_enable(struct drm_device *dev);
1045 void vc4_irq_disable(struct drm_device *dev);
1046 int vc4_irq_install(struct drm_device *dev, int irq);
1047 void vc4_irq_uninstall(struct drm_device *dev);
1048 void vc4_irq_reset(struct drm_device *dev);
1049 
1050 /* vc4_hvs.c */
1051 extern struct platform_driver vc4_hvs_driver;
1052 struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
1053 				void __iomem *regs,
1054 				struct platform_device *pdev);
1055 void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int output);
1056 int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output);
1057 u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo);
1058 int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state);
1059 void vc4_hvs_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state);
1060 void vc4_hvs_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state);
1061 void vc4_hvs_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state);
1062 void vc4_hvs_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state);
1063 void vc4_hvs_dump_state(struct vc4_hvs *hvs);
1064 void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel);
1065 void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel);
1066 int vc4_hvs_debugfs_init(struct drm_minor *minor);
1067 
1068 /* vc4_kms.c */
1069 int vc4_kms_load(struct drm_device *dev);
1070 
1071 /* vc4_plane.c */
1072 struct drm_plane *vc4_plane_init(struct drm_device *dev,
1073 				 enum drm_plane_type type,
1074 				 uint32_t possible_crtcs);
1075 int vc4_plane_create_additional_planes(struct drm_device *dev);
1076 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
1077 u32 vc4_plane_dlist_size(const struct drm_plane_state *state);
1078 void vc4_plane_async_set_fb(struct drm_plane *plane,
1079 			    struct drm_framebuffer *fb);
1080 
1081 /* vc4_v3d.c */
1082 extern struct platform_driver vc4_v3d_driver;
1083 extern const struct of_device_id vc4_v3d_dt_match[];
1084 int vc4_v3d_get_bin_slot(struct vc4_dev *vc4);
1085 int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used);
1086 void vc4_v3d_bin_bo_put(struct vc4_dev *vc4);
1087 int vc4_v3d_pm_get(struct vc4_dev *vc4);
1088 void vc4_v3d_pm_put(struct vc4_dev *vc4);
1089 int vc4_v3d_debugfs_init(struct drm_minor *minor);
1090 
1091 /* vc4_validate.c */
1092 int
1093 vc4_validate_bin_cl(struct drm_device *dev,
1094 		    void *validated,
1095 		    void *unvalidated,
1096 		    struct vc4_exec_info *exec);
1097 
1098 int
1099 vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
1100 
1101 struct drm_gem_dma_object *vc4_use_bo(struct vc4_exec_info *exec,
1102 				      uint32_t hindex);
1103 
1104 int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
1105 
1106 bool vc4_check_tex_size(struct vc4_exec_info *exec,
1107 			struct drm_gem_dma_object *fbo,
1108 			uint32_t offset, uint8_t tiling_format,
1109 			uint32_t width, uint32_t height, uint8_t cpp);
1110 
1111 /* vc4_validate_shader.c */
1112 struct vc4_validated_shader_info *
1113 vc4_validate_shader(struct drm_gem_dma_object *shader_obj);
1114 
1115 /* vc4_perfmon.c */
1116 void vc4_perfmon_get(struct vc4_perfmon *perfmon);
1117 void vc4_perfmon_put(struct vc4_perfmon *perfmon);
1118 void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon);
1119 void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
1120 		      bool capture);
1121 struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id);
1122 void vc4_perfmon_open_file(struct vc4_file *vc4file);
1123 void vc4_perfmon_close_file(struct vc4_file *vc4file);
1124 int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
1125 			     struct drm_file *file_priv);
1126 int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
1127 			      struct drm_file *file_priv);
1128 int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
1129 				 struct drm_file *file_priv);
1130 
1131 #endif /* _VC4_DRV_H_ */
1132