xref: /linux/drivers/gpu/drm/vc4/vc4_plane.c (revision face6a3615a649456eb4549f6d474221d877d604)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5 
6 /**
7  * DOC: VC4 plane module
8  *
9  * Each DRM plane is a layer of pixels being scanned out by the HVS.
10  *
11  * At atomic modeset check time, we compute the HVS display element
12  * state that would be necessary for displaying the plane (giving us a
13  * chance to figure out if a plane configuration is invalid), then at
14  * atomic flush time the CRTC will ask us to write our element state
15  * into the region of the HVS that it has allocated for us.
16  */
17 
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_blend.h>
22 #include <drm/drm_drv.h>
23 #include <drm/drm_fb_dma_helper.h>
24 #include <drm/drm_fourcc.h>
25 #include <drm/drm_framebuffer.h>
26 #include <drm/drm_gem_atomic_helper.h>
27 
28 #include "uapi/drm/vc4_drm.h"
29 
30 #include "vc4_drv.h"
31 #include "vc4_regs.h"
32 
33 static const struct hvs_format {
34 	u32 drm; /* DRM_FORMAT_* */
35 	u32 hvs; /* HVS_FORMAT_* */
36 	u32 pixel_order;
37 	u32 pixel_order_hvs5;
38 	bool hvs5_only;
39 } hvs_formats[] = {
40 	{
41 		.drm = DRM_FORMAT_XRGB8888,
42 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
43 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
44 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
45 	},
46 	{
47 		.drm = DRM_FORMAT_ARGB8888,
48 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
49 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
50 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
51 	},
52 	{
53 		.drm = DRM_FORMAT_ABGR8888,
54 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
55 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
56 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
57 	},
58 	{
59 		.drm = DRM_FORMAT_XBGR8888,
60 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
61 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
62 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
63 	},
64 	{
65 		.drm = DRM_FORMAT_RGB565,
66 		.hvs = HVS_PIXEL_FORMAT_RGB565,
67 		.pixel_order = HVS_PIXEL_ORDER_XRGB,
68 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
69 	},
70 	{
71 		.drm = DRM_FORMAT_BGR565,
72 		.hvs = HVS_PIXEL_FORMAT_RGB565,
73 		.pixel_order = HVS_PIXEL_ORDER_XBGR,
74 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
75 	},
76 	{
77 		.drm = DRM_FORMAT_ARGB1555,
78 		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
79 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
80 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
81 	},
82 	{
83 		.drm = DRM_FORMAT_XRGB1555,
84 		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
85 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
86 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
87 	},
88 	{
89 		.drm = DRM_FORMAT_RGB888,
90 		.hvs = HVS_PIXEL_FORMAT_RGB888,
91 		.pixel_order = HVS_PIXEL_ORDER_XRGB,
92 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
93 	},
94 	{
95 		.drm = DRM_FORMAT_BGR888,
96 		.hvs = HVS_PIXEL_FORMAT_RGB888,
97 		.pixel_order = HVS_PIXEL_ORDER_XBGR,
98 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
99 	},
100 	{
101 		.drm = DRM_FORMAT_YUV422,
102 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
103 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
104 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
105 	},
106 	{
107 		.drm = DRM_FORMAT_YVU422,
108 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
109 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
110 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
111 	},
112 	{
113 		.drm = DRM_FORMAT_YUV444,
114 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
115 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
116 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
117 	},
118 	{
119 		.drm = DRM_FORMAT_YVU444,
120 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
121 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
122 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
123 	},
124 	{
125 		.drm = DRM_FORMAT_YUV420,
126 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
127 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
128 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
129 	},
130 	{
131 		.drm = DRM_FORMAT_YVU420,
132 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
133 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
134 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
135 	},
136 	{
137 		.drm = DRM_FORMAT_NV12,
138 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
139 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
140 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
141 	},
142 	{
143 		.drm = DRM_FORMAT_NV21,
144 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
145 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
146 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
147 	},
148 	{
149 		.drm = DRM_FORMAT_NV16,
150 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
151 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
152 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
153 	},
154 	{
155 		.drm = DRM_FORMAT_NV61,
156 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
157 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
158 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
159 	},
160 	{
161 		.drm = DRM_FORMAT_P030,
162 		.hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT,
163 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
164 		.hvs5_only = true,
165 	},
166 	{
167 		.drm = DRM_FORMAT_XRGB2101010,
168 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
169 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
170 		.hvs5_only = true,
171 	},
172 	{
173 		.drm = DRM_FORMAT_ARGB2101010,
174 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
175 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
176 		.hvs5_only = true,
177 	},
178 	{
179 		.drm = DRM_FORMAT_ABGR2101010,
180 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
181 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
182 		.hvs5_only = true,
183 	},
184 	{
185 		.drm = DRM_FORMAT_XBGR2101010,
186 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
187 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
188 		.hvs5_only = true,
189 	},
190 	{
191 		.drm = DRM_FORMAT_RGB332,
192 		.hvs = HVS_PIXEL_FORMAT_RGB332,
193 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
194 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
195 	},
196 	{
197 		.drm = DRM_FORMAT_BGR233,
198 		.hvs = HVS_PIXEL_FORMAT_RGB332,
199 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
200 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
201 	},
202 	{
203 		.drm = DRM_FORMAT_XRGB4444,
204 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
205 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
206 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
207 	},
208 	{
209 		.drm = DRM_FORMAT_ARGB4444,
210 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
211 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
212 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
213 	},
214 	{
215 		.drm = DRM_FORMAT_XBGR4444,
216 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
217 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
218 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
219 	},
220 	{
221 		.drm = DRM_FORMAT_ABGR4444,
222 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
223 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
224 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
225 	},
226 	{
227 		.drm = DRM_FORMAT_BGRX4444,
228 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
229 		.pixel_order = HVS_PIXEL_ORDER_RGBA,
230 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
231 	},
232 	{
233 		.drm = DRM_FORMAT_BGRA4444,
234 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
235 		.pixel_order = HVS_PIXEL_ORDER_RGBA,
236 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
237 	},
238 	{
239 		.drm = DRM_FORMAT_RGBX4444,
240 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
241 		.pixel_order = HVS_PIXEL_ORDER_BGRA,
242 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
243 	},
244 	{
245 		.drm = DRM_FORMAT_RGBA4444,
246 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
247 		.pixel_order = HVS_PIXEL_ORDER_BGRA,
248 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
249 	},
250 };
251 
252 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
253 {
254 	unsigned i;
255 
256 	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
257 		if (hvs_formats[i].drm == drm_format)
258 			return &hvs_formats[i];
259 	}
260 
261 	return NULL;
262 }
263 
264 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
265 {
266 	if (dst == src >> 16)
267 		return VC4_SCALING_NONE;
268 	if (3 * dst >= 2 * (src >> 16))
269 		return VC4_SCALING_PPF;
270 	else
271 		return VC4_SCALING_TPZ;
272 }
273 
274 static bool plane_enabled(struct drm_plane_state *state)
275 {
276 	return state->fb && !WARN_ON(!state->crtc);
277 }
278 
279 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
280 {
281 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
282 	struct vc4_hvs *hvs = vc4->hvs;
283 	struct vc4_plane_state *vc4_state;
284 	unsigned int i;
285 
286 	if (WARN_ON(!plane->state))
287 		return NULL;
288 
289 	vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
290 	if (!vc4_state)
291 		return NULL;
292 
293 	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
294 
295 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
296 		if (vc4_state->upm_handle[i])
297 			refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
298 	}
299 
300 	vc4_state->dlist_initialized = 0;
301 
302 	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
303 
304 	if (vc4_state->dlist) {
305 		vc4_state->dlist = kmemdup(vc4_state->dlist,
306 					   vc4_state->dlist_count * 4,
307 					   GFP_KERNEL);
308 		if (!vc4_state->dlist) {
309 			kfree(vc4_state);
310 			return NULL;
311 		}
312 		vc4_state->dlist_size = vc4_state->dlist_count;
313 	}
314 
315 	return &vc4_state->base;
316 }
317 
318 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
319 {
320 	struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
321 	unsigned long irqflags;
322 
323 	spin_lock_irqsave(&hvs->mm_lock, irqflags);
324 	drm_mm_remove_node(&refcount->upm);
325 	spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
326 	refcount->upm.start = 0;
327 	refcount->upm.size = 0;
328 	refcount->size = 0;
329 
330 	ida_free(&hvs->upm_handles, upm_handle);
331 }
332 
333 static void vc4_plane_destroy_state(struct drm_plane *plane,
334 				    struct drm_plane_state *state)
335 {
336 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
337 	struct vc4_hvs *hvs = vc4->hvs;
338 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
339 	unsigned int i;
340 
341 	if (drm_mm_node_allocated(&vc4_state->lbm)) {
342 		unsigned long irqflags;
343 
344 		spin_lock_irqsave(&hvs->mm_lock, irqflags);
345 		drm_mm_remove_node(&vc4_state->lbm);
346 		spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
347 	}
348 
349 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
350 		struct vc4_upm_refcounts *refcount;
351 
352 		if (!vc4_state->upm_handle[i])
353 			continue;
354 
355 		refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]];
356 
357 		if (refcount_dec_and_test(&refcount->refcount))
358 			vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]);
359 	}
360 
361 	kfree(vc4_state->dlist);
362 	__drm_atomic_helper_plane_destroy_state(&vc4_state->base);
363 	kfree(state);
364 }
365 
366 /* Called during init to allocate the plane's atomic state. */
367 static void vc4_plane_reset(struct drm_plane *plane)
368 {
369 	struct vc4_plane_state *vc4_state;
370 
371 	if (plane->state)
372 		__drm_atomic_helper_plane_destroy_state(plane->state);
373 
374 	kfree(plane->state);
375 
376 	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
377 	if (!vc4_state)
378 		return;
379 
380 	__drm_atomic_helper_plane_reset(plane, &vc4_state->base);
381 }
382 
383 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
384 {
385 	if (vc4_state->dlist_count == vc4_state->dlist_size) {
386 		u32 new_size = max(4u, vc4_state->dlist_count * 2);
387 		u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
388 
389 		if (!new_dlist)
390 			return;
391 		memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
392 
393 		kfree(vc4_state->dlist);
394 		vc4_state->dlist = new_dlist;
395 		vc4_state->dlist_size = new_size;
396 	}
397 
398 	vc4_state->dlist_count++;
399 }
400 
401 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
402 {
403 	unsigned int idx = vc4_state->dlist_count;
404 
405 	vc4_dlist_counter_increment(vc4_state);
406 	vc4_state->dlist[idx] = val;
407 }
408 
409 /* Returns the scl0/scl1 field based on whether the dimensions need to
410  * be up/down/non-scaled.
411  *
412  * This is a replication of a table from the spec.
413  */
414 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
415 {
416 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
417 
418 	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
419 	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
420 		return SCALER_CTL0_SCL_H_PPF_V_PPF;
421 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
422 		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
423 	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
424 		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
425 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
426 		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
427 	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
428 		return SCALER_CTL0_SCL_H_PPF_V_NONE;
429 	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
430 		return SCALER_CTL0_SCL_H_NONE_V_PPF;
431 	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
432 		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
433 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
434 		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
435 	default:
436 	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
437 		/* The unity case is independently handled by
438 		 * SCALER_CTL0_UNITY.
439 		 */
440 		return 0;
441 	}
442 }
443 
444 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
445 {
446 	struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
447 	unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
448 	struct drm_crtc_state *crtc_state;
449 
450 	crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
451 						   pstate->crtc);
452 
453 	vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
454 	if (!left && !right && !top && !bottom)
455 		return 0;
456 
457 	if (left + right >= crtc_state->mode.hdisplay ||
458 	    top + bottom >= crtc_state->mode.vdisplay)
459 		return -EINVAL;
460 
461 	adjhdisplay = crtc_state->mode.hdisplay - (left + right);
462 	vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
463 					       adjhdisplay,
464 					       crtc_state->mode.hdisplay);
465 	vc4_pstate->crtc_x += left;
466 	if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
467 		vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
468 
469 	adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
470 	vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
471 					       adjvdisplay,
472 					       crtc_state->mode.vdisplay);
473 	vc4_pstate->crtc_y += top;
474 	if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
475 		vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
476 
477 	vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
478 					       adjhdisplay,
479 					       crtc_state->mode.hdisplay);
480 	vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
481 					       adjvdisplay,
482 					       crtc_state->mode.vdisplay);
483 
484 	if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
485 		return -EINVAL;
486 
487 	return 0;
488 }
489 
490 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
491 {
492 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
493 	struct drm_framebuffer *fb = state->fb;
494 	int num_planes = fb->format->num_planes;
495 	struct drm_crtc_state *crtc_state;
496 	u32 h_subsample = fb->format->hsub;
497 	u32 v_subsample = fb->format->vsub;
498 	int ret;
499 
500 	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
501 	if (!crtc_state) {
502 		DRM_DEBUG_KMS("Invalid crtc state\n");
503 		return -EINVAL;
504 	}
505 
506 	ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
507 						  INT_MAX, true, true);
508 	if (ret)
509 		return ret;
510 
511 	vc4_state->src_x = state->src.x1;
512 	vc4_state->src_y = state->src.y1;
513 	vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
514 	vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
515 
516 	vc4_state->crtc_x = state->dst.x1;
517 	vc4_state->crtc_y = state->dst.y1;
518 	vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
519 	vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
520 
521 	ret = vc4_plane_margins_adj(state);
522 	if (ret)
523 		return ret;
524 
525 	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
526 						       vc4_state->crtc_w);
527 	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
528 						       vc4_state->crtc_h);
529 
530 	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
531 			       vc4_state->y_scaling[0] == VC4_SCALING_NONE);
532 
533 	if (num_planes > 1) {
534 		vc4_state->is_yuv = true;
535 
536 		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
537 		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
538 
539 		vc4_state->x_scaling[1] =
540 			vc4_get_scaling_mode(vc4_state->src_w[1],
541 					     vc4_state->crtc_w);
542 		vc4_state->y_scaling[1] =
543 			vc4_get_scaling_mode(vc4_state->src_h[1],
544 					     vc4_state->crtc_h);
545 
546 		/* YUV conversion requires that horizontal scaling be enabled
547 		 * on the UV plane even if vc4_get_scaling_mode() returned
548 		 * VC4_SCALING_NONE (which can happen when the down-scaling
549 		 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
550 		 * case.
551 		 */
552 		if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
553 			vc4_state->x_scaling[1] = VC4_SCALING_PPF;
554 
555 		/* Similarly UV needs vertical scaling to be enabled.
556 		 * Without this a 1:1 scaled YUV422 plane isn't rendered.
557 		 */
558 		if (vc4_state->y_scaling[1] == VC4_SCALING_NONE)
559 			vc4_state->y_scaling[1] = VC4_SCALING_PPF;
560 	} else {
561 		vc4_state->is_yuv = false;
562 		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
563 		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
564 	}
565 
566 	return 0;
567 }
568 
569 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
570 {
571 	struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
572 	u32 scale, recip;
573 
574 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
575 
576 	scale = src / dst;
577 
578 	/* The specs note that while the reciprocal would be defined
579 	 * as (1<<32)/scale, ~0 is close enough.
580 	 */
581 	recip = ~0 / scale;
582 
583 	vc4_dlist_write(vc4_state,
584 			/*
585 			 * The BCM2712 is lacking BIT(31) compared to
586 			 * the previous generations, but we don't use
587 			 * it.
588 			 */
589 			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
590 			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
591 	vc4_dlist_write(vc4_state,
592 			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
593 }
594 
595 /* phase magnitude bits */
596 #define PHASE_BITS 6
597 
598 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst,
599 			  u32 xy, int channel)
600 {
601 	struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
602 	u32 scale = src / dst;
603 	s32 offset, offset2;
604 	s32 phase;
605 
606 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
607 
608 	/*
609 	 * Start the phase at 1/2 pixel from the 1st pixel at src_x.
610 	 * 1/4 pixel for YUV.
611 	 */
612 	if (channel) {
613 		/*
614 		 * The phase is relative to scale_src->x, so shift it for
615 		 * display list's x value
616 		 */
617 		offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1;
618 		offset += -(1 << PHASE_BITS >> 2);
619 	} else {
620 		/*
621 		 * The phase is relative to scale_src->x, so shift it for
622 		 * display list's x value
623 		 */
624 		offset = (xy & 0xffff) >> (16 - PHASE_BITS);
625 		offset += -(1 << PHASE_BITS >> 1);
626 
627 		/*
628 		 * This is a kludge to make sure the scaling factors are
629 		 * consistent with YUV's luma scaling. We lose 1-bit precision
630 		 * because of this.
631 		 */
632 		scale &= ~1;
633 	}
634 
635 	/*
636 	 * There may be a also small error introduced by precision of scale.
637 	 * Add half of that as a compromise
638 	 */
639 	offset2 = src - dst * scale;
640 	offset2 >>= 16 - PHASE_BITS;
641 	phase = offset + (offset2 >> 1);
642 
643 	/* Ensure +ve values don't touch the sign bit, then truncate negative values */
644 	if (phase >= 1 << PHASE_BITS)
645 		phase = (1 << PHASE_BITS) - 1;
646 
647 	phase &= SCALER_PPF_IPHASE_MASK;
648 
649 	vc4_dlist_write(vc4_state,
650 			SCALER_PPF_AGC |
651 			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
652 			/*
653 			 * The register layout documentation is slightly
654 			 * different to setup the phase in the BCM2712,
655 			 * but they seem equivalent.
656 			 */
657 			VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
658 }
659 
660 static u32 __vc4_lbm_size(struct drm_plane_state *state)
661 {
662 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
663 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
664 	u32 pix_per_line;
665 	u32 lbm;
666 
667 	/* LBM is not needed when there's no vertical scaling. */
668 	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
669 	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
670 		return 0;
671 
672 	/*
673 	 * This can be further optimized in the RGB/YUV444 case if the PPF
674 	 * decimation factor is between 0.5 and 1.0 by using crtc_w.
675 	 *
676 	 * It's not an issue though, since in that case since src_w[0] is going
677 	 * to be greater than or equal to crtc_w.
678 	 */
679 	if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
680 		pix_per_line = vc4_state->crtc_w;
681 	else
682 		pix_per_line = vc4_state->src_w[0] >> 16;
683 
684 	if (!vc4_state->is_yuv) {
685 		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
686 			lbm = pix_per_line * 8;
687 		else {
688 			/* In special cases, this multiplier might be 12. */
689 			lbm = pix_per_line * 16;
690 		}
691 	} else {
692 		/* There are cases for this going down to a multiplier
693 		 * of 2, but according to the firmware source, the
694 		 * table in the docs is somewhat wrong.
695 		 */
696 		lbm = pix_per_line * 16;
697 	}
698 
699 	/* Align it to 64 or 128 (hvs5) bytes */
700 	lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64);
701 
702 	/* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
703 	lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2;
704 
705 	return lbm;
706 }
707 
708 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
709 						unsigned int channel)
710 {
711 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
712 
713 	switch (vc4_state->y_scaling[channel]) {
714 	case VC4_SCALING_PPF:
715 		return 4;
716 
717 	case VC4_SCALING_TPZ:
718 		return 2;
719 
720 	default:
721 		return 0;
722 	}
723 }
724 
725 static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
726 				       unsigned int channel)
727 {
728 	const struct drm_format_info *info = state->fb->format;
729 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
730 
731 	if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
732 		return 0;
733 
734 	if (info->is_yuv)
735 		return channel ? 2 : 1;
736 
737 	if (info->has_alpha)
738 		return 4;
739 
740 	return 3;
741 }
742 
743 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
744 					 unsigned int channel)
745 {
746 	const struct drm_format_info *info = state->fb->format;
747 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
748 	unsigned int channels_scaled = 0;
749 	unsigned int components, words, wpc;
750 	unsigned int width, lines;
751 	unsigned int i;
752 
753 	/* LBM is meant to use the smaller of source or dest width, but there
754 	 * is a issue with UV scaling that the size required for the second
755 	 * channel is based on the source width only.
756 	 */
757 	if (info->hsub > 1 && channel == 1)
758 		width = state->src_w >> 16;
759 	else
760 		width = min(state->src_w >> 16, state->crtc_w);
761 	width = round_up(width / info->hsub, 4);
762 
763 	wpc = vc4_lbm_words_per_component(state, channel);
764 	if (!wpc)
765 		return 0;
766 
767 	components = vc4_lbm_components(state, channel);
768 	if (!components)
769 		return 0;
770 
771 	if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha)
772 		components -= 1;
773 
774 	words = width * wpc * components;
775 
776 	lines = DIV_ROUND_UP(words, 128 / info->hsub);
777 
778 	for (i = 0; i < 2; i++)
779 		if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
780 			channels_scaled++;
781 
782 	if (channels_scaled == 1)
783 		lines = lines / 2;
784 
785 	return lines;
786 }
787 
788 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
789 {
790 	const struct drm_format_info *info = state->fb->format;
791 
792 	if (info->hsub > 1)
793 		return max(vc4_lbm_channel_size(state, 0),
794 			   vc4_lbm_channel_size(state, 1));
795 	else
796 		return vc4_lbm_channel_size(state, 0);
797 }
798 
799 static u32 vc4_lbm_size(struct drm_plane_state *state)
800 {
801 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
802 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
803 
804 	/* LBM is not needed when there's no vertical scaling. */
805 	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
806 	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
807 		return 0;
808 
809 	if (vc4->gen >= VC4_GEN_6_C)
810 		return __vc6_lbm_size(state);
811 	else
812 		return __vc4_lbm_size(state);
813 }
814 
815 static size_t vc6_upm_size(const struct drm_plane_state *state,
816 			   unsigned int plane)
817 {
818 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
819 	unsigned int stride = state->fb->pitches[plane];
820 
821 	/*
822 	 * TODO: This only works for raster formats, and is sub-optimal
823 	 * for buffers with a stride aligned on 32 bytes.
824 	 */
825 	unsigned int words_per_line = (stride + 62) / 32;
826 	unsigned int fetch_region_size = words_per_line * 32;
827 	unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
828 	unsigned int buffer_size = fetch_region_size * buffer_lines;
829 
830 	return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
831 }
832 
833 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
834 					 int channel)
835 {
836 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
837 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
838 
839 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
840 
841 	/* Ch0 H-PPF Word 0: Scaling Parameters */
842 	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
843 		vc4_write_ppf(vc4_state, vc4_state->src_w[channel],
844 			      vc4_state->crtc_w, vc4_state->src_x, channel);
845 	}
846 
847 	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
848 	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
849 		vc4_write_ppf(vc4_state, vc4_state->src_h[channel],
850 			      vc4_state->crtc_h, vc4_state->src_y, channel);
851 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
852 	}
853 
854 	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
855 	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
856 		vc4_write_tpz(vc4_state, vc4_state->src_w[channel],
857 			      vc4_state->crtc_w);
858 	}
859 
860 	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
861 	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
862 		vc4_write_tpz(vc4_state, vc4_state->src_h[channel],
863 			      vc4_state->crtc_h);
864 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
865 	}
866 }
867 
868 static void vc4_plane_calc_load(struct drm_plane_state *state)
869 {
870 	unsigned int hvs_load_shift, vrefresh, i;
871 	struct drm_framebuffer *fb = state->fb;
872 	struct vc4_plane_state *vc4_state;
873 	struct drm_crtc_state *crtc_state;
874 	unsigned int vscale_factor;
875 
876 	vc4_state = to_vc4_plane_state(state);
877 	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
878 	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
879 
880 	/* The HVS is able to process 2 pixels/cycle when scaling the source,
881 	 * 4 pixels/cycle otherwise.
882 	 * Alpha blending step seems to be pipelined and it's always operating
883 	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
884 	 * scaler block.
885 	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
886 	 */
887 	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
888 	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
889 	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
890 	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
891 		hvs_load_shift = 1;
892 	else
893 		hvs_load_shift = 2;
894 
895 	vc4_state->membus_load = 0;
896 	vc4_state->hvs_load = 0;
897 	for (i = 0; i < fb->format->num_planes; i++) {
898 		/* Even if the bandwidth/plane required for a single frame is
899 		 *
900 		 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
901 		 *  cpp * vrefresh
902 		 *
903 		 * when downscaling, we have to read more pixels per line in
904 		 * the time frame reserved for a single line, so the bandwidth
905 		 * demand can be punctually higher. To account for that, we
906 		 * calculate the down-scaling factor and multiply the plane
907 		 * load by this number. We're likely over-estimating the read
908 		 * demand, but that's better than under-estimating it.
909 		 */
910 		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
911 					     vc4_state->crtc_h);
912 		vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
913 					  (vc4_state->src_h[i] >> 16) *
914 					  vscale_factor * fb->format->cpp[i];
915 		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
916 	}
917 
918 	vc4_state->hvs_load *= vrefresh;
919 	vc4_state->hvs_load >>= hvs_load_shift;
920 	vc4_state->membus_load *= vrefresh;
921 }
922 
923 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
924 {
925 	struct drm_device *drm = state->plane->dev;
926 	struct vc4_dev *vc4 = to_vc4_dev(drm);
927 	struct drm_plane *plane = state->plane;
928 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
929 	unsigned long irqflags;
930 	u32 lbm_size;
931 
932 	lbm_size = vc4_lbm_size(state);
933 	if (!lbm_size)
934 		return 0;
935 
936 	/*
937 	 * NOTE: BCM2712 doesn't need to be aligned, since the size
938 	 * returned by vc4_lbm_size() is in words already.
939 	 */
940 	if (vc4->gen == VC4_GEN_5)
941 		lbm_size = ALIGN(lbm_size, 64);
942 	else if (vc4->gen == VC4_GEN_4)
943 		lbm_size = ALIGN(lbm_size, 32);
944 
945 	drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n",
946 		       plane->base.id, plane->name, lbm_size);
947 
948 	if (WARN_ON(!vc4_state->lbm_offset))
949 		return -EINVAL;
950 
951 	/* Allocate the LBM memory that the HVS will use for temporary
952 	 * storage due to our scaling/format conversion.
953 	 */
954 	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
955 		int ret;
956 
957 		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
958 		ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
959 						 &vc4_state->lbm,
960 						 lbm_size, 1,
961 						 0, 0);
962 		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
963 
964 		if (ret) {
965 			drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
966 			return ret;
967 		}
968 	} else {
969 		WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
970 	}
971 
972 	vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
973 
974 	return 0;
975 }
976 
977 static int vc6_plane_allocate_upm(struct drm_plane_state *state)
978 {
979 	const struct drm_format_info *info = state->fb->format;
980 	struct drm_device *drm = state->plane->dev;
981 	struct vc4_dev *vc4 = to_vc4_dev(drm);
982 	struct vc4_hvs *hvs = vc4->hvs;
983 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
984 	unsigned int i;
985 	int ret;
986 
987 	WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
988 
989 	vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
990 
991 	for (i = 0; i < info->num_planes; i++) {
992 		struct vc4_upm_refcounts *refcount;
993 		int upm_handle;
994 		unsigned long irqflags;
995 		size_t upm_size;
996 
997 		upm_size = vc6_upm_size(state, i);
998 		if (!upm_size)
999 			return -EINVAL;
1000 		upm_handle = vc4_state->upm_handle[i];
1001 
1002 		if (upm_handle &&
1003 		    hvs->upm_refcounts[upm_handle].size == upm_size) {
1004 			/* Allocation is the same size as the previous user of
1005 			 * the plane. Keep the allocation.
1006 			 */
1007 			vc4_state->upm_handle[i] = upm_handle;
1008 		} else {
1009 			if (upm_handle &&
1010 			    refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) {
1011 				vc4_plane_release_upm_ida(hvs, upm_handle);
1012 				vc4_state->upm_handle[i] = 0;
1013 			}
1014 
1015 			upm_handle = ida_alloc_range(&hvs->upm_handles, 1,
1016 						     VC4_NUM_UPM_HANDLES,
1017 						     GFP_KERNEL);
1018 			if (upm_handle < 0) {
1019 				drm_dbg(drm, "Out of upm_handles\n");
1020 				return upm_handle;
1021 			}
1022 			vc4_state->upm_handle[i] = upm_handle;
1023 
1024 			refcount = &hvs->upm_refcounts[upm_handle];
1025 			refcount_set(&refcount->refcount, 1);
1026 			refcount->size = upm_size;
1027 
1028 			spin_lock_irqsave(&hvs->mm_lock, irqflags);
1029 			ret = drm_mm_insert_node_generic(&hvs->upm_mm,
1030 							 &refcount->upm,
1031 							 upm_size, HVS_UBM_WORD_SIZE,
1032 							 0, 0);
1033 			spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
1034 			if (ret) {
1035 				drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
1036 				refcount_set(&refcount->refcount, 0);
1037 				ida_free(&hvs->upm_handles, upm_handle);
1038 				vc4_state->upm_handle[i] = 0;
1039 				return ret;
1040 			}
1041 		}
1042 
1043 		refcount = &hvs->upm_refcounts[upm_handle];
1044 		vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
1045 			VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE,
1046 				      SCALER6_PTR0_UPM_BASE) |
1047 			VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
1048 				      SCALER6_PTR0_UPM_HANDLE) |
1049 			VC4_SET_FIELD(vc4_state->upm_buffer_lines,
1050 				      SCALER6_PTR0_UPM_BUFF_SIZE);
1051 	}
1052 
1053 	return 0;
1054 }
1055 
1056 static void vc6_plane_free_upm(struct drm_plane_state *state)
1057 {
1058 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1059 	struct drm_device *drm = state->plane->dev;
1060 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1061 	struct vc4_hvs *hvs = vc4->hvs;
1062 	unsigned int i;
1063 
1064 	WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
1065 
1066 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
1067 		unsigned int upm_handle;
1068 
1069 		upm_handle = vc4_state->upm_handle[i];
1070 		if (!upm_handle)
1071 			continue;
1072 
1073 		if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount))
1074 			vc4_plane_release_upm_ida(hvs, upm_handle);
1075 		vc4_state->upm_handle[i] = 0;
1076 	}
1077 }
1078 
1079 /*
1080  * The colorspace conversion matrices are held in 3 entries in the dlist.
1081  * Create an array of them, with entries for each full and limited mode, and
1082  * each supported colorspace.
1083  */
1084 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = {
1085 	{
1086 		/* Limited range */
1087 		{
1088 			/* BT601 */
1089 			SCALER_CSC0_ITR_R_601_5,
1090 			SCALER_CSC1_ITR_R_601_5,
1091 			SCALER_CSC2_ITR_R_601_5,
1092 		}, {
1093 			/* BT709 */
1094 			SCALER_CSC0_ITR_R_709_3,
1095 			SCALER_CSC1_ITR_R_709_3,
1096 			SCALER_CSC2_ITR_R_709_3,
1097 		}, {
1098 			/* BT2020 */
1099 			SCALER_CSC0_ITR_R_2020,
1100 			SCALER_CSC1_ITR_R_2020,
1101 			SCALER_CSC2_ITR_R_2020,
1102 		}
1103 	}, {
1104 		/* Full range */
1105 		{
1106 			/* JFIF */
1107 			SCALER_CSC0_JPEG_JFIF,
1108 			SCALER_CSC1_JPEG_JFIF,
1109 			SCALER_CSC2_JPEG_JFIF,
1110 		}, {
1111 			/* BT709 */
1112 			SCALER_CSC0_ITR_R_709_3_FR,
1113 			SCALER_CSC1_ITR_R_709_3_FR,
1114 			SCALER_CSC2_ITR_R_709_3_FR,
1115 		}, {
1116 			/* BT2020 */
1117 			SCALER_CSC0_ITR_R_2020_FR,
1118 			SCALER_CSC1_ITR_R_2020_FR,
1119 			SCALER_CSC2_ITR_R_2020_FR,
1120 		}
1121 	}
1122 };
1123 
1124 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state)
1125 {
1126 	struct drm_device *dev = state->state->dev;
1127 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1128 
1129 	WARN_ON_ONCE(vc4->gen != VC4_GEN_4);
1130 
1131 	if (!state->fb->format->has_alpha)
1132 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1133 				     SCALER_POS2_ALPHA_MODE);
1134 
1135 	switch (state->pixel_blend_mode) {
1136 	case DRM_MODE_BLEND_PIXEL_NONE:
1137 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1138 				     SCALER_POS2_ALPHA_MODE);
1139 	default:
1140 	case DRM_MODE_BLEND_PREMULTI:
1141 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1142 				     SCALER_POS2_ALPHA_MODE) |
1143 			SCALER_POS2_ALPHA_PREMULT;
1144 	case DRM_MODE_BLEND_COVERAGE:
1145 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1146 				     SCALER_POS2_ALPHA_MODE);
1147 	}
1148 }
1149 
1150 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state)
1151 {
1152 	struct drm_device *dev = state->state->dev;
1153 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1154 
1155 	WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C &&
1156 		     vc4->gen != VC4_GEN_6_D);
1157 
1158 	switch (vc4->gen) {
1159 	default:
1160 	case VC4_GEN_5:
1161 	case VC4_GEN_6_C:
1162 		if (!state->fb->format->has_alpha)
1163 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1164 					     SCALER5_CTL2_ALPHA_MODE);
1165 
1166 		switch (state->pixel_blend_mode) {
1167 		case DRM_MODE_BLEND_PIXEL_NONE:
1168 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1169 					     SCALER5_CTL2_ALPHA_MODE);
1170 		default:
1171 		case DRM_MODE_BLEND_PREMULTI:
1172 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1173 					     SCALER5_CTL2_ALPHA_MODE) |
1174 				SCALER5_CTL2_ALPHA_PREMULT;
1175 		case DRM_MODE_BLEND_COVERAGE:
1176 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1177 					     SCALER5_CTL2_ALPHA_MODE);
1178 		}
1179 	case VC4_GEN_6_D:
1180 		/* 2712-D configures fixed alpha mode in CTL0 */
1181 		return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ?
1182 			SCALER5_CTL2_ALPHA_PREMULT : 0;
1183 	}
1184 }
1185 
1186 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state)
1187 {
1188 	struct drm_device *dev = state->state->dev;
1189 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1190 
1191 	WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D);
1192 
1193 	if (vc4->gen == VC4_GEN_6_D &&
1194 	    (!state->fb->format->has_alpha ||
1195 	     state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE))
1196 		return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED,
1197 				     SCALER6_CTL0_ALPHA_MASK);
1198 
1199 	return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK);
1200 }
1201 
1202 /* Writes out a full display list for an active plane to the plane's
1203  * private dlist state.
1204  */
1205 static int vc4_plane_mode_set(struct drm_plane *plane,
1206 			      struct drm_plane_state *state)
1207 {
1208 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
1209 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1210 	struct drm_framebuffer *fb = state->fb;
1211 	u32 ctl0_offset = vc4_state->dlist_count;
1212 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1213 	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1214 	int num_planes = fb->format->num_planes;
1215 	u32 h_subsample = fb->format->hsub;
1216 	u32 v_subsample = fb->format->vsub;
1217 	bool mix_plane_alpha;
1218 	bool covers_screen;
1219 	u32 scl0, scl1, pitch0;
1220 	u32 tiling, src_x, src_y;
1221 	u32 width, height;
1222 	u32 hvs_format = format->hvs;
1223 	unsigned int rotation;
1224 	u32 offsets[3] = { 0 };
1225 	int ret, i;
1226 
1227 	if (vc4_state->dlist_initialized)
1228 		return 0;
1229 
1230 	ret = vc4_plane_setup_clipping_and_scaling(state);
1231 	if (ret)
1232 		return ret;
1233 
1234 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1235 	    !vc4_state->crtc_w || !vc4_state->crtc_h) {
1236 		/* 0 source size probably means the plane is offscreen */
1237 		vc4_state->dlist_initialized = 1;
1238 		return 0;
1239 	}
1240 
1241 	width = vc4_state->src_w[0] >> 16;
1242 	height = vc4_state->src_h[0] >> 16;
1243 
1244 	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1245 	 * and 4:4:4, scl1 should be set to scl0 so both channels of
1246 	 * the scaler do the same thing.  For YUV, the Y plane needs
1247 	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1248 	 * the scl fields here.
1249 	 */
1250 	if (num_planes == 1) {
1251 		scl0 = vc4_get_scl_field(state, 0);
1252 		scl1 = scl0;
1253 	} else {
1254 		scl0 = vc4_get_scl_field(state, 1);
1255 		scl1 = vc4_get_scl_field(state, 0);
1256 	}
1257 
1258 	rotation = drm_rotation_simplify(state->rotation,
1259 					 DRM_MODE_ROTATE_0 |
1260 					 DRM_MODE_REFLECT_X |
1261 					 DRM_MODE_REFLECT_Y);
1262 
1263 	/* We must point to the last line when Y reflection is enabled. */
1264 	src_y = vc4_state->src_y >> 16;
1265 	if (rotation & DRM_MODE_REFLECT_Y)
1266 		src_y += height - 1;
1267 
1268 	src_x = vc4_state->src_x >> 16;
1269 
1270 	switch (base_format_mod) {
1271 	case DRM_FORMAT_MOD_LINEAR:
1272 		tiling = SCALER_CTL0_TILING_LINEAR;
1273 		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
1274 
1275 		/* Adjust the base pointer to the first pixel to be scanned
1276 		 * out.
1277 		 */
1278 		for (i = 0; i < num_planes; i++) {
1279 			offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1280 			offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1281 		}
1282 
1283 		break;
1284 
1285 	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
1286 		u32 tile_size_shift = 12; /* T tiles are 4kb */
1287 		/* Whole-tile offsets, mostly for setting the pitch. */
1288 		u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
1289 		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
1290 		u32 tile_w_mask = (1 << tile_w_shift) - 1;
1291 		/* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
1292 		 * the height (in pixels) of a 4k tile.
1293 		 */
1294 		u32 tile_h_mask = (2 << tile_h_shift) - 1;
1295 		/* For T-tiled, the FB pitch is "how many bytes from one row to
1296 		 * the next, such that
1297 		 *
1298 		 *	pitch * tile_h == tile_size * tiles_per_row
1299 		 */
1300 		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
1301 		u32 tiles_l = src_x >> tile_w_shift;
1302 		u32 tiles_r = tiles_w - tiles_l;
1303 		u32 tiles_t = src_y >> tile_h_shift;
1304 		/* Intra-tile offsets, which modify the base address (the
1305 		 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
1306 		 * base address).
1307 		 */
1308 		u32 tile_y = (src_y >> 4) & 1;
1309 		u32 subtile_y = (src_y >> 2) & 3;
1310 		u32 utile_y = src_y & 3;
1311 		u32 x_off = src_x & tile_w_mask;
1312 		u32 y_off = src_y & tile_h_mask;
1313 
1314 		/* When Y reflection is requested we must set the
1315 		 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
1316 		 * after the initial one should be fetched in descending order,
1317 		 * which makes sense since we start from the last line and go
1318 		 * backward.
1319 		 * Don't know why we need y_off = max_y_off - y_off, but it's
1320 		 * definitely required (I guess it's also related to the "going
1321 		 * backward" situation).
1322 		 */
1323 		if (rotation & DRM_MODE_REFLECT_Y) {
1324 			y_off = tile_h_mask - y_off;
1325 			pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
1326 		} else {
1327 			pitch0 = 0;
1328 		}
1329 
1330 		tiling = SCALER_CTL0_TILING_256B_OR_T;
1331 		pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
1332 			   VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
1333 			   VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
1334 			   VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
1335 		offsets[0] += tiles_t * (tiles_w << tile_size_shift);
1336 		offsets[0] += subtile_y << 8;
1337 		offsets[0] += utile_y << 4;
1338 
1339 		/* Rows of tiles alternate left-to-right and right-to-left. */
1340 		if (tiles_t & 1) {
1341 			pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
1342 			offsets[0] += (tiles_w - tiles_l) << tile_size_shift;
1343 			offsets[0] -= (1 + !tile_y) << 10;
1344 		} else {
1345 			offsets[0] += tiles_l << tile_size_shift;
1346 			offsets[0] += tile_y << 10;
1347 		}
1348 
1349 		break;
1350 	}
1351 
1352 	case DRM_FORMAT_MOD_BROADCOM_SAND64:
1353 	case DRM_FORMAT_MOD_BROADCOM_SAND128:
1354 	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1355 		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1356 
1357 		if (param > SCALER_TILE_HEIGHT_MASK) {
1358 			DRM_DEBUG_KMS("SAND height too large (%d)\n",
1359 				      param);
1360 			return -EINVAL;
1361 		}
1362 
1363 		if (fb->format->format == DRM_FORMAT_P030) {
1364 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1365 			tiling = SCALER_CTL0_TILING_128B;
1366 		} else {
1367 			hvs_format = HVS_PIXEL_FORMAT_H264;
1368 
1369 			switch (base_format_mod) {
1370 			case DRM_FORMAT_MOD_BROADCOM_SAND64:
1371 				tiling = SCALER_CTL0_TILING_64B;
1372 				break;
1373 			case DRM_FORMAT_MOD_BROADCOM_SAND128:
1374 				tiling = SCALER_CTL0_TILING_128B;
1375 				break;
1376 			case DRM_FORMAT_MOD_BROADCOM_SAND256:
1377 				tiling = SCALER_CTL0_TILING_256B_OR_T;
1378 				break;
1379 			default:
1380 				return -EINVAL;
1381 			}
1382 		}
1383 
1384 		/* Adjust the base pointer to the first pixel to be scanned
1385 		 * out.
1386 		 *
1387 		 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1388 		 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1389 		 * word that should be taken as the first pixel.
1390 		 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1391 		 * element within the 128bit word, eg for pixel 3 the value
1392 		 * should be 6.
1393 		 */
1394 		for (i = 0; i < num_planes; i++) {
1395 			u32 tile_w, tile, x_off, pix_per_tile;
1396 
1397 			if (fb->format->format == DRM_FORMAT_P030) {
1398 				/*
1399 				 * Spec says: bits [31:4] of the given address
1400 				 * should point to the 128-bit word containing
1401 				 * the desired starting pixel, and bits[3:0]
1402 				 * should be between 0 and 11, indicating which
1403 				 * of the 12-pixels in that 128-bit word is the
1404 				 * first pixel to be used
1405 				 */
1406 				u32 remaining_pixels = src_x % 96;
1407 				u32 aligned = remaining_pixels / 12;
1408 				u32 last_bits = remaining_pixels % 12;
1409 
1410 				x_off = aligned * 16 + last_bits;
1411 				tile_w = 128;
1412 				pix_per_tile = 96;
1413 			} else {
1414 				switch (base_format_mod) {
1415 				case DRM_FORMAT_MOD_BROADCOM_SAND64:
1416 					tile_w = 64;
1417 					break;
1418 				case DRM_FORMAT_MOD_BROADCOM_SAND128:
1419 					tile_w = 128;
1420 					break;
1421 				case DRM_FORMAT_MOD_BROADCOM_SAND256:
1422 					tile_w = 256;
1423 					break;
1424 				default:
1425 					return -EINVAL;
1426 				}
1427 				pix_per_tile = tile_w / fb->format->cpp[0];
1428 				x_off = (src_x % pix_per_tile) /
1429 					(i ? h_subsample : 1) *
1430 					fb->format->cpp[i];
1431 			}
1432 
1433 			tile = src_x / pix_per_tile;
1434 
1435 			offsets[i] += param * tile_w * tile;
1436 			offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1437 			offsets[i] += x_off & ~(i ? 1 : 0);
1438 		}
1439 
1440 		pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
1441 		break;
1442 	}
1443 
1444 	default:
1445 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1446 			      (long long)fb->modifier);
1447 		return -EINVAL;
1448 	}
1449 
1450 	/* fetch an extra pixel if we don't actually line up with the left edge. */
1451 	if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1452 		width++;
1453 
1454 	/* same for the right side */
1455 	if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1456 	    vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1457 		width++;
1458 
1459 	/* now for the top */
1460 	if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1461 		height++;
1462 
1463 	/* and the bottom */
1464 	if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1465 	    vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1466 		height++;
1467 
1468 	/* For YUV444 the hardware wants double the width, otherwise it doesn't
1469 	 * fetch full width of chroma
1470 	 */
1471 	if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1472 		width <<= 1;
1473 
1474 	/* Don't waste cycles mixing with plane alpha if the set alpha
1475 	 * is opaque or there is no per-pixel alpha information.
1476 	 * In any case we use the alpha property value as the fixed alpha.
1477 	 */
1478 	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1479 			  fb->format->has_alpha;
1480 
1481 	if (vc4->gen == VC4_GEN_4) {
1482 	/* Control word */
1483 		vc4_dlist_write(vc4_state,
1484 				SCALER_CTL0_VALID |
1485 				(rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
1486 				(rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
1487 				VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
1488 				(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
1489 				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1490 				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1491 				(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
1492 				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1493 				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
1494 
1495 		/* Position Word 0: Image Positions and Alpha Value */
1496 		vc4_state->pos0_offset = vc4_state->dlist_count;
1497 		vc4_dlist_write(vc4_state,
1498 				VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
1499 				VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
1500 				VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
1501 
1502 		/* Position Word 1: Scaled Image Dimensions. */
1503 		if (!vc4_state->is_unity) {
1504 			vc4_dlist_write(vc4_state,
1505 					VC4_SET_FIELD(vc4_state->crtc_w,
1506 						      SCALER_POS1_SCL_WIDTH) |
1507 					VC4_SET_FIELD(vc4_state->crtc_h,
1508 						      SCALER_POS1_SCL_HEIGHT));
1509 		}
1510 
1511 		/* Position Word 2: Source Image Size, Alpha */
1512 		vc4_state->pos2_offset = vc4_state->dlist_count;
1513 		vc4_dlist_write(vc4_state,
1514 				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
1515 				vc4_hvs4_get_alpha_blend_mode(state) |
1516 				VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
1517 				VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
1518 
1519 		/* Position Word 3: Context.  Written by the HVS. */
1520 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1521 
1522 	} else {
1523 		/* Control word */
1524 		vc4_dlist_write(vc4_state,
1525 				SCALER_CTL0_VALID |
1526 				(format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) |
1527 				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1528 				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1529 				(vc4_state->is_unity ?
1530 						SCALER5_CTL0_UNITY : 0) |
1531 				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1532 				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
1533 				SCALER5_CTL0_ALPHA_EXPAND |
1534 				SCALER5_CTL0_RGB_EXPAND);
1535 
1536 		/* Position Word 0: Image Positions and Alpha Value */
1537 		vc4_state->pos0_offset = vc4_state->dlist_count;
1538 		vc4_dlist_write(vc4_state,
1539 				(rotation & DRM_MODE_REFLECT_Y ?
1540 						SCALER5_POS0_VFLIP : 0) |
1541 				VC4_SET_FIELD(vc4_state->crtc_x,
1542 					      SCALER_POS0_START_X) |
1543 				(rotation & DRM_MODE_REFLECT_X ?
1544 					      SCALER5_POS0_HFLIP : 0) |
1545 				VC4_SET_FIELD(vc4_state->crtc_y,
1546 					      SCALER5_POS0_START_Y)
1547 			       );
1548 
1549 		/* Control Word 2 */
1550 		vc4_dlist_write(vc4_state,
1551 				VC4_SET_FIELD(state->alpha >> 4,
1552 					      SCALER5_CTL2_ALPHA) |
1553 				vc4_hvs5_get_alpha_blend_mode(state) |
1554 				(mix_plane_alpha ?
1555 					SCALER5_CTL2_ALPHA_MIX : 0)
1556 			       );
1557 
1558 		/* Position Word 1: Scaled Image Dimensions. */
1559 		if (!vc4_state->is_unity) {
1560 			vc4_dlist_write(vc4_state,
1561 					VC4_SET_FIELD(vc4_state->crtc_w,
1562 						      SCALER5_POS1_SCL_WIDTH) |
1563 					VC4_SET_FIELD(vc4_state->crtc_h,
1564 						      SCALER5_POS1_SCL_HEIGHT));
1565 		}
1566 
1567 		/* Position Word 2: Source Image Size */
1568 		vc4_state->pos2_offset = vc4_state->dlist_count;
1569 		vc4_dlist_write(vc4_state,
1570 				VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
1571 				VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
1572 
1573 		/* Position Word 3: Context.  Written by the HVS. */
1574 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1575 	}
1576 
1577 
1578 	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
1579 	 *
1580 	 * The pointers may be any byte address.
1581 	 */
1582 	vc4_state->ptr0_offset[0] = vc4_state->dlist_count;
1583 
1584 	for (i = 0; i < num_planes; i++) {
1585 		struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1586 
1587 		vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]);
1588 	}
1589 
1590 	/* Pointer Context Word 0/1/2: Written by the HVS */
1591 	for (i = 0; i < num_planes; i++)
1592 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1593 
1594 	/* Pitch word 0 */
1595 	vc4_dlist_write(vc4_state, pitch0);
1596 
1597 	/* Pitch word 1/2 */
1598 	for (i = 1; i < num_planes; i++) {
1599 		if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1600 		    hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
1601 			vc4_dlist_write(vc4_state,
1602 					VC4_SET_FIELD(fb->pitches[i],
1603 						      SCALER_SRC_PITCH));
1604 		} else {
1605 			vc4_dlist_write(vc4_state, pitch0);
1606 		}
1607 	}
1608 
1609 	/* Colorspace conversion words */
1610 	if (vc4_state->is_yuv) {
1611 		enum drm_color_encoding color_encoding = state->color_encoding;
1612 		enum drm_color_range color_range = state->color_range;
1613 		const u32 *ccm;
1614 
1615 		if (color_encoding >= DRM_COLOR_ENCODING_MAX)
1616 			color_encoding = DRM_COLOR_YCBCR_BT601;
1617 		if (color_range >= DRM_COLOR_RANGE_MAX)
1618 			color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1619 
1620 		ccm = colorspace_coeffs[color_range][color_encoding];
1621 
1622 		vc4_dlist_write(vc4_state, ccm[0]);
1623 		vc4_dlist_write(vc4_state, ccm[1]);
1624 		vc4_dlist_write(vc4_state, ccm[2]);
1625 	}
1626 
1627 	vc4_state->lbm_offset = 0;
1628 
1629 	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1630 	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1631 	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1632 	    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1633 		/* Reserve a slot for the LBM Base Address. The real value will
1634 		 * be set when calling vc4_plane_allocate_lbm().
1635 		 */
1636 		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1637 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1638 			vc4_state->lbm_offset = vc4_state->dlist_count;
1639 			vc4_dlist_counter_increment(vc4_state);
1640 		}
1641 
1642 		if (num_planes > 1) {
1643 			/* Emit Cb/Cr as channel 0 and Y as channel
1644 			 * 1. This matches how we set up scl0/scl1
1645 			 * above.
1646 			 */
1647 			vc4_write_scaling_parameters(state, 1);
1648 		}
1649 		vc4_write_scaling_parameters(state, 0);
1650 
1651 		/* If any PPF setup was done, then all the kernel
1652 		 * pointers get uploaded.
1653 		 */
1654 		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1655 		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1656 		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1657 		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1658 			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1659 						   SCALER_PPF_KERNEL_OFFSET);
1660 
1661 			/* HPPF plane 0 */
1662 			vc4_dlist_write(vc4_state, kernel);
1663 			/* VPPF plane 0 */
1664 			vc4_dlist_write(vc4_state, kernel);
1665 			/* HPPF plane 1 */
1666 			vc4_dlist_write(vc4_state, kernel);
1667 			/* VPPF plane 1 */
1668 			vc4_dlist_write(vc4_state, kernel);
1669 		}
1670 	}
1671 
1672 	vc4_state->dlist[ctl0_offset] |=
1673 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1674 
1675 	/* crtc_* are already clipped coordinates. */
1676 	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1677 			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1678 			vc4_state->crtc_h == state->crtc->mode.vdisplay;
1679 	/* Background fill might be necessary when the plane has per-pixel
1680 	 * alpha content or a non-opaque plane alpha and could blend from the
1681 	 * background or does not cover the entire screen.
1682 	 */
1683 	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1684 				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1685 
1686 	/* Flag the dlist as initialized to avoid checking it twice in case
1687 	 * the async update check already called vc4_plane_mode_set() and
1688 	 * decided to fallback to sync update because async update was not
1689 	 * possible.
1690 	 */
1691 	vc4_state->dlist_initialized = 1;
1692 
1693 	vc4_plane_calc_load(state);
1694 
1695 	return 0;
1696 }
1697 
1698 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
1699 {
1700 	struct drm_plane_state *state = &vc4_state->base;
1701 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
1702 	u32 ret = 0;
1703 
1704 	if (vc4_state->is_yuv) {
1705 		enum drm_color_encoding color_encoding = state->color_encoding;
1706 		enum drm_color_range color_range = state->color_range;
1707 
1708 		/* CSC pre-loaded with:
1709 		 * 0 = BT601 limited range
1710 		 * 1 = BT709 limited range
1711 		 * 2 = BT2020 limited range
1712 		 * 3 = BT601 full range
1713 		 * 4 = BT709 full range
1714 		 * 5 = BT2020 full range
1715 		 */
1716 		if (color_encoding > DRM_COLOR_YCBCR_BT2020)
1717 			color_encoding = DRM_COLOR_YCBCR_BT601;
1718 		if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
1719 			color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1720 
1721 		if (vc4->gen == VC4_GEN_6_C) {
1722 			ret |= SCALER6C_CTL2_CSC_ENABLE;
1723 			ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1724 					     SCALER6C_CTL2_BRCM_CFC_CONTROL);
1725 		} else {
1726 			ret |= SCALER6D_CTL2_CSC_ENABLE;
1727 			ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1728 					     SCALER6D_CTL2_BRCM_CFC_CONTROL);
1729 		}
1730 	}
1731 
1732 	return ret;
1733 }
1734 
1735 static int vc6_plane_mode_set(struct drm_plane *plane,
1736 			      struct drm_plane_state *state)
1737 {
1738 	struct drm_device *drm = plane->dev;
1739 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1740 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1741 	struct drm_framebuffer *fb = state->fb;
1742 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1743 	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1744 	int num_planes = fb->format->num_planes;
1745 	u32 h_subsample = fb->format->hsub;
1746 	u32 v_subsample = fb->format->vsub;
1747 	bool mix_plane_alpha;
1748 	bool covers_screen;
1749 	u32 scl0, scl1, pitch0;
1750 	u32 tiling, src_x, src_y;
1751 	u32 width, height;
1752 	u32 hvs_format = format->hvs;
1753 	u32 offsets[3] = { 0 };
1754 	unsigned int rotation;
1755 	int ret, i;
1756 
1757 	if (vc4_state->dlist_initialized)
1758 		return 0;
1759 
1760 	ret = vc4_plane_setup_clipping_and_scaling(state);
1761 	if (ret)
1762 		return ret;
1763 
1764 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1765 	    !vc4_state->crtc_w || !vc4_state->crtc_h) {
1766 		/* 0 source size probably means the plane is offscreen.
1767 		 * 0 destination size is a redundant plane.
1768 		 */
1769 		vc4_state->dlist_initialized = 1;
1770 		return 0;
1771 	}
1772 
1773 	width = vc4_state->src_w[0] >> 16;
1774 	height = vc4_state->src_h[0] >> 16;
1775 
1776 	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1777 	 * and 4:4:4, scl1 should be set to scl0 so both channels of
1778 	 * the scaler do the same thing.  For YUV, the Y plane needs
1779 	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1780 	 * the scl fields here.
1781 	 */
1782 	if (num_planes == 1) {
1783 		scl0 = vc4_get_scl_field(state, 0);
1784 		scl1 = scl0;
1785 	} else {
1786 		scl0 = vc4_get_scl_field(state, 1);
1787 		scl1 = vc4_get_scl_field(state, 0);
1788 	}
1789 
1790 	rotation = drm_rotation_simplify(state->rotation,
1791 					 DRM_MODE_ROTATE_0 |
1792 					 DRM_MODE_REFLECT_X |
1793 					 DRM_MODE_REFLECT_Y);
1794 
1795 	/* We must point to the last line when Y reflection is enabled. */
1796 	src_y = vc4_state->src_y >> 16;
1797 	if (rotation & DRM_MODE_REFLECT_Y)
1798 		src_y += height - 1;
1799 
1800 	src_x = vc4_state->src_x >> 16;
1801 
1802 	switch (base_format_mod) {
1803 	case DRM_FORMAT_MOD_LINEAR:
1804 		tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
1805 
1806 		/* Adjust the base pointer to the first pixel to be scanned
1807 		 * out.
1808 		 */
1809 		for (i = 0; i < num_planes; i++) {
1810 			offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1811 			offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1812 		}
1813 
1814 		break;
1815 
1816 	case DRM_FORMAT_MOD_BROADCOM_SAND128:
1817 	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1818 		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1819 		u32 components_per_word;
1820 		u32 starting_offset;
1821 		u32 fetch_count;
1822 
1823 		if (param > SCALER_TILE_HEIGHT_MASK) {
1824 			DRM_DEBUG_KMS("SAND height too large (%d)\n",
1825 				      param);
1826 			return -EINVAL;
1827 		}
1828 
1829 		if (fb->format->format == DRM_FORMAT_P030) {
1830 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1831 			tiling = SCALER6_CTL0_ADDR_MODE_128B;
1832 		} else {
1833 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
1834 
1835 			switch (base_format_mod) {
1836 			case DRM_FORMAT_MOD_BROADCOM_SAND128:
1837 				tiling = SCALER6_CTL0_ADDR_MODE_128B;
1838 				break;
1839 			case DRM_FORMAT_MOD_BROADCOM_SAND256:
1840 				tiling = SCALER6_CTL0_ADDR_MODE_256B;
1841 				break;
1842 			default:
1843 				return -EINVAL;
1844 			}
1845 		}
1846 
1847 		/* Adjust the base pointer to the first pixel to be scanned
1848 		 * out.
1849 		 *
1850 		 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1851 		 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1852 		 * word that should be taken as the first pixel.
1853 		 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1854 		 * element within the 128bit word, eg for pixel 3 the value
1855 		 * should be 6.
1856 		 */
1857 		for (i = 0; i < num_planes; i++) {
1858 			u32 tile_w, tile, x_off, pix_per_tile;
1859 
1860 			if (fb->format->format == DRM_FORMAT_P030) {
1861 				/*
1862 				 * Spec says: bits [31:4] of the given address
1863 				 * should point to the 128-bit word containing
1864 				 * the desired starting pixel, and bits[3:0]
1865 				 * should be between 0 and 11, indicating which
1866 				 * of the 12-pixels in that 128-bit word is the
1867 				 * first pixel to be used
1868 				 */
1869 				u32 remaining_pixels = src_x % 96;
1870 				u32 aligned = remaining_pixels / 12;
1871 				u32 last_bits = remaining_pixels % 12;
1872 
1873 				x_off = aligned * 16 + last_bits;
1874 				tile_w = 128;
1875 				pix_per_tile = 96;
1876 			} else {
1877 				switch (base_format_mod) {
1878 				case DRM_FORMAT_MOD_BROADCOM_SAND128:
1879 					tile_w = 128;
1880 					break;
1881 				case DRM_FORMAT_MOD_BROADCOM_SAND256:
1882 					tile_w = 256;
1883 					break;
1884 				default:
1885 					return -EINVAL;
1886 				}
1887 				pix_per_tile = tile_w / fb->format->cpp[0];
1888 				x_off = (src_x % pix_per_tile) /
1889 					(i ? h_subsample : 1) *
1890 					fb->format->cpp[i];
1891 			}
1892 
1893 			tile = src_x / pix_per_tile;
1894 
1895 			offsets[i] += param * tile_w * tile;
1896 			offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1897 			offsets[i] += x_off & ~(i ? 1 : 0);
1898 		}
1899 
1900 		components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
1901 		starting_offset = src_x % components_per_word;
1902 		fetch_count = (width + starting_offset + components_per_word - 1) /
1903 			components_per_word;
1904 
1905 		pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
1906 			 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
1907 		break;
1908 	}
1909 
1910 	default:
1911 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1912 			      (long long)fb->modifier);
1913 		return -EINVAL;
1914 	}
1915 
1916 	/* fetch an extra pixel if we don't actually line up with the left edge. */
1917 	if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1918 		width++;
1919 
1920 	/* same for the right side */
1921 	if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1922 	    vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1923 		width++;
1924 
1925 	/* now for the top */
1926 	if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1927 		height++;
1928 
1929 	/* and the bottom */
1930 	if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1931 	    vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1932 		height++;
1933 
1934 	/* for YUV444 hardware wants double the width, otherwise it doesn't
1935 	 * fetch full width of chroma
1936 	 */
1937 	if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1938 		width <<= 1;
1939 
1940 	/* Don't waste cycles mixing with plane alpha if the set alpha
1941 	 * is opaque or there is no per-pixel alpha information.
1942 	 * In any case we use the alpha property value as the fixed alpha.
1943 	 */
1944 	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1945 			  fb->format->has_alpha;
1946 
1947 	/* Control Word 0: Scaling Configuration & Element Validity*/
1948 	vc4_dlist_write(vc4_state,
1949 			SCALER6_CTL0_VALID |
1950 			VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
1951 			vc4_hvs6_get_alpha_mask_mode(state) |
1952 			(vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
1953 			VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
1954 			VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
1955 			VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
1956 			VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
1957 
1958 	/* Position Word 0: Image Position */
1959 	vc4_state->pos0_offset = vc4_state->dlist_count;
1960 	vc4_dlist_write(vc4_state,
1961 			VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
1962 			(rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
1963 			VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
1964 
1965 	/* Control Word 2: Alpha Value & CSC */
1966 	vc4_dlist_write(vc4_state,
1967 			vc6_plane_get_csc_mode(vc4_state) |
1968 			vc4_hvs5_get_alpha_blend_mode(state) |
1969 			(mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
1970 			VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
1971 
1972 	/* Position Word 1: Scaled Image Dimensions */
1973 	if (!vc4_state->is_unity)
1974 		vc4_dlist_write(vc4_state,
1975 				VC4_SET_FIELD(vc4_state->crtc_h - 1,
1976 					      SCALER6_POS1_SCL_LINES) |
1977 				VC4_SET_FIELD(vc4_state->crtc_w - 1,
1978 					      SCALER6_POS1_SCL_WIDTH));
1979 
1980 	/* Position Word 2: Source Image Size */
1981 	vc4_state->pos2_offset = vc4_state->dlist_count;
1982 	vc4_dlist_write(vc4_state,
1983 			VC4_SET_FIELD(height - 1,
1984 				      SCALER6_POS2_SRC_LINES) |
1985 			VC4_SET_FIELD(width - 1,
1986 				      SCALER6_POS2_SRC_WIDTH));
1987 
1988 	/* Position Word 3: Context */
1989 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1990 
1991 	/*
1992 	 * TODO: This only covers Raster Scan Order planes
1993 	 */
1994 	for (i = 0; i < num_planes; i++) {
1995 		struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1996 		dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i];
1997 
1998 		/* Pointer Word 0 */
1999 		vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
2000 		vc4_dlist_write(vc4_state,
2001 				(rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
2002 				/*
2003 				 * The UPM buffer will be allocated in
2004 				 * vc6_plane_allocate_upm().
2005 				 */
2006 				VC4_SET_FIELD(upper_32_bits(paddr) & 0xff,
2007 					      SCALER6_PTR0_UPPER_ADDR));
2008 
2009 		/* Pointer Word 1 */
2010 		vc4_dlist_write(vc4_state, lower_32_bits(paddr));
2011 
2012 		/* Pointer Word 2 */
2013 		if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
2014 		    base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
2015 			vc4_dlist_write(vc4_state,
2016 					VC4_SET_FIELD(fb->pitches[i],
2017 						      SCALER6_PTR2_PITCH));
2018 		} else {
2019 			vc4_dlist_write(vc4_state, pitch0);
2020 		}
2021 	}
2022 
2023 	/*
2024 	 * Palette Word 0
2025 	 * TODO: We're not using the palette mode
2026 	 */
2027 
2028 	/*
2029 	 * Trans Word 0
2030 	 * TODO: It's only relevant if we set the trans_rgb bit in the
2031 	 * control word 0, and we don't at the moment.
2032 	 */
2033 
2034 	vc4_state->lbm_offset = 0;
2035 
2036 	if (!vc4_state->is_unity || fb->format->is_yuv) {
2037 		/*
2038 		 * Reserve a slot for the LBM Base Address. The real value will
2039 		 * be set when calling vc4_plane_allocate_lbm().
2040 		 */
2041 		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2042 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2043 			vc4_state->lbm_offset = vc4_state->dlist_count;
2044 			vc4_dlist_counter_increment(vc4_state);
2045 		}
2046 
2047 		if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
2048 		    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
2049 		    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2050 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2051 			if (num_planes > 1)
2052 				/*
2053 				 * Emit Cb/Cr as channel 0 and Y as channel
2054 				 * 1. This matches how we set up scl0/scl1
2055 				 * above.
2056 				 */
2057 				vc4_write_scaling_parameters(state, 1);
2058 
2059 			vc4_write_scaling_parameters(state, 0);
2060 		}
2061 
2062 		/*
2063 		 * If any PPF setup was done, then all the kernel
2064 		 * pointers get uploaded.
2065 		 */
2066 		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
2067 		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
2068 		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
2069 		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
2070 			u32 kernel =
2071 				VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
2072 					      SCALER_PPF_KERNEL_OFFSET);
2073 
2074 			/* HPPF plane 0 */
2075 			vc4_dlist_write(vc4_state, kernel);
2076 			/* VPPF plane 0 */
2077 			vc4_dlist_write(vc4_state, kernel);
2078 			/* HPPF plane 1 */
2079 			vc4_dlist_write(vc4_state, kernel);
2080 			/* VPPF plane 1 */
2081 			vc4_dlist_write(vc4_state, kernel);
2082 		}
2083 	}
2084 
2085 	vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
2086 
2087 	vc4_state->dlist[0] |=
2088 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
2089 
2090 	/* crtc_* are already clipped coordinates. */
2091 	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
2092 			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
2093 			vc4_state->crtc_h == state->crtc->mode.vdisplay;
2094 
2095 	/*
2096 	 * Background fill might be necessary when the plane has per-pixel
2097 	 * alpha content or a non-opaque plane alpha and could blend from the
2098 	 * background or does not cover the entire screen.
2099 	 */
2100 	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
2101 				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
2102 
2103 	/*
2104 	 * Flag the dlist as initialized to avoid checking it twice in case
2105 	 * the async update check already called vc4_plane_mode_set() and
2106 	 * decided to fallback to sync update because async update was not
2107 	 * possible.
2108 	 */
2109 	vc4_state->dlist_initialized = 1;
2110 
2111 	vc4_plane_calc_load(state);
2112 
2113 	drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
2114 		       plane->base.id, plane->name, vc4_state->dlist_count);
2115 
2116 	return 0;
2117 }
2118 
2119 /* If a modeset involves changing the setup of a plane, the atomic
2120  * infrastructure will call this to validate a proposed plane setup.
2121  * However, if a plane isn't getting updated, this (and the
2122  * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
2123  * compute the dlist here and have all active plane dlists get updated
2124  * in the CRTC's flush.
2125  */
2126 static int vc4_plane_atomic_check(struct drm_plane *plane,
2127 				  struct drm_atomic_state *state)
2128 {
2129 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2130 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2131 										 plane);
2132 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
2133 	int ret;
2134 
2135 	vc4_state->dlist_count = 0;
2136 
2137 	if (!plane_enabled(new_plane_state)) {
2138 		struct drm_plane_state *old_plane_state =
2139 				drm_atomic_get_old_plane_state(state, plane);
2140 
2141 		if (vc4->gen >= VC4_GEN_6_C && old_plane_state &&
2142 		    plane_enabled(old_plane_state)) {
2143 			vc6_plane_free_upm(new_plane_state);
2144 		}
2145 		return 0;
2146 	}
2147 
2148 	if (vc4->gen >= VC4_GEN_6_C)
2149 		ret = vc6_plane_mode_set(plane, new_plane_state);
2150 	else
2151 		ret = vc4_plane_mode_set(plane, new_plane_state);
2152 	if (ret)
2153 		return ret;
2154 
2155 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
2156 	    !vc4_state->crtc_w || !vc4_state->crtc_h)
2157 		return 0;
2158 
2159 	ret = vc4_plane_allocate_lbm(new_plane_state);
2160 	if (ret)
2161 		return ret;
2162 
2163 	if (vc4->gen >= VC4_GEN_6_C) {
2164 		ret = vc6_plane_allocate_upm(new_plane_state);
2165 		if (ret)
2166 			return ret;
2167 	}
2168 
2169 	return 0;
2170 }
2171 
2172 static void vc4_plane_atomic_update(struct drm_plane *plane,
2173 				    struct drm_atomic_state *state)
2174 {
2175 	/* No contents here.  Since we don't know where in the CRTC's
2176 	 * dlist we should be stored, our dlist is uploaded to the
2177 	 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
2178 	 * time.
2179 	 */
2180 }
2181 
2182 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
2183 {
2184 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2185 	int i;
2186 	int idx;
2187 
2188 	if (!drm_dev_enter(plane->dev, &idx))
2189 		goto out;
2190 
2191 	vc4_state->hw_dlist = dlist;
2192 
2193 	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
2194 	for (i = 0; i < vc4_state->dlist_count; i++)
2195 		writel(vc4_state->dlist[i], &dlist[i]);
2196 
2197 	drm_dev_exit(idx);
2198 
2199 out:
2200 	return vc4_state->dlist_count;
2201 }
2202 
2203 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
2204 {
2205 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
2206 
2207 	return vc4_state->dlist_count;
2208 }
2209 
2210 /* Updates the plane to immediately (well, once the FIFO needs
2211  * refilling) scan out from at a new framebuffer.
2212  */
2213 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
2214 {
2215 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2216 	struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
2217 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2218 	dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0];
2219 	int idx;
2220 
2221 	if (!drm_dev_enter(plane->dev, &idx))
2222 		return;
2223 
2224 	/* We're skipping the address adjustment for negative origin,
2225 	 * because this is only called on the primary plane.
2226 	 */
2227 	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
2228 
2229 	if (vc4->gen == VC4_GEN_6_C) {
2230 		u32 value;
2231 
2232 		value = vc4_state->dlist[vc4_state->ptr0_offset[0]] &
2233 					~SCALER6_PTR0_UPPER_ADDR_MASK;
2234 		value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff,
2235 				       SCALER6_PTR0_UPPER_ADDR);
2236 
2237 		writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2238 		vc4_state->dlist[vc4_state->ptr0_offset[0]] = value;
2239 
2240 		value = lower_32_bits(dma_addr);
2241 		writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]);
2242 		vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value;
2243 	} else {
2244 		u32 addr;
2245 
2246 		addr = (u32)dma_addr;
2247 
2248 		/* Write the new address into the hardware immediately.  The
2249 		 * scanout will start from this address as soon as the FIFO
2250 		 * needs to refill with pixels.
2251 		 */
2252 		writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2253 
2254 		/* Also update the CPU-side dlist copy, so that any later
2255 		 * atomic updates that don't do a new modeset on our plane
2256 		 * also use our updated address.
2257 		 */
2258 		vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
2259 	}
2260 
2261 	drm_dev_exit(idx);
2262 }
2263 
2264 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
2265 					  struct drm_atomic_state *state)
2266 {
2267 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2268 										 plane);
2269 	struct vc4_plane_state *vc4_state, *new_vc4_state;
2270 	int idx;
2271 
2272 	if (!drm_dev_enter(plane->dev, &idx))
2273 		return;
2274 
2275 	swap(plane->state->fb, new_plane_state->fb);
2276 	plane->state->crtc_x = new_plane_state->crtc_x;
2277 	plane->state->crtc_y = new_plane_state->crtc_y;
2278 	plane->state->crtc_w = new_plane_state->crtc_w;
2279 	plane->state->crtc_h = new_plane_state->crtc_h;
2280 	plane->state->src_x = new_plane_state->src_x;
2281 	plane->state->src_y = new_plane_state->src_y;
2282 	plane->state->src_w = new_plane_state->src_w;
2283 	plane->state->src_h = new_plane_state->src_h;
2284 	plane->state->alpha = new_plane_state->alpha;
2285 	plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
2286 	plane->state->rotation = new_plane_state->rotation;
2287 	plane->state->zpos = new_plane_state->zpos;
2288 	plane->state->normalized_zpos = new_plane_state->normalized_zpos;
2289 	plane->state->color_encoding = new_plane_state->color_encoding;
2290 	plane->state->color_range = new_plane_state->color_range;
2291 	plane->state->src = new_plane_state->src;
2292 	plane->state->dst = new_plane_state->dst;
2293 	plane->state->visible = new_plane_state->visible;
2294 
2295 	new_vc4_state = to_vc4_plane_state(new_plane_state);
2296 	vc4_state = to_vc4_plane_state(plane->state);
2297 
2298 	vc4_state->crtc_x = new_vc4_state->crtc_x;
2299 	vc4_state->crtc_y = new_vc4_state->crtc_y;
2300 	vc4_state->crtc_h = new_vc4_state->crtc_h;
2301 	vc4_state->crtc_w = new_vc4_state->crtc_w;
2302 	vc4_state->src_x = new_vc4_state->src_x;
2303 	vc4_state->src_y = new_vc4_state->src_y;
2304 	memcpy(vc4_state->src_w, new_vc4_state->src_w,
2305 	       sizeof(vc4_state->src_w));
2306 	memcpy(vc4_state->src_h, new_vc4_state->src_h,
2307 	       sizeof(vc4_state->src_h));
2308 	memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
2309 	       sizeof(vc4_state->x_scaling));
2310 	memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
2311 	       sizeof(vc4_state->y_scaling));
2312 	vc4_state->is_unity = new_vc4_state->is_unity;
2313 	vc4_state->is_yuv = new_vc4_state->is_yuv;
2314 	vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
2315 
2316 	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
2317 	vc4_state->dlist[vc4_state->pos0_offset] =
2318 		new_vc4_state->dlist[vc4_state->pos0_offset];
2319 	vc4_state->dlist[vc4_state->pos2_offset] =
2320 		new_vc4_state->dlist[vc4_state->pos2_offset];
2321 	vc4_state->dlist[vc4_state->ptr0_offset[0]] =
2322 		new_vc4_state->dlist[vc4_state->ptr0_offset[0]];
2323 
2324 	/* Note that we can't just call vc4_plane_write_dlist()
2325 	 * because that would smash the context data that the HVS is
2326 	 * currently using.
2327 	 */
2328 	writel(vc4_state->dlist[vc4_state->pos0_offset],
2329 	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
2330 	writel(vc4_state->dlist[vc4_state->pos2_offset],
2331 	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
2332 	writel(vc4_state->dlist[vc4_state->ptr0_offset[0]],
2333 	       &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2334 
2335 	drm_dev_exit(idx);
2336 }
2337 
2338 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
2339 					struct drm_atomic_state *state, bool flip)
2340 {
2341 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2342 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2343 										 plane);
2344 	struct vc4_plane_state *old_vc4_state, *new_vc4_state;
2345 	int ret;
2346 	u32 i;
2347 
2348 	if (vc4->gen <= VC4_GEN_5)
2349 		ret = vc4_plane_mode_set(plane, new_plane_state);
2350 	else
2351 		ret = vc6_plane_mode_set(plane, new_plane_state);
2352 	if (ret)
2353 		return ret;
2354 
2355 	old_vc4_state = to_vc4_plane_state(plane->state);
2356 	new_vc4_state = to_vc4_plane_state(new_plane_state);
2357 
2358 	if (!new_vc4_state->hw_dlist)
2359 		return -EINVAL;
2360 
2361 	if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
2362 	    old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
2363 	    old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
2364 	    old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] ||
2365 	    vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
2366 		return -EINVAL;
2367 
2368 	/* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
2369 	 * if anything else has changed, fallback to a sync update.
2370 	 */
2371 	for (i = 0; i < new_vc4_state->dlist_count; i++) {
2372 		if (i == new_vc4_state->pos0_offset ||
2373 		    i == new_vc4_state->pos2_offset ||
2374 		    i == new_vc4_state->ptr0_offset[0] ||
2375 		    (new_vc4_state->lbm_offset &&
2376 		     i == new_vc4_state->lbm_offset))
2377 			continue;
2378 
2379 		if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
2380 			return -EINVAL;
2381 	}
2382 
2383 	return 0;
2384 }
2385 
2386 static int vc4_prepare_fb(struct drm_plane *plane,
2387 			  struct drm_plane_state *state)
2388 {
2389 	struct vc4_bo *bo;
2390 	int ret;
2391 
2392 	if (!state->fb)
2393 		return 0;
2394 
2395 	bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2396 
2397 	ret = drm_gem_plane_helper_prepare_fb(plane, state);
2398 	if (ret)
2399 		return ret;
2400 
2401 	return vc4_bo_inc_usecnt(bo);
2402 }
2403 
2404 static void vc4_cleanup_fb(struct drm_plane *plane,
2405 			   struct drm_plane_state *state)
2406 {
2407 	struct vc4_bo *bo;
2408 
2409 	if (!state->fb)
2410 		return;
2411 
2412 	bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2413 	vc4_bo_dec_usecnt(bo);
2414 }
2415 
2416 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
2417 	.atomic_check = vc4_plane_atomic_check,
2418 	.atomic_update = vc4_plane_atomic_update,
2419 	.prepare_fb = vc4_prepare_fb,
2420 	.cleanup_fb = vc4_cleanup_fb,
2421 	.atomic_async_check = vc4_plane_atomic_async_check,
2422 	.atomic_async_update = vc4_plane_atomic_async_update,
2423 };
2424 
2425 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
2426 	.atomic_check = vc4_plane_atomic_check,
2427 	.atomic_update = vc4_plane_atomic_update,
2428 	.atomic_async_check = vc4_plane_atomic_async_check,
2429 	.atomic_async_update = vc4_plane_atomic_async_update,
2430 };
2431 
2432 static bool vc4_format_mod_supported(struct drm_plane *plane,
2433 				     uint32_t format,
2434 				     uint64_t modifier)
2435 {
2436 	/* Support T_TILING for RGB formats only. */
2437 	switch (format) {
2438 	case DRM_FORMAT_XRGB8888:
2439 	case DRM_FORMAT_ARGB8888:
2440 	case DRM_FORMAT_ABGR8888:
2441 	case DRM_FORMAT_XBGR8888:
2442 	case DRM_FORMAT_RGB565:
2443 	case DRM_FORMAT_BGR565:
2444 	case DRM_FORMAT_ARGB1555:
2445 	case DRM_FORMAT_XRGB1555:
2446 		switch (fourcc_mod_broadcom_mod(modifier)) {
2447 		case DRM_FORMAT_MOD_LINEAR:
2448 		case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
2449 			return true;
2450 		default:
2451 			return false;
2452 		}
2453 	case DRM_FORMAT_NV12:
2454 	case DRM_FORMAT_NV21:
2455 		switch (fourcc_mod_broadcom_mod(modifier)) {
2456 		case DRM_FORMAT_MOD_LINEAR:
2457 		case DRM_FORMAT_MOD_BROADCOM_SAND64:
2458 		case DRM_FORMAT_MOD_BROADCOM_SAND128:
2459 		case DRM_FORMAT_MOD_BROADCOM_SAND256:
2460 			return true;
2461 		default:
2462 			return false;
2463 		}
2464 	case DRM_FORMAT_P030:
2465 		switch (fourcc_mod_broadcom_mod(modifier)) {
2466 		case DRM_FORMAT_MOD_BROADCOM_SAND128:
2467 			return true;
2468 		default:
2469 			return false;
2470 		}
2471 	case DRM_FORMAT_RGBX1010102:
2472 	case DRM_FORMAT_BGRX1010102:
2473 	case DRM_FORMAT_RGBA1010102:
2474 	case DRM_FORMAT_BGRA1010102:
2475 	case DRM_FORMAT_XRGB4444:
2476 	case DRM_FORMAT_ARGB4444:
2477 	case DRM_FORMAT_XBGR4444:
2478 	case DRM_FORMAT_ABGR4444:
2479 	case DRM_FORMAT_RGBX4444:
2480 	case DRM_FORMAT_RGBA4444:
2481 	case DRM_FORMAT_BGRX4444:
2482 	case DRM_FORMAT_BGRA4444:
2483 	case DRM_FORMAT_RGB332:
2484 	case DRM_FORMAT_BGR233:
2485 	case DRM_FORMAT_YUV422:
2486 	case DRM_FORMAT_YVU422:
2487 	case DRM_FORMAT_YUV420:
2488 	case DRM_FORMAT_YVU420:
2489 	case DRM_FORMAT_NV16:
2490 	case DRM_FORMAT_NV61:
2491 	default:
2492 		return (modifier == DRM_FORMAT_MOD_LINEAR);
2493 	}
2494 }
2495 
2496 static const struct drm_plane_funcs vc4_plane_funcs = {
2497 	.update_plane = drm_atomic_helper_update_plane,
2498 	.disable_plane = drm_atomic_helper_disable_plane,
2499 	.reset = vc4_plane_reset,
2500 	.atomic_duplicate_state = vc4_plane_duplicate_state,
2501 	.atomic_destroy_state = vc4_plane_destroy_state,
2502 	.format_mod_supported = vc4_format_mod_supported,
2503 };
2504 
2505 struct drm_plane *vc4_plane_init(struct drm_device *dev,
2506 				 enum drm_plane_type type,
2507 				 uint32_t possible_crtcs)
2508 {
2509 	struct vc4_dev *vc4 = to_vc4_dev(dev);
2510 	struct drm_plane *plane;
2511 	struct vc4_plane *vc4_plane;
2512 	u32 formats[ARRAY_SIZE(hvs_formats)];
2513 	int num_formats = 0;
2514 	unsigned i;
2515 	static const uint64_t modifiers[] = {
2516 		DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
2517 		DRM_FORMAT_MOD_BROADCOM_SAND128,
2518 		DRM_FORMAT_MOD_BROADCOM_SAND64,
2519 		DRM_FORMAT_MOD_BROADCOM_SAND256,
2520 		DRM_FORMAT_MOD_LINEAR,
2521 		DRM_FORMAT_MOD_INVALID
2522 	};
2523 
2524 	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
2525 		if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
2526 			formats[num_formats] = hvs_formats[i].drm;
2527 			num_formats++;
2528 		}
2529 	}
2530 
2531 	vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base,
2532 					       possible_crtcs,
2533 					       &vc4_plane_funcs,
2534 					       formats, num_formats,
2535 					       modifiers, type, NULL);
2536 	if (IS_ERR(vc4_plane))
2537 		return ERR_CAST(vc4_plane);
2538 	plane = &vc4_plane->base;
2539 
2540 	if (vc4->gen >= VC4_GEN_5)
2541 		drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
2542 	else
2543 		drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
2544 
2545 	drm_plane_create_alpha_property(plane);
2546 	drm_plane_create_blend_mode_property(plane,
2547 					     BIT(DRM_MODE_BLEND_PIXEL_NONE) |
2548 					     BIT(DRM_MODE_BLEND_PREMULTI) |
2549 					     BIT(DRM_MODE_BLEND_COVERAGE));
2550 	drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
2551 					   DRM_MODE_ROTATE_0 |
2552 					   DRM_MODE_ROTATE_180 |
2553 					   DRM_MODE_REFLECT_X |
2554 					   DRM_MODE_REFLECT_Y);
2555 
2556 	drm_plane_create_color_properties(plane,
2557 					  BIT(DRM_COLOR_YCBCR_BT601) |
2558 					  BIT(DRM_COLOR_YCBCR_BT709) |
2559 					  BIT(DRM_COLOR_YCBCR_BT2020),
2560 					  BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
2561 					  BIT(DRM_COLOR_YCBCR_FULL_RANGE),
2562 					  DRM_COLOR_YCBCR_BT709,
2563 					  DRM_COLOR_YCBCR_LIMITED_RANGE);
2564 
2565 	if (type == DRM_PLANE_TYPE_PRIMARY)
2566 		drm_plane_create_zpos_immutable_property(plane, 0);
2567 
2568 	return plane;
2569 }
2570 
2571 #define VC4_NUM_OVERLAY_PLANES	16
2572 
2573 int vc4_plane_create_additional_planes(struct drm_device *drm)
2574 {
2575 	struct drm_plane *cursor_plane;
2576 	struct drm_crtc *crtc;
2577 	unsigned int i;
2578 
2579 	/* Set up some arbitrary number of planes.  We're not limited
2580 	 * by a set number of physical registers, just the space in
2581 	 * the HVS (16k) and how small an plane can be (28 bytes).
2582 	 * However, each plane we set up takes up some memory, and
2583 	 * increases the cost of looping over planes, which atomic
2584 	 * modesetting does quite a bit.  As a result, we pick a
2585 	 * modest number of planes to expose, that should hopefully
2586 	 * still cover any sane usecase.
2587 	 */
2588 	for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) {
2589 		struct drm_plane *plane =
2590 			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY,
2591 				       GENMASK(drm->mode_config.num_crtc - 1, 0));
2592 
2593 		if (IS_ERR(plane))
2594 			continue;
2595 
2596 		/* Create zpos property. Max of all the overlays + 1 primary +
2597 		 * 1 cursor plane on a crtc.
2598 		 */
2599 		drm_plane_create_zpos_property(plane, i + 1, 1,
2600 					       VC4_NUM_OVERLAY_PLANES + 1);
2601 	}
2602 
2603 	drm_for_each_crtc(crtc, drm) {
2604 		/* Set up the legacy cursor after overlay initialization,
2605 		 * since the zpos fallback is that planes are rendered by plane
2606 		 * ID order, and that then puts the cursor on top.
2607 		 */
2608 		cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR,
2609 					      drm_crtc_mask(crtc));
2610 		if (!IS_ERR(cursor_plane)) {
2611 			crtc->cursor = cursor_plane;
2612 
2613 			drm_plane_create_zpos_property(cursor_plane,
2614 						       VC4_NUM_OVERLAY_PLANES + 1,
2615 						       1,
2616 						       VC4_NUM_OVERLAY_PLANES + 1);
2617 		}
2618 	}
2619 
2620 	return 0;
2621 }
2622