xref: /linux/drivers/gpu/drm/vc4/vc4_plane.c (revision 74ba587f402d5501af2c85e50cf1e4044263b6ca)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5 
6 /**
7  * DOC: VC4 plane module
8  *
9  * Each DRM plane is a layer of pixels being scanned out by the HVS.
10  *
11  * At atomic modeset check time, we compute the HVS display element
12  * state that would be necessary for displaying the plane (giving us a
13  * chance to figure out if a plane configuration is invalid), then at
14  * atomic flush time the CRTC will ask us to write our element state
15  * into the region of the HVS that it has allocated for us.
16  */
17 
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_blend.h>
22 #include <drm/drm_drv.h>
23 #include <drm/drm_fb_dma_helper.h>
24 #include <drm/drm_fourcc.h>
25 #include <drm/drm_framebuffer.h>
26 #include <drm/drm_gem_atomic_helper.h>
27 #include <drm/drm_print.h>
28 
29 #include "uapi/drm/vc4_drm.h"
30 
31 #include "vc4_drv.h"
32 #include "vc4_regs.h"
33 
34 static const struct hvs_format {
35 	u32 drm; /* DRM_FORMAT_* */
36 	u32 hvs; /* HVS_FORMAT_* */
37 	u32 pixel_order;
38 	u32 pixel_order_hvs5;
39 	bool hvs5_only;
40 } hvs_formats[] = {
41 	{
42 		.drm = DRM_FORMAT_XRGB8888,
43 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
44 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
45 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
46 	},
47 	{
48 		.drm = DRM_FORMAT_ARGB8888,
49 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
50 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
51 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
52 	},
53 	{
54 		.drm = DRM_FORMAT_ABGR8888,
55 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
56 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
57 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
58 	},
59 	{
60 		.drm = DRM_FORMAT_XBGR8888,
61 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
62 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
63 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
64 	},
65 	{
66 		.drm = DRM_FORMAT_RGB565,
67 		.hvs = HVS_PIXEL_FORMAT_RGB565,
68 		.pixel_order = HVS_PIXEL_ORDER_XRGB,
69 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
70 	},
71 	{
72 		.drm = DRM_FORMAT_BGR565,
73 		.hvs = HVS_PIXEL_FORMAT_RGB565,
74 		.pixel_order = HVS_PIXEL_ORDER_XBGR,
75 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
76 	},
77 	{
78 		.drm = DRM_FORMAT_ARGB1555,
79 		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
80 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
81 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
82 	},
83 	{
84 		.drm = DRM_FORMAT_XRGB1555,
85 		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
86 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
87 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
88 	},
89 	{
90 		.drm = DRM_FORMAT_RGB888,
91 		.hvs = HVS_PIXEL_FORMAT_RGB888,
92 		.pixel_order = HVS_PIXEL_ORDER_XRGB,
93 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
94 	},
95 	{
96 		.drm = DRM_FORMAT_BGR888,
97 		.hvs = HVS_PIXEL_FORMAT_RGB888,
98 		.pixel_order = HVS_PIXEL_ORDER_XBGR,
99 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
100 	},
101 	{
102 		.drm = DRM_FORMAT_YUV422,
103 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
104 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
105 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
106 	},
107 	{
108 		.drm = DRM_FORMAT_YVU422,
109 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
110 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
111 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
112 	},
113 	{
114 		.drm = DRM_FORMAT_YUV444,
115 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
116 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
117 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
118 	},
119 	{
120 		.drm = DRM_FORMAT_YVU444,
121 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
122 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
123 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
124 	},
125 	{
126 		.drm = DRM_FORMAT_YUV420,
127 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
128 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
129 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
130 	},
131 	{
132 		.drm = DRM_FORMAT_YVU420,
133 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
134 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
135 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
136 	},
137 	{
138 		.drm = DRM_FORMAT_NV12,
139 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
140 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
141 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
142 	},
143 	{
144 		.drm = DRM_FORMAT_NV21,
145 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
146 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
147 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
148 	},
149 	{
150 		.drm = DRM_FORMAT_NV16,
151 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
152 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
153 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
154 	},
155 	{
156 		.drm = DRM_FORMAT_NV61,
157 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
158 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
159 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
160 	},
161 	{
162 		.drm = DRM_FORMAT_P030,
163 		.hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT,
164 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
165 		.hvs5_only = true,
166 	},
167 	{
168 		.drm = DRM_FORMAT_XRGB2101010,
169 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
170 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
171 		.hvs5_only = true,
172 	},
173 	{
174 		.drm = DRM_FORMAT_ARGB2101010,
175 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
176 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
177 		.hvs5_only = true,
178 	},
179 	{
180 		.drm = DRM_FORMAT_ABGR2101010,
181 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
182 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
183 		.hvs5_only = true,
184 	},
185 	{
186 		.drm = DRM_FORMAT_XBGR2101010,
187 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
188 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
189 		.hvs5_only = true,
190 	},
191 	{
192 		.drm = DRM_FORMAT_RGB332,
193 		.hvs = HVS_PIXEL_FORMAT_RGB332,
194 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
195 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
196 	},
197 	{
198 		.drm = DRM_FORMAT_BGR233,
199 		.hvs = HVS_PIXEL_FORMAT_RGB332,
200 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
201 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
202 	},
203 	{
204 		.drm = DRM_FORMAT_XRGB4444,
205 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
206 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
207 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
208 	},
209 	{
210 		.drm = DRM_FORMAT_ARGB4444,
211 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
212 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
213 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
214 	},
215 	{
216 		.drm = DRM_FORMAT_XBGR4444,
217 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
218 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
219 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
220 	},
221 	{
222 		.drm = DRM_FORMAT_ABGR4444,
223 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
224 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
225 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
226 	},
227 	{
228 		.drm = DRM_FORMAT_BGRX4444,
229 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
230 		.pixel_order = HVS_PIXEL_ORDER_RGBA,
231 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
232 	},
233 	{
234 		.drm = DRM_FORMAT_BGRA4444,
235 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
236 		.pixel_order = HVS_PIXEL_ORDER_RGBA,
237 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
238 	},
239 	{
240 		.drm = DRM_FORMAT_RGBX4444,
241 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
242 		.pixel_order = HVS_PIXEL_ORDER_BGRA,
243 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
244 	},
245 	{
246 		.drm = DRM_FORMAT_RGBA4444,
247 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
248 		.pixel_order = HVS_PIXEL_ORDER_BGRA,
249 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
250 	},
251 };
252 
253 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
254 {
255 	unsigned i;
256 
257 	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
258 		if (hvs_formats[i].drm == drm_format)
259 			return &hvs_formats[i];
260 	}
261 
262 	return NULL;
263 }
264 
265 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
266 {
267 	if (dst == src >> 16)
268 		return VC4_SCALING_NONE;
269 	if (3 * dst >= 2 * (src >> 16))
270 		return VC4_SCALING_PPF;
271 	else
272 		return VC4_SCALING_TPZ;
273 }
274 
275 static bool plane_enabled(struct drm_plane_state *state)
276 {
277 	return state->fb && !WARN_ON(!state->crtc);
278 }
279 
280 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
281 {
282 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
283 	struct vc4_hvs *hvs = vc4->hvs;
284 	struct vc4_plane_state *vc4_state;
285 	unsigned int i;
286 
287 	if (WARN_ON(!plane->state))
288 		return NULL;
289 
290 	vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
291 	if (!vc4_state)
292 		return NULL;
293 
294 	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
295 
296 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
297 		if (vc4_state->upm_handle[i])
298 			refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
299 	}
300 
301 	vc4_state->dlist_initialized = 0;
302 
303 	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
304 
305 	if (vc4_state->dlist) {
306 		vc4_state->dlist = kmemdup(vc4_state->dlist,
307 					   vc4_state->dlist_count * 4,
308 					   GFP_KERNEL);
309 		if (!vc4_state->dlist) {
310 			kfree(vc4_state);
311 			return NULL;
312 		}
313 		vc4_state->dlist_size = vc4_state->dlist_count;
314 	}
315 
316 	return &vc4_state->base;
317 }
318 
319 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
320 {
321 	struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
322 	unsigned long irqflags;
323 
324 	spin_lock_irqsave(&hvs->mm_lock, irqflags);
325 	drm_mm_remove_node(&refcount->upm);
326 	spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
327 	refcount->upm.start = 0;
328 	refcount->upm.size = 0;
329 	refcount->size = 0;
330 
331 	ida_free(&hvs->upm_handles, upm_handle);
332 }
333 
334 static void vc4_plane_destroy_state(struct drm_plane *plane,
335 				    struct drm_plane_state *state)
336 {
337 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
338 	struct vc4_hvs *hvs = vc4->hvs;
339 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
340 	unsigned int i;
341 
342 	if (drm_mm_node_allocated(&vc4_state->lbm)) {
343 		unsigned long irqflags;
344 
345 		spin_lock_irqsave(&hvs->mm_lock, irqflags);
346 		drm_mm_remove_node(&vc4_state->lbm);
347 		spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
348 	}
349 
350 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
351 		struct vc4_upm_refcounts *refcount;
352 
353 		if (!vc4_state->upm_handle[i])
354 			continue;
355 
356 		refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]];
357 
358 		if (refcount_dec_and_test(&refcount->refcount))
359 			vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]);
360 	}
361 
362 	kfree(vc4_state->dlist);
363 	__drm_atomic_helper_plane_destroy_state(&vc4_state->base);
364 	kfree(state);
365 }
366 
367 /* Called during init to allocate the plane's atomic state. */
368 static void vc4_plane_reset(struct drm_plane *plane)
369 {
370 	struct vc4_plane_state *vc4_state;
371 
372 	if (plane->state)
373 		__drm_atomic_helper_plane_destroy_state(plane->state);
374 
375 	kfree(plane->state);
376 
377 	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
378 	if (!vc4_state)
379 		return;
380 
381 	__drm_atomic_helper_plane_reset(plane, &vc4_state->base);
382 }
383 
384 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
385 {
386 	if (vc4_state->dlist_count == vc4_state->dlist_size) {
387 		u32 new_size = max(4u, vc4_state->dlist_count * 2);
388 		u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
389 
390 		if (!new_dlist)
391 			return;
392 		memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
393 
394 		kfree(vc4_state->dlist);
395 		vc4_state->dlist = new_dlist;
396 		vc4_state->dlist_size = new_size;
397 	}
398 
399 	vc4_state->dlist_count++;
400 }
401 
402 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
403 {
404 	unsigned int idx = vc4_state->dlist_count;
405 
406 	vc4_dlist_counter_increment(vc4_state);
407 	vc4_state->dlist[idx] = val;
408 }
409 
410 /* Returns the scl0/scl1 field based on whether the dimensions need to
411  * be up/down/non-scaled.
412  *
413  * This is a replication of a table from the spec.
414  */
415 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
416 {
417 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
418 
419 	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
420 	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
421 		return SCALER_CTL0_SCL_H_PPF_V_PPF;
422 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
423 		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
424 	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
425 		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
426 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
427 		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
428 	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
429 		return SCALER_CTL0_SCL_H_PPF_V_NONE;
430 	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
431 		return SCALER_CTL0_SCL_H_NONE_V_PPF;
432 	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
433 		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
434 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
435 		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
436 	default:
437 	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
438 		/* The unity case is independently handled by
439 		 * SCALER_CTL0_UNITY.
440 		 */
441 		return 0;
442 	}
443 }
444 
445 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
446 {
447 	struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
448 	unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
449 	struct drm_crtc_state *crtc_state;
450 
451 	crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
452 						   pstate->crtc);
453 
454 	vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
455 	if (!left && !right && !top && !bottom)
456 		return 0;
457 
458 	if (left + right >= crtc_state->mode.hdisplay ||
459 	    top + bottom >= crtc_state->mode.vdisplay)
460 		return -EINVAL;
461 
462 	adjhdisplay = crtc_state->mode.hdisplay - (left + right);
463 	vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
464 					       adjhdisplay,
465 					       crtc_state->mode.hdisplay);
466 	vc4_pstate->crtc_x += left;
467 	if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
468 		vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
469 
470 	adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
471 	vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
472 					       adjvdisplay,
473 					       crtc_state->mode.vdisplay);
474 	vc4_pstate->crtc_y += top;
475 	if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
476 		vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
477 
478 	vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
479 					       adjhdisplay,
480 					       crtc_state->mode.hdisplay);
481 	vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
482 					       adjvdisplay,
483 					       crtc_state->mode.vdisplay);
484 
485 	if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
486 		return -EINVAL;
487 
488 	return 0;
489 }
490 
491 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
492 {
493 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
494 	struct drm_framebuffer *fb = state->fb;
495 	int num_planes = fb->format->num_planes;
496 	struct drm_crtc_state *crtc_state;
497 	u32 h_subsample = fb->format->hsub;
498 	u32 v_subsample = fb->format->vsub;
499 	int ret;
500 
501 	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
502 	if (!crtc_state) {
503 		DRM_DEBUG_KMS("Invalid crtc state\n");
504 		return -EINVAL;
505 	}
506 
507 	ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
508 						  INT_MAX, true, true);
509 	if (ret)
510 		return ret;
511 
512 	vc4_state->src_x = state->src.x1;
513 	vc4_state->src_y = state->src.y1;
514 	vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
515 	vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
516 
517 	vc4_state->crtc_x = state->dst.x1;
518 	vc4_state->crtc_y = state->dst.y1;
519 	vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
520 	vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
521 
522 	ret = vc4_plane_margins_adj(state);
523 	if (ret)
524 		return ret;
525 
526 	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
527 						       vc4_state->crtc_w);
528 	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
529 						       vc4_state->crtc_h);
530 
531 	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
532 			       vc4_state->y_scaling[0] == VC4_SCALING_NONE);
533 
534 	if (num_planes > 1) {
535 		vc4_state->is_yuv = true;
536 
537 		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
538 		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
539 
540 		vc4_state->x_scaling[1] =
541 			vc4_get_scaling_mode(vc4_state->src_w[1],
542 					     vc4_state->crtc_w);
543 		vc4_state->y_scaling[1] =
544 			vc4_get_scaling_mode(vc4_state->src_h[1],
545 					     vc4_state->crtc_h);
546 
547 		/* YUV conversion requires that horizontal scaling be enabled
548 		 * on the UV plane even if vc4_get_scaling_mode() returned
549 		 * VC4_SCALING_NONE (which can happen when the down-scaling
550 		 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
551 		 * case.
552 		 */
553 		if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
554 			vc4_state->x_scaling[1] = VC4_SCALING_PPF;
555 
556 		/* Similarly UV needs vertical scaling to be enabled.
557 		 * Without this a 1:1 scaled YUV422 plane isn't rendered.
558 		 */
559 		if (vc4_state->y_scaling[1] == VC4_SCALING_NONE)
560 			vc4_state->y_scaling[1] = VC4_SCALING_PPF;
561 	} else {
562 		vc4_state->is_yuv = false;
563 		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
564 		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
565 	}
566 
567 	return 0;
568 }
569 
570 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
571 {
572 	struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
573 	u32 scale, recip;
574 
575 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
576 
577 	scale = src / dst;
578 
579 	/* The specs note that while the reciprocal would be defined
580 	 * as (1<<32)/scale, ~0 is close enough.
581 	 */
582 	recip = ~0 / scale;
583 
584 	vc4_dlist_write(vc4_state,
585 			/*
586 			 * The BCM2712 is lacking BIT(31) compared to
587 			 * the previous generations, but we don't use
588 			 * it.
589 			 */
590 			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
591 			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
592 	vc4_dlist_write(vc4_state,
593 			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
594 }
595 
596 /* phase magnitude bits */
597 #define PHASE_BITS 6
598 
599 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst,
600 			  u32 xy, int channel)
601 {
602 	struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
603 	u32 scale = src / dst;
604 	s32 offset, offset2;
605 	s32 phase;
606 
607 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
608 
609 	/*
610 	 * Start the phase at 1/2 pixel from the 1st pixel at src_x.
611 	 * 1/4 pixel for YUV.
612 	 */
613 	if (channel) {
614 		/*
615 		 * The phase is relative to scale_src->x, so shift it for
616 		 * display list's x value
617 		 */
618 		offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1;
619 		offset += -(1 << PHASE_BITS >> 2);
620 	} else {
621 		/*
622 		 * The phase is relative to scale_src->x, so shift it for
623 		 * display list's x value
624 		 */
625 		offset = (xy & 0xffff) >> (16 - PHASE_BITS);
626 		offset += -(1 << PHASE_BITS >> 1);
627 
628 		/*
629 		 * This is a kludge to make sure the scaling factors are
630 		 * consistent with YUV's luma scaling. We lose 1-bit precision
631 		 * because of this.
632 		 */
633 		scale &= ~1;
634 	}
635 
636 	/*
637 	 * There may be a also small error introduced by precision of scale.
638 	 * Add half of that as a compromise
639 	 */
640 	offset2 = src - dst * scale;
641 	offset2 >>= 16 - PHASE_BITS;
642 	phase = offset + (offset2 >> 1);
643 
644 	/* Ensure +ve values don't touch the sign bit, then truncate negative values */
645 	if (phase >= 1 << PHASE_BITS)
646 		phase = (1 << PHASE_BITS) - 1;
647 
648 	phase &= SCALER_PPF_IPHASE_MASK;
649 
650 	vc4_dlist_write(vc4_state,
651 			SCALER_PPF_AGC |
652 			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
653 			/*
654 			 * The register layout documentation is slightly
655 			 * different to setup the phase in the BCM2712,
656 			 * but they seem equivalent.
657 			 */
658 			VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
659 }
660 
661 static u32 __vc4_lbm_size(struct drm_plane_state *state)
662 {
663 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
664 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
665 	u32 pix_per_line;
666 	u32 lbm;
667 
668 	/* LBM is not needed when there's no vertical scaling. */
669 	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
670 	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
671 		return 0;
672 
673 	/*
674 	 * This can be further optimized in the RGB/YUV444 case if the PPF
675 	 * decimation factor is between 0.5 and 1.0 by using crtc_w.
676 	 *
677 	 * It's not an issue though, since in that case since src_w[0] is going
678 	 * to be greater than or equal to crtc_w.
679 	 */
680 	if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
681 		pix_per_line = vc4_state->crtc_w;
682 	else
683 		pix_per_line = vc4_state->src_w[0] >> 16;
684 
685 	if (!vc4_state->is_yuv) {
686 		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
687 			lbm = pix_per_line * 8;
688 		else {
689 			/* In special cases, this multiplier might be 12. */
690 			lbm = pix_per_line * 16;
691 		}
692 	} else {
693 		/* There are cases for this going down to a multiplier
694 		 * of 2, but according to the firmware source, the
695 		 * table in the docs is somewhat wrong.
696 		 */
697 		lbm = pix_per_line * 16;
698 	}
699 
700 	/* Align it to 64 or 128 (hvs5) bytes */
701 	lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64);
702 
703 	/* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
704 	lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2;
705 
706 	return lbm;
707 }
708 
709 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
710 						unsigned int channel)
711 {
712 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
713 
714 	switch (vc4_state->y_scaling[channel]) {
715 	case VC4_SCALING_PPF:
716 		return 4;
717 
718 	case VC4_SCALING_TPZ:
719 		return 2;
720 
721 	default:
722 		return 0;
723 	}
724 }
725 
726 static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
727 				       unsigned int channel)
728 {
729 	const struct drm_format_info *info = state->fb->format;
730 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
731 
732 	if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
733 		return 0;
734 
735 	if (info->is_yuv)
736 		return channel ? 2 : 1;
737 
738 	if (info->has_alpha)
739 		return 4;
740 
741 	return 3;
742 }
743 
744 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
745 					 unsigned int channel)
746 {
747 	const struct drm_format_info *info = state->fb->format;
748 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
749 	unsigned int channels_scaled = 0;
750 	unsigned int components, words, wpc;
751 	unsigned int width, lines;
752 	unsigned int i;
753 
754 	/* LBM is meant to use the smaller of source or dest width, but there
755 	 * is a issue with UV scaling that the size required for the second
756 	 * channel is based on the source width only.
757 	 */
758 	if (info->hsub > 1 && channel == 1)
759 		width = state->src_w >> 16;
760 	else
761 		width = min(state->src_w >> 16, state->crtc_w);
762 	width = round_up(width / info->hsub, 4);
763 
764 	wpc = vc4_lbm_words_per_component(state, channel);
765 	if (!wpc)
766 		return 0;
767 
768 	components = vc4_lbm_components(state, channel);
769 	if (!components)
770 		return 0;
771 
772 	if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha)
773 		components -= 1;
774 
775 	words = width * wpc * components;
776 
777 	lines = DIV_ROUND_UP(words, 128 / info->hsub);
778 
779 	for (i = 0; i < 2; i++)
780 		if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
781 			channels_scaled++;
782 
783 	if (channels_scaled == 1)
784 		lines = lines / 2;
785 
786 	return lines;
787 }
788 
789 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
790 {
791 	const struct drm_format_info *info = state->fb->format;
792 
793 	if (info->hsub > 1)
794 		return max(vc4_lbm_channel_size(state, 0),
795 			   vc4_lbm_channel_size(state, 1));
796 	else
797 		return vc4_lbm_channel_size(state, 0);
798 }
799 
800 static u32 vc4_lbm_size(struct drm_plane_state *state)
801 {
802 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
803 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
804 
805 	/* LBM is not needed when there's no vertical scaling. */
806 	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
807 	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
808 		return 0;
809 
810 	if (vc4->gen >= VC4_GEN_6_C)
811 		return __vc6_lbm_size(state);
812 	else
813 		return __vc4_lbm_size(state);
814 }
815 
816 static size_t vc6_upm_size(const struct drm_plane_state *state,
817 			   unsigned int plane)
818 {
819 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
820 	unsigned int stride = state->fb->pitches[plane];
821 
822 	/*
823 	 * TODO: This only works for raster formats, and is sub-optimal
824 	 * for buffers with a stride aligned on 32 bytes.
825 	 */
826 	unsigned int words_per_line = (stride + 62) / 32;
827 	unsigned int fetch_region_size = words_per_line * 32;
828 	unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
829 	unsigned int buffer_size = fetch_region_size * buffer_lines;
830 
831 	return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
832 }
833 
834 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
835 					 int channel)
836 {
837 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
838 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
839 
840 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
841 
842 	/* Ch0 H-PPF Word 0: Scaling Parameters */
843 	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
844 		vc4_write_ppf(vc4_state, vc4_state->src_w[channel],
845 			      vc4_state->crtc_w, vc4_state->src_x, channel);
846 	}
847 
848 	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
849 	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
850 		vc4_write_ppf(vc4_state, vc4_state->src_h[channel],
851 			      vc4_state->crtc_h, vc4_state->src_y, channel);
852 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
853 	}
854 
855 	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
856 	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
857 		vc4_write_tpz(vc4_state, vc4_state->src_w[channel],
858 			      vc4_state->crtc_w);
859 	}
860 
861 	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
862 	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
863 		vc4_write_tpz(vc4_state, vc4_state->src_h[channel],
864 			      vc4_state->crtc_h);
865 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
866 	}
867 }
868 
869 static void vc4_plane_calc_load(struct drm_plane_state *state)
870 {
871 	unsigned int hvs_load_shift, vrefresh, i;
872 	struct drm_framebuffer *fb = state->fb;
873 	struct vc4_plane_state *vc4_state;
874 	struct drm_crtc_state *crtc_state;
875 	unsigned int vscale_factor;
876 
877 	vc4_state = to_vc4_plane_state(state);
878 	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
879 	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
880 
881 	/* The HVS is able to process 2 pixels/cycle when scaling the source,
882 	 * 4 pixels/cycle otherwise.
883 	 * Alpha blending step seems to be pipelined and it's always operating
884 	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
885 	 * scaler block.
886 	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
887 	 */
888 	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
889 	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
890 	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
891 	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
892 		hvs_load_shift = 1;
893 	else
894 		hvs_load_shift = 2;
895 
896 	vc4_state->membus_load = 0;
897 	vc4_state->hvs_load = 0;
898 	for (i = 0; i < fb->format->num_planes; i++) {
899 		/* Even if the bandwidth/plane required for a single frame is
900 		 *
901 		 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
902 		 *  cpp * vrefresh
903 		 *
904 		 * when downscaling, we have to read more pixels per line in
905 		 * the time frame reserved for a single line, so the bandwidth
906 		 * demand can be punctually higher. To account for that, we
907 		 * calculate the down-scaling factor and multiply the plane
908 		 * load by this number. We're likely over-estimating the read
909 		 * demand, but that's better than under-estimating it.
910 		 */
911 		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
912 					     vc4_state->crtc_h);
913 		vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
914 					  (vc4_state->src_h[i] >> 16) *
915 					  vscale_factor * fb->format->cpp[i];
916 		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
917 	}
918 
919 	vc4_state->hvs_load *= vrefresh;
920 	vc4_state->hvs_load >>= hvs_load_shift;
921 	vc4_state->membus_load *= vrefresh;
922 }
923 
924 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
925 {
926 	struct drm_device *drm = state->plane->dev;
927 	struct vc4_dev *vc4 = to_vc4_dev(drm);
928 	struct drm_plane *plane = state->plane;
929 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
930 	unsigned long irqflags;
931 	u32 lbm_size;
932 
933 	lbm_size = vc4_lbm_size(state);
934 	if (!lbm_size)
935 		return 0;
936 
937 	/*
938 	 * NOTE: BCM2712 doesn't need to be aligned, since the size
939 	 * returned by vc4_lbm_size() is in words already.
940 	 */
941 	if (vc4->gen == VC4_GEN_5)
942 		lbm_size = ALIGN(lbm_size, 64);
943 	else if (vc4->gen == VC4_GEN_4)
944 		lbm_size = ALIGN(lbm_size, 32);
945 
946 	drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n",
947 		       plane->base.id, plane->name, lbm_size);
948 
949 	if (WARN_ON(!vc4_state->lbm_offset))
950 		return -EINVAL;
951 
952 	/* Allocate the LBM memory that the HVS will use for temporary
953 	 * storage due to our scaling/format conversion.
954 	 */
955 	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
956 		int ret;
957 
958 		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
959 		ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
960 						 &vc4_state->lbm,
961 						 lbm_size, 1,
962 						 0, 0);
963 		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
964 
965 		if (ret) {
966 			drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
967 			return ret;
968 		}
969 	} else {
970 		WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
971 	}
972 
973 	vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
974 
975 	return 0;
976 }
977 
978 static int vc6_plane_allocate_upm(struct drm_plane_state *state)
979 {
980 	const struct drm_format_info *info = state->fb->format;
981 	struct drm_device *drm = state->plane->dev;
982 	struct vc4_dev *vc4 = to_vc4_dev(drm);
983 	struct vc4_hvs *hvs = vc4->hvs;
984 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
985 	unsigned int i;
986 	int ret;
987 
988 	WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
989 
990 	vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
991 
992 	for (i = 0; i < info->num_planes; i++) {
993 		struct vc4_upm_refcounts *refcount;
994 		int upm_handle;
995 		unsigned long irqflags;
996 		size_t upm_size;
997 
998 		upm_size = vc6_upm_size(state, i);
999 		if (!upm_size)
1000 			return -EINVAL;
1001 		upm_handle = vc4_state->upm_handle[i];
1002 
1003 		if (upm_handle &&
1004 		    hvs->upm_refcounts[upm_handle].size == upm_size) {
1005 			/* Allocation is the same size as the previous user of
1006 			 * the plane. Keep the allocation.
1007 			 */
1008 			vc4_state->upm_handle[i] = upm_handle;
1009 		} else {
1010 			if (upm_handle &&
1011 			    refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) {
1012 				vc4_plane_release_upm_ida(hvs, upm_handle);
1013 				vc4_state->upm_handle[i] = 0;
1014 			}
1015 
1016 			upm_handle = ida_alloc_range(&hvs->upm_handles, 1,
1017 						     VC4_NUM_UPM_HANDLES,
1018 						     GFP_KERNEL);
1019 			if (upm_handle < 0) {
1020 				drm_dbg(drm, "Out of upm_handles\n");
1021 				return upm_handle;
1022 			}
1023 			vc4_state->upm_handle[i] = upm_handle;
1024 
1025 			refcount = &hvs->upm_refcounts[upm_handle];
1026 			refcount_set(&refcount->refcount, 1);
1027 			refcount->size = upm_size;
1028 
1029 			spin_lock_irqsave(&hvs->mm_lock, irqflags);
1030 			ret = drm_mm_insert_node_generic(&hvs->upm_mm,
1031 							 &refcount->upm,
1032 							 upm_size, HVS_UBM_WORD_SIZE,
1033 							 0, 0);
1034 			spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
1035 			if (ret) {
1036 				drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
1037 				refcount_set(&refcount->refcount, 0);
1038 				ida_free(&hvs->upm_handles, upm_handle);
1039 				vc4_state->upm_handle[i] = 0;
1040 				return ret;
1041 			}
1042 		}
1043 
1044 		refcount = &hvs->upm_refcounts[upm_handle];
1045 		vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
1046 			VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE,
1047 				      SCALER6_PTR0_UPM_BASE) |
1048 			VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
1049 				      SCALER6_PTR0_UPM_HANDLE) |
1050 			VC4_SET_FIELD(vc4_state->upm_buffer_lines,
1051 				      SCALER6_PTR0_UPM_BUFF_SIZE);
1052 	}
1053 
1054 	return 0;
1055 }
1056 
1057 static void vc6_plane_free_upm(struct drm_plane_state *state)
1058 {
1059 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1060 	struct drm_device *drm = state->plane->dev;
1061 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1062 	struct vc4_hvs *hvs = vc4->hvs;
1063 	unsigned int i;
1064 
1065 	WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
1066 
1067 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
1068 		unsigned int upm_handle;
1069 
1070 		upm_handle = vc4_state->upm_handle[i];
1071 		if (!upm_handle)
1072 			continue;
1073 
1074 		if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount))
1075 			vc4_plane_release_upm_ida(hvs, upm_handle);
1076 		vc4_state->upm_handle[i] = 0;
1077 	}
1078 }
1079 
1080 /*
1081  * The colorspace conversion matrices are held in 3 entries in the dlist.
1082  * Create an array of them, with entries for each full and limited mode, and
1083  * each supported colorspace.
1084  */
1085 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = {
1086 	{
1087 		/* Limited range */
1088 		{
1089 			/* BT601 */
1090 			SCALER_CSC0_ITR_R_601_5,
1091 			SCALER_CSC1_ITR_R_601_5,
1092 			SCALER_CSC2_ITR_R_601_5,
1093 		}, {
1094 			/* BT709 */
1095 			SCALER_CSC0_ITR_R_709_3,
1096 			SCALER_CSC1_ITR_R_709_3,
1097 			SCALER_CSC2_ITR_R_709_3,
1098 		}, {
1099 			/* BT2020 */
1100 			SCALER_CSC0_ITR_R_2020,
1101 			SCALER_CSC1_ITR_R_2020,
1102 			SCALER_CSC2_ITR_R_2020,
1103 		}
1104 	}, {
1105 		/* Full range */
1106 		{
1107 			/* JFIF */
1108 			SCALER_CSC0_JPEG_JFIF,
1109 			SCALER_CSC1_JPEG_JFIF,
1110 			SCALER_CSC2_JPEG_JFIF,
1111 		}, {
1112 			/* BT709 */
1113 			SCALER_CSC0_ITR_R_709_3_FR,
1114 			SCALER_CSC1_ITR_R_709_3_FR,
1115 			SCALER_CSC2_ITR_R_709_3_FR,
1116 		}, {
1117 			/* BT2020 */
1118 			SCALER_CSC0_ITR_R_2020_FR,
1119 			SCALER_CSC1_ITR_R_2020_FR,
1120 			SCALER_CSC2_ITR_R_2020_FR,
1121 		}
1122 	}
1123 };
1124 
1125 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state)
1126 {
1127 	struct drm_device *dev = state->state->dev;
1128 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1129 
1130 	WARN_ON_ONCE(vc4->gen != VC4_GEN_4);
1131 
1132 	if (!state->fb->format->has_alpha)
1133 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1134 				     SCALER_POS2_ALPHA_MODE);
1135 
1136 	switch (state->pixel_blend_mode) {
1137 	case DRM_MODE_BLEND_PIXEL_NONE:
1138 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1139 				     SCALER_POS2_ALPHA_MODE);
1140 	default:
1141 	case DRM_MODE_BLEND_PREMULTI:
1142 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1143 				     SCALER_POS2_ALPHA_MODE) |
1144 			SCALER_POS2_ALPHA_PREMULT;
1145 	case DRM_MODE_BLEND_COVERAGE:
1146 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1147 				     SCALER_POS2_ALPHA_MODE);
1148 	}
1149 }
1150 
1151 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state)
1152 {
1153 	struct drm_device *dev = state->state->dev;
1154 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1155 
1156 	WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C &&
1157 		     vc4->gen != VC4_GEN_6_D);
1158 
1159 	switch (vc4->gen) {
1160 	default:
1161 	case VC4_GEN_5:
1162 	case VC4_GEN_6_C:
1163 		if (!state->fb->format->has_alpha)
1164 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1165 					     SCALER5_CTL2_ALPHA_MODE);
1166 
1167 		switch (state->pixel_blend_mode) {
1168 		case DRM_MODE_BLEND_PIXEL_NONE:
1169 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1170 					     SCALER5_CTL2_ALPHA_MODE);
1171 		default:
1172 		case DRM_MODE_BLEND_PREMULTI:
1173 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1174 					     SCALER5_CTL2_ALPHA_MODE) |
1175 				SCALER5_CTL2_ALPHA_PREMULT;
1176 		case DRM_MODE_BLEND_COVERAGE:
1177 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1178 					     SCALER5_CTL2_ALPHA_MODE);
1179 		}
1180 	case VC4_GEN_6_D:
1181 		/* 2712-D configures fixed alpha mode in CTL0 */
1182 		return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ?
1183 			SCALER5_CTL2_ALPHA_PREMULT : 0;
1184 	}
1185 }
1186 
1187 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state)
1188 {
1189 	struct drm_device *dev = state->state->dev;
1190 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1191 
1192 	WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D);
1193 
1194 	if (vc4->gen == VC4_GEN_6_D &&
1195 	    (!state->fb->format->has_alpha ||
1196 	     state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE))
1197 		return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED,
1198 				     SCALER6_CTL0_ALPHA_MASK);
1199 
1200 	return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK);
1201 }
1202 
1203 /* Writes out a full display list for an active plane to the plane's
1204  * private dlist state.
1205  */
1206 static int vc4_plane_mode_set(struct drm_plane *plane,
1207 			      struct drm_plane_state *state)
1208 {
1209 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
1210 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1211 	struct drm_framebuffer *fb = state->fb;
1212 	u32 ctl0_offset = vc4_state->dlist_count;
1213 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1214 	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1215 	int num_planes = fb->format->num_planes;
1216 	u32 h_subsample = fb->format->hsub;
1217 	u32 v_subsample = fb->format->vsub;
1218 	bool mix_plane_alpha;
1219 	bool covers_screen;
1220 	u32 scl0, scl1, pitch0;
1221 	u32 tiling, src_x, src_y;
1222 	u32 width, height;
1223 	u32 hvs_format = format->hvs;
1224 	unsigned int rotation;
1225 	u32 offsets[3] = { 0 };
1226 	int ret, i;
1227 
1228 	if (vc4_state->dlist_initialized)
1229 		return 0;
1230 
1231 	ret = vc4_plane_setup_clipping_and_scaling(state);
1232 	if (ret)
1233 		return ret;
1234 
1235 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1236 	    !vc4_state->crtc_w || !vc4_state->crtc_h) {
1237 		/* 0 source size probably means the plane is offscreen */
1238 		vc4_state->dlist_initialized = 1;
1239 		return 0;
1240 	}
1241 
1242 	width = vc4_state->src_w[0] >> 16;
1243 	height = vc4_state->src_h[0] >> 16;
1244 
1245 	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1246 	 * and 4:4:4, scl1 should be set to scl0 so both channels of
1247 	 * the scaler do the same thing.  For YUV, the Y plane needs
1248 	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1249 	 * the scl fields here.
1250 	 */
1251 	if (num_planes == 1) {
1252 		scl0 = vc4_get_scl_field(state, 0);
1253 		scl1 = scl0;
1254 	} else {
1255 		scl0 = vc4_get_scl_field(state, 1);
1256 		scl1 = vc4_get_scl_field(state, 0);
1257 	}
1258 
1259 	rotation = drm_rotation_simplify(state->rotation,
1260 					 DRM_MODE_ROTATE_0 |
1261 					 DRM_MODE_REFLECT_X |
1262 					 DRM_MODE_REFLECT_Y);
1263 
1264 	/* We must point to the last line when Y reflection is enabled. */
1265 	src_y = vc4_state->src_y >> 16;
1266 	if (rotation & DRM_MODE_REFLECT_Y)
1267 		src_y += height - 1;
1268 
1269 	src_x = vc4_state->src_x >> 16;
1270 
1271 	switch (base_format_mod) {
1272 	case DRM_FORMAT_MOD_LINEAR:
1273 		tiling = SCALER_CTL0_TILING_LINEAR;
1274 		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
1275 
1276 		/* Adjust the base pointer to the first pixel to be scanned
1277 		 * out.
1278 		 */
1279 		for (i = 0; i < num_planes; i++) {
1280 			offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1281 			offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1282 		}
1283 
1284 		break;
1285 
1286 	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
1287 		u32 tile_size_shift = 12; /* T tiles are 4kb */
1288 		/* Whole-tile offsets, mostly for setting the pitch. */
1289 		u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
1290 		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
1291 		u32 tile_w_mask = (1 << tile_w_shift) - 1;
1292 		/* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
1293 		 * the height (in pixels) of a 4k tile.
1294 		 */
1295 		u32 tile_h_mask = (2 << tile_h_shift) - 1;
1296 		/* For T-tiled, the FB pitch is "how many bytes from one row to
1297 		 * the next, such that
1298 		 *
1299 		 *	pitch * tile_h == tile_size * tiles_per_row
1300 		 */
1301 		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
1302 		u32 tiles_l = src_x >> tile_w_shift;
1303 		u32 tiles_r = tiles_w - tiles_l;
1304 		u32 tiles_t = src_y >> tile_h_shift;
1305 		/* Intra-tile offsets, which modify the base address (the
1306 		 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
1307 		 * base address).
1308 		 */
1309 		u32 tile_y = (src_y >> 4) & 1;
1310 		u32 subtile_y = (src_y >> 2) & 3;
1311 		u32 utile_y = src_y & 3;
1312 		u32 x_off = src_x & tile_w_mask;
1313 		u32 y_off = src_y & tile_h_mask;
1314 
1315 		/* When Y reflection is requested we must set the
1316 		 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
1317 		 * after the initial one should be fetched in descending order,
1318 		 * which makes sense since we start from the last line and go
1319 		 * backward.
1320 		 * Don't know why we need y_off = max_y_off - y_off, but it's
1321 		 * definitely required (I guess it's also related to the "going
1322 		 * backward" situation).
1323 		 */
1324 		if (rotation & DRM_MODE_REFLECT_Y) {
1325 			y_off = tile_h_mask - y_off;
1326 			pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
1327 		} else {
1328 			pitch0 = 0;
1329 		}
1330 
1331 		tiling = SCALER_CTL0_TILING_256B_OR_T;
1332 		pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
1333 			   VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
1334 			   VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
1335 			   VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
1336 		offsets[0] += tiles_t * (tiles_w << tile_size_shift);
1337 		offsets[0] += subtile_y << 8;
1338 		offsets[0] += utile_y << 4;
1339 
1340 		/* Rows of tiles alternate left-to-right and right-to-left. */
1341 		if (tiles_t & 1) {
1342 			pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
1343 			offsets[0] += (tiles_w - tiles_l) << tile_size_shift;
1344 			offsets[0] -= (1 + !tile_y) << 10;
1345 		} else {
1346 			offsets[0] += tiles_l << tile_size_shift;
1347 			offsets[0] += tile_y << 10;
1348 		}
1349 
1350 		break;
1351 	}
1352 
1353 	case DRM_FORMAT_MOD_BROADCOM_SAND64:
1354 	case DRM_FORMAT_MOD_BROADCOM_SAND128:
1355 	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1356 		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1357 
1358 		if (param > SCALER_TILE_HEIGHT_MASK) {
1359 			DRM_DEBUG_KMS("SAND height too large (%d)\n",
1360 				      param);
1361 			return -EINVAL;
1362 		}
1363 
1364 		if (fb->format->format == DRM_FORMAT_P030) {
1365 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1366 			tiling = SCALER_CTL0_TILING_128B;
1367 		} else {
1368 			hvs_format = HVS_PIXEL_FORMAT_H264;
1369 
1370 			switch (base_format_mod) {
1371 			case DRM_FORMAT_MOD_BROADCOM_SAND64:
1372 				tiling = SCALER_CTL0_TILING_64B;
1373 				break;
1374 			case DRM_FORMAT_MOD_BROADCOM_SAND128:
1375 				tiling = SCALER_CTL0_TILING_128B;
1376 				break;
1377 			case DRM_FORMAT_MOD_BROADCOM_SAND256:
1378 				tiling = SCALER_CTL0_TILING_256B_OR_T;
1379 				break;
1380 			default:
1381 				return -EINVAL;
1382 			}
1383 		}
1384 
1385 		/* Adjust the base pointer to the first pixel to be scanned
1386 		 * out.
1387 		 *
1388 		 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1389 		 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1390 		 * word that should be taken as the first pixel.
1391 		 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1392 		 * element within the 128bit word, eg for pixel 3 the value
1393 		 * should be 6.
1394 		 */
1395 		for (i = 0; i < num_planes; i++) {
1396 			u32 tile_w, tile, x_off, pix_per_tile;
1397 
1398 			if (fb->format->format == DRM_FORMAT_P030) {
1399 				/*
1400 				 * Spec says: bits [31:4] of the given address
1401 				 * should point to the 128-bit word containing
1402 				 * the desired starting pixel, and bits[3:0]
1403 				 * should be between 0 and 11, indicating which
1404 				 * of the 12-pixels in that 128-bit word is the
1405 				 * first pixel to be used
1406 				 */
1407 				u32 remaining_pixels = src_x % 96;
1408 				u32 aligned = remaining_pixels / 12;
1409 				u32 last_bits = remaining_pixels % 12;
1410 
1411 				x_off = aligned * 16 + last_bits;
1412 				tile_w = 128;
1413 				pix_per_tile = 96;
1414 			} else {
1415 				switch (base_format_mod) {
1416 				case DRM_FORMAT_MOD_BROADCOM_SAND64:
1417 					tile_w = 64;
1418 					break;
1419 				case DRM_FORMAT_MOD_BROADCOM_SAND128:
1420 					tile_w = 128;
1421 					break;
1422 				case DRM_FORMAT_MOD_BROADCOM_SAND256:
1423 					tile_w = 256;
1424 					break;
1425 				default:
1426 					return -EINVAL;
1427 				}
1428 				pix_per_tile = tile_w / fb->format->cpp[0];
1429 				x_off = (src_x % pix_per_tile) /
1430 					(i ? h_subsample : 1) *
1431 					fb->format->cpp[i];
1432 			}
1433 
1434 			tile = src_x / pix_per_tile;
1435 
1436 			offsets[i] += param * tile_w * tile;
1437 			offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1438 			offsets[i] += x_off & ~(i ? 1 : 0);
1439 		}
1440 
1441 		pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
1442 		break;
1443 	}
1444 
1445 	default:
1446 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1447 			      (long long)fb->modifier);
1448 		return -EINVAL;
1449 	}
1450 
1451 	/* fetch an extra pixel if we don't actually line up with the left edge. */
1452 	if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1453 		width++;
1454 
1455 	/* same for the right side */
1456 	if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1457 	    vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1458 		width++;
1459 
1460 	/* now for the top */
1461 	if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1462 		height++;
1463 
1464 	/* and the bottom */
1465 	if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1466 	    vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1467 		height++;
1468 
1469 	/* For YUV444 the hardware wants double the width, otherwise it doesn't
1470 	 * fetch full width of chroma
1471 	 */
1472 	if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1473 		width <<= 1;
1474 
1475 	/* Don't waste cycles mixing with plane alpha if the set alpha
1476 	 * is opaque or there is no per-pixel alpha information.
1477 	 * In any case we use the alpha property value as the fixed alpha.
1478 	 */
1479 	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1480 			  fb->format->has_alpha;
1481 
1482 	if (vc4->gen == VC4_GEN_4) {
1483 	/* Control word */
1484 		vc4_dlist_write(vc4_state,
1485 				SCALER_CTL0_VALID |
1486 				(rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
1487 				(rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
1488 				VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
1489 				(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
1490 				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1491 				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1492 				(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
1493 				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1494 				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
1495 
1496 		/* Position Word 0: Image Positions and Alpha Value */
1497 		vc4_state->pos0_offset = vc4_state->dlist_count;
1498 		vc4_dlist_write(vc4_state,
1499 				VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
1500 				VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
1501 				VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
1502 
1503 		/* Position Word 1: Scaled Image Dimensions. */
1504 		if (!vc4_state->is_unity) {
1505 			vc4_dlist_write(vc4_state,
1506 					VC4_SET_FIELD(vc4_state->crtc_w,
1507 						      SCALER_POS1_SCL_WIDTH) |
1508 					VC4_SET_FIELD(vc4_state->crtc_h,
1509 						      SCALER_POS1_SCL_HEIGHT));
1510 		}
1511 
1512 		/* Position Word 2: Source Image Size, Alpha */
1513 		vc4_state->pos2_offset = vc4_state->dlist_count;
1514 		vc4_dlist_write(vc4_state,
1515 				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
1516 				vc4_hvs4_get_alpha_blend_mode(state) |
1517 				VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
1518 				VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
1519 
1520 		/* Position Word 3: Context.  Written by the HVS. */
1521 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1522 
1523 	} else {
1524 		/* Control word */
1525 		vc4_dlist_write(vc4_state,
1526 				SCALER_CTL0_VALID |
1527 				(format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) |
1528 				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1529 				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1530 				(vc4_state->is_unity ?
1531 						SCALER5_CTL0_UNITY : 0) |
1532 				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1533 				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
1534 				SCALER5_CTL0_ALPHA_EXPAND |
1535 				SCALER5_CTL0_RGB_EXPAND);
1536 
1537 		/* Position Word 0: Image Positions and Alpha Value */
1538 		vc4_state->pos0_offset = vc4_state->dlist_count;
1539 		vc4_dlist_write(vc4_state,
1540 				(rotation & DRM_MODE_REFLECT_Y ?
1541 						SCALER5_POS0_VFLIP : 0) |
1542 				VC4_SET_FIELD(vc4_state->crtc_x,
1543 					      SCALER_POS0_START_X) |
1544 				(rotation & DRM_MODE_REFLECT_X ?
1545 					      SCALER5_POS0_HFLIP : 0) |
1546 				VC4_SET_FIELD(vc4_state->crtc_y,
1547 					      SCALER5_POS0_START_Y)
1548 			       );
1549 
1550 		/* Control Word 2 */
1551 		vc4_dlist_write(vc4_state,
1552 				VC4_SET_FIELD(state->alpha >> 4,
1553 					      SCALER5_CTL2_ALPHA) |
1554 				vc4_hvs5_get_alpha_blend_mode(state) |
1555 				(mix_plane_alpha ?
1556 					SCALER5_CTL2_ALPHA_MIX : 0)
1557 			       );
1558 
1559 		/* Position Word 1: Scaled Image Dimensions. */
1560 		if (!vc4_state->is_unity) {
1561 			vc4_dlist_write(vc4_state,
1562 					VC4_SET_FIELD(vc4_state->crtc_w,
1563 						      SCALER5_POS1_SCL_WIDTH) |
1564 					VC4_SET_FIELD(vc4_state->crtc_h,
1565 						      SCALER5_POS1_SCL_HEIGHT));
1566 		}
1567 
1568 		/* Position Word 2: Source Image Size */
1569 		vc4_state->pos2_offset = vc4_state->dlist_count;
1570 		vc4_dlist_write(vc4_state,
1571 				VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
1572 				VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
1573 
1574 		/* Position Word 3: Context.  Written by the HVS. */
1575 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1576 	}
1577 
1578 
1579 	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
1580 	 *
1581 	 * The pointers may be any byte address.
1582 	 */
1583 	vc4_state->ptr0_offset[0] = vc4_state->dlist_count;
1584 
1585 	for (i = 0; i < num_planes; i++) {
1586 		struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1587 
1588 		vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]);
1589 	}
1590 
1591 	/* Pointer Context Word 0/1/2: Written by the HVS */
1592 	for (i = 0; i < num_planes; i++)
1593 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1594 
1595 	/* Pitch word 0 */
1596 	vc4_dlist_write(vc4_state, pitch0);
1597 
1598 	/* Pitch word 1/2 */
1599 	for (i = 1; i < num_planes; i++) {
1600 		if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1601 		    hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
1602 			vc4_dlist_write(vc4_state,
1603 					VC4_SET_FIELD(fb->pitches[i],
1604 						      SCALER_SRC_PITCH));
1605 		} else {
1606 			vc4_dlist_write(vc4_state, pitch0);
1607 		}
1608 	}
1609 
1610 	/* Colorspace conversion words */
1611 	if (vc4_state->is_yuv) {
1612 		enum drm_color_encoding color_encoding = state->color_encoding;
1613 		enum drm_color_range color_range = state->color_range;
1614 		const u32 *ccm;
1615 
1616 		if (color_encoding >= DRM_COLOR_ENCODING_MAX)
1617 			color_encoding = DRM_COLOR_YCBCR_BT601;
1618 		if (color_range >= DRM_COLOR_RANGE_MAX)
1619 			color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1620 
1621 		ccm = colorspace_coeffs[color_range][color_encoding];
1622 
1623 		vc4_dlist_write(vc4_state, ccm[0]);
1624 		vc4_dlist_write(vc4_state, ccm[1]);
1625 		vc4_dlist_write(vc4_state, ccm[2]);
1626 	}
1627 
1628 	vc4_state->lbm_offset = 0;
1629 
1630 	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1631 	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1632 	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1633 	    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1634 		/* Reserve a slot for the LBM Base Address. The real value will
1635 		 * be set when calling vc4_plane_allocate_lbm().
1636 		 */
1637 		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1638 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1639 			vc4_state->lbm_offset = vc4_state->dlist_count;
1640 			vc4_dlist_counter_increment(vc4_state);
1641 		}
1642 
1643 		if (num_planes > 1) {
1644 			/* Emit Cb/Cr as channel 0 and Y as channel
1645 			 * 1. This matches how we set up scl0/scl1
1646 			 * above.
1647 			 */
1648 			vc4_write_scaling_parameters(state, 1);
1649 		}
1650 		vc4_write_scaling_parameters(state, 0);
1651 
1652 		/* If any PPF setup was done, then all the kernel
1653 		 * pointers get uploaded.
1654 		 */
1655 		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1656 		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1657 		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1658 		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1659 			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1660 						   SCALER_PPF_KERNEL_OFFSET);
1661 
1662 			/* HPPF plane 0 */
1663 			vc4_dlist_write(vc4_state, kernel);
1664 			/* VPPF plane 0 */
1665 			vc4_dlist_write(vc4_state, kernel);
1666 			/* HPPF plane 1 */
1667 			vc4_dlist_write(vc4_state, kernel);
1668 			/* VPPF plane 1 */
1669 			vc4_dlist_write(vc4_state, kernel);
1670 		}
1671 	}
1672 
1673 	vc4_state->dlist[ctl0_offset] |=
1674 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1675 
1676 	/* crtc_* are already clipped coordinates. */
1677 	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1678 			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1679 			vc4_state->crtc_h == state->crtc->mode.vdisplay;
1680 	/* Background fill might be necessary when the plane has per-pixel
1681 	 * alpha content or a non-opaque plane alpha and could blend from the
1682 	 * background or does not cover the entire screen.
1683 	 */
1684 	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1685 				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1686 
1687 	/* Flag the dlist as initialized to avoid checking it twice in case
1688 	 * the async update check already called vc4_plane_mode_set() and
1689 	 * decided to fallback to sync update because async update was not
1690 	 * possible.
1691 	 */
1692 	vc4_state->dlist_initialized = 1;
1693 
1694 	vc4_plane_calc_load(state);
1695 
1696 	return 0;
1697 }
1698 
1699 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
1700 {
1701 	struct drm_plane_state *state = &vc4_state->base;
1702 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
1703 	u32 ret = 0;
1704 
1705 	if (vc4_state->is_yuv) {
1706 		enum drm_color_encoding color_encoding = state->color_encoding;
1707 		enum drm_color_range color_range = state->color_range;
1708 
1709 		/* CSC pre-loaded with:
1710 		 * 0 = BT601 limited range
1711 		 * 1 = BT709 limited range
1712 		 * 2 = BT2020 limited range
1713 		 * 3 = BT601 full range
1714 		 * 4 = BT709 full range
1715 		 * 5 = BT2020 full range
1716 		 */
1717 		if (color_encoding > DRM_COLOR_YCBCR_BT2020)
1718 			color_encoding = DRM_COLOR_YCBCR_BT601;
1719 		if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
1720 			color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1721 
1722 		if (vc4->gen == VC4_GEN_6_C) {
1723 			ret |= SCALER6C_CTL2_CSC_ENABLE;
1724 			ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1725 					     SCALER6C_CTL2_BRCM_CFC_CONTROL);
1726 		} else {
1727 			ret |= SCALER6D_CTL2_CSC_ENABLE;
1728 			ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1729 					     SCALER6D_CTL2_BRCM_CFC_CONTROL);
1730 		}
1731 	}
1732 
1733 	return ret;
1734 }
1735 
1736 static int vc6_plane_mode_set(struct drm_plane *plane,
1737 			      struct drm_plane_state *state)
1738 {
1739 	struct drm_device *drm = plane->dev;
1740 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1741 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1742 	struct drm_framebuffer *fb = state->fb;
1743 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1744 	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1745 	int num_planes = fb->format->num_planes;
1746 	u32 h_subsample = fb->format->hsub;
1747 	u32 v_subsample = fb->format->vsub;
1748 	bool mix_plane_alpha;
1749 	bool covers_screen;
1750 	u32 scl0, scl1, pitch0;
1751 	u32 tiling, src_x, src_y;
1752 	u32 width, height;
1753 	u32 hvs_format = format->hvs;
1754 	u32 offsets[3] = { 0 };
1755 	unsigned int rotation;
1756 	int ret, i;
1757 
1758 	if (vc4_state->dlist_initialized)
1759 		return 0;
1760 
1761 	ret = vc4_plane_setup_clipping_and_scaling(state);
1762 	if (ret)
1763 		return ret;
1764 
1765 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1766 	    !vc4_state->crtc_w || !vc4_state->crtc_h) {
1767 		/* 0 source size probably means the plane is offscreen.
1768 		 * 0 destination size is a redundant plane.
1769 		 */
1770 		vc4_state->dlist_initialized = 1;
1771 		return 0;
1772 	}
1773 
1774 	width = vc4_state->src_w[0] >> 16;
1775 	height = vc4_state->src_h[0] >> 16;
1776 
1777 	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1778 	 * and 4:4:4, scl1 should be set to scl0 so both channels of
1779 	 * the scaler do the same thing.  For YUV, the Y plane needs
1780 	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1781 	 * the scl fields here.
1782 	 */
1783 	if (num_planes == 1) {
1784 		scl0 = vc4_get_scl_field(state, 0);
1785 		scl1 = scl0;
1786 	} else {
1787 		scl0 = vc4_get_scl_field(state, 1);
1788 		scl1 = vc4_get_scl_field(state, 0);
1789 	}
1790 
1791 	rotation = drm_rotation_simplify(state->rotation,
1792 					 DRM_MODE_ROTATE_0 |
1793 					 DRM_MODE_REFLECT_X |
1794 					 DRM_MODE_REFLECT_Y);
1795 
1796 	/* We must point to the last line when Y reflection is enabled. */
1797 	src_y = vc4_state->src_y >> 16;
1798 	if (rotation & DRM_MODE_REFLECT_Y)
1799 		src_y += height - 1;
1800 
1801 	src_x = vc4_state->src_x >> 16;
1802 
1803 	switch (base_format_mod) {
1804 	case DRM_FORMAT_MOD_LINEAR:
1805 		tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
1806 
1807 		/* Adjust the base pointer to the first pixel to be scanned
1808 		 * out.
1809 		 */
1810 		for (i = 0; i < num_planes; i++) {
1811 			offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1812 			offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1813 		}
1814 
1815 		break;
1816 
1817 	case DRM_FORMAT_MOD_BROADCOM_SAND128:
1818 	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1819 		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1820 		u32 components_per_word;
1821 		u32 starting_offset;
1822 		u32 fetch_count;
1823 
1824 		if (param > SCALER_TILE_HEIGHT_MASK) {
1825 			DRM_DEBUG_KMS("SAND height too large (%d)\n",
1826 				      param);
1827 			return -EINVAL;
1828 		}
1829 
1830 		if (fb->format->format == DRM_FORMAT_P030) {
1831 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1832 			tiling = SCALER6_CTL0_ADDR_MODE_128B;
1833 		} else {
1834 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
1835 
1836 			switch (base_format_mod) {
1837 			case DRM_FORMAT_MOD_BROADCOM_SAND128:
1838 				tiling = SCALER6_CTL0_ADDR_MODE_128B;
1839 				break;
1840 			case DRM_FORMAT_MOD_BROADCOM_SAND256:
1841 				tiling = SCALER6_CTL0_ADDR_MODE_256B;
1842 				break;
1843 			default:
1844 				return -EINVAL;
1845 			}
1846 		}
1847 
1848 		/* Adjust the base pointer to the first pixel to be scanned
1849 		 * out.
1850 		 *
1851 		 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1852 		 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1853 		 * word that should be taken as the first pixel.
1854 		 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1855 		 * element within the 128bit word, eg for pixel 3 the value
1856 		 * should be 6.
1857 		 */
1858 		for (i = 0; i < num_planes; i++) {
1859 			u32 tile_w, tile, x_off, pix_per_tile;
1860 
1861 			if (fb->format->format == DRM_FORMAT_P030) {
1862 				/*
1863 				 * Spec says: bits [31:4] of the given address
1864 				 * should point to the 128-bit word containing
1865 				 * the desired starting pixel, and bits[3:0]
1866 				 * should be between 0 and 11, indicating which
1867 				 * of the 12-pixels in that 128-bit word is the
1868 				 * first pixel to be used
1869 				 */
1870 				u32 remaining_pixels = src_x % 96;
1871 				u32 aligned = remaining_pixels / 12;
1872 				u32 last_bits = remaining_pixels % 12;
1873 
1874 				x_off = aligned * 16 + last_bits;
1875 				tile_w = 128;
1876 				pix_per_tile = 96;
1877 			} else {
1878 				switch (base_format_mod) {
1879 				case DRM_FORMAT_MOD_BROADCOM_SAND128:
1880 					tile_w = 128;
1881 					break;
1882 				case DRM_FORMAT_MOD_BROADCOM_SAND256:
1883 					tile_w = 256;
1884 					break;
1885 				default:
1886 					return -EINVAL;
1887 				}
1888 				pix_per_tile = tile_w / fb->format->cpp[0];
1889 				x_off = (src_x % pix_per_tile) /
1890 					(i ? h_subsample : 1) *
1891 					fb->format->cpp[i];
1892 			}
1893 
1894 			tile = src_x / pix_per_tile;
1895 
1896 			offsets[i] += param * tile_w * tile;
1897 			offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1898 			offsets[i] += x_off & ~(i ? 1 : 0);
1899 		}
1900 
1901 		components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
1902 		starting_offset = src_x % components_per_word;
1903 		fetch_count = (width + starting_offset + components_per_word - 1) /
1904 			components_per_word;
1905 
1906 		pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
1907 			 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
1908 		break;
1909 	}
1910 
1911 	default:
1912 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1913 			      (long long)fb->modifier);
1914 		return -EINVAL;
1915 	}
1916 
1917 	/* fetch an extra pixel if we don't actually line up with the left edge. */
1918 	if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1919 		width++;
1920 
1921 	/* same for the right side */
1922 	if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1923 	    vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1924 		width++;
1925 
1926 	/* now for the top */
1927 	if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1928 		height++;
1929 
1930 	/* and the bottom */
1931 	if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1932 	    vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1933 		height++;
1934 
1935 	/* for YUV444 hardware wants double the width, otherwise it doesn't
1936 	 * fetch full width of chroma
1937 	 */
1938 	if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1939 		width <<= 1;
1940 
1941 	/* Don't waste cycles mixing with plane alpha if the set alpha
1942 	 * is opaque or there is no per-pixel alpha information.
1943 	 * In any case we use the alpha property value as the fixed alpha.
1944 	 */
1945 	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1946 			  fb->format->has_alpha;
1947 
1948 	/* Control Word 0: Scaling Configuration & Element Validity*/
1949 	vc4_dlist_write(vc4_state,
1950 			SCALER6_CTL0_VALID |
1951 			VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
1952 			vc4_hvs6_get_alpha_mask_mode(state) |
1953 			(vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
1954 			VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
1955 			VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
1956 			VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
1957 			VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
1958 
1959 	/* Position Word 0: Image Position */
1960 	vc4_state->pos0_offset = vc4_state->dlist_count;
1961 	vc4_dlist_write(vc4_state,
1962 			VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
1963 			(rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
1964 			VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
1965 
1966 	/* Control Word 2: Alpha Value & CSC */
1967 	vc4_dlist_write(vc4_state,
1968 			vc6_plane_get_csc_mode(vc4_state) |
1969 			vc4_hvs5_get_alpha_blend_mode(state) |
1970 			(mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
1971 			VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
1972 
1973 	/* Position Word 1: Scaled Image Dimensions */
1974 	if (!vc4_state->is_unity)
1975 		vc4_dlist_write(vc4_state,
1976 				VC4_SET_FIELD(vc4_state->crtc_h - 1,
1977 					      SCALER6_POS1_SCL_LINES) |
1978 				VC4_SET_FIELD(vc4_state->crtc_w - 1,
1979 					      SCALER6_POS1_SCL_WIDTH));
1980 
1981 	/* Position Word 2: Source Image Size */
1982 	vc4_state->pos2_offset = vc4_state->dlist_count;
1983 	vc4_dlist_write(vc4_state,
1984 			VC4_SET_FIELD(height - 1,
1985 				      SCALER6_POS2_SRC_LINES) |
1986 			VC4_SET_FIELD(width - 1,
1987 				      SCALER6_POS2_SRC_WIDTH));
1988 
1989 	/* Position Word 3: Context */
1990 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1991 
1992 	/*
1993 	 * TODO: This only covers Raster Scan Order planes
1994 	 */
1995 	for (i = 0; i < num_planes; i++) {
1996 		struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1997 		dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i];
1998 
1999 		/* Pointer Word 0 */
2000 		vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
2001 		vc4_dlist_write(vc4_state,
2002 				(rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
2003 				/*
2004 				 * The UPM buffer will be allocated in
2005 				 * vc6_plane_allocate_upm().
2006 				 */
2007 				VC4_SET_FIELD(upper_32_bits(paddr) & 0xff,
2008 					      SCALER6_PTR0_UPPER_ADDR));
2009 
2010 		/* Pointer Word 1 */
2011 		vc4_dlist_write(vc4_state, lower_32_bits(paddr));
2012 
2013 		/* Pointer Word 2 */
2014 		if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
2015 		    base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
2016 			vc4_dlist_write(vc4_state,
2017 					VC4_SET_FIELD(fb->pitches[i],
2018 						      SCALER6_PTR2_PITCH));
2019 		} else {
2020 			vc4_dlist_write(vc4_state, pitch0);
2021 		}
2022 	}
2023 
2024 	/*
2025 	 * Palette Word 0
2026 	 * TODO: We're not using the palette mode
2027 	 */
2028 
2029 	/*
2030 	 * Trans Word 0
2031 	 * TODO: It's only relevant if we set the trans_rgb bit in the
2032 	 * control word 0, and we don't at the moment.
2033 	 */
2034 
2035 	vc4_state->lbm_offset = 0;
2036 
2037 	if (!vc4_state->is_unity || fb->format->is_yuv) {
2038 		/*
2039 		 * Reserve a slot for the LBM Base Address. The real value will
2040 		 * be set when calling vc4_plane_allocate_lbm().
2041 		 */
2042 		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2043 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2044 			vc4_state->lbm_offset = vc4_state->dlist_count;
2045 			vc4_dlist_counter_increment(vc4_state);
2046 		}
2047 
2048 		if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
2049 		    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
2050 		    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2051 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2052 			if (num_planes > 1)
2053 				/*
2054 				 * Emit Cb/Cr as channel 0 and Y as channel
2055 				 * 1. This matches how we set up scl0/scl1
2056 				 * above.
2057 				 */
2058 				vc4_write_scaling_parameters(state, 1);
2059 
2060 			vc4_write_scaling_parameters(state, 0);
2061 		}
2062 
2063 		/*
2064 		 * If any PPF setup was done, then all the kernel
2065 		 * pointers get uploaded.
2066 		 */
2067 		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
2068 		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
2069 		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
2070 		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
2071 			u32 kernel =
2072 				VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
2073 					      SCALER_PPF_KERNEL_OFFSET);
2074 
2075 			/* HPPF plane 0 */
2076 			vc4_dlist_write(vc4_state, kernel);
2077 			/* VPPF plane 0 */
2078 			vc4_dlist_write(vc4_state, kernel);
2079 			/* HPPF plane 1 */
2080 			vc4_dlist_write(vc4_state, kernel);
2081 			/* VPPF plane 1 */
2082 			vc4_dlist_write(vc4_state, kernel);
2083 		}
2084 	}
2085 
2086 	vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
2087 
2088 	vc4_state->dlist[0] |=
2089 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
2090 
2091 	/* crtc_* are already clipped coordinates. */
2092 	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
2093 			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
2094 			vc4_state->crtc_h == state->crtc->mode.vdisplay;
2095 
2096 	/*
2097 	 * Background fill might be necessary when the plane has per-pixel
2098 	 * alpha content or a non-opaque plane alpha and could blend from the
2099 	 * background or does not cover the entire screen.
2100 	 */
2101 	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
2102 				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
2103 
2104 	/*
2105 	 * Flag the dlist as initialized to avoid checking it twice in case
2106 	 * the async update check already called vc4_plane_mode_set() and
2107 	 * decided to fallback to sync update because async update was not
2108 	 * possible.
2109 	 */
2110 	vc4_state->dlist_initialized = 1;
2111 
2112 	vc4_plane_calc_load(state);
2113 
2114 	drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
2115 		       plane->base.id, plane->name, vc4_state->dlist_count);
2116 
2117 	return 0;
2118 }
2119 
2120 /* If a modeset involves changing the setup of a plane, the atomic
2121  * infrastructure will call this to validate a proposed plane setup.
2122  * However, if a plane isn't getting updated, this (and the
2123  * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
2124  * compute the dlist here and have all active plane dlists get updated
2125  * in the CRTC's flush.
2126  */
2127 static int vc4_plane_atomic_check(struct drm_plane *plane,
2128 				  struct drm_atomic_state *state)
2129 {
2130 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2131 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2132 										 plane);
2133 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
2134 	int ret;
2135 
2136 	vc4_state->dlist_count = 0;
2137 
2138 	if (!plane_enabled(new_plane_state)) {
2139 		struct drm_plane_state *old_plane_state =
2140 				drm_atomic_get_old_plane_state(state, plane);
2141 
2142 		if (vc4->gen >= VC4_GEN_6_C && old_plane_state &&
2143 		    plane_enabled(old_plane_state)) {
2144 			vc6_plane_free_upm(new_plane_state);
2145 		}
2146 		return 0;
2147 	}
2148 
2149 	if (vc4->gen >= VC4_GEN_6_C)
2150 		ret = vc6_plane_mode_set(plane, new_plane_state);
2151 	else
2152 		ret = vc4_plane_mode_set(plane, new_plane_state);
2153 	if (ret)
2154 		return ret;
2155 
2156 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
2157 	    !vc4_state->crtc_w || !vc4_state->crtc_h)
2158 		return 0;
2159 
2160 	ret = vc4_plane_allocate_lbm(new_plane_state);
2161 	if (ret)
2162 		return ret;
2163 
2164 	if (vc4->gen >= VC4_GEN_6_C) {
2165 		ret = vc6_plane_allocate_upm(new_plane_state);
2166 		if (ret)
2167 			return ret;
2168 	}
2169 
2170 	return 0;
2171 }
2172 
2173 static void vc4_plane_atomic_update(struct drm_plane *plane,
2174 				    struct drm_atomic_state *state)
2175 {
2176 	/* No contents here.  Since we don't know where in the CRTC's
2177 	 * dlist we should be stored, our dlist is uploaded to the
2178 	 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
2179 	 * time.
2180 	 */
2181 }
2182 
2183 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
2184 {
2185 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2186 	int i;
2187 	int idx;
2188 
2189 	if (!drm_dev_enter(plane->dev, &idx))
2190 		goto out;
2191 
2192 	vc4_state->hw_dlist = dlist;
2193 
2194 	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
2195 	for (i = 0; i < vc4_state->dlist_count; i++)
2196 		writel(vc4_state->dlist[i], &dlist[i]);
2197 
2198 	drm_dev_exit(idx);
2199 
2200 out:
2201 	return vc4_state->dlist_count;
2202 }
2203 
2204 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
2205 {
2206 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
2207 
2208 	return vc4_state->dlist_count;
2209 }
2210 
2211 /* Updates the plane to immediately (well, once the FIFO needs
2212  * refilling) scan out from at a new framebuffer.
2213  */
2214 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
2215 {
2216 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2217 	struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
2218 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2219 	dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0];
2220 	int idx;
2221 
2222 	if (!drm_dev_enter(plane->dev, &idx))
2223 		return;
2224 
2225 	/* We're skipping the address adjustment for negative origin,
2226 	 * because this is only called on the primary plane.
2227 	 */
2228 	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
2229 
2230 	if (vc4->gen == VC4_GEN_6_C) {
2231 		u32 value;
2232 
2233 		value = vc4_state->dlist[vc4_state->ptr0_offset[0]] &
2234 					~SCALER6_PTR0_UPPER_ADDR_MASK;
2235 		value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff,
2236 				       SCALER6_PTR0_UPPER_ADDR);
2237 
2238 		writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2239 		vc4_state->dlist[vc4_state->ptr0_offset[0]] = value;
2240 
2241 		value = lower_32_bits(dma_addr);
2242 		writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]);
2243 		vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value;
2244 	} else {
2245 		u32 addr;
2246 
2247 		addr = (u32)dma_addr;
2248 
2249 		/* Write the new address into the hardware immediately.  The
2250 		 * scanout will start from this address as soon as the FIFO
2251 		 * needs to refill with pixels.
2252 		 */
2253 		writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2254 
2255 		/* Also update the CPU-side dlist copy, so that any later
2256 		 * atomic updates that don't do a new modeset on our plane
2257 		 * also use our updated address.
2258 		 */
2259 		vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
2260 	}
2261 
2262 	drm_dev_exit(idx);
2263 }
2264 
2265 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
2266 					  struct drm_atomic_state *state)
2267 {
2268 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2269 										 plane);
2270 	struct vc4_plane_state *vc4_state, *new_vc4_state;
2271 	int idx;
2272 
2273 	if (!drm_dev_enter(plane->dev, &idx))
2274 		return;
2275 
2276 	swap(plane->state->fb, new_plane_state->fb);
2277 	plane->state->crtc_x = new_plane_state->crtc_x;
2278 	plane->state->crtc_y = new_plane_state->crtc_y;
2279 	plane->state->crtc_w = new_plane_state->crtc_w;
2280 	plane->state->crtc_h = new_plane_state->crtc_h;
2281 	plane->state->src_x = new_plane_state->src_x;
2282 	plane->state->src_y = new_plane_state->src_y;
2283 	plane->state->src_w = new_plane_state->src_w;
2284 	plane->state->src_h = new_plane_state->src_h;
2285 	plane->state->alpha = new_plane_state->alpha;
2286 	plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
2287 	plane->state->rotation = new_plane_state->rotation;
2288 	plane->state->zpos = new_plane_state->zpos;
2289 	plane->state->normalized_zpos = new_plane_state->normalized_zpos;
2290 	plane->state->color_encoding = new_plane_state->color_encoding;
2291 	plane->state->color_range = new_plane_state->color_range;
2292 	plane->state->src = new_plane_state->src;
2293 	plane->state->dst = new_plane_state->dst;
2294 	plane->state->visible = new_plane_state->visible;
2295 
2296 	new_vc4_state = to_vc4_plane_state(new_plane_state);
2297 	vc4_state = to_vc4_plane_state(plane->state);
2298 
2299 	vc4_state->crtc_x = new_vc4_state->crtc_x;
2300 	vc4_state->crtc_y = new_vc4_state->crtc_y;
2301 	vc4_state->crtc_h = new_vc4_state->crtc_h;
2302 	vc4_state->crtc_w = new_vc4_state->crtc_w;
2303 	vc4_state->src_x = new_vc4_state->src_x;
2304 	vc4_state->src_y = new_vc4_state->src_y;
2305 	memcpy(vc4_state->src_w, new_vc4_state->src_w,
2306 	       sizeof(vc4_state->src_w));
2307 	memcpy(vc4_state->src_h, new_vc4_state->src_h,
2308 	       sizeof(vc4_state->src_h));
2309 	memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
2310 	       sizeof(vc4_state->x_scaling));
2311 	memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
2312 	       sizeof(vc4_state->y_scaling));
2313 	vc4_state->is_unity = new_vc4_state->is_unity;
2314 	vc4_state->is_yuv = new_vc4_state->is_yuv;
2315 	vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
2316 
2317 	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
2318 	vc4_state->dlist[vc4_state->pos0_offset] =
2319 		new_vc4_state->dlist[vc4_state->pos0_offset];
2320 	vc4_state->dlist[vc4_state->pos2_offset] =
2321 		new_vc4_state->dlist[vc4_state->pos2_offset];
2322 	vc4_state->dlist[vc4_state->ptr0_offset[0]] =
2323 		new_vc4_state->dlist[vc4_state->ptr0_offset[0]];
2324 
2325 	/* Note that we can't just call vc4_plane_write_dlist()
2326 	 * because that would smash the context data that the HVS is
2327 	 * currently using.
2328 	 */
2329 	writel(vc4_state->dlist[vc4_state->pos0_offset],
2330 	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
2331 	writel(vc4_state->dlist[vc4_state->pos2_offset],
2332 	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
2333 	writel(vc4_state->dlist[vc4_state->ptr0_offset[0]],
2334 	       &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2335 
2336 	drm_dev_exit(idx);
2337 }
2338 
2339 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
2340 					struct drm_atomic_state *state, bool flip)
2341 {
2342 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2343 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2344 										 plane);
2345 	struct vc4_plane_state *old_vc4_state, *new_vc4_state;
2346 	int ret;
2347 	u32 i;
2348 
2349 	if (vc4->gen <= VC4_GEN_5)
2350 		ret = vc4_plane_mode_set(plane, new_plane_state);
2351 	else
2352 		ret = vc6_plane_mode_set(plane, new_plane_state);
2353 	if (ret)
2354 		return ret;
2355 
2356 	old_vc4_state = to_vc4_plane_state(plane->state);
2357 	new_vc4_state = to_vc4_plane_state(new_plane_state);
2358 
2359 	if (!new_vc4_state->hw_dlist)
2360 		return -EINVAL;
2361 
2362 	if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
2363 	    old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
2364 	    old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
2365 	    old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] ||
2366 	    vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
2367 		return -EINVAL;
2368 
2369 	/* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
2370 	 * if anything else has changed, fallback to a sync update.
2371 	 */
2372 	for (i = 0; i < new_vc4_state->dlist_count; i++) {
2373 		if (i == new_vc4_state->pos0_offset ||
2374 		    i == new_vc4_state->pos2_offset ||
2375 		    i == new_vc4_state->ptr0_offset[0] ||
2376 		    (new_vc4_state->lbm_offset &&
2377 		     i == new_vc4_state->lbm_offset))
2378 			continue;
2379 
2380 		if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
2381 			return -EINVAL;
2382 	}
2383 
2384 	return 0;
2385 }
2386 
2387 static int vc4_prepare_fb(struct drm_plane *plane,
2388 			  struct drm_plane_state *state)
2389 {
2390 	struct vc4_bo *bo;
2391 	int ret;
2392 
2393 	if (!state->fb)
2394 		return 0;
2395 
2396 	bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2397 
2398 	ret = drm_gem_plane_helper_prepare_fb(plane, state);
2399 	if (ret)
2400 		return ret;
2401 
2402 	return vc4_bo_inc_usecnt(bo);
2403 }
2404 
2405 static void vc4_cleanup_fb(struct drm_plane *plane,
2406 			   struct drm_plane_state *state)
2407 {
2408 	struct vc4_bo *bo;
2409 
2410 	if (!state->fb)
2411 		return;
2412 
2413 	bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2414 	vc4_bo_dec_usecnt(bo);
2415 }
2416 
2417 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
2418 	.atomic_check = vc4_plane_atomic_check,
2419 	.atomic_update = vc4_plane_atomic_update,
2420 	.prepare_fb = vc4_prepare_fb,
2421 	.cleanup_fb = vc4_cleanup_fb,
2422 	.atomic_async_check = vc4_plane_atomic_async_check,
2423 	.atomic_async_update = vc4_plane_atomic_async_update,
2424 };
2425 
2426 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
2427 	.atomic_check = vc4_plane_atomic_check,
2428 	.atomic_update = vc4_plane_atomic_update,
2429 	.atomic_async_check = vc4_plane_atomic_async_check,
2430 	.atomic_async_update = vc4_plane_atomic_async_update,
2431 };
2432 
2433 static bool vc4_format_mod_supported(struct drm_plane *plane,
2434 				     uint32_t format,
2435 				     uint64_t modifier)
2436 {
2437 	/* Support T_TILING for RGB formats only. */
2438 	switch (format) {
2439 	case DRM_FORMAT_XRGB8888:
2440 	case DRM_FORMAT_ARGB8888:
2441 	case DRM_FORMAT_ABGR8888:
2442 	case DRM_FORMAT_XBGR8888:
2443 	case DRM_FORMAT_RGB565:
2444 	case DRM_FORMAT_BGR565:
2445 	case DRM_FORMAT_ARGB1555:
2446 	case DRM_FORMAT_XRGB1555:
2447 		switch (fourcc_mod_broadcom_mod(modifier)) {
2448 		case DRM_FORMAT_MOD_LINEAR:
2449 		case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
2450 			return true;
2451 		default:
2452 			return false;
2453 		}
2454 	case DRM_FORMAT_NV12:
2455 	case DRM_FORMAT_NV21:
2456 		switch (fourcc_mod_broadcom_mod(modifier)) {
2457 		case DRM_FORMAT_MOD_LINEAR:
2458 		case DRM_FORMAT_MOD_BROADCOM_SAND64:
2459 		case DRM_FORMAT_MOD_BROADCOM_SAND128:
2460 		case DRM_FORMAT_MOD_BROADCOM_SAND256:
2461 			return true;
2462 		default:
2463 			return false;
2464 		}
2465 	case DRM_FORMAT_P030:
2466 		switch (fourcc_mod_broadcom_mod(modifier)) {
2467 		case DRM_FORMAT_MOD_BROADCOM_SAND128:
2468 			return true;
2469 		default:
2470 			return false;
2471 		}
2472 	case DRM_FORMAT_RGBX1010102:
2473 	case DRM_FORMAT_BGRX1010102:
2474 	case DRM_FORMAT_RGBA1010102:
2475 	case DRM_FORMAT_BGRA1010102:
2476 	case DRM_FORMAT_XRGB4444:
2477 	case DRM_FORMAT_ARGB4444:
2478 	case DRM_FORMAT_XBGR4444:
2479 	case DRM_FORMAT_ABGR4444:
2480 	case DRM_FORMAT_RGBX4444:
2481 	case DRM_FORMAT_RGBA4444:
2482 	case DRM_FORMAT_BGRX4444:
2483 	case DRM_FORMAT_BGRA4444:
2484 	case DRM_FORMAT_RGB332:
2485 	case DRM_FORMAT_BGR233:
2486 	case DRM_FORMAT_YUV422:
2487 	case DRM_FORMAT_YVU422:
2488 	case DRM_FORMAT_YUV420:
2489 	case DRM_FORMAT_YVU420:
2490 	case DRM_FORMAT_NV16:
2491 	case DRM_FORMAT_NV61:
2492 	default:
2493 		return (modifier == DRM_FORMAT_MOD_LINEAR);
2494 	}
2495 }
2496 
2497 static const struct drm_plane_funcs vc4_plane_funcs = {
2498 	.update_plane = drm_atomic_helper_update_plane,
2499 	.disable_plane = drm_atomic_helper_disable_plane,
2500 	.reset = vc4_plane_reset,
2501 	.atomic_duplicate_state = vc4_plane_duplicate_state,
2502 	.atomic_destroy_state = vc4_plane_destroy_state,
2503 	.format_mod_supported = vc4_format_mod_supported,
2504 };
2505 
2506 struct drm_plane *vc4_plane_init(struct drm_device *dev,
2507 				 enum drm_plane_type type,
2508 				 uint32_t possible_crtcs)
2509 {
2510 	struct vc4_dev *vc4 = to_vc4_dev(dev);
2511 	struct drm_plane *plane;
2512 	struct vc4_plane *vc4_plane;
2513 	u32 formats[ARRAY_SIZE(hvs_formats)];
2514 	int num_formats = 0;
2515 	unsigned i;
2516 	static const uint64_t modifiers[] = {
2517 		DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
2518 		DRM_FORMAT_MOD_BROADCOM_SAND128,
2519 		DRM_FORMAT_MOD_BROADCOM_SAND64,
2520 		DRM_FORMAT_MOD_BROADCOM_SAND256,
2521 		DRM_FORMAT_MOD_LINEAR,
2522 		DRM_FORMAT_MOD_INVALID
2523 	};
2524 
2525 	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
2526 		if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
2527 			formats[num_formats] = hvs_formats[i].drm;
2528 			num_formats++;
2529 		}
2530 	}
2531 
2532 	vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base,
2533 					       possible_crtcs,
2534 					       &vc4_plane_funcs,
2535 					       formats, num_formats,
2536 					       modifiers, type, NULL);
2537 	if (IS_ERR(vc4_plane))
2538 		return ERR_CAST(vc4_plane);
2539 	plane = &vc4_plane->base;
2540 
2541 	if (vc4->gen >= VC4_GEN_5)
2542 		drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
2543 	else
2544 		drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
2545 
2546 	drm_plane_create_alpha_property(plane);
2547 	drm_plane_create_blend_mode_property(plane,
2548 					     BIT(DRM_MODE_BLEND_PIXEL_NONE) |
2549 					     BIT(DRM_MODE_BLEND_PREMULTI) |
2550 					     BIT(DRM_MODE_BLEND_COVERAGE));
2551 	drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
2552 					   DRM_MODE_ROTATE_0 |
2553 					   DRM_MODE_ROTATE_180 |
2554 					   DRM_MODE_REFLECT_X |
2555 					   DRM_MODE_REFLECT_Y);
2556 
2557 	drm_plane_create_color_properties(plane,
2558 					  BIT(DRM_COLOR_YCBCR_BT601) |
2559 					  BIT(DRM_COLOR_YCBCR_BT709) |
2560 					  BIT(DRM_COLOR_YCBCR_BT2020),
2561 					  BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
2562 					  BIT(DRM_COLOR_YCBCR_FULL_RANGE),
2563 					  DRM_COLOR_YCBCR_BT709,
2564 					  DRM_COLOR_YCBCR_LIMITED_RANGE);
2565 
2566 	if (type == DRM_PLANE_TYPE_PRIMARY)
2567 		drm_plane_create_zpos_immutable_property(plane, 0);
2568 
2569 	return plane;
2570 }
2571 
2572 #define VC4_NUM_OVERLAY_PLANES	16
2573 
2574 int vc4_plane_create_additional_planes(struct drm_device *drm)
2575 {
2576 	struct drm_plane *cursor_plane;
2577 	struct drm_crtc *crtc;
2578 	unsigned int i;
2579 
2580 	/* Set up some arbitrary number of planes.  We're not limited
2581 	 * by a set number of physical registers, just the space in
2582 	 * the HVS (16k) and how small an plane can be (28 bytes).
2583 	 * However, each plane we set up takes up some memory, and
2584 	 * increases the cost of looping over planes, which atomic
2585 	 * modesetting does quite a bit.  As a result, we pick a
2586 	 * modest number of planes to expose, that should hopefully
2587 	 * still cover any sane usecase.
2588 	 */
2589 	for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) {
2590 		struct drm_plane *plane =
2591 			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY,
2592 				       GENMASK(drm->mode_config.num_crtc - 1, 0));
2593 
2594 		if (IS_ERR(plane))
2595 			continue;
2596 
2597 		/* Create zpos property. Max of all the overlays + 1 primary +
2598 		 * 1 cursor plane on a crtc.
2599 		 */
2600 		drm_plane_create_zpos_property(plane, i + 1, 1,
2601 					       VC4_NUM_OVERLAY_PLANES + 1);
2602 	}
2603 
2604 	drm_for_each_crtc(crtc, drm) {
2605 		/* Set up the legacy cursor after overlay initialization,
2606 		 * since the zpos fallback is that planes are rendered by plane
2607 		 * ID order, and that then puts the cursor on top.
2608 		 */
2609 		cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR,
2610 					      drm_crtc_mask(crtc));
2611 		if (!IS_ERR(cursor_plane)) {
2612 			crtc->cursor = cursor_plane;
2613 
2614 			drm_plane_create_zpos_property(cursor_plane,
2615 						       VC4_NUM_OVERLAY_PLANES + 1,
2616 						       1,
2617 						       VC4_NUM_OVERLAY_PLANES + 1);
2618 		}
2619 	}
2620 
2621 	return 0;
2622 }
2623