xref: /linux/drivers/gpu/drm/vc4/vc4_plane.c (revision 5077f45ecdd6098996c54082c9a4539d5480f8b1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5 
6 /**
7  * DOC: VC4 plane module
8  *
9  * Each DRM plane is a layer of pixels being scanned out by the HVS.
10  *
11  * At atomic modeset check time, we compute the HVS display element
12  * state that would be necessary for displaying the plane (giving us a
13  * chance to figure out if a plane configuration is invalid), then at
14  * atomic flush time the CRTC will ask us to write our element state
15  * into the region of the HVS that it has allocated for us.
16  */
17 
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_blend.h>
22 #include <drm/drm_drv.h>
23 #include <drm/drm_fb_dma_helper.h>
24 #include <drm/drm_fourcc.h>
25 #include <drm/drm_framebuffer.h>
26 #include <drm/drm_gem_atomic_helper.h>
27 #include <drm/drm_print.h>
28 
29 #include "vc4_drv.h"
30 #include "vc4_regs.h"
31 
32 static const struct hvs_format {
33 	u32 drm; /* DRM_FORMAT_* */
34 	u32 hvs; /* HVS_FORMAT_* */
35 	u32 pixel_order;
36 	u32 pixel_order_hvs5;
37 	bool hvs5_only;
38 } hvs_formats[] = {
39 	{
40 		.drm = DRM_FORMAT_XRGB8888,
41 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
42 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
43 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
44 	},
45 	{
46 		.drm = DRM_FORMAT_ARGB8888,
47 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
48 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
49 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
50 	},
51 	{
52 		.drm = DRM_FORMAT_ABGR8888,
53 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
54 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
55 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
56 	},
57 	{
58 		.drm = DRM_FORMAT_XBGR8888,
59 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
60 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
61 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
62 	},
63 	{
64 		.drm = DRM_FORMAT_RGB565,
65 		.hvs = HVS_PIXEL_FORMAT_RGB565,
66 		.pixel_order = HVS_PIXEL_ORDER_XRGB,
67 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
68 	},
69 	{
70 		.drm = DRM_FORMAT_BGR565,
71 		.hvs = HVS_PIXEL_FORMAT_RGB565,
72 		.pixel_order = HVS_PIXEL_ORDER_XBGR,
73 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
74 	},
75 	{
76 		.drm = DRM_FORMAT_ARGB1555,
77 		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
78 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
79 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
80 	},
81 	{
82 		.drm = DRM_FORMAT_XRGB1555,
83 		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
84 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
85 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
86 	},
87 	{
88 		.drm = DRM_FORMAT_RGB888,
89 		.hvs = HVS_PIXEL_FORMAT_RGB888,
90 		.pixel_order = HVS_PIXEL_ORDER_XRGB,
91 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
92 	},
93 	{
94 		.drm = DRM_FORMAT_BGR888,
95 		.hvs = HVS_PIXEL_FORMAT_RGB888,
96 		.pixel_order = HVS_PIXEL_ORDER_XBGR,
97 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
98 	},
99 	{
100 		.drm = DRM_FORMAT_YUV422,
101 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
102 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
103 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
104 	},
105 	{
106 		.drm = DRM_FORMAT_YVU422,
107 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
108 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
109 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
110 	},
111 	{
112 		.drm = DRM_FORMAT_YUV444,
113 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
114 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
115 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
116 	},
117 	{
118 		.drm = DRM_FORMAT_YVU444,
119 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
120 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
121 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
122 	},
123 	{
124 		.drm = DRM_FORMAT_YUV420,
125 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
126 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
127 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
128 	},
129 	{
130 		.drm = DRM_FORMAT_YVU420,
131 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
132 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
133 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
134 	},
135 	{
136 		.drm = DRM_FORMAT_NV12,
137 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
138 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
139 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
140 	},
141 	{
142 		.drm = DRM_FORMAT_NV21,
143 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
144 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
145 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
146 	},
147 	{
148 		.drm = DRM_FORMAT_NV16,
149 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
150 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
151 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
152 	},
153 	{
154 		.drm = DRM_FORMAT_NV61,
155 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
156 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
157 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
158 	},
159 	{
160 		.drm = DRM_FORMAT_P030,
161 		.hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT,
162 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
163 		.hvs5_only = true,
164 	},
165 	{
166 		.drm = DRM_FORMAT_XRGB2101010,
167 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
168 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
169 		.hvs5_only = true,
170 	},
171 	{
172 		.drm = DRM_FORMAT_ARGB2101010,
173 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
174 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
175 		.hvs5_only = true,
176 	},
177 	{
178 		.drm = DRM_FORMAT_ABGR2101010,
179 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
180 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
181 		.hvs5_only = true,
182 	},
183 	{
184 		.drm = DRM_FORMAT_XBGR2101010,
185 		.hvs = HVS_PIXEL_FORMAT_RGBA1010102,
186 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
187 		.hvs5_only = true,
188 	},
189 	{
190 		.drm = DRM_FORMAT_RGB332,
191 		.hvs = HVS_PIXEL_FORMAT_RGB332,
192 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
193 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
194 	},
195 	{
196 		.drm = DRM_FORMAT_BGR233,
197 		.hvs = HVS_PIXEL_FORMAT_RGB332,
198 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
199 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
200 	},
201 	{
202 		.drm = DRM_FORMAT_XRGB4444,
203 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
204 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
205 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
206 	},
207 	{
208 		.drm = DRM_FORMAT_ARGB4444,
209 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
210 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
211 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
212 	},
213 	{
214 		.drm = DRM_FORMAT_XBGR4444,
215 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
216 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
217 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
218 	},
219 	{
220 		.drm = DRM_FORMAT_ABGR4444,
221 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
222 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
223 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
224 	},
225 	{
226 		.drm = DRM_FORMAT_BGRX4444,
227 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
228 		.pixel_order = HVS_PIXEL_ORDER_RGBA,
229 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
230 	},
231 	{
232 		.drm = DRM_FORMAT_BGRA4444,
233 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
234 		.pixel_order = HVS_PIXEL_ORDER_RGBA,
235 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
236 	},
237 	{
238 		.drm = DRM_FORMAT_RGBX4444,
239 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
240 		.pixel_order = HVS_PIXEL_ORDER_BGRA,
241 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
242 	},
243 	{
244 		.drm = DRM_FORMAT_RGBA4444,
245 		.hvs = HVS_PIXEL_FORMAT_RGBA4444,
246 		.pixel_order = HVS_PIXEL_ORDER_BGRA,
247 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
248 	},
249 };
250 
251 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
252 {
253 	unsigned i;
254 
255 	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
256 		if (hvs_formats[i].drm == drm_format)
257 			return &hvs_formats[i];
258 	}
259 
260 	return NULL;
261 }
262 
263 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
264 {
265 	if (dst == src >> 16)
266 		return VC4_SCALING_NONE;
267 	if (3 * dst >= 2 * (src >> 16))
268 		return VC4_SCALING_PPF;
269 	else
270 		return VC4_SCALING_TPZ;
271 }
272 
273 static bool plane_enabled(struct drm_plane_state *state)
274 {
275 	return state->fb && !WARN_ON(!state->crtc);
276 }
277 
278 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
279 {
280 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
281 	struct vc4_hvs *hvs = vc4->hvs;
282 	struct vc4_plane_state *vc4_state;
283 	unsigned int i;
284 
285 	if (WARN_ON(!plane->state))
286 		return NULL;
287 
288 	vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
289 	if (!vc4_state)
290 		return NULL;
291 
292 	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
293 
294 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
295 		if (vc4_state->upm_handle[i])
296 			refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
297 	}
298 
299 	vc4_state->dlist_initialized = 0;
300 
301 	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
302 
303 	if (vc4_state->dlist) {
304 		vc4_state->dlist = kmemdup(vc4_state->dlist,
305 					   vc4_state->dlist_count * 4,
306 					   GFP_KERNEL);
307 		if (!vc4_state->dlist) {
308 			kfree(vc4_state);
309 			return NULL;
310 		}
311 		vc4_state->dlist_size = vc4_state->dlist_count;
312 	}
313 
314 	return &vc4_state->base;
315 }
316 
317 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
318 {
319 	struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
320 	unsigned long irqflags;
321 
322 	spin_lock_irqsave(&hvs->mm_lock, irqflags);
323 	drm_mm_remove_node(&refcount->upm);
324 	spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
325 	refcount->upm.start = 0;
326 	refcount->upm.size = 0;
327 	refcount->size = 0;
328 
329 	ida_free(&hvs->upm_handles, upm_handle);
330 }
331 
332 static void vc4_plane_destroy_state(struct drm_plane *plane,
333 				    struct drm_plane_state *state)
334 {
335 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
336 	struct vc4_hvs *hvs = vc4->hvs;
337 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
338 	unsigned int i;
339 
340 	if (drm_mm_node_allocated(&vc4_state->lbm)) {
341 		unsigned long irqflags;
342 
343 		spin_lock_irqsave(&hvs->mm_lock, irqflags);
344 		drm_mm_remove_node(&vc4_state->lbm);
345 		spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
346 	}
347 
348 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
349 		struct vc4_upm_refcounts *refcount;
350 
351 		if (!vc4_state->upm_handle[i])
352 			continue;
353 
354 		refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]];
355 
356 		if (refcount_dec_and_test(&refcount->refcount))
357 			vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]);
358 	}
359 
360 	kfree(vc4_state->dlist);
361 	__drm_atomic_helper_plane_destroy_state(&vc4_state->base);
362 	kfree(state);
363 }
364 
365 /* Called during init to allocate the plane's atomic state. */
366 static void vc4_plane_reset(struct drm_plane *plane)
367 {
368 	struct vc4_plane_state *vc4_state;
369 
370 	if (plane->state)
371 		__drm_atomic_helper_plane_destroy_state(plane->state);
372 
373 	kfree(plane->state);
374 
375 	vc4_state = kzalloc_obj(*vc4_state);
376 	if (!vc4_state)
377 		return;
378 
379 	__drm_atomic_helper_plane_reset(plane, &vc4_state->base);
380 }
381 
382 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
383 {
384 	if (vc4_state->dlist_count == vc4_state->dlist_size) {
385 		u32 new_size = max(4u, vc4_state->dlist_count * 2);
386 		u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
387 
388 		if (!new_dlist)
389 			return;
390 		memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
391 
392 		kfree(vc4_state->dlist);
393 		vc4_state->dlist = new_dlist;
394 		vc4_state->dlist_size = new_size;
395 	}
396 
397 	vc4_state->dlist_count++;
398 }
399 
400 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
401 {
402 	unsigned int idx = vc4_state->dlist_count;
403 
404 	vc4_dlist_counter_increment(vc4_state);
405 	vc4_state->dlist[idx] = val;
406 }
407 
408 /* Returns the scl0/scl1 field based on whether the dimensions need to
409  * be up/down/non-scaled.
410  *
411  * This is a replication of a table from the spec.
412  */
413 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
414 {
415 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
416 
417 	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
418 	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
419 		return SCALER_CTL0_SCL_H_PPF_V_PPF;
420 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
421 		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
422 	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
423 		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
424 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
425 		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
426 	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
427 		return SCALER_CTL0_SCL_H_PPF_V_NONE;
428 	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
429 		return SCALER_CTL0_SCL_H_NONE_V_PPF;
430 	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
431 		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
432 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
433 		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
434 	default:
435 	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
436 		/* The unity case is independently handled by
437 		 * SCALER_CTL0_UNITY.
438 		 */
439 		return 0;
440 	}
441 }
442 
443 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
444 {
445 	struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
446 	unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
447 	struct drm_crtc_state *crtc_state;
448 
449 	crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
450 						   pstate->crtc);
451 
452 	vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
453 	if (!left && !right && !top && !bottom)
454 		return 0;
455 
456 	if (left + right >= crtc_state->mode.hdisplay ||
457 	    top + bottom >= crtc_state->mode.vdisplay)
458 		return -EINVAL;
459 
460 	adjhdisplay = crtc_state->mode.hdisplay - (left + right);
461 	vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
462 					       adjhdisplay,
463 					       crtc_state->mode.hdisplay);
464 	vc4_pstate->crtc_x += left;
465 	if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
466 		vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
467 
468 	adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
469 	vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
470 					       adjvdisplay,
471 					       crtc_state->mode.vdisplay);
472 	vc4_pstate->crtc_y += top;
473 	if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
474 		vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
475 
476 	vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
477 					       adjhdisplay,
478 					       crtc_state->mode.hdisplay);
479 	vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
480 					       adjvdisplay,
481 					       crtc_state->mode.vdisplay);
482 
483 	if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
484 		return -EINVAL;
485 
486 	return 0;
487 }
488 
489 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
490 {
491 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
492 	struct drm_framebuffer *fb = state->fb;
493 	int num_planes = fb->format->num_planes;
494 	struct drm_crtc_state *crtc_state;
495 	u32 h_subsample = fb->format->hsub;
496 	u32 v_subsample = fb->format->vsub;
497 	int ret;
498 
499 	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
500 	if (!crtc_state) {
501 		DRM_DEBUG_KMS("Invalid crtc state\n");
502 		return -EINVAL;
503 	}
504 
505 	ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
506 						  INT_MAX, true, true);
507 	if (ret)
508 		return ret;
509 
510 	vc4_state->src_x = state->src.x1;
511 	vc4_state->src_y = state->src.y1;
512 	vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
513 	vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
514 
515 	vc4_state->crtc_x = state->dst.x1;
516 	vc4_state->crtc_y = state->dst.y1;
517 	vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
518 	vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
519 
520 	ret = vc4_plane_margins_adj(state);
521 	if (ret)
522 		return ret;
523 
524 	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
525 						       vc4_state->crtc_w);
526 	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
527 						       vc4_state->crtc_h);
528 
529 	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
530 			       vc4_state->y_scaling[0] == VC4_SCALING_NONE);
531 
532 	if (num_planes > 1) {
533 		vc4_state->is_yuv = true;
534 
535 		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
536 		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
537 
538 		vc4_state->x_scaling[1] =
539 			vc4_get_scaling_mode(vc4_state->src_w[1],
540 					     vc4_state->crtc_w);
541 		vc4_state->y_scaling[1] =
542 			vc4_get_scaling_mode(vc4_state->src_h[1],
543 					     vc4_state->crtc_h);
544 
545 		/* YUV conversion requires that horizontal scaling be enabled
546 		 * on the UV plane even if vc4_get_scaling_mode() returned
547 		 * VC4_SCALING_NONE (which can happen when the down-scaling
548 		 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
549 		 * case.
550 		 */
551 		if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
552 			vc4_state->x_scaling[1] = VC4_SCALING_PPF;
553 
554 		/* Similarly UV needs vertical scaling to be enabled.
555 		 * Without this a 1:1 scaled YUV422 plane isn't rendered.
556 		 */
557 		if (vc4_state->y_scaling[1] == VC4_SCALING_NONE)
558 			vc4_state->y_scaling[1] = VC4_SCALING_PPF;
559 	} else {
560 		vc4_state->is_yuv = false;
561 		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
562 		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
563 	}
564 
565 	return 0;
566 }
567 
568 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
569 {
570 	struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
571 	u32 scale, recip;
572 
573 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
574 
575 	scale = src / dst;
576 
577 	/* The specs note that while the reciprocal would be defined
578 	 * as (1<<32)/scale, ~0 is close enough.
579 	 */
580 	recip = ~0 / scale;
581 
582 	vc4_dlist_write(vc4_state,
583 			/*
584 			 * The BCM2712 is lacking BIT(31) compared to
585 			 * the previous generations, but we don't use
586 			 * it.
587 			 */
588 			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
589 			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
590 	vc4_dlist_write(vc4_state,
591 			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
592 }
593 
594 /* phase magnitude bits */
595 #define PHASE_BITS 6
596 
597 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst,
598 			  u32 xy, int channel)
599 {
600 	struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
601 	u32 scale = src / dst;
602 	s32 offset, offset2;
603 	s32 phase;
604 
605 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
606 
607 	/*
608 	 * Start the phase at 1/2 pixel from the 1st pixel at src_x.
609 	 * 1/4 pixel for YUV.
610 	 */
611 	if (channel) {
612 		/*
613 		 * The phase is relative to scale_src->x, so shift it for
614 		 * display list's x value
615 		 */
616 		offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1;
617 		offset += -(1 << PHASE_BITS >> 2);
618 	} else {
619 		/*
620 		 * The phase is relative to scale_src->x, so shift it for
621 		 * display list's x value
622 		 */
623 		offset = (xy & 0xffff) >> (16 - PHASE_BITS);
624 		offset += -(1 << PHASE_BITS >> 1);
625 
626 		/*
627 		 * This is a kludge to make sure the scaling factors are
628 		 * consistent with YUV's luma scaling. We lose 1-bit precision
629 		 * because of this.
630 		 */
631 		scale &= ~1;
632 	}
633 
634 	/*
635 	 * There may be a also small error introduced by precision of scale.
636 	 * Add half of that as a compromise
637 	 */
638 	offset2 = src - dst * scale;
639 	offset2 >>= 16 - PHASE_BITS;
640 	phase = offset + (offset2 >> 1);
641 
642 	/* Ensure +ve values don't touch the sign bit, then truncate negative values */
643 	if (phase >= 1 << PHASE_BITS)
644 		phase = (1 << PHASE_BITS) - 1;
645 
646 	phase &= SCALER_PPF_IPHASE_MASK;
647 
648 	vc4_dlist_write(vc4_state,
649 			SCALER_PPF_AGC |
650 			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
651 			/*
652 			 * The register layout documentation is slightly
653 			 * different to setup the phase in the BCM2712,
654 			 * but they seem equivalent.
655 			 */
656 			VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
657 }
658 
659 static u32 __vc4_lbm_size(struct drm_plane_state *state)
660 {
661 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
662 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
663 	u32 pix_per_line;
664 	u32 lbm;
665 
666 	/* LBM is not needed when there's no vertical scaling. */
667 	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
668 	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
669 		return 0;
670 
671 	/*
672 	 * This can be further optimized in the RGB/YUV444 case if the PPF
673 	 * decimation factor is between 0.5 and 1.0 by using crtc_w.
674 	 *
675 	 * It's not an issue though, since in that case since src_w[0] is going
676 	 * to be greater than or equal to crtc_w.
677 	 */
678 	if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
679 		pix_per_line = vc4_state->crtc_w;
680 	else
681 		pix_per_line = vc4_state->src_w[0] >> 16;
682 
683 	if (!vc4_state->is_yuv) {
684 		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
685 			lbm = pix_per_line * 8;
686 		else {
687 			/* In special cases, this multiplier might be 12. */
688 			lbm = pix_per_line * 16;
689 		}
690 	} else {
691 		/* There are cases for this going down to a multiplier
692 		 * of 2, but according to the firmware source, the
693 		 * table in the docs is somewhat wrong.
694 		 */
695 		lbm = pix_per_line * 16;
696 	}
697 
698 	/* Align it to 64 or 128 (hvs5) bytes */
699 	lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64);
700 
701 	/* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
702 	lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2;
703 
704 	return lbm;
705 }
706 
707 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
708 						unsigned int channel)
709 {
710 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
711 
712 	switch (vc4_state->y_scaling[channel]) {
713 	case VC4_SCALING_PPF:
714 		return 4;
715 
716 	case VC4_SCALING_TPZ:
717 		return 2;
718 
719 	default:
720 		return 0;
721 	}
722 }
723 
724 static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
725 				       unsigned int channel)
726 {
727 	const struct drm_format_info *info = state->fb->format;
728 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
729 
730 	if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
731 		return 0;
732 
733 	if (info->is_yuv)
734 		return channel ? 2 : 1;
735 
736 	if (info->has_alpha)
737 		return 4;
738 
739 	return 3;
740 }
741 
742 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
743 					 unsigned int channel)
744 {
745 	const struct drm_format_info *info = state->fb->format;
746 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
747 	unsigned int channels_scaled = 0;
748 	unsigned int components, words, wpc;
749 	unsigned int width, lines;
750 	unsigned int i;
751 
752 	/* LBM is meant to use the smaller of source or dest width, but there
753 	 * is a issue with UV scaling that the size required for the second
754 	 * channel is based on the source width only.
755 	 */
756 	if (info->hsub > 1 && channel == 1)
757 		width = state->src_w >> 16;
758 	else
759 		width = min(state->src_w >> 16, state->crtc_w);
760 	width = round_up(width / info->hsub, 4);
761 
762 	wpc = vc4_lbm_words_per_component(state, channel);
763 	if (!wpc)
764 		return 0;
765 
766 	components = vc4_lbm_components(state, channel);
767 	if (!components)
768 		return 0;
769 
770 	if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha)
771 		components -= 1;
772 
773 	words = width * wpc * components;
774 
775 	lines = DIV_ROUND_UP(words, 128 / info->hsub);
776 
777 	for (i = 0; i < 2; i++)
778 		if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
779 			channels_scaled++;
780 
781 	if (channels_scaled == 1)
782 		lines = lines / 2;
783 
784 	return lines;
785 }
786 
787 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
788 {
789 	const struct drm_format_info *info = state->fb->format;
790 
791 	if (info->hsub > 1)
792 		return max(vc4_lbm_channel_size(state, 0),
793 			   vc4_lbm_channel_size(state, 1));
794 	else
795 		return vc4_lbm_channel_size(state, 0);
796 }
797 
798 static u32 vc4_lbm_size(struct drm_plane_state *state)
799 {
800 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
801 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
802 
803 	/* LBM is not needed when there's no vertical scaling. */
804 	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
805 	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
806 		return 0;
807 
808 	if (vc4->gen >= VC4_GEN_6_C)
809 		return __vc6_lbm_size(state);
810 	else
811 		return __vc4_lbm_size(state);
812 }
813 
814 static size_t vc6_upm_size(const struct drm_plane_state *state,
815 			   unsigned int plane)
816 {
817 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
818 	unsigned int stride = state->fb->pitches[plane];
819 
820 	/*
821 	 * TODO: This only works for raster formats, and is sub-optimal
822 	 * for buffers with a stride aligned on 32 bytes.
823 	 */
824 	unsigned int words_per_line = (stride + 62) / 32;
825 	unsigned int fetch_region_size = words_per_line * 32;
826 	unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
827 	unsigned int buffer_size = fetch_region_size * buffer_lines;
828 
829 	return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
830 }
831 
832 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
833 					 int channel)
834 {
835 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
836 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
837 
838 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
839 
840 	/* Ch0 H-PPF Word 0: Scaling Parameters */
841 	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
842 		vc4_write_ppf(vc4_state, vc4_state->src_w[channel],
843 			      vc4_state->crtc_w, vc4_state->src_x, channel);
844 	}
845 
846 	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
847 	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
848 		vc4_write_ppf(vc4_state, vc4_state->src_h[channel],
849 			      vc4_state->crtc_h, vc4_state->src_y, channel);
850 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
851 	}
852 
853 	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
854 	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
855 		vc4_write_tpz(vc4_state, vc4_state->src_w[channel],
856 			      vc4_state->crtc_w);
857 	}
858 
859 	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
860 	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
861 		vc4_write_tpz(vc4_state, vc4_state->src_h[channel],
862 			      vc4_state->crtc_h);
863 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
864 	}
865 }
866 
867 static void vc4_plane_calc_load(struct drm_plane_state *state)
868 {
869 	unsigned int hvs_load_shift, vrefresh, i;
870 	struct drm_framebuffer *fb = state->fb;
871 	struct vc4_plane_state *vc4_state;
872 	struct drm_crtc_state *crtc_state;
873 	unsigned int vscale_factor;
874 
875 	vc4_state = to_vc4_plane_state(state);
876 	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
877 	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
878 
879 	/* The HVS is able to process 2 pixels/cycle when scaling the source,
880 	 * 4 pixels/cycle otherwise.
881 	 * Alpha blending step seems to be pipelined and it's always operating
882 	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
883 	 * scaler block.
884 	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
885 	 */
886 	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
887 	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
888 	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
889 	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
890 		hvs_load_shift = 1;
891 	else
892 		hvs_load_shift = 2;
893 
894 	vc4_state->membus_load = 0;
895 	vc4_state->hvs_load = 0;
896 	for (i = 0; i < fb->format->num_planes; i++) {
897 		/* Even if the bandwidth/plane required for a single frame is
898 		 *
899 		 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
900 		 *  cpp * vrefresh
901 		 *
902 		 * when downscaling, we have to read more pixels per line in
903 		 * the time frame reserved for a single line, so the bandwidth
904 		 * demand can be punctually higher. To account for that, we
905 		 * calculate the down-scaling factor and multiply the plane
906 		 * load by this number. We're likely over-estimating the read
907 		 * demand, but that's better than under-estimating it.
908 		 */
909 		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
910 					     vc4_state->crtc_h);
911 		vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
912 					  (vc4_state->src_h[i] >> 16) *
913 					  vscale_factor * fb->format->cpp[i];
914 		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
915 	}
916 
917 	vc4_state->hvs_load *= vrefresh;
918 	vc4_state->hvs_load >>= hvs_load_shift;
919 	vc4_state->membus_load *= vrefresh;
920 }
921 
922 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
923 {
924 	struct drm_device *drm = state->plane->dev;
925 	struct vc4_dev *vc4 = to_vc4_dev(drm);
926 	struct drm_plane *plane = state->plane;
927 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
928 	unsigned long irqflags;
929 	u32 lbm_size;
930 
931 	lbm_size = vc4_lbm_size(state);
932 	if (!lbm_size)
933 		return 0;
934 
935 	/*
936 	 * NOTE: BCM2712 doesn't need to be aligned, since the size
937 	 * returned by vc4_lbm_size() is in words already.
938 	 */
939 	if (vc4->gen == VC4_GEN_5)
940 		lbm_size = ALIGN(lbm_size, 64);
941 	else if (vc4->gen == VC4_GEN_4)
942 		lbm_size = ALIGN(lbm_size, 32);
943 
944 	drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n",
945 		       plane->base.id, plane->name, lbm_size);
946 
947 	if (WARN_ON(!vc4_state->lbm_offset))
948 		return -EINVAL;
949 
950 	/* Allocate the LBM memory that the HVS will use for temporary
951 	 * storage due to our scaling/format conversion.
952 	 */
953 	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
954 		int ret;
955 
956 		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
957 		ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
958 						 &vc4_state->lbm,
959 						 lbm_size, 1,
960 						 0, 0);
961 		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
962 
963 		if (ret) {
964 			drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
965 			return ret;
966 		}
967 	} else {
968 		WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
969 	}
970 
971 	vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
972 
973 	return 0;
974 }
975 
976 static int vc6_plane_allocate_upm(struct drm_plane_state *state)
977 {
978 	const struct drm_format_info *info = state->fb->format;
979 	struct drm_device *drm = state->plane->dev;
980 	struct vc4_dev *vc4 = to_vc4_dev(drm);
981 	struct vc4_hvs *hvs = vc4->hvs;
982 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
983 	unsigned int i;
984 	int ret;
985 
986 	WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
987 
988 	vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
989 
990 	for (i = 0; i < info->num_planes; i++) {
991 		struct vc4_upm_refcounts *refcount;
992 		int upm_handle;
993 		unsigned long irqflags;
994 		size_t upm_size;
995 
996 		upm_size = vc6_upm_size(state, i);
997 		if (!upm_size)
998 			return -EINVAL;
999 		upm_handle = vc4_state->upm_handle[i];
1000 
1001 		if (upm_handle &&
1002 		    hvs->upm_refcounts[upm_handle].size == upm_size) {
1003 			/* Allocation is the same size as the previous user of
1004 			 * the plane. Keep the allocation.
1005 			 */
1006 			vc4_state->upm_handle[i] = upm_handle;
1007 		} else {
1008 			if (upm_handle &&
1009 			    refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) {
1010 				vc4_plane_release_upm_ida(hvs, upm_handle);
1011 				vc4_state->upm_handle[i] = 0;
1012 			}
1013 
1014 			upm_handle = ida_alloc_range(&hvs->upm_handles, 1,
1015 						     VC4_NUM_UPM_HANDLES,
1016 						     GFP_KERNEL);
1017 			if (upm_handle < 0) {
1018 				drm_dbg(drm, "Out of upm_handles\n");
1019 				return upm_handle;
1020 			}
1021 			vc4_state->upm_handle[i] = upm_handle;
1022 
1023 			refcount = &hvs->upm_refcounts[upm_handle];
1024 			refcount_set(&refcount->refcount, 1);
1025 			refcount->size = upm_size;
1026 
1027 			spin_lock_irqsave(&hvs->mm_lock, irqflags);
1028 			ret = drm_mm_insert_node_generic(&hvs->upm_mm,
1029 							 &refcount->upm,
1030 							 upm_size, HVS_UBM_WORD_SIZE,
1031 							 0, 0);
1032 			spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
1033 			if (ret) {
1034 				drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
1035 				refcount_set(&refcount->refcount, 0);
1036 				ida_free(&hvs->upm_handles, upm_handle);
1037 				vc4_state->upm_handle[i] = 0;
1038 				return ret;
1039 			}
1040 		}
1041 
1042 		refcount = &hvs->upm_refcounts[upm_handle];
1043 		vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
1044 			VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE,
1045 				      SCALER6_PTR0_UPM_BASE) |
1046 			VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
1047 				      SCALER6_PTR0_UPM_HANDLE) |
1048 			VC4_SET_FIELD(vc4_state->upm_buffer_lines,
1049 				      SCALER6_PTR0_UPM_BUFF_SIZE);
1050 	}
1051 
1052 	return 0;
1053 }
1054 
1055 static void vc6_plane_free_upm(struct drm_plane_state *state)
1056 {
1057 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1058 	struct drm_device *drm = state->plane->dev;
1059 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1060 	struct vc4_hvs *hvs = vc4->hvs;
1061 	unsigned int i;
1062 
1063 	WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
1064 
1065 	for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
1066 		unsigned int upm_handle;
1067 
1068 		upm_handle = vc4_state->upm_handle[i];
1069 		if (!upm_handle)
1070 			continue;
1071 
1072 		if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount))
1073 			vc4_plane_release_upm_ida(hvs, upm_handle);
1074 		vc4_state->upm_handle[i] = 0;
1075 	}
1076 }
1077 
1078 /*
1079  * The colorspace conversion matrices are held in 3 entries in the dlist.
1080  * Create an array of them, with entries for each full and limited mode, and
1081  * each supported colorspace.
1082  */
1083 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = {
1084 	{
1085 		/* Limited range */
1086 		{
1087 			/* BT601 */
1088 			SCALER_CSC0_ITR_R_601_5,
1089 			SCALER_CSC1_ITR_R_601_5,
1090 			SCALER_CSC2_ITR_R_601_5,
1091 		}, {
1092 			/* BT709 */
1093 			SCALER_CSC0_ITR_R_709_3,
1094 			SCALER_CSC1_ITR_R_709_3,
1095 			SCALER_CSC2_ITR_R_709_3,
1096 		}, {
1097 			/* BT2020 */
1098 			SCALER_CSC0_ITR_R_2020,
1099 			SCALER_CSC1_ITR_R_2020,
1100 			SCALER_CSC2_ITR_R_2020,
1101 		}
1102 	}, {
1103 		/* Full range */
1104 		{
1105 			/* JFIF */
1106 			SCALER_CSC0_JPEG_JFIF,
1107 			SCALER_CSC1_JPEG_JFIF,
1108 			SCALER_CSC2_JPEG_JFIF,
1109 		}, {
1110 			/* BT709 */
1111 			SCALER_CSC0_ITR_R_709_3_FR,
1112 			SCALER_CSC1_ITR_R_709_3_FR,
1113 			SCALER_CSC2_ITR_R_709_3_FR,
1114 		}, {
1115 			/* BT2020 */
1116 			SCALER_CSC0_ITR_R_2020_FR,
1117 			SCALER_CSC1_ITR_R_2020_FR,
1118 			SCALER_CSC2_ITR_R_2020_FR,
1119 		}
1120 	}
1121 };
1122 
1123 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state)
1124 {
1125 	struct drm_device *dev = state->state->dev;
1126 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1127 
1128 	WARN_ON_ONCE(vc4->gen != VC4_GEN_4);
1129 
1130 	if (!state->fb->format->has_alpha)
1131 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1132 				     SCALER_POS2_ALPHA_MODE);
1133 
1134 	switch (state->pixel_blend_mode) {
1135 	case DRM_MODE_BLEND_PIXEL_NONE:
1136 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1137 				     SCALER_POS2_ALPHA_MODE);
1138 	default:
1139 	case DRM_MODE_BLEND_PREMULTI:
1140 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1141 				     SCALER_POS2_ALPHA_MODE) |
1142 			SCALER_POS2_ALPHA_PREMULT;
1143 	case DRM_MODE_BLEND_COVERAGE:
1144 		return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1145 				     SCALER_POS2_ALPHA_MODE);
1146 	}
1147 }
1148 
1149 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state)
1150 {
1151 	struct drm_device *dev = state->state->dev;
1152 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1153 
1154 	WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C &&
1155 		     vc4->gen != VC4_GEN_6_D);
1156 
1157 	switch (vc4->gen) {
1158 	default:
1159 	case VC4_GEN_5:
1160 	case VC4_GEN_6_C:
1161 		if (!state->fb->format->has_alpha)
1162 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1163 					     SCALER5_CTL2_ALPHA_MODE);
1164 
1165 		switch (state->pixel_blend_mode) {
1166 		case DRM_MODE_BLEND_PIXEL_NONE:
1167 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1168 					     SCALER5_CTL2_ALPHA_MODE);
1169 		default:
1170 		case DRM_MODE_BLEND_PREMULTI:
1171 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1172 					     SCALER5_CTL2_ALPHA_MODE) |
1173 				SCALER5_CTL2_ALPHA_PREMULT;
1174 		case DRM_MODE_BLEND_COVERAGE:
1175 			return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1176 					     SCALER5_CTL2_ALPHA_MODE);
1177 		}
1178 	case VC4_GEN_6_D:
1179 		/* 2712-D configures fixed alpha mode in CTL0 */
1180 		return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ?
1181 			SCALER5_CTL2_ALPHA_PREMULT : 0;
1182 	}
1183 }
1184 
1185 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state)
1186 {
1187 	struct drm_device *dev = state->state->dev;
1188 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1189 
1190 	WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D);
1191 
1192 	if (vc4->gen == VC4_GEN_6_D &&
1193 	    (!state->fb->format->has_alpha ||
1194 	     state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE))
1195 		return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED,
1196 				     SCALER6_CTL0_ALPHA_MASK);
1197 
1198 	return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK);
1199 }
1200 
1201 /* Writes out a full display list for an active plane to the plane's
1202  * private dlist state.
1203  */
1204 static int vc4_plane_mode_set(struct drm_plane *plane,
1205 			      struct drm_plane_state *state)
1206 {
1207 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
1208 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1209 	struct drm_framebuffer *fb = state->fb;
1210 	u32 ctl0_offset = vc4_state->dlist_count;
1211 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1212 	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1213 	int num_planes = fb->format->num_planes;
1214 	u32 h_subsample = fb->format->hsub;
1215 	u32 v_subsample = fb->format->vsub;
1216 	bool mix_plane_alpha;
1217 	bool covers_screen;
1218 	u32 scl0, scl1, pitch0;
1219 	u32 tiling, src_x, src_y;
1220 	u32 width, height;
1221 	u32 hvs_format = format->hvs;
1222 	unsigned int rotation;
1223 	u32 offsets[3] = { 0 };
1224 	int ret, i;
1225 
1226 	if (vc4_state->dlist_initialized)
1227 		return 0;
1228 
1229 	ret = vc4_plane_setup_clipping_and_scaling(state);
1230 	if (ret)
1231 		return ret;
1232 
1233 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1234 	    !vc4_state->crtc_w || !vc4_state->crtc_h) {
1235 		/* 0 source size probably means the plane is offscreen */
1236 		vc4_state->dlist_initialized = 1;
1237 		return 0;
1238 	}
1239 
1240 	width = vc4_state->src_w[0] >> 16;
1241 	height = vc4_state->src_h[0] >> 16;
1242 
1243 	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1244 	 * and 4:4:4, scl1 should be set to scl0 so both channels of
1245 	 * the scaler do the same thing.  For YUV, the Y plane needs
1246 	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1247 	 * the scl fields here.
1248 	 */
1249 	if (num_planes == 1) {
1250 		scl0 = vc4_get_scl_field(state, 0);
1251 		scl1 = scl0;
1252 	} else {
1253 		scl0 = vc4_get_scl_field(state, 1);
1254 		scl1 = vc4_get_scl_field(state, 0);
1255 	}
1256 
1257 	rotation = drm_rotation_simplify(state->rotation,
1258 					 DRM_MODE_ROTATE_0 |
1259 					 DRM_MODE_REFLECT_X |
1260 					 DRM_MODE_REFLECT_Y);
1261 
1262 	/* We must point to the last line when Y reflection is enabled. */
1263 	src_y = vc4_state->src_y >> 16;
1264 	if (rotation & DRM_MODE_REFLECT_Y)
1265 		src_y += height - 1;
1266 
1267 	src_x = vc4_state->src_x >> 16;
1268 
1269 	switch (base_format_mod) {
1270 	case DRM_FORMAT_MOD_LINEAR:
1271 		tiling = SCALER_CTL0_TILING_LINEAR;
1272 		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
1273 
1274 		/* Adjust the base pointer to the first pixel to be scanned
1275 		 * out.
1276 		 */
1277 		for (i = 0; i < num_planes; i++) {
1278 			offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1279 			offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1280 		}
1281 
1282 		break;
1283 
1284 	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
1285 		u32 tile_size_shift = 12; /* T tiles are 4kb */
1286 		/* Whole-tile offsets, mostly for setting the pitch. */
1287 		u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
1288 		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
1289 		u32 tile_w_mask = (1 << tile_w_shift) - 1;
1290 		/* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
1291 		 * the height (in pixels) of a 4k tile.
1292 		 */
1293 		u32 tile_h_mask = (2 << tile_h_shift) - 1;
1294 		/* For T-tiled, the FB pitch is "how many bytes from one row to
1295 		 * the next, such that
1296 		 *
1297 		 *	pitch * tile_h == tile_size * tiles_per_row
1298 		 */
1299 		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
1300 		u32 tiles_l = src_x >> tile_w_shift;
1301 		u32 tiles_r = tiles_w - tiles_l;
1302 		u32 tiles_t = src_y >> tile_h_shift;
1303 		/* Intra-tile offsets, which modify the base address (the
1304 		 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
1305 		 * base address).
1306 		 */
1307 		u32 tile_y = (src_y >> 4) & 1;
1308 		u32 subtile_y = (src_y >> 2) & 3;
1309 		u32 utile_y = src_y & 3;
1310 		u32 x_off = src_x & tile_w_mask;
1311 		u32 y_off = src_y & tile_h_mask;
1312 
1313 		/* When Y reflection is requested we must set the
1314 		 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
1315 		 * after the initial one should be fetched in descending order,
1316 		 * which makes sense since we start from the last line and go
1317 		 * backward.
1318 		 * Don't know why we need y_off = max_y_off - y_off, but it's
1319 		 * definitely required (I guess it's also related to the "going
1320 		 * backward" situation).
1321 		 */
1322 		if (rotation & DRM_MODE_REFLECT_Y) {
1323 			y_off = tile_h_mask - y_off;
1324 			pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
1325 		} else {
1326 			pitch0 = 0;
1327 		}
1328 
1329 		tiling = SCALER_CTL0_TILING_256B_OR_T;
1330 		pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
1331 			   VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
1332 			   VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
1333 			   VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
1334 		offsets[0] += tiles_t * (tiles_w << tile_size_shift);
1335 		offsets[0] += subtile_y << 8;
1336 		offsets[0] += utile_y << 4;
1337 
1338 		/* Rows of tiles alternate left-to-right and right-to-left. */
1339 		if (tiles_t & 1) {
1340 			pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
1341 			offsets[0] += (tiles_w - tiles_l) << tile_size_shift;
1342 			offsets[0] -= (1 + !tile_y) << 10;
1343 		} else {
1344 			offsets[0] += tiles_l << tile_size_shift;
1345 			offsets[0] += tile_y << 10;
1346 		}
1347 
1348 		break;
1349 	}
1350 
1351 	case DRM_FORMAT_MOD_BROADCOM_SAND64:
1352 	case DRM_FORMAT_MOD_BROADCOM_SAND128:
1353 	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1354 		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1355 
1356 		if (param > SCALER_TILE_HEIGHT_MASK) {
1357 			DRM_DEBUG_KMS("SAND height too large (%d)\n",
1358 				      param);
1359 			return -EINVAL;
1360 		}
1361 
1362 		if (fb->format->format == DRM_FORMAT_P030) {
1363 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1364 			tiling = SCALER_CTL0_TILING_128B;
1365 		} else {
1366 			hvs_format = HVS_PIXEL_FORMAT_H264;
1367 
1368 			switch (base_format_mod) {
1369 			case DRM_FORMAT_MOD_BROADCOM_SAND64:
1370 				tiling = SCALER_CTL0_TILING_64B;
1371 				break;
1372 			case DRM_FORMAT_MOD_BROADCOM_SAND128:
1373 				tiling = SCALER_CTL0_TILING_128B;
1374 				break;
1375 			case DRM_FORMAT_MOD_BROADCOM_SAND256:
1376 				tiling = SCALER_CTL0_TILING_256B_OR_T;
1377 				break;
1378 			default:
1379 				return -EINVAL;
1380 			}
1381 		}
1382 
1383 		/* Adjust the base pointer to the first pixel to be scanned
1384 		 * out.
1385 		 *
1386 		 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1387 		 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1388 		 * word that should be taken as the first pixel.
1389 		 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1390 		 * element within the 128bit word, eg for pixel 3 the value
1391 		 * should be 6.
1392 		 */
1393 		for (i = 0; i < num_planes; i++) {
1394 			u32 tile_w, tile, x_off, pix_per_tile;
1395 
1396 			if (fb->format->format == DRM_FORMAT_P030) {
1397 				/*
1398 				 * Spec says: bits [31:4] of the given address
1399 				 * should point to the 128-bit word containing
1400 				 * the desired starting pixel, and bits[3:0]
1401 				 * should be between 0 and 11, indicating which
1402 				 * of the 12-pixels in that 128-bit word is the
1403 				 * first pixel to be used
1404 				 */
1405 				u32 remaining_pixels = src_x % 96;
1406 				u32 aligned = remaining_pixels / 12;
1407 				u32 last_bits = remaining_pixels % 12;
1408 
1409 				x_off = aligned * 16 + last_bits;
1410 				tile_w = 128;
1411 				pix_per_tile = 96;
1412 			} else {
1413 				switch (base_format_mod) {
1414 				case DRM_FORMAT_MOD_BROADCOM_SAND64:
1415 					tile_w = 64;
1416 					break;
1417 				case DRM_FORMAT_MOD_BROADCOM_SAND128:
1418 					tile_w = 128;
1419 					break;
1420 				case DRM_FORMAT_MOD_BROADCOM_SAND256:
1421 					tile_w = 256;
1422 					break;
1423 				default:
1424 					return -EINVAL;
1425 				}
1426 				pix_per_tile = tile_w / fb->format->cpp[0];
1427 				x_off = (src_x % pix_per_tile) /
1428 					(i ? h_subsample : 1) *
1429 					fb->format->cpp[i];
1430 			}
1431 
1432 			tile = src_x / pix_per_tile;
1433 
1434 			offsets[i] += param * tile_w * tile;
1435 			offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1436 			offsets[i] += x_off & ~(i ? 1 : 0);
1437 		}
1438 
1439 		pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
1440 		break;
1441 	}
1442 
1443 	default:
1444 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1445 			      (long long)fb->modifier);
1446 		return -EINVAL;
1447 	}
1448 
1449 	/* fetch an extra pixel if we don't actually line up with the left edge. */
1450 	if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1451 		width++;
1452 
1453 	/* same for the right side */
1454 	if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1455 	    vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1456 		width++;
1457 
1458 	/* now for the top */
1459 	if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1460 		height++;
1461 
1462 	/* and the bottom */
1463 	if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1464 	    vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1465 		height++;
1466 
1467 	/* For YUV444 the hardware wants double the width, otherwise it doesn't
1468 	 * fetch full width of chroma
1469 	 */
1470 	if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1471 		width <<= 1;
1472 
1473 	/* Don't waste cycles mixing with plane alpha if the set alpha
1474 	 * is opaque or there is no per-pixel alpha information.
1475 	 * In any case we use the alpha property value as the fixed alpha.
1476 	 */
1477 	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1478 			  fb->format->has_alpha;
1479 
1480 	if (vc4->gen == VC4_GEN_4) {
1481 	/* Control word */
1482 		vc4_dlist_write(vc4_state,
1483 				SCALER_CTL0_VALID |
1484 				(rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
1485 				(rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
1486 				VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
1487 				(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
1488 				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1489 				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1490 				(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
1491 				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1492 				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
1493 
1494 		/* Position Word 0: Image Positions and Alpha Value */
1495 		vc4_state->pos0_offset = vc4_state->dlist_count;
1496 		vc4_dlist_write(vc4_state,
1497 				VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
1498 				VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
1499 				VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
1500 
1501 		/* Position Word 1: Scaled Image Dimensions. */
1502 		if (!vc4_state->is_unity) {
1503 			vc4_dlist_write(vc4_state,
1504 					VC4_SET_FIELD(vc4_state->crtc_w,
1505 						      SCALER_POS1_SCL_WIDTH) |
1506 					VC4_SET_FIELD(vc4_state->crtc_h,
1507 						      SCALER_POS1_SCL_HEIGHT));
1508 		}
1509 
1510 		/* Position Word 2: Source Image Size, Alpha */
1511 		vc4_state->pos2_offset = vc4_state->dlist_count;
1512 		vc4_dlist_write(vc4_state,
1513 				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
1514 				vc4_hvs4_get_alpha_blend_mode(state) |
1515 				VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
1516 				VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
1517 
1518 		/* Position Word 3: Context.  Written by the HVS. */
1519 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1520 
1521 	} else {
1522 		/* Control word */
1523 		vc4_dlist_write(vc4_state,
1524 				SCALER_CTL0_VALID |
1525 				(format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) |
1526 				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1527 				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1528 				(vc4_state->is_unity ?
1529 						SCALER5_CTL0_UNITY : 0) |
1530 				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1531 				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
1532 				SCALER5_CTL0_ALPHA_EXPAND |
1533 				SCALER5_CTL0_RGB_EXPAND);
1534 
1535 		/* Position Word 0: Image Positions and Alpha Value */
1536 		vc4_state->pos0_offset = vc4_state->dlist_count;
1537 		vc4_dlist_write(vc4_state,
1538 				(rotation & DRM_MODE_REFLECT_Y ?
1539 						SCALER5_POS0_VFLIP : 0) |
1540 				VC4_SET_FIELD(vc4_state->crtc_x,
1541 					      SCALER_POS0_START_X) |
1542 				(rotation & DRM_MODE_REFLECT_X ?
1543 					      SCALER5_POS0_HFLIP : 0) |
1544 				VC4_SET_FIELD(vc4_state->crtc_y,
1545 					      SCALER5_POS0_START_Y)
1546 			       );
1547 
1548 		/* Control Word 2 */
1549 		vc4_dlist_write(vc4_state,
1550 				VC4_SET_FIELD(state->alpha >> 4,
1551 					      SCALER5_CTL2_ALPHA) |
1552 				vc4_hvs5_get_alpha_blend_mode(state) |
1553 				(mix_plane_alpha ?
1554 					SCALER5_CTL2_ALPHA_MIX : 0)
1555 			       );
1556 
1557 		/* Position Word 1: Scaled Image Dimensions. */
1558 		if (!vc4_state->is_unity) {
1559 			vc4_dlist_write(vc4_state,
1560 					VC4_SET_FIELD(vc4_state->crtc_w,
1561 						      SCALER5_POS1_SCL_WIDTH) |
1562 					VC4_SET_FIELD(vc4_state->crtc_h,
1563 						      SCALER5_POS1_SCL_HEIGHT));
1564 		}
1565 
1566 		/* Position Word 2: Source Image Size */
1567 		vc4_state->pos2_offset = vc4_state->dlist_count;
1568 		vc4_dlist_write(vc4_state,
1569 				VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
1570 				VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
1571 
1572 		/* Position Word 3: Context.  Written by the HVS. */
1573 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1574 	}
1575 
1576 
1577 	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
1578 	 *
1579 	 * The pointers may be any byte address.
1580 	 */
1581 	vc4_state->ptr0_offset[0] = vc4_state->dlist_count;
1582 
1583 	for (i = 0; i < num_planes; i++) {
1584 		struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1585 
1586 		vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]);
1587 	}
1588 
1589 	/* Pointer Context Word 0/1/2: Written by the HVS */
1590 	for (i = 0; i < num_planes; i++)
1591 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1592 
1593 	/* Pitch word 0 */
1594 	vc4_dlist_write(vc4_state, pitch0);
1595 
1596 	/* Pitch word 1/2 */
1597 	for (i = 1; i < num_planes; i++) {
1598 		if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1599 		    hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
1600 			vc4_dlist_write(vc4_state,
1601 					VC4_SET_FIELD(fb->pitches[i],
1602 						      SCALER_SRC_PITCH));
1603 		} else {
1604 			vc4_dlist_write(vc4_state, pitch0);
1605 		}
1606 	}
1607 
1608 	/* Colorspace conversion words */
1609 	if (vc4_state->is_yuv) {
1610 		enum drm_color_encoding color_encoding = state->color_encoding;
1611 		enum drm_color_range color_range = state->color_range;
1612 		const u32 *ccm;
1613 
1614 		if (color_encoding >= DRM_COLOR_ENCODING_MAX)
1615 			color_encoding = DRM_COLOR_YCBCR_BT601;
1616 		if (color_range >= DRM_COLOR_RANGE_MAX)
1617 			color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1618 
1619 		ccm = colorspace_coeffs[color_range][color_encoding];
1620 
1621 		vc4_dlist_write(vc4_state, ccm[0]);
1622 		vc4_dlist_write(vc4_state, ccm[1]);
1623 		vc4_dlist_write(vc4_state, ccm[2]);
1624 	}
1625 
1626 	vc4_state->lbm_offset = 0;
1627 
1628 	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1629 	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1630 	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1631 	    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1632 		/* Reserve a slot for the LBM Base Address. The real value will
1633 		 * be set when calling vc4_plane_allocate_lbm().
1634 		 */
1635 		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1636 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1637 			vc4_state->lbm_offset = vc4_state->dlist_count;
1638 			vc4_dlist_counter_increment(vc4_state);
1639 		}
1640 
1641 		if (num_planes > 1) {
1642 			/* Emit Cb/Cr as channel 0 and Y as channel
1643 			 * 1. This matches how we set up scl0/scl1
1644 			 * above.
1645 			 */
1646 			vc4_write_scaling_parameters(state, 1);
1647 		}
1648 		vc4_write_scaling_parameters(state, 0);
1649 
1650 		/* If any PPF setup was done, then all the kernel
1651 		 * pointers get uploaded.
1652 		 */
1653 		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1654 		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1655 		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1656 		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1657 			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1658 						   SCALER_PPF_KERNEL_OFFSET);
1659 
1660 			/* HPPF plane 0 */
1661 			vc4_dlist_write(vc4_state, kernel);
1662 			/* VPPF plane 0 */
1663 			vc4_dlist_write(vc4_state, kernel);
1664 			/* HPPF plane 1 */
1665 			vc4_dlist_write(vc4_state, kernel);
1666 			/* VPPF plane 1 */
1667 			vc4_dlist_write(vc4_state, kernel);
1668 		}
1669 	}
1670 
1671 	vc4_state->dlist[ctl0_offset] |=
1672 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1673 
1674 	/* crtc_* are already clipped coordinates. */
1675 	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1676 			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1677 			vc4_state->crtc_h == state->crtc->mode.vdisplay;
1678 	/* Background fill might be necessary when the plane has per-pixel
1679 	 * alpha content or a non-opaque plane alpha and could blend from the
1680 	 * background or does not cover the entire screen.
1681 	 */
1682 	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1683 				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1684 
1685 	/* Flag the dlist as initialized to avoid checking it twice in case
1686 	 * the async update check already called vc4_plane_mode_set() and
1687 	 * decided to fallback to sync update because async update was not
1688 	 * possible.
1689 	 */
1690 	vc4_state->dlist_initialized = 1;
1691 
1692 	vc4_plane_calc_load(state);
1693 
1694 	return 0;
1695 }
1696 
1697 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
1698 {
1699 	struct drm_plane_state *state = &vc4_state->base;
1700 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
1701 	u32 ret = 0;
1702 
1703 	if (vc4_state->is_yuv) {
1704 		enum drm_color_encoding color_encoding = state->color_encoding;
1705 		enum drm_color_range color_range = state->color_range;
1706 
1707 		/* CSC pre-loaded with:
1708 		 * 0 = BT601 limited range
1709 		 * 1 = BT709 limited range
1710 		 * 2 = BT2020 limited range
1711 		 * 3 = BT601 full range
1712 		 * 4 = BT709 full range
1713 		 * 5 = BT2020 full range
1714 		 */
1715 		if (color_encoding > DRM_COLOR_YCBCR_BT2020)
1716 			color_encoding = DRM_COLOR_YCBCR_BT601;
1717 		if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
1718 			color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1719 
1720 		if (vc4->gen == VC4_GEN_6_C) {
1721 			ret |= SCALER6C_CTL2_CSC_ENABLE;
1722 			ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1723 					     SCALER6C_CTL2_BRCM_CFC_CONTROL);
1724 		} else {
1725 			ret |= SCALER6D_CTL2_CSC_ENABLE;
1726 			ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1727 					     SCALER6D_CTL2_BRCM_CFC_CONTROL);
1728 		}
1729 	}
1730 
1731 	return ret;
1732 }
1733 
1734 static int vc6_plane_mode_set(struct drm_plane *plane,
1735 			      struct drm_plane_state *state)
1736 {
1737 	struct drm_device *drm = plane->dev;
1738 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1739 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1740 	struct drm_framebuffer *fb = state->fb;
1741 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1742 	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1743 	int num_planes = fb->format->num_planes;
1744 	u32 h_subsample = fb->format->hsub;
1745 	u32 v_subsample = fb->format->vsub;
1746 	bool mix_plane_alpha;
1747 	bool covers_screen;
1748 	u32 scl0, scl1, pitch0;
1749 	u32 tiling, src_x, src_y;
1750 	u32 width, height;
1751 	u32 hvs_format = format->hvs;
1752 	u32 offsets[3] = { 0 };
1753 	unsigned int rotation;
1754 	int ret, i;
1755 
1756 	if (vc4_state->dlist_initialized)
1757 		return 0;
1758 
1759 	ret = vc4_plane_setup_clipping_and_scaling(state);
1760 	if (ret)
1761 		return ret;
1762 
1763 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1764 	    !vc4_state->crtc_w || !vc4_state->crtc_h) {
1765 		/* 0 source size probably means the plane is offscreen.
1766 		 * 0 destination size is a redundant plane.
1767 		 */
1768 		vc4_state->dlist_initialized = 1;
1769 		return 0;
1770 	}
1771 
1772 	width = vc4_state->src_w[0] >> 16;
1773 	height = vc4_state->src_h[0] >> 16;
1774 
1775 	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
1776 	 * and 4:4:4, scl1 should be set to scl0 so both channels of
1777 	 * the scaler do the same thing.  For YUV, the Y plane needs
1778 	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1779 	 * the scl fields here.
1780 	 */
1781 	if (num_planes == 1) {
1782 		scl0 = vc4_get_scl_field(state, 0);
1783 		scl1 = scl0;
1784 	} else {
1785 		scl0 = vc4_get_scl_field(state, 1);
1786 		scl1 = vc4_get_scl_field(state, 0);
1787 	}
1788 
1789 	rotation = drm_rotation_simplify(state->rotation,
1790 					 DRM_MODE_ROTATE_0 |
1791 					 DRM_MODE_REFLECT_X |
1792 					 DRM_MODE_REFLECT_Y);
1793 
1794 	/* We must point to the last line when Y reflection is enabled. */
1795 	src_y = vc4_state->src_y >> 16;
1796 	if (rotation & DRM_MODE_REFLECT_Y)
1797 		src_y += height - 1;
1798 
1799 	src_x = vc4_state->src_x >> 16;
1800 
1801 	switch (base_format_mod) {
1802 	case DRM_FORMAT_MOD_LINEAR:
1803 		tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
1804 
1805 		/* Adjust the base pointer to the first pixel to be scanned
1806 		 * out.
1807 		 */
1808 		for (i = 0; i < num_planes; i++) {
1809 			offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1810 			offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1811 		}
1812 
1813 		break;
1814 
1815 	case DRM_FORMAT_MOD_BROADCOM_SAND128:
1816 	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1817 		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1818 		u32 components_per_word;
1819 		u32 starting_offset;
1820 		u32 fetch_count;
1821 
1822 		if (param > SCALER_TILE_HEIGHT_MASK) {
1823 			DRM_DEBUG_KMS("SAND height too large (%d)\n",
1824 				      param);
1825 			return -EINVAL;
1826 		}
1827 
1828 		if (fb->format->format == DRM_FORMAT_P030) {
1829 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1830 			tiling = SCALER6_CTL0_ADDR_MODE_128B;
1831 		} else {
1832 			hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
1833 
1834 			switch (base_format_mod) {
1835 			case DRM_FORMAT_MOD_BROADCOM_SAND128:
1836 				tiling = SCALER6_CTL0_ADDR_MODE_128B;
1837 				break;
1838 			case DRM_FORMAT_MOD_BROADCOM_SAND256:
1839 				tiling = SCALER6_CTL0_ADDR_MODE_256B;
1840 				break;
1841 			default:
1842 				return -EINVAL;
1843 			}
1844 		}
1845 
1846 		/* Adjust the base pointer to the first pixel to be scanned
1847 		 * out.
1848 		 *
1849 		 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1850 		 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1851 		 * word that should be taken as the first pixel.
1852 		 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1853 		 * element within the 128bit word, eg for pixel 3 the value
1854 		 * should be 6.
1855 		 */
1856 		for (i = 0; i < num_planes; i++) {
1857 			u32 tile_w, tile, x_off, pix_per_tile;
1858 
1859 			if (fb->format->format == DRM_FORMAT_P030) {
1860 				/*
1861 				 * Spec says: bits [31:4] of the given address
1862 				 * should point to the 128-bit word containing
1863 				 * the desired starting pixel, and bits[3:0]
1864 				 * should be between 0 and 11, indicating which
1865 				 * of the 12-pixels in that 128-bit word is the
1866 				 * first pixel to be used
1867 				 */
1868 				u32 remaining_pixels = src_x % 96;
1869 				u32 aligned = remaining_pixels / 12;
1870 				u32 last_bits = remaining_pixels % 12;
1871 
1872 				x_off = aligned * 16 + last_bits;
1873 				tile_w = 128;
1874 				pix_per_tile = 96;
1875 			} else {
1876 				switch (base_format_mod) {
1877 				case DRM_FORMAT_MOD_BROADCOM_SAND128:
1878 					tile_w = 128;
1879 					break;
1880 				case DRM_FORMAT_MOD_BROADCOM_SAND256:
1881 					tile_w = 256;
1882 					break;
1883 				default:
1884 					return -EINVAL;
1885 				}
1886 				pix_per_tile = tile_w / fb->format->cpp[0];
1887 				x_off = (src_x % pix_per_tile) /
1888 					(i ? h_subsample : 1) *
1889 					fb->format->cpp[i];
1890 			}
1891 
1892 			tile = src_x / pix_per_tile;
1893 
1894 			offsets[i] += param * tile_w * tile;
1895 			offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1896 			offsets[i] += x_off & ~(i ? 1 : 0);
1897 		}
1898 
1899 		components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
1900 		starting_offset = src_x % components_per_word;
1901 		fetch_count = (width + starting_offset + components_per_word - 1) /
1902 			components_per_word;
1903 
1904 		pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
1905 			 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
1906 		break;
1907 	}
1908 
1909 	default:
1910 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1911 			      (long long)fb->modifier);
1912 		return -EINVAL;
1913 	}
1914 
1915 	/* fetch an extra pixel if we don't actually line up with the left edge. */
1916 	if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1917 		width++;
1918 
1919 	/* same for the right side */
1920 	if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1921 	    vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1922 		width++;
1923 
1924 	/* now for the top */
1925 	if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1926 		height++;
1927 
1928 	/* and the bottom */
1929 	if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1930 	    vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1931 		height++;
1932 
1933 	/* for YUV444 hardware wants double the width, otherwise it doesn't
1934 	 * fetch full width of chroma
1935 	 */
1936 	if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1937 		width <<= 1;
1938 
1939 	/* Don't waste cycles mixing with plane alpha if the set alpha
1940 	 * is opaque or there is no per-pixel alpha information.
1941 	 * In any case we use the alpha property value as the fixed alpha.
1942 	 */
1943 	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1944 			  fb->format->has_alpha;
1945 
1946 	/* Control Word 0: Scaling Configuration & Element Validity*/
1947 	vc4_dlist_write(vc4_state,
1948 			SCALER6_CTL0_VALID |
1949 			VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
1950 			vc4_hvs6_get_alpha_mask_mode(state) |
1951 			(vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
1952 			VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
1953 			VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
1954 			VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
1955 			VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
1956 
1957 	/* Position Word 0: Image Position */
1958 	vc4_state->pos0_offset = vc4_state->dlist_count;
1959 	vc4_dlist_write(vc4_state,
1960 			VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
1961 			(rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
1962 			VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
1963 
1964 	/* Control Word 2: Alpha Value & CSC */
1965 	vc4_dlist_write(vc4_state,
1966 			vc6_plane_get_csc_mode(vc4_state) |
1967 			vc4_hvs5_get_alpha_blend_mode(state) |
1968 			(mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
1969 			VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
1970 
1971 	/* Position Word 1: Scaled Image Dimensions */
1972 	if (!vc4_state->is_unity)
1973 		vc4_dlist_write(vc4_state,
1974 				VC4_SET_FIELD(vc4_state->crtc_h - 1,
1975 					      SCALER6_POS1_SCL_LINES) |
1976 				VC4_SET_FIELD(vc4_state->crtc_w - 1,
1977 					      SCALER6_POS1_SCL_WIDTH));
1978 
1979 	/* Position Word 2: Source Image Size */
1980 	vc4_state->pos2_offset = vc4_state->dlist_count;
1981 	vc4_dlist_write(vc4_state,
1982 			VC4_SET_FIELD(height - 1,
1983 				      SCALER6_POS2_SRC_LINES) |
1984 			VC4_SET_FIELD(width - 1,
1985 				      SCALER6_POS2_SRC_WIDTH));
1986 
1987 	/* Position Word 3: Context */
1988 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1989 
1990 	/*
1991 	 * TODO: This only covers Raster Scan Order planes
1992 	 */
1993 	for (i = 0; i < num_planes; i++) {
1994 		struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1995 		dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i];
1996 
1997 		/* Pointer Word 0 */
1998 		vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
1999 		vc4_dlist_write(vc4_state,
2000 				(rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
2001 				/*
2002 				 * The UPM buffer will be allocated in
2003 				 * vc6_plane_allocate_upm().
2004 				 */
2005 				VC4_SET_FIELD(upper_32_bits(paddr) & 0xff,
2006 					      SCALER6_PTR0_UPPER_ADDR));
2007 
2008 		/* Pointer Word 1 */
2009 		vc4_dlist_write(vc4_state, lower_32_bits(paddr));
2010 
2011 		/* Pointer Word 2 */
2012 		if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
2013 		    base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
2014 			vc4_dlist_write(vc4_state,
2015 					VC4_SET_FIELD(fb->pitches[i],
2016 						      SCALER6_PTR2_PITCH));
2017 		} else {
2018 			vc4_dlist_write(vc4_state, pitch0);
2019 		}
2020 	}
2021 
2022 	/*
2023 	 * Palette Word 0
2024 	 * TODO: We're not using the palette mode
2025 	 */
2026 
2027 	/*
2028 	 * Trans Word 0
2029 	 * TODO: It's only relevant if we set the trans_rgb bit in the
2030 	 * control word 0, and we don't at the moment.
2031 	 */
2032 
2033 	vc4_state->lbm_offset = 0;
2034 
2035 	if (!vc4_state->is_unity || fb->format->is_yuv) {
2036 		/*
2037 		 * Reserve a slot for the LBM Base Address. The real value will
2038 		 * be set when calling vc4_plane_allocate_lbm().
2039 		 */
2040 		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2041 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2042 			vc4_state->lbm_offset = vc4_state->dlist_count;
2043 			vc4_dlist_counter_increment(vc4_state);
2044 		}
2045 
2046 		if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
2047 		    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
2048 		    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2049 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2050 			if (num_planes > 1)
2051 				/*
2052 				 * Emit Cb/Cr as channel 0 and Y as channel
2053 				 * 1. This matches how we set up scl0/scl1
2054 				 * above.
2055 				 */
2056 				vc4_write_scaling_parameters(state, 1);
2057 
2058 			vc4_write_scaling_parameters(state, 0);
2059 		}
2060 
2061 		/*
2062 		 * If any PPF setup was done, then all the kernel
2063 		 * pointers get uploaded.
2064 		 */
2065 		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
2066 		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
2067 		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
2068 		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
2069 			u32 kernel =
2070 				VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
2071 					      SCALER_PPF_KERNEL_OFFSET);
2072 
2073 			/* HPPF plane 0 */
2074 			vc4_dlist_write(vc4_state, kernel);
2075 			/* VPPF plane 0 */
2076 			vc4_dlist_write(vc4_state, kernel);
2077 			/* HPPF plane 1 */
2078 			vc4_dlist_write(vc4_state, kernel);
2079 			/* VPPF plane 1 */
2080 			vc4_dlist_write(vc4_state, kernel);
2081 		}
2082 	}
2083 
2084 	vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
2085 
2086 	vc4_state->dlist[0] |=
2087 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
2088 
2089 	/* crtc_* are already clipped coordinates. */
2090 	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
2091 			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
2092 			vc4_state->crtc_h == state->crtc->mode.vdisplay;
2093 
2094 	/*
2095 	 * Background fill might be necessary when the plane has per-pixel
2096 	 * alpha content or a non-opaque plane alpha and could blend from the
2097 	 * background or does not cover the entire screen.
2098 	 */
2099 	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
2100 				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
2101 
2102 	/*
2103 	 * Flag the dlist as initialized to avoid checking it twice in case
2104 	 * the async update check already called vc4_plane_mode_set() and
2105 	 * decided to fallback to sync update because async update was not
2106 	 * possible.
2107 	 */
2108 	vc4_state->dlist_initialized = 1;
2109 
2110 	vc4_plane_calc_load(state);
2111 
2112 	drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
2113 		       plane->base.id, plane->name, vc4_state->dlist_count);
2114 
2115 	return 0;
2116 }
2117 
2118 /* If a modeset involves changing the setup of a plane, the atomic
2119  * infrastructure will call this to validate a proposed plane setup.
2120  * However, if a plane isn't getting updated, this (and the
2121  * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
2122  * compute the dlist here and have all active plane dlists get updated
2123  * in the CRTC's flush.
2124  */
2125 static int vc4_plane_atomic_check(struct drm_plane *plane,
2126 				  struct drm_atomic_commit *state)
2127 {
2128 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2129 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2130 										 plane);
2131 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
2132 	int ret;
2133 
2134 	vc4_state->dlist_count = 0;
2135 
2136 	if (!plane_enabled(new_plane_state)) {
2137 		struct drm_plane_state *old_plane_state =
2138 				drm_atomic_get_old_plane_state(state, plane);
2139 
2140 		if (vc4->gen >= VC4_GEN_6_C && old_plane_state &&
2141 		    plane_enabled(old_plane_state)) {
2142 			vc6_plane_free_upm(new_plane_state);
2143 		}
2144 		return 0;
2145 	}
2146 
2147 	if (vc4->gen >= VC4_GEN_6_C)
2148 		ret = vc6_plane_mode_set(plane, new_plane_state);
2149 	else
2150 		ret = vc4_plane_mode_set(plane, new_plane_state);
2151 	if (ret)
2152 		return ret;
2153 
2154 	if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
2155 	    !vc4_state->crtc_w || !vc4_state->crtc_h)
2156 		return 0;
2157 
2158 	ret = vc4_plane_allocate_lbm(new_plane_state);
2159 	if (ret)
2160 		return ret;
2161 
2162 	if (vc4->gen >= VC4_GEN_6_C) {
2163 		ret = vc6_plane_allocate_upm(new_plane_state);
2164 		if (ret)
2165 			return ret;
2166 	}
2167 
2168 	return 0;
2169 }
2170 
2171 static void vc4_plane_atomic_update(struct drm_plane *plane,
2172 				    struct drm_atomic_commit *state)
2173 {
2174 	/* No contents here.  Since we don't know where in the CRTC's
2175 	 * dlist we should be stored, our dlist is uploaded to the
2176 	 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
2177 	 * time.
2178 	 */
2179 }
2180 
2181 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
2182 {
2183 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2184 	int i;
2185 	int idx;
2186 
2187 	if (!drm_dev_enter(plane->dev, &idx))
2188 		goto out;
2189 
2190 	vc4_state->hw_dlist = dlist;
2191 
2192 	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
2193 	for (i = 0; i < vc4_state->dlist_count; i++)
2194 		writel(vc4_state->dlist[i], &dlist[i]);
2195 
2196 	drm_dev_exit(idx);
2197 
2198 out:
2199 	return vc4_state->dlist_count;
2200 }
2201 
2202 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
2203 {
2204 	const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
2205 
2206 	return vc4_state->dlist_count;
2207 }
2208 
2209 /* Updates the plane to immediately (well, once the FIFO needs
2210  * refilling) scan out from at a new framebuffer.
2211  */
2212 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
2213 {
2214 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2215 	struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
2216 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2217 	dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0];
2218 	int idx;
2219 
2220 	if (!drm_dev_enter(plane->dev, &idx))
2221 		return;
2222 
2223 	/* We're skipping the address adjustment for negative origin,
2224 	 * because this is only called on the primary plane.
2225 	 */
2226 	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
2227 
2228 	if (vc4->gen == VC4_GEN_6_C) {
2229 		u32 value;
2230 
2231 		value = vc4_state->dlist[vc4_state->ptr0_offset[0]] &
2232 					~SCALER6_PTR0_UPPER_ADDR_MASK;
2233 		value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff,
2234 				       SCALER6_PTR0_UPPER_ADDR);
2235 
2236 		writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2237 		vc4_state->dlist[vc4_state->ptr0_offset[0]] = value;
2238 
2239 		value = lower_32_bits(dma_addr);
2240 		writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]);
2241 		vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value;
2242 	} else {
2243 		u32 addr;
2244 
2245 		addr = (u32)dma_addr;
2246 
2247 		/* Write the new address into the hardware immediately.  The
2248 		 * scanout will start from this address as soon as the FIFO
2249 		 * needs to refill with pixels.
2250 		 */
2251 		writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2252 
2253 		/* Also update the CPU-side dlist copy, so that any later
2254 		 * atomic updates that don't do a new modeset on our plane
2255 		 * also use our updated address.
2256 		 */
2257 		vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
2258 	}
2259 
2260 	drm_dev_exit(idx);
2261 }
2262 
2263 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
2264 					  struct drm_atomic_commit *state)
2265 {
2266 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2267 										 plane);
2268 	struct vc4_plane_state *vc4_state, *new_vc4_state;
2269 	int idx;
2270 
2271 	if (!drm_dev_enter(plane->dev, &idx))
2272 		return;
2273 
2274 	swap(plane->state->fb, new_plane_state->fb);
2275 	plane->state->crtc_x = new_plane_state->crtc_x;
2276 	plane->state->crtc_y = new_plane_state->crtc_y;
2277 	plane->state->crtc_w = new_plane_state->crtc_w;
2278 	plane->state->crtc_h = new_plane_state->crtc_h;
2279 	plane->state->src_x = new_plane_state->src_x;
2280 	plane->state->src_y = new_plane_state->src_y;
2281 	plane->state->src_w = new_plane_state->src_w;
2282 	plane->state->src_h = new_plane_state->src_h;
2283 	plane->state->alpha = new_plane_state->alpha;
2284 	plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
2285 	plane->state->rotation = new_plane_state->rotation;
2286 	plane->state->zpos = new_plane_state->zpos;
2287 	plane->state->normalized_zpos = new_plane_state->normalized_zpos;
2288 	plane->state->color_encoding = new_plane_state->color_encoding;
2289 	plane->state->color_range = new_plane_state->color_range;
2290 	plane->state->src = new_plane_state->src;
2291 	plane->state->dst = new_plane_state->dst;
2292 	plane->state->visible = new_plane_state->visible;
2293 
2294 	new_vc4_state = to_vc4_plane_state(new_plane_state);
2295 	vc4_state = to_vc4_plane_state(plane->state);
2296 
2297 	vc4_state->crtc_x = new_vc4_state->crtc_x;
2298 	vc4_state->crtc_y = new_vc4_state->crtc_y;
2299 	vc4_state->crtc_h = new_vc4_state->crtc_h;
2300 	vc4_state->crtc_w = new_vc4_state->crtc_w;
2301 	vc4_state->src_x = new_vc4_state->src_x;
2302 	vc4_state->src_y = new_vc4_state->src_y;
2303 	memcpy(vc4_state->src_w, new_vc4_state->src_w,
2304 	       sizeof(vc4_state->src_w));
2305 	memcpy(vc4_state->src_h, new_vc4_state->src_h,
2306 	       sizeof(vc4_state->src_h));
2307 	memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
2308 	       sizeof(vc4_state->x_scaling));
2309 	memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
2310 	       sizeof(vc4_state->y_scaling));
2311 	vc4_state->is_unity = new_vc4_state->is_unity;
2312 	vc4_state->is_yuv = new_vc4_state->is_yuv;
2313 	vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
2314 
2315 	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
2316 	vc4_state->dlist[vc4_state->pos0_offset] =
2317 		new_vc4_state->dlist[vc4_state->pos0_offset];
2318 	vc4_state->dlist[vc4_state->pos2_offset] =
2319 		new_vc4_state->dlist[vc4_state->pos2_offset];
2320 	vc4_state->dlist[vc4_state->ptr0_offset[0]] =
2321 		new_vc4_state->dlist[vc4_state->ptr0_offset[0]];
2322 
2323 	/* Note that we can't just call vc4_plane_write_dlist()
2324 	 * because that would smash the context data that the HVS is
2325 	 * currently using.
2326 	 */
2327 	writel(vc4_state->dlist[vc4_state->pos0_offset],
2328 	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
2329 	writel(vc4_state->dlist[vc4_state->pos2_offset],
2330 	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
2331 	writel(vc4_state->dlist[vc4_state->ptr0_offset[0]],
2332 	       &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2333 
2334 	drm_dev_exit(idx);
2335 }
2336 
2337 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
2338 					struct drm_atomic_commit *state, bool flip)
2339 {
2340 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2341 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2342 										 plane);
2343 	struct vc4_plane_state *old_vc4_state, *new_vc4_state;
2344 	int ret;
2345 	u32 i;
2346 
2347 	if (vc4->gen <= VC4_GEN_5)
2348 		ret = vc4_plane_mode_set(plane, new_plane_state);
2349 	else
2350 		ret = vc6_plane_mode_set(plane, new_plane_state);
2351 	if (ret)
2352 		return ret;
2353 
2354 	old_vc4_state = to_vc4_plane_state(plane->state);
2355 	new_vc4_state = to_vc4_plane_state(new_plane_state);
2356 
2357 	if (!new_vc4_state->hw_dlist)
2358 		return -EINVAL;
2359 
2360 	if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
2361 	    old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
2362 	    old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
2363 	    old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] ||
2364 	    vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
2365 		return -EINVAL;
2366 
2367 	/* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
2368 	 * if anything else has changed, fallback to a sync update.
2369 	 */
2370 	for (i = 0; i < new_vc4_state->dlist_count; i++) {
2371 		if (i == new_vc4_state->pos0_offset ||
2372 		    i == new_vc4_state->pos2_offset ||
2373 		    i == new_vc4_state->ptr0_offset[0] ||
2374 		    (new_vc4_state->lbm_offset &&
2375 		     i == new_vc4_state->lbm_offset))
2376 			continue;
2377 
2378 		if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
2379 			return -EINVAL;
2380 	}
2381 
2382 	return 0;
2383 }
2384 
2385 static int vc4_prepare_fb(struct drm_plane *plane,
2386 			  struct drm_plane_state *state)
2387 {
2388 	struct vc4_bo *bo;
2389 	int ret;
2390 
2391 	if (!state->fb)
2392 		return 0;
2393 
2394 	bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2395 
2396 	ret = drm_gem_plane_helper_prepare_fb(plane, state);
2397 	if (ret)
2398 		return ret;
2399 
2400 	return vc4_bo_inc_usecnt(bo);
2401 }
2402 
2403 static void vc4_cleanup_fb(struct drm_plane *plane,
2404 			   struct drm_plane_state *state)
2405 {
2406 	struct vc4_bo *bo;
2407 
2408 	if (!state->fb)
2409 		return;
2410 
2411 	bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2412 	vc4_bo_dec_usecnt(bo);
2413 }
2414 
2415 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
2416 	.atomic_check = vc4_plane_atomic_check,
2417 	.atomic_update = vc4_plane_atomic_update,
2418 	.prepare_fb = vc4_prepare_fb,
2419 	.cleanup_fb = vc4_cleanup_fb,
2420 	.atomic_async_check = vc4_plane_atomic_async_check,
2421 	.atomic_async_update = vc4_plane_atomic_async_update,
2422 };
2423 
2424 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
2425 	.atomic_check = vc4_plane_atomic_check,
2426 	.atomic_update = vc4_plane_atomic_update,
2427 	.atomic_async_check = vc4_plane_atomic_async_check,
2428 	.atomic_async_update = vc4_plane_atomic_async_update,
2429 };
2430 
2431 static bool vc4_format_mod_supported(struct drm_plane *plane,
2432 				     uint32_t format,
2433 				     uint64_t modifier)
2434 {
2435 	/* Support T_TILING for RGB formats only. */
2436 	switch (format) {
2437 	case DRM_FORMAT_XRGB8888:
2438 	case DRM_FORMAT_ARGB8888:
2439 	case DRM_FORMAT_ABGR8888:
2440 	case DRM_FORMAT_XBGR8888:
2441 	case DRM_FORMAT_RGB565:
2442 	case DRM_FORMAT_BGR565:
2443 	case DRM_FORMAT_ARGB1555:
2444 	case DRM_FORMAT_XRGB1555:
2445 		switch (fourcc_mod_broadcom_mod(modifier)) {
2446 		case DRM_FORMAT_MOD_LINEAR:
2447 		case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
2448 			return true;
2449 		default:
2450 			return false;
2451 		}
2452 	case DRM_FORMAT_NV12:
2453 	case DRM_FORMAT_NV21:
2454 		switch (fourcc_mod_broadcom_mod(modifier)) {
2455 		case DRM_FORMAT_MOD_LINEAR:
2456 		case DRM_FORMAT_MOD_BROADCOM_SAND64:
2457 		case DRM_FORMAT_MOD_BROADCOM_SAND128:
2458 		case DRM_FORMAT_MOD_BROADCOM_SAND256:
2459 			return true;
2460 		default:
2461 			return false;
2462 		}
2463 	case DRM_FORMAT_P030:
2464 		switch (fourcc_mod_broadcom_mod(modifier)) {
2465 		case DRM_FORMAT_MOD_BROADCOM_SAND128:
2466 			return true;
2467 		default:
2468 			return false;
2469 		}
2470 	case DRM_FORMAT_RGBX1010102:
2471 	case DRM_FORMAT_BGRX1010102:
2472 	case DRM_FORMAT_RGBA1010102:
2473 	case DRM_FORMAT_BGRA1010102:
2474 	case DRM_FORMAT_XRGB4444:
2475 	case DRM_FORMAT_ARGB4444:
2476 	case DRM_FORMAT_XBGR4444:
2477 	case DRM_FORMAT_ABGR4444:
2478 	case DRM_FORMAT_RGBX4444:
2479 	case DRM_FORMAT_RGBA4444:
2480 	case DRM_FORMAT_BGRX4444:
2481 	case DRM_FORMAT_BGRA4444:
2482 	case DRM_FORMAT_RGB332:
2483 	case DRM_FORMAT_BGR233:
2484 	case DRM_FORMAT_YUV422:
2485 	case DRM_FORMAT_YVU422:
2486 	case DRM_FORMAT_YUV420:
2487 	case DRM_FORMAT_YVU420:
2488 	case DRM_FORMAT_NV16:
2489 	case DRM_FORMAT_NV61:
2490 	default:
2491 		return (modifier == DRM_FORMAT_MOD_LINEAR);
2492 	}
2493 }
2494 
2495 static const struct drm_plane_funcs vc4_plane_funcs = {
2496 	.update_plane = drm_atomic_helper_update_plane,
2497 	.disable_plane = drm_atomic_helper_disable_plane,
2498 	.reset = vc4_plane_reset,
2499 	.atomic_duplicate_state = vc4_plane_duplicate_state,
2500 	.atomic_destroy_state = vc4_plane_destroy_state,
2501 	.format_mod_supported = vc4_format_mod_supported,
2502 };
2503 
2504 struct drm_plane *vc4_plane_init(struct drm_device *dev,
2505 				 enum drm_plane_type type,
2506 				 uint32_t possible_crtcs)
2507 {
2508 	struct vc4_dev *vc4 = to_vc4_dev(dev);
2509 	struct drm_plane *plane;
2510 	struct vc4_plane *vc4_plane;
2511 	u32 formats[ARRAY_SIZE(hvs_formats)];
2512 	int num_formats = 0;
2513 	unsigned i;
2514 	static const uint64_t modifiers[] = {
2515 		DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
2516 		DRM_FORMAT_MOD_BROADCOM_SAND128,
2517 		DRM_FORMAT_MOD_BROADCOM_SAND64,
2518 		DRM_FORMAT_MOD_BROADCOM_SAND256,
2519 		DRM_FORMAT_MOD_LINEAR,
2520 		DRM_FORMAT_MOD_INVALID
2521 	};
2522 
2523 	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
2524 		if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
2525 			formats[num_formats] = hvs_formats[i].drm;
2526 			num_formats++;
2527 		}
2528 	}
2529 
2530 	vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base,
2531 					       possible_crtcs,
2532 					       &vc4_plane_funcs,
2533 					       formats, num_formats,
2534 					       modifiers, type, NULL);
2535 	if (IS_ERR(vc4_plane))
2536 		return ERR_CAST(vc4_plane);
2537 	plane = &vc4_plane->base;
2538 
2539 	if (vc4->gen >= VC4_GEN_5)
2540 		drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
2541 	else
2542 		drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
2543 
2544 	drm_plane_create_alpha_property(plane);
2545 	drm_plane_create_blend_mode_property(plane,
2546 					     BIT(DRM_MODE_BLEND_PIXEL_NONE) |
2547 					     BIT(DRM_MODE_BLEND_PREMULTI) |
2548 					     BIT(DRM_MODE_BLEND_COVERAGE));
2549 	drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
2550 					   DRM_MODE_ROTATE_0 |
2551 					   DRM_MODE_ROTATE_180 |
2552 					   DRM_MODE_REFLECT_X |
2553 					   DRM_MODE_REFLECT_Y);
2554 
2555 	drm_plane_create_color_properties(plane,
2556 					  BIT(DRM_COLOR_YCBCR_BT601) |
2557 					  BIT(DRM_COLOR_YCBCR_BT709) |
2558 					  BIT(DRM_COLOR_YCBCR_BT2020),
2559 					  BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
2560 					  BIT(DRM_COLOR_YCBCR_FULL_RANGE),
2561 					  DRM_COLOR_YCBCR_BT709,
2562 					  DRM_COLOR_YCBCR_LIMITED_RANGE);
2563 
2564 	if (type == DRM_PLANE_TYPE_PRIMARY)
2565 		drm_plane_create_zpos_immutable_property(plane, 0);
2566 
2567 	return plane;
2568 }
2569 
2570 #define VC4_NUM_OVERLAY_PLANES	16
2571 
2572 int vc4_plane_create_additional_planes(struct drm_device *drm)
2573 {
2574 	struct drm_plane *cursor_plane;
2575 	struct drm_crtc *crtc;
2576 	unsigned int i;
2577 
2578 	/* Set up some arbitrary number of planes.  We're not limited
2579 	 * by a set number of physical registers, just the space in
2580 	 * the HVS (16k) and how small an plane can be (28 bytes).
2581 	 * However, each plane we set up takes up some memory, and
2582 	 * increases the cost of looping over planes, which atomic
2583 	 * modesetting does quite a bit.  As a result, we pick a
2584 	 * modest number of planes to expose, that should hopefully
2585 	 * still cover any sane usecase.
2586 	 */
2587 	for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) {
2588 		struct drm_plane *plane =
2589 			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY,
2590 				       GENMASK(drm->mode_config.num_crtc - 1, 0));
2591 
2592 		if (IS_ERR(plane))
2593 			continue;
2594 
2595 		/* Create zpos property. Max of all the overlays + 1 primary +
2596 		 * 1 cursor plane on a crtc.
2597 		 */
2598 		drm_plane_create_zpos_property(plane, i + 1, 1,
2599 					       VC4_NUM_OVERLAY_PLANES + 1);
2600 	}
2601 
2602 	drm_for_each_crtc(crtc, drm) {
2603 		/* Set up the legacy cursor after overlay initialization,
2604 		 * since the zpos fallback is that planes are rendered by plane
2605 		 * ID order, and that then puts the cursor on top.
2606 		 */
2607 		cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR,
2608 					      drm_crtc_mask(crtc));
2609 		if (!IS_ERR(cursor_plane)) {
2610 			crtc->cursor = cursor_plane;
2611 
2612 			drm_plane_create_zpos_property(cursor_plane,
2613 						       VC4_NUM_OVERLAY_PLANES + 1,
2614 						       1,
2615 						       VC4_NUM_OVERLAY_PLANES + 1);
2616 		}
2617 	}
2618 
2619 	return 0;
2620 }
2621