1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2015 Broadcom
4 */
5
6 /**
7 * DOC: VC4 plane module
8 *
9 * Each DRM plane is a layer of pixels being scanned out by the HVS.
10 *
11 * At atomic modeset check time, we compute the HVS display element
12 * state that would be necessary for displaying the plane (giving us a
13 * chance to figure out if a plane configuration is invalid), then at
14 * atomic flush time the CRTC will ask us to write our element state
15 * into the region of the HVS that it has allocated for us.
16 */
17
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_blend.h>
22 #include <drm/drm_drv.h>
23 #include <drm/drm_fb_dma_helper.h>
24 #include <drm/drm_fourcc.h>
25 #include <drm/drm_framebuffer.h>
26 #include <drm/drm_gem_atomic_helper.h>
27 #include <drm/drm_print.h>
28
29 #include "uapi/drm/vc4_drm.h"
30
31 #include "vc4_drv.h"
32 #include "vc4_regs.h"
33
34 static const struct hvs_format {
35 u32 drm; /* DRM_FORMAT_* */
36 u32 hvs; /* HVS_FORMAT_* */
37 u32 pixel_order;
38 u32 pixel_order_hvs5;
39 bool hvs5_only;
40 } hvs_formats[] = {
41 {
42 .drm = DRM_FORMAT_XRGB8888,
43 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
44 .pixel_order = HVS_PIXEL_ORDER_ABGR,
45 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
46 },
47 {
48 .drm = DRM_FORMAT_ARGB8888,
49 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
50 .pixel_order = HVS_PIXEL_ORDER_ABGR,
51 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
52 },
53 {
54 .drm = DRM_FORMAT_ABGR8888,
55 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
56 .pixel_order = HVS_PIXEL_ORDER_ARGB,
57 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
58 },
59 {
60 .drm = DRM_FORMAT_XBGR8888,
61 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
62 .pixel_order = HVS_PIXEL_ORDER_ARGB,
63 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
64 },
65 {
66 .drm = DRM_FORMAT_RGB565,
67 .hvs = HVS_PIXEL_FORMAT_RGB565,
68 .pixel_order = HVS_PIXEL_ORDER_XRGB,
69 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
70 },
71 {
72 .drm = DRM_FORMAT_BGR565,
73 .hvs = HVS_PIXEL_FORMAT_RGB565,
74 .pixel_order = HVS_PIXEL_ORDER_XBGR,
75 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
76 },
77 {
78 .drm = DRM_FORMAT_ARGB1555,
79 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
80 .pixel_order = HVS_PIXEL_ORDER_ABGR,
81 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
82 },
83 {
84 .drm = DRM_FORMAT_XRGB1555,
85 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
86 .pixel_order = HVS_PIXEL_ORDER_ABGR,
87 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
88 },
89 {
90 .drm = DRM_FORMAT_RGB888,
91 .hvs = HVS_PIXEL_FORMAT_RGB888,
92 .pixel_order = HVS_PIXEL_ORDER_XRGB,
93 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
94 },
95 {
96 .drm = DRM_FORMAT_BGR888,
97 .hvs = HVS_PIXEL_FORMAT_RGB888,
98 .pixel_order = HVS_PIXEL_ORDER_XBGR,
99 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
100 },
101 {
102 .drm = DRM_FORMAT_YUV422,
103 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
104 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
105 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
106 },
107 {
108 .drm = DRM_FORMAT_YVU422,
109 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
110 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
111 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
112 },
113 {
114 .drm = DRM_FORMAT_YUV444,
115 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
116 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
117 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
118 },
119 {
120 .drm = DRM_FORMAT_YVU444,
121 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
122 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
123 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
124 },
125 {
126 .drm = DRM_FORMAT_YUV420,
127 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
128 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
129 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
130 },
131 {
132 .drm = DRM_FORMAT_YVU420,
133 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
134 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
135 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
136 },
137 {
138 .drm = DRM_FORMAT_NV12,
139 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
140 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
141 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
142 },
143 {
144 .drm = DRM_FORMAT_NV21,
145 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
146 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
147 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
148 },
149 {
150 .drm = DRM_FORMAT_NV16,
151 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
152 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
153 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
154 },
155 {
156 .drm = DRM_FORMAT_NV61,
157 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
158 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
159 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
160 },
161 {
162 .drm = DRM_FORMAT_P030,
163 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT,
164 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
165 .hvs5_only = true,
166 },
167 {
168 .drm = DRM_FORMAT_XRGB2101010,
169 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
170 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
171 .hvs5_only = true,
172 },
173 {
174 .drm = DRM_FORMAT_ARGB2101010,
175 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
176 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
177 .hvs5_only = true,
178 },
179 {
180 .drm = DRM_FORMAT_ABGR2101010,
181 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
182 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
183 .hvs5_only = true,
184 },
185 {
186 .drm = DRM_FORMAT_XBGR2101010,
187 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
188 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
189 .hvs5_only = true,
190 },
191 {
192 .drm = DRM_FORMAT_RGB332,
193 .hvs = HVS_PIXEL_FORMAT_RGB332,
194 .pixel_order = HVS_PIXEL_ORDER_ARGB,
195 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
196 },
197 {
198 .drm = DRM_FORMAT_BGR233,
199 .hvs = HVS_PIXEL_FORMAT_RGB332,
200 .pixel_order = HVS_PIXEL_ORDER_ABGR,
201 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
202 },
203 {
204 .drm = DRM_FORMAT_XRGB4444,
205 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
206 .pixel_order = HVS_PIXEL_ORDER_ABGR,
207 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
208 },
209 {
210 .drm = DRM_FORMAT_ARGB4444,
211 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
212 .pixel_order = HVS_PIXEL_ORDER_ABGR,
213 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
214 },
215 {
216 .drm = DRM_FORMAT_XBGR4444,
217 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
218 .pixel_order = HVS_PIXEL_ORDER_ARGB,
219 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
220 },
221 {
222 .drm = DRM_FORMAT_ABGR4444,
223 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
224 .pixel_order = HVS_PIXEL_ORDER_ARGB,
225 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
226 },
227 {
228 .drm = DRM_FORMAT_BGRX4444,
229 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
230 .pixel_order = HVS_PIXEL_ORDER_RGBA,
231 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
232 },
233 {
234 .drm = DRM_FORMAT_BGRA4444,
235 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
236 .pixel_order = HVS_PIXEL_ORDER_RGBA,
237 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
238 },
239 {
240 .drm = DRM_FORMAT_RGBX4444,
241 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
242 .pixel_order = HVS_PIXEL_ORDER_BGRA,
243 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
244 },
245 {
246 .drm = DRM_FORMAT_RGBA4444,
247 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
248 .pixel_order = HVS_PIXEL_ORDER_BGRA,
249 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
250 },
251 };
252
vc4_get_hvs_format(u32 drm_format)253 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
254 {
255 unsigned i;
256
257 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
258 if (hvs_formats[i].drm == drm_format)
259 return &hvs_formats[i];
260 }
261
262 return NULL;
263 }
264
vc4_get_scaling_mode(u32 src,u32 dst)265 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
266 {
267 if (dst == src >> 16)
268 return VC4_SCALING_NONE;
269 if (3 * dst >= 2 * (src >> 16))
270 return VC4_SCALING_PPF;
271 else
272 return VC4_SCALING_TPZ;
273 }
274
plane_enabled(struct drm_plane_state * state)275 static bool plane_enabled(struct drm_plane_state *state)
276 {
277 return state->fb && !WARN_ON(!state->crtc);
278 }
279
vc4_plane_duplicate_state(struct drm_plane * plane)280 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
281 {
282 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
283 struct vc4_hvs *hvs = vc4->hvs;
284 struct vc4_plane_state *vc4_state;
285 unsigned int i;
286
287 if (WARN_ON(!plane->state))
288 return NULL;
289
290 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
291 if (!vc4_state)
292 return NULL;
293
294 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
295
296 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
297 if (vc4_state->upm_handle[i])
298 refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
299 }
300
301 vc4_state->dlist_initialized = 0;
302
303 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
304
305 if (vc4_state->dlist) {
306 vc4_state->dlist = kmemdup(vc4_state->dlist,
307 vc4_state->dlist_count * 4,
308 GFP_KERNEL);
309 if (!vc4_state->dlist) {
310 kfree(vc4_state);
311 return NULL;
312 }
313 vc4_state->dlist_size = vc4_state->dlist_count;
314 }
315
316 return &vc4_state->base;
317 }
318
vc4_plane_release_upm_ida(struct vc4_hvs * hvs,unsigned int upm_handle)319 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
320 {
321 struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
322 unsigned long irqflags;
323
324 spin_lock_irqsave(&hvs->mm_lock, irqflags);
325 drm_mm_remove_node(&refcount->upm);
326 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
327 refcount->upm.start = 0;
328 refcount->upm.size = 0;
329 refcount->size = 0;
330
331 ida_free(&hvs->upm_handles, upm_handle);
332 }
333
vc4_plane_destroy_state(struct drm_plane * plane,struct drm_plane_state * state)334 static void vc4_plane_destroy_state(struct drm_plane *plane,
335 struct drm_plane_state *state)
336 {
337 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
338 struct vc4_hvs *hvs = vc4->hvs;
339 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
340 unsigned int i;
341
342 if (drm_mm_node_allocated(&vc4_state->lbm)) {
343 unsigned long irqflags;
344
345 spin_lock_irqsave(&hvs->mm_lock, irqflags);
346 drm_mm_remove_node(&vc4_state->lbm);
347 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
348 }
349
350 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
351 struct vc4_upm_refcounts *refcount;
352
353 if (!vc4_state->upm_handle[i])
354 continue;
355
356 refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]];
357
358 if (refcount_dec_and_test(&refcount->refcount))
359 vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]);
360 }
361
362 kfree(vc4_state->dlist);
363 __drm_atomic_helper_plane_destroy_state(&vc4_state->base);
364 kfree(state);
365 }
366
367 /* Called during init to allocate the plane's atomic state. */
vc4_plane_reset(struct drm_plane * plane)368 static void vc4_plane_reset(struct drm_plane *plane)
369 {
370 struct vc4_plane_state *vc4_state;
371
372 if (plane->state)
373 __drm_atomic_helper_plane_destroy_state(plane->state);
374
375 kfree(plane->state);
376
377 vc4_state = kzalloc_obj(*vc4_state);
378 if (!vc4_state)
379 return;
380
381 __drm_atomic_helper_plane_reset(plane, &vc4_state->base);
382 }
383
vc4_dlist_counter_increment(struct vc4_plane_state * vc4_state)384 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
385 {
386 if (vc4_state->dlist_count == vc4_state->dlist_size) {
387 u32 new_size = max(4u, vc4_state->dlist_count * 2);
388 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
389
390 if (!new_dlist)
391 return;
392 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
393
394 kfree(vc4_state->dlist);
395 vc4_state->dlist = new_dlist;
396 vc4_state->dlist_size = new_size;
397 }
398
399 vc4_state->dlist_count++;
400 }
401
vc4_dlist_write(struct vc4_plane_state * vc4_state,u32 val)402 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
403 {
404 unsigned int idx = vc4_state->dlist_count;
405
406 vc4_dlist_counter_increment(vc4_state);
407 vc4_state->dlist[idx] = val;
408 }
409
410 /* Returns the scl0/scl1 field based on whether the dimensions need to
411 * be up/down/non-scaled.
412 *
413 * This is a replication of a table from the spec.
414 */
vc4_get_scl_field(struct drm_plane_state * state,int plane)415 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
416 {
417 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
418
419 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
420 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
421 return SCALER_CTL0_SCL_H_PPF_V_PPF;
422 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
423 return SCALER_CTL0_SCL_H_TPZ_V_PPF;
424 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
425 return SCALER_CTL0_SCL_H_PPF_V_TPZ;
426 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
427 return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
428 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
429 return SCALER_CTL0_SCL_H_PPF_V_NONE;
430 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
431 return SCALER_CTL0_SCL_H_NONE_V_PPF;
432 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
433 return SCALER_CTL0_SCL_H_NONE_V_TPZ;
434 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
435 return SCALER_CTL0_SCL_H_TPZ_V_NONE;
436 default:
437 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
438 /* The unity case is independently handled by
439 * SCALER_CTL0_UNITY.
440 */
441 return 0;
442 }
443 }
444
vc4_plane_margins_adj(struct drm_plane_state * pstate)445 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
446 {
447 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
448 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
449 struct drm_crtc_state *crtc_state;
450
451 crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
452 pstate->crtc);
453
454 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
455 if (!left && !right && !top && !bottom)
456 return 0;
457
458 if (left + right >= crtc_state->mode.hdisplay ||
459 top + bottom >= crtc_state->mode.vdisplay)
460 return -EINVAL;
461
462 adjhdisplay = crtc_state->mode.hdisplay - (left + right);
463 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
464 adjhdisplay,
465 crtc_state->mode.hdisplay);
466 vc4_pstate->crtc_x += left;
467 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
468 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
469
470 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
471 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
472 adjvdisplay,
473 crtc_state->mode.vdisplay);
474 vc4_pstate->crtc_y += top;
475 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
476 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
477
478 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
479 adjhdisplay,
480 crtc_state->mode.hdisplay);
481 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
482 adjvdisplay,
483 crtc_state->mode.vdisplay);
484
485 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
486 return -EINVAL;
487
488 return 0;
489 }
490
vc4_plane_setup_clipping_and_scaling(struct drm_plane_state * state)491 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
492 {
493 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
494 struct drm_framebuffer *fb = state->fb;
495 int num_planes = fb->format->num_planes;
496 struct drm_crtc_state *crtc_state;
497 u32 h_subsample = fb->format->hsub;
498 u32 v_subsample = fb->format->vsub;
499 int ret;
500
501 crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
502 if (!crtc_state) {
503 DRM_DEBUG_KMS("Invalid crtc state\n");
504 return -EINVAL;
505 }
506
507 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
508 INT_MAX, true, true);
509 if (ret)
510 return ret;
511
512 vc4_state->src_x = state->src.x1;
513 vc4_state->src_y = state->src.y1;
514 vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
515 vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
516
517 vc4_state->crtc_x = state->dst.x1;
518 vc4_state->crtc_y = state->dst.y1;
519 vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
520 vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
521
522 ret = vc4_plane_margins_adj(state);
523 if (ret)
524 return ret;
525
526 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
527 vc4_state->crtc_w);
528 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
529 vc4_state->crtc_h);
530
531 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
532 vc4_state->y_scaling[0] == VC4_SCALING_NONE);
533
534 if (num_planes > 1) {
535 vc4_state->is_yuv = true;
536
537 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
538 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
539
540 vc4_state->x_scaling[1] =
541 vc4_get_scaling_mode(vc4_state->src_w[1],
542 vc4_state->crtc_w);
543 vc4_state->y_scaling[1] =
544 vc4_get_scaling_mode(vc4_state->src_h[1],
545 vc4_state->crtc_h);
546
547 /* YUV conversion requires that horizontal scaling be enabled
548 * on the UV plane even if vc4_get_scaling_mode() returned
549 * VC4_SCALING_NONE (which can happen when the down-scaling
550 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
551 * case.
552 */
553 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
554 vc4_state->x_scaling[1] = VC4_SCALING_PPF;
555
556 /* Similarly UV needs vertical scaling to be enabled.
557 * Without this a 1:1 scaled YUV422 plane isn't rendered.
558 */
559 if (vc4_state->y_scaling[1] == VC4_SCALING_NONE)
560 vc4_state->y_scaling[1] = VC4_SCALING_PPF;
561 } else {
562 vc4_state->is_yuv = false;
563 vc4_state->x_scaling[1] = VC4_SCALING_NONE;
564 vc4_state->y_scaling[1] = VC4_SCALING_NONE;
565 }
566
567 return 0;
568 }
569
vc4_write_tpz(struct vc4_plane_state * vc4_state,u32 src,u32 dst)570 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
571 {
572 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
573 u32 scale, recip;
574
575 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
576
577 scale = src / dst;
578
579 /* The specs note that while the reciprocal would be defined
580 * as (1<<32)/scale, ~0 is close enough.
581 */
582 recip = ~0 / scale;
583
584 vc4_dlist_write(vc4_state,
585 /*
586 * The BCM2712 is lacking BIT(31) compared to
587 * the previous generations, but we don't use
588 * it.
589 */
590 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
591 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
592 vc4_dlist_write(vc4_state,
593 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
594 }
595
596 /* phase magnitude bits */
597 #define PHASE_BITS 6
598
vc4_write_ppf(struct vc4_plane_state * vc4_state,u32 src,u32 dst,u32 xy,int channel)599 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst,
600 u32 xy, int channel)
601 {
602 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
603 u32 scale = src / dst;
604 s32 offset, offset2;
605 s32 phase;
606
607 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
608
609 /*
610 * Start the phase at 1/2 pixel from the 1st pixel at src_x.
611 * 1/4 pixel for YUV.
612 */
613 if (channel) {
614 /*
615 * The phase is relative to scale_src->x, so shift it for
616 * display list's x value
617 */
618 offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1;
619 offset += -(1 << PHASE_BITS >> 2);
620 } else {
621 /*
622 * The phase is relative to scale_src->x, so shift it for
623 * display list's x value
624 */
625 offset = (xy & 0xffff) >> (16 - PHASE_BITS);
626 offset += -(1 << PHASE_BITS >> 1);
627
628 /*
629 * This is a kludge to make sure the scaling factors are
630 * consistent with YUV's luma scaling. We lose 1-bit precision
631 * because of this.
632 */
633 scale &= ~1;
634 }
635
636 /*
637 * There may be a also small error introduced by precision of scale.
638 * Add half of that as a compromise
639 */
640 offset2 = src - dst * scale;
641 offset2 >>= 16 - PHASE_BITS;
642 phase = offset + (offset2 >> 1);
643
644 /* Ensure +ve values don't touch the sign bit, then truncate negative values */
645 if (phase >= 1 << PHASE_BITS)
646 phase = (1 << PHASE_BITS) - 1;
647
648 phase &= SCALER_PPF_IPHASE_MASK;
649
650 vc4_dlist_write(vc4_state,
651 SCALER_PPF_AGC |
652 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
653 /*
654 * The register layout documentation is slightly
655 * different to setup the phase in the BCM2712,
656 * but they seem equivalent.
657 */
658 VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
659 }
660
__vc4_lbm_size(struct drm_plane_state * state)661 static u32 __vc4_lbm_size(struct drm_plane_state *state)
662 {
663 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
664 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
665 u32 pix_per_line;
666 u32 lbm;
667
668 /* LBM is not needed when there's no vertical scaling. */
669 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
670 vc4_state->y_scaling[1] == VC4_SCALING_NONE)
671 return 0;
672
673 /*
674 * This can be further optimized in the RGB/YUV444 case if the PPF
675 * decimation factor is between 0.5 and 1.0 by using crtc_w.
676 *
677 * It's not an issue though, since in that case since src_w[0] is going
678 * to be greater than or equal to crtc_w.
679 */
680 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
681 pix_per_line = vc4_state->crtc_w;
682 else
683 pix_per_line = vc4_state->src_w[0] >> 16;
684
685 if (!vc4_state->is_yuv) {
686 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
687 lbm = pix_per_line * 8;
688 else {
689 /* In special cases, this multiplier might be 12. */
690 lbm = pix_per_line * 16;
691 }
692 } else {
693 /* There are cases for this going down to a multiplier
694 * of 2, but according to the firmware source, the
695 * table in the docs is somewhat wrong.
696 */
697 lbm = pix_per_line * 16;
698 }
699
700 /* Align it to 64 or 128 (hvs5) bytes */
701 lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64);
702
703 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
704 lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2;
705
706 return lbm;
707 }
708
vc4_lbm_words_per_component(const struct drm_plane_state * state,unsigned int channel)709 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
710 unsigned int channel)
711 {
712 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
713
714 switch (vc4_state->y_scaling[channel]) {
715 case VC4_SCALING_PPF:
716 return 4;
717
718 case VC4_SCALING_TPZ:
719 return 2;
720
721 default:
722 return 0;
723 }
724 }
725
vc4_lbm_components(const struct drm_plane_state * state,unsigned int channel)726 static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
727 unsigned int channel)
728 {
729 const struct drm_format_info *info = state->fb->format;
730 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
731
732 if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
733 return 0;
734
735 if (info->is_yuv)
736 return channel ? 2 : 1;
737
738 if (info->has_alpha)
739 return 4;
740
741 return 3;
742 }
743
vc4_lbm_channel_size(const struct drm_plane_state * state,unsigned int channel)744 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
745 unsigned int channel)
746 {
747 const struct drm_format_info *info = state->fb->format;
748 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
749 unsigned int channels_scaled = 0;
750 unsigned int components, words, wpc;
751 unsigned int width, lines;
752 unsigned int i;
753
754 /* LBM is meant to use the smaller of source or dest width, but there
755 * is a issue with UV scaling that the size required for the second
756 * channel is based on the source width only.
757 */
758 if (info->hsub > 1 && channel == 1)
759 width = state->src_w >> 16;
760 else
761 width = min(state->src_w >> 16, state->crtc_w);
762 width = round_up(width / info->hsub, 4);
763
764 wpc = vc4_lbm_words_per_component(state, channel);
765 if (!wpc)
766 return 0;
767
768 components = vc4_lbm_components(state, channel);
769 if (!components)
770 return 0;
771
772 if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha)
773 components -= 1;
774
775 words = width * wpc * components;
776
777 lines = DIV_ROUND_UP(words, 128 / info->hsub);
778
779 for (i = 0; i < 2; i++)
780 if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
781 channels_scaled++;
782
783 if (channels_scaled == 1)
784 lines = lines / 2;
785
786 return lines;
787 }
788
__vc6_lbm_size(const struct drm_plane_state * state)789 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
790 {
791 const struct drm_format_info *info = state->fb->format;
792
793 if (info->hsub > 1)
794 return max(vc4_lbm_channel_size(state, 0),
795 vc4_lbm_channel_size(state, 1));
796 else
797 return vc4_lbm_channel_size(state, 0);
798 }
799
vc4_lbm_size(struct drm_plane_state * state)800 static u32 vc4_lbm_size(struct drm_plane_state *state)
801 {
802 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
803 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
804
805 /* LBM is not needed when there's no vertical scaling. */
806 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
807 vc4_state->y_scaling[1] == VC4_SCALING_NONE)
808 return 0;
809
810 if (vc4->gen >= VC4_GEN_6_C)
811 return __vc6_lbm_size(state);
812 else
813 return __vc4_lbm_size(state);
814 }
815
vc6_upm_size(const struct drm_plane_state * state,unsigned int plane)816 static size_t vc6_upm_size(const struct drm_plane_state *state,
817 unsigned int plane)
818 {
819 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
820 unsigned int stride = state->fb->pitches[plane];
821
822 /*
823 * TODO: This only works for raster formats, and is sub-optimal
824 * for buffers with a stride aligned on 32 bytes.
825 */
826 unsigned int words_per_line = (stride + 62) / 32;
827 unsigned int fetch_region_size = words_per_line * 32;
828 unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
829 unsigned int buffer_size = fetch_region_size * buffer_lines;
830
831 return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
832 }
833
vc4_write_scaling_parameters(struct drm_plane_state * state,int channel)834 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
835 int channel)
836 {
837 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
838 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
839
840 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
841
842 /* Ch0 H-PPF Word 0: Scaling Parameters */
843 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
844 vc4_write_ppf(vc4_state, vc4_state->src_w[channel],
845 vc4_state->crtc_w, vc4_state->src_x, channel);
846 }
847
848 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
849 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
850 vc4_write_ppf(vc4_state, vc4_state->src_h[channel],
851 vc4_state->crtc_h, vc4_state->src_y, channel);
852 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
853 }
854
855 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
856 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
857 vc4_write_tpz(vc4_state, vc4_state->src_w[channel],
858 vc4_state->crtc_w);
859 }
860
861 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
862 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
863 vc4_write_tpz(vc4_state, vc4_state->src_h[channel],
864 vc4_state->crtc_h);
865 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
866 }
867 }
868
vc4_plane_calc_load(struct drm_plane_state * state)869 static void vc4_plane_calc_load(struct drm_plane_state *state)
870 {
871 unsigned int hvs_load_shift, vrefresh, i;
872 struct drm_framebuffer *fb = state->fb;
873 struct vc4_plane_state *vc4_state;
874 struct drm_crtc_state *crtc_state;
875 unsigned int vscale_factor;
876
877 vc4_state = to_vc4_plane_state(state);
878 crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
879 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
880
881 /* The HVS is able to process 2 pixels/cycle when scaling the source,
882 * 4 pixels/cycle otherwise.
883 * Alpha blending step seems to be pipelined and it's always operating
884 * at 4 pixels/cycle, so the limiting aspect here seems to be the
885 * scaler block.
886 * HVS load is expressed in clk-cycles/sec (AKA Hz).
887 */
888 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
889 vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
890 vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
891 vc4_state->y_scaling[1] != VC4_SCALING_NONE)
892 hvs_load_shift = 1;
893 else
894 hvs_load_shift = 2;
895
896 vc4_state->membus_load = 0;
897 vc4_state->hvs_load = 0;
898 for (i = 0; i < fb->format->num_planes; i++) {
899 /* Even if the bandwidth/plane required for a single frame is
900 *
901 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
902 * cpp * vrefresh
903 *
904 * when downscaling, we have to read more pixels per line in
905 * the time frame reserved for a single line, so the bandwidth
906 * demand can be punctually higher. To account for that, we
907 * calculate the down-scaling factor and multiply the plane
908 * load by this number. We're likely over-estimating the read
909 * demand, but that's better than under-estimating it.
910 */
911 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
912 vc4_state->crtc_h);
913 vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
914 (vc4_state->src_h[i] >> 16) *
915 vscale_factor * fb->format->cpp[i];
916 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
917 }
918
919 vc4_state->hvs_load *= vrefresh;
920 vc4_state->hvs_load >>= hvs_load_shift;
921 vc4_state->membus_load *= vrefresh;
922 }
923
vc4_plane_allocate_lbm(struct drm_plane_state * state)924 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
925 {
926 struct drm_device *drm = state->plane->dev;
927 struct vc4_dev *vc4 = to_vc4_dev(drm);
928 struct drm_plane *plane = state->plane;
929 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
930 unsigned long irqflags;
931 u32 lbm_size;
932
933 lbm_size = vc4_lbm_size(state);
934 if (!lbm_size)
935 return 0;
936
937 /*
938 * NOTE: BCM2712 doesn't need to be aligned, since the size
939 * returned by vc4_lbm_size() is in words already.
940 */
941 if (vc4->gen == VC4_GEN_5)
942 lbm_size = ALIGN(lbm_size, 64);
943 else if (vc4->gen == VC4_GEN_4)
944 lbm_size = ALIGN(lbm_size, 32);
945
946 drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n",
947 plane->base.id, plane->name, lbm_size);
948
949 if (WARN_ON(!vc4_state->lbm_offset))
950 return -EINVAL;
951
952 /* Allocate the LBM memory that the HVS will use for temporary
953 * storage due to our scaling/format conversion.
954 */
955 if (!drm_mm_node_allocated(&vc4_state->lbm)) {
956 int ret;
957
958 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
959 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
960 &vc4_state->lbm,
961 lbm_size, 1,
962 0, 0);
963 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
964
965 if (ret) {
966 drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
967 return ret;
968 }
969 } else {
970 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
971 }
972
973 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
974
975 return 0;
976 }
977
vc6_plane_allocate_upm(struct drm_plane_state * state)978 static int vc6_plane_allocate_upm(struct drm_plane_state *state)
979 {
980 const struct drm_format_info *info = state->fb->format;
981 struct drm_device *drm = state->plane->dev;
982 struct vc4_dev *vc4 = to_vc4_dev(drm);
983 struct vc4_hvs *hvs = vc4->hvs;
984 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
985 unsigned int i;
986 int ret;
987
988 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
989
990 vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
991
992 for (i = 0; i < info->num_planes; i++) {
993 struct vc4_upm_refcounts *refcount;
994 int upm_handle;
995 unsigned long irqflags;
996 size_t upm_size;
997
998 upm_size = vc6_upm_size(state, i);
999 if (!upm_size)
1000 return -EINVAL;
1001 upm_handle = vc4_state->upm_handle[i];
1002
1003 if (upm_handle &&
1004 hvs->upm_refcounts[upm_handle].size == upm_size) {
1005 /* Allocation is the same size as the previous user of
1006 * the plane. Keep the allocation.
1007 */
1008 vc4_state->upm_handle[i] = upm_handle;
1009 } else {
1010 if (upm_handle &&
1011 refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) {
1012 vc4_plane_release_upm_ida(hvs, upm_handle);
1013 vc4_state->upm_handle[i] = 0;
1014 }
1015
1016 upm_handle = ida_alloc_range(&hvs->upm_handles, 1,
1017 VC4_NUM_UPM_HANDLES,
1018 GFP_KERNEL);
1019 if (upm_handle < 0) {
1020 drm_dbg(drm, "Out of upm_handles\n");
1021 return upm_handle;
1022 }
1023 vc4_state->upm_handle[i] = upm_handle;
1024
1025 refcount = &hvs->upm_refcounts[upm_handle];
1026 refcount_set(&refcount->refcount, 1);
1027 refcount->size = upm_size;
1028
1029 spin_lock_irqsave(&hvs->mm_lock, irqflags);
1030 ret = drm_mm_insert_node_generic(&hvs->upm_mm,
1031 &refcount->upm,
1032 upm_size, HVS_UBM_WORD_SIZE,
1033 0, 0);
1034 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
1035 if (ret) {
1036 drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
1037 refcount_set(&refcount->refcount, 0);
1038 ida_free(&hvs->upm_handles, upm_handle);
1039 vc4_state->upm_handle[i] = 0;
1040 return ret;
1041 }
1042 }
1043
1044 refcount = &hvs->upm_refcounts[upm_handle];
1045 vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
1046 VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE,
1047 SCALER6_PTR0_UPM_BASE) |
1048 VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
1049 SCALER6_PTR0_UPM_HANDLE) |
1050 VC4_SET_FIELD(vc4_state->upm_buffer_lines,
1051 SCALER6_PTR0_UPM_BUFF_SIZE);
1052 }
1053
1054 return 0;
1055 }
1056
vc6_plane_free_upm(struct drm_plane_state * state)1057 static void vc6_plane_free_upm(struct drm_plane_state *state)
1058 {
1059 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1060 struct drm_device *drm = state->plane->dev;
1061 struct vc4_dev *vc4 = to_vc4_dev(drm);
1062 struct vc4_hvs *hvs = vc4->hvs;
1063 unsigned int i;
1064
1065 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
1066
1067 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
1068 unsigned int upm_handle;
1069
1070 upm_handle = vc4_state->upm_handle[i];
1071 if (!upm_handle)
1072 continue;
1073
1074 if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount))
1075 vc4_plane_release_upm_ida(hvs, upm_handle);
1076 vc4_state->upm_handle[i] = 0;
1077 }
1078 }
1079
1080 /*
1081 * The colorspace conversion matrices are held in 3 entries in the dlist.
1082 * Create an array of them, with entries for each full and limited mode, and
1083 * each supported colorspace.
1084 */
1085 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = {
1086 {
1087 /* Limited range */
1088 {
1089 /* BT601 */
1090 SCALER_CSC0_ITR_R_601_5,
1091 SCALER_CSC1_ITR_R_601_5,
1092 SCALER_CSC2_ITR_R_601_5,
1093 }, {
1094 /* BT709 */
1095 SCALER_CSC0_ITR_R_709_3,
1096 SCALER_CSC1_ITR_R_709_3,
1097 SCALER_CSC2_ITR_R_709_3,
1098 }, {
1099 /* BT2020 */
1100 SCALER_CSC0_ITR_R_2020,
1101 SCALER_CSC1_ITR_R_2020,
1102 SCALER_CSC2_ITR_R_2020,
1103 }
1104 }, {
1105 /* Full range */
1106 {
1107 /* JFIF */
1108 SCALER_CSC0_JPEG_JFIF,
1109 SCALER_CSC1_JPEG_JFIF,
1110 SCALER_CSC2_JPEG_JFIF,
1111 }, {
1112 /* BT709 */
1113 SCALER_CSC0_ITR_R_709_3_FR,
1114 SCALER_CSC1_ITR_R_709_3_FR,
1115 SCALER_CSC2_ITR_R_709_3_FR,
1116 }, {
1117 /* BT2020 */
1118 SCALER_CSC0_ITR_R_2020_FR,
1119 SCALER_CSC1_ITR_R_2020_FR,
1120 SCALER_CSC2_ITR_R_2020_FR,
1121 }
1122 }
1123 };
1124
vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state * state)1125 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state)
1126 {
1127 struct drm_device *dev = state->state->dev;
1128 struct vc4_dev *vc4 = to_vc4_dev(dev);
1129
1130 WARN_ON_ONCE(vc4->gen != VC4_GEN_4);
1131
1132 if (!state->fb->format->has_alpha)
1133 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1134 SCALER_POS2_ALPHA_MODE);
1135
1136 switch (state->pixel_blend_mode) {
1137 case DRM_MODE_BLEND_PIXEL_NONE:
1138 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1139 SCALER_POS2_ALPHA_MODE);
1140 default:
1141 case DRM_MODE_BLEND_PREMULTI:
1142 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1143 SCALER_POS2_ALPHA_MODE) |
1144 SCALER_POS2_ALPHA_PREMULT;
1145 case DRM_MODE_BLEND_COVERAGE:
1146 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1147 SCALER_POS2_ALPHA_MODE);
1148 }
1149 }
1150
vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state * state)1151 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state)
1152 {
1153 struct drm_device *dev = state->state->dev;
1154 struct vc4_dev *vc4 = to_vc4_dev(dev);
1155
1156 WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C &&
1157 vc4->gen != VC4_GEN_6_D);
1158
1159 switch (vc4->gen) {
1160 default:
1161 case VC4_GEN_5:
1162 case VC4_GEN_6_C:
1163 if (!state->fb->format->has_alpha)
1164 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1165 SCALER5_CTL2_ALPHA_MODE);
1166
1167 switch (state->pixel_blend_mode) {
1168 case DRM_MODE_BLEND_PIXEL_NONE:
1169 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1170 SCALER5_CTL2_ALPHA_MODE);
1171 default:
1172 case DRM_MODE_BLEND_PREMULTI:
1173 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1174 SCALER5_CTL2_ALPHA_MODE) |
1175 SCALER5_CTL2_ALPHA_PREMULT;
1176 case DRM_MODE_BLEND_COVERAGE:
1177 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1178 SCALER5_CTL2_ALPHA_MODE);
1179 }
1180 case VC4_GEN_6_D:
1181 /* 2712-D configures fixed alpha mode in CTL0 */
1182 return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ?
1183 SCALER5_CTL2_ALPHA_PREMULT : 0;
1184 }
1185 }
1186
vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state * state)1187 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state)
1188 {
1189 struct drm_device *dev = state->state->dev;
1190 struct vc4_dev *vc4 = to_vc4_dev(dev);
1191
1192 WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D);
1193
1194 if (vc4->gen == VC4_GEN_6_D &&
1195 (!state->fb->format->has_alpha ||
1196 state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE))
1197 return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED,
1198 SCALER6_CTL0_ALPHA_MASK);
1199
1200 return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK);
1201 }
1202
1203 /* Writes out a full display list for an active plane to the plane's
1204 * private dlist state.
1205 */
vc4_plane_mode_set(struct drm_plane * plane,struct drm_plane_state * state)1206 static int vc4_plane_mode_set(struct drm_plane *plane,
1207 struct drm_plane_state *state)
1208 {
1209 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
1210 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1211 struct drm_framebuffer *fb = state->fb;
1212 u32 ctl0_offset = vc4_state->dlist_count;
1213 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1214 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1215 int num_planes = fb->format->num_planes;
1216 u32 h_subsample = fb->format->hsub;
1217 u32 v_subsample = fb->format->vsub;
1218 bool mix_plane_alpha;
1219 bool covers_screen;
1220 u32 scl0, scl1, pitch0;
1221 u32 tiling, src_x, src_y;
1222 u32 width, height;
1223 u32 hvs_format = format->hvs;
1224 unsigned int rotation;
1225 u32 offsets[3] = { 0 };
1226 int ret, i;
1227
1228 if (vc4_state->dlist_initialized)
1229 return 0;
1230
1231 ret = vc4_plane_setup_clipping_and_scaling(state);
1232 if (ret)
1233 return ret;
1234
1235 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1236 !vc4_state->crtc_w || !vc4_state->crtc_h) {
1237 /* 0 source size probably means the plane is offscreen */
1238 vc4_state->dlist_initialized = 1;
1239 return 0;
1240 }
1241
1242 width = vc4_state->src_w[0] >> 16;
1243 height = vc4_state->src_h[0] >> 16;
1244
1245 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
1246 * and 4:4:4, scl1 should be set to scl0 so both channels of
1247 * the scaler do the same thing. For YUV, the Y plane needs
1248 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1249 * the scl fields here.
1250 */
1251 if (num_planes == 1) {
1252 scl0 = vc4_get_scl_field(state, 0);
1253 scl1 = scl0;
1254 } else {
1255 scl0 = vc4_get_scl_field(state, 1);
1256 scl1 = vc4_get_scl_field(state, 0);
1257 }
1258
1259 rotation = drm_rotation_simplify(state->rotation,
1260 DRM_MODE_ROTATE_0 |
1261 DRM_MODE_REFLECT_X |
1262 DRM_MODE_REFLECT_Y);
1263
1264 /* We must point to the last line when Y reflection is enabled. */
1265 src_y = vc4_state->src_y >> 16;
1266 if (rotation & DRM_MODE_REFLECT_Y)
1267 src_y += height - 1;
1268
1269 src_x = vc4_state->src_x >> 16;
1270
1271 switch (base_format_mod) {
1272 case DRM_FORMAT_MOD_LINEAR:
1273 tiling = SCALER_CTL0_TILING_LINEAR;
1274 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
1275
1276 /* Adjust the base pointer to the first pixel to be scanned
1277 * out.
1278 */
1279 for (i = 0; i < num_planes; i++) {
1280 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1281 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1282 }
1283
1284 break;
1285
1286 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
1287 u32 tile_size_shift = 12; /* T tiles are 4kb */
1288 /* Whole-tile offsets, mostly for setting the pitch. */
1289 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
1290 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
1291 u32 tile_w_mask = (1 << tile_w_shift) - 1;
1292 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
1293 * the height (in pixels) of a 4k tile.
1294 */
1295 u32 tile_h_mask = (2 << tile_h_shift) - 1;
1296 /* For T-tiled, the FB pitch is "how many bytes from one row to
1297 * the next, such that
1298 *
1299 * pitch * tile_h == tile_size * tiles_per_row
1300 */
1301 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
1302 u32 tiles_l = src_x >> tile_w_shift;
1303 u32 tiles_r = tiles_w - tiles_l;
1304 u32 tiles_t = src_y >> tile_h_shift;
1305 /* Intra-tile offsets, which modify the base address (the
1306 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
1307 * base address).
1308 */
1309 u32 tile_y = (src_y >> 4) & 1;
1310 u32 subtile_y = (src_y >> 2) & 3;
1311 u32 utile_y = src_y & 3;
1312 u32 x_off = src_x & tile_w_mask;
1313 u32 y_off = src_y & tile_h_mask;
1314
1315 /* When Y reflection is requested we must set the
1316 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
1317 * after the initial one should be fetched in descending order,
1318 * which makes sense since we start from the last line and go
1319 * backward.
1320 * Don't know why we need y_off = max_y_off - y_off, but it's
1321 * definitely required (I guess it's also related to the "going
1322 * backward" situation).
1323 */
1324 if (rotation & DRM_MODE_REFLECT_Y) {
1325 y_off = tile_h_mask - y_off;
1326 pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
1327 } else {
1328 pitch0 = 0;
1329 }
1330
1331 tiling = SCALER_CTL0_TILING_256B_OR_T;
1332 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
1333 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
1334 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
1335 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
1336 offsets[0] += tiles_t * (tiles_w << tile_size_shift);
1337 offsets[0] += subtile_y << 8;
1338 offsets[0] += utile_y << 4;
1339
1340 /* Rows of tiles alternate left-to-right and right-to-left. */
1341 if (tiles_t & 1) {
1342 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
1343 offsets[0] += (tiles_w - tiles_l) << tile_size_shift;
1344 offsets[0] -= (1 + !tile_y) << 10;
1345 } else {
1346 offsets[0] += tiles_l << tile_size_shift;
1347 offsets[0] += tile_y << 10;
1348 }
1349
1350 break;
1351 }
1352
1353 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1354 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1355 case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1356 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1357
1358 if (param > SCALER_TILE_HEIGHT_MASK) {
1359 DRM_DEBUG_KMS("SAND height too large (%d)\n",
1360 param);
1361 return -EINVAL;
1362 }
1363
1364 if (fb->format->format == DRM_FORMAT_P030) {
1365 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1366 tiling = SCALER_CTL0_TILING_128B;
1367 } else {
1368 hvs_format = HVS_PIXEL_FORMAT_H264;
1369
1370 switch (base_format_mod) {
1371 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1372 tiling = SCALER_CTL0_TILING_64B;
1373 break;
1374 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1375 tiling = SCALER_CTL0_TILING_128B;
1376 break;
1377 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1378 tiling = SCALER_CTL0_TILING_256B_OR_T;
1379 break;
1380 default:
1381 return -EINVAL;
1382 }
1383 }
1384
1385 /* Adjust the base pointer to the first pixel to be scanned
1386 * out.
1387 *
1388 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1389 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1390 * word that should be taken as the first pixel.
1391 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1392 * element within the 128bit word, eg for pixel 3 the value
1393 * should be 6.
1394 */
1395 for (i = 0; i < num_planes; i++) {
1396 u32 tile_w, tile, x_off, pix_per_tile;
1397
1398 if (fb->format->format == DRM_FORMAT_P030) {
1399 /*
1400 * Spec says: bits [31:4] of the given address
1401 * should point to the 128-bit word containing
1402 * the desired starting pixel, and bits[3:0]
1403 * should be between 0 and 11, indicating which
1404 * of the 12-pixels in that 128-bit word is the
1405 * first pixel to be used
1406 */
1407 u32 remaining_pixels = src_x % 96;
1408 u32 aligned = remaining_pixels / 12;
1409 u32 last_bits = remaining_pixels % 12;
1410
1411 x_off = aligned * 16 + last_bits;
1412 tile_w = 128;
1413 pix_per_tile = 96;
1414 } else {
1415 switch (base_format_mod) {
1416 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1417 tile_w = 64;
1418 break;
1419 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1420 tile_w = 128;
1421 break;
1422 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1423 tile_w = 256;
1424 break;
1425 default:
1426 return -EINVAL;
1427 }
1428 pix_per_tile = tile_w / fb->format->cpp[0];
1429 x_off = (src_x % pix_per_tile) /
1430 (i ? h_subsample : 1) *
1431 fb->format->cpp[i];
1432 }
1433
1434 tile = src_x / pix_per_tile;
1435
1436 offsets[i] += param * tile_w * tile;
1437 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1438 offsets[i] += x_off & ~(i ? 1 : 0);
1439 }
1440
1441 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
1442 break;
1443 }
1444
1445 default:
1446 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1447 (long long)fb->modifier);
1448 return -EINVAL;
1449 }
1450
1451 /* fetch an extra pixel if we don't actually line up with the left edge. */
1452 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1453 width++;
1454
1455 /* same for the right side */
1456 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1457 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1458 width++;
1459
1460 /* now for the top */
1461 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1462 height++;
1463
1464 /* and the bottom */
1465 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1466 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1467 height++;
1468
1469 /* For YUV444 the hardware wants double the width, otherwise it doesn't
1470 * fetch full width of chroma
1471 */
1472 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1473 width <<= 1;
1474
1475 /* Don't waste cycles mixing with plane alpha if the set alpha
1476 * is opaque or there is no per-pixel alpha information.
1477 * In any case we use the alpha property value as the fixed alpha.
1478 */
1479 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1480 fb->format->has_alpha;
1481
1482 if (vc4->gen == VC4_GEN_4) {
1483 /* Control word */
1484 vc4_dlist_write(vc4_state,
1485 SCALER_CTL0_VALID |
1486 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
1487 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
1488 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
1489 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
1490 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1491 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1492 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
1493 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1494 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
1495
1496 /* Position Word 0: Image Positions and Alpha Value */
1497 vc4_state->pos0_offset = vc4_state->dlist_count;
1498 vc4_dlist_write(vc4_state,
1499 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
1500 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
1501 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
1502
1503 /* Position Word 1: Scaled Image Dimensions. */
1504 if (!vc4_state->is_unity) {
1505 vc4_dlist_write(vc4_state,
1506 VC4_SET_FIELD(vc4_state->crtc_w,
1507 SCALER_POS1_SCL_WIDTH) |
1508 VC4_SET_FIELD(vc4_state->crtc_h,
1509 SCALER_POS1_SCL_HEIGHT));
1510 }
1511
1512 /* Position Word 2: Source Image Size, Alpha */
1513 vc4_state->pos2_offset = vc4_state->dlist_count;
1514 vc4_dlist_write(vc4_state,
1515 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
1516 vc4_hvs4_get_alpha_blend_mode(state) |
1517 VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
1518 VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
1519
1520 /* Position Word 3: Context. Written by the HVS. */
1521 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1522
1523 } else {
1524 /* Control word */
1525 vc4_dlist_write(vc4_state,
1526 SCALER_CTL0_VALID |
1527 (format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) |
1528 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1529 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1530 (vc4_state->is_unity ?
1531 SCALER5_CTL0_UNITY : 0) |
1532 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1533 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
1534 SCALER5_CTL0_ALPHA_EXPAND |
1535 SCALER5_CTL0_RGB_EXPAND);
1536
1537 /* Position Word 0: Image Positions and Alpha Value */
1538 vc4_state->pos0_offset = vc4_state->dlist_count;
1539 vc4_dlist_write(vc4_state,
1540 (rotation & DRM_MODE_REFLECT_Y ?
1541 SCALER5_POS0_VFLIP : 0) |
1542 VC4_SET_FIELD(vc4_state->crtc_x,
1543 SCALER_POS0_START_X) |
1544 (rotation & DRM_MODE_REFLECT_X ?
1545 SCALER5_POS0_HFLIP : 0) |
1546 VC4_SET_FIELD(vc4_state->crtc_y,
1547 SCALER5_POS0_START_Y)
1548 );
1549
1550 /* Control Word 2 */
1551 vc4_dlist_write(vc4_state,
1552 VC4_SET_FIELD(state->alpha >> 4,
1553 SCALER5_CTL2_ALPHA) |
1554 vc4_hvs5_get_alpha_blend_mode(state) |
1555 (mix_plane_alpha ?
1556 SCALER5_CTL2_ALPHA_MIX : 0)
1557 );
1558
1559 /* Position Word 1: Scaled Image Dimensions. */
1560 if (!vc4_state->is_unity) {
1561 vc4_dlist_write(vc4_state,
1562 VC4_SET_FIELD(vc4_state->crtc_w,
1563 SCALER5_POS1_SCL_WIDTH) |
1564 VC4_SET_FIELD(vc4_state->crtc_h,
1565 SCALER5_POS1_SCL_HEIGHT));
1566 }
1567
1568 /* Position Word 2: Source Image Size */
1569 vc4_state->pos2_offset = vc4_state->dlist_count;
1570 vc4_dlist_write(vc4_state,
1571 VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
1572 VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
1573
1574 /* Position Word 3: Context. Written by the HVS. */
1575 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1576 }
1577
1578
1579 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
1580 *
1581 * The pointers may be any byte address.
1582 */
1583 vc4_state->ptr0_offset[0] = vc4_state->dlist_count;
1584
1585 for (i = 0; i < num_planes; i++) {
1586 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1587
1588 vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]);
1589 }
1590
1591 /* Pointer Context Word 0/1/2: Written by the HVS */
1592 for (i = 0; i < num_planes; i++)
1593 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1594
1595 /* Pitch word 0 */
1596 vc4_dlist_write(vc4_state, pitch0);
1597
1598 /* Pitch word 1/2 */
1599 for (i = 1; i < num_planes; i++) {
1600 if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1601 hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
1602 vc4_dlist_write(vc4_state,
1603 VC4_SET_FIELD(fb->pitches[i],
1604 SCALER_SRC_PITCH));
1605 } else {
1606 vc4_dlist_write(vc4_state, pitch0);
1607 }
1608 }
1609
1610 /* Colorspace conversion words */
1611 if (vc4_state->is_yuv) {
1612 enum drm_color_encoding color_encoding = state->color_encoding;
1613 enum drm_color_range color_range = state->color_range;
1614 const u32 *ccm;
1615
1616 if (color_encoding >= DRM_COLOR_ENCODING_MAX)
1617 color_encoding = DRM_COLOR_YCBCR_BT601;
1618 if (color_range >= DRM_COLOR_RANGE_MAX)
1619 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1620
1621 ccm = colorspace_coeffs[color_range][color_encoding];
1622
1623 vc4_dlist_write(vc4_state, ccm[0]);
1624 vc4_dlist_write(vc4_state, ccm[1]);
1625 vc4_dlist_write(vc4_state, ccm[2]);
1626 }
1627
1628 vc4_state->lbm_offset = 0;
1629
1630 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1631 vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1632 vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1633 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1634 /* Reserve a slot for the LBM Base Address. The real value will
1635 * be set when calling vc4_plane_allocate_lbm().
1636 */
1637 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1638 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1639 vc4_state->lbm_offset = vc4_state->dlist_count;
1640 vc4_dlist_counter_increment(vc4_state);
1641 }
1642
1643 if (num_planes > 1) {
1644 /* Emit Cb/Cr as channel 0 and Y as channel
1645 * 1. This matches how we set up scl0/scl1
1646 * above.
1647 */
1648 vc4_write_scaling_parameters(state, 1);
1649 }
1650 vc4_write_scaling_parameters(state, 0);
1651
1652 /* If any PPF setup was done, then all the kernel
1653 * pointers get uploaded.
1654 */
1655 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1656 vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1657 vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1658 vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1659 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1660 SCALER_PPF_KERNEL_OFFSET);
1661
1662 /* HPPF plane 0 */
1663 vc4_dlist_write(vc4_state, kernel);
1664 /* VPPF plane 0 */
1665 vc4_dlist_write(vc4_state, kernel);
1666 /* HPPF plane 1 */
1667 vc4_dlist_write(vc4_state, kernel);
1668 /* VPPF plane 1 */
1669 vc4_dlist_write(vc4_state, kernel);
1670 }
1671 }
1672
1673 vc4_state->dlist[ctl0_offset] |=
1674 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1675
1676 /* crtc_* are already clipped coordinates. */
1677 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1678 vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1679 vc4_state->crtc_h == state->crtc->mode.vdisplay;
1680 /* Background fill might be necessary when the plane has per-pixel
1681 * alpha content or a non-opaque plane alpha and could blend from the
1682 * background or does not cover the entire screen.
1683 */
1684 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1685 state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1686
1687 /* Flag the dlist as initialized to avoid checking it twice in case
1688 * the async update check already called vc4_plane_mode_set() and
1689 * decided to fallback to sync update because async update was not
1690 * possible.
1691 */
1692 vc4_state->dlist_initialized = 1;
1693
1694 vc4_plane_calc_load(state);
1695
1696 return 0;
1697 }
1698
vc6_plane_get_csc_mode(struct vc4_plane_state * vc4_state)1699 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
1700 {
1701 struct drm_plane_state *state = &vc4_state->base;
1702 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
1703 u32 ret = 0;
1704
1705 if (vc4_state->is_yuv) {
1706 enum drm_color_encoding color_encoding = state->color_encoding;
1707 enum drm_color_range color_range = state->color_range;
1708
1709 /* CSC pre-loaded with:
1710 * 0 = BT601 limited range
1711 * 1 = BT709 limited range
1712 * 2 = BT2020 limited range
1713 * 3 = BT601 full range
1714 * 4 = BT709 full range
1715 * 5 = BT2020 full range
1716 */
1717 if (color_encoding > DRM_COLOR_YCBCR_BT2020)
1718 color_encoding = DRM_COLOR_YCBCR_BT601;
1719 if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
1720 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1721
1722 if (vc4->gen == VC4_GEN_6_C) {
1723 ret |= SCALER6C_CTL2_CSC_ENABLE;
1724 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1725 SCALER6C_CTL2_BRCM_CFC_CONTROL);
1726 } else {
1727 ret |= SCALER6D_CTL2_CSC_ENABLE;
1728 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1729 SCALER6D_CTL2_BRCM_CFC_CONTROL);
1730 }
1731 }
1732
1733 return ret;
1734 }
1735
vc6_plane_mode_set(struct drm_plane * plane,struct drm_plane_state * state)1736 static int vc6_plane_mode_set(struct drm_plane *plane,
1737 struct drm_plane_state *state)
1738 {
1739 struct drm_device *drm = plane->dev;
1740 struct vc4_dev *vc4 = to_vc4_dev(drm);
1741 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1742 struct drm_framebuffer *fb = state->fb;
1743 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1744 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1745 int num_planes = fb->format->num_planes;
1746 u32 h_subsample = fb->format->hsub;
1747 u32 v_subsample = fb->format->vsub;
1748 bool mix_plane_alpha;
1749 bool covers_screen;
1750 u32 scl0, scl1, pitch0;
1751 u32 tiling, src_x, src_y;
1752 u32 width, height;
1753 u32 hvs_format = format->hvs;
1754 u32 offsets[3] = { 0 };
1755 unsigned int rotation;
1756 int ret, i;
1757
1758 if (vc4_state->dlist_initialized)
1759 return 0;
1760
1761 ret = vc4_plane_setup_clipping_and_scaling(state);
1762 if (ret)
1763 return ret;
1764
1765 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1766 !vc4_state->crtc_w || !vc4_state->crtc_h) {
1767 /* 0 source size probably means the plane is offscreen.
1768 * 0 destination size is a redundant plane.
1769 */
1770 vc4_state->dlist_initialized = 1;
1771 return 0;
1772 }
1773
1774 width = vc4_state->src_w[0] >> 16;
1775 height = vc4_state->src_h[0] >> 16;
1776
1777 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
1778 * and 4:4:4, scl1 should be set to scl0 so both channels of
1779 * the scaler do the same thing. For YUV, the Y plane needs
1780 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1781 * the scl fields here.
1782 */
1783 if (num_planes == 1) {
1784 scl0 = vc4_get_scl_field(state, 0);
1785 scl1 = scl0;
1786 } else {
1787 scl0 = vc4_get_scl_field(state, 1);
1788 scl1 = vc4_get_scl_field(state, 0);
1789 }
1790
1791 rotation = drm_rotation_simplify(state->rotation,
1792 DRM_MODE_ROTATE_0 |
1793 DRM_MODE_REFLECT_X |
1794 DRM_MODE_REFLECT_Y);
1795
1796 /* We must point to the last line when Y reflection is enabled. */
1797 src_y = vc4_state->src_y >> 16;
1798 if (rotation & DRM_MODE_REFLECT_Y)
1799 src_y += height - 1;
1800
1801 src_x = vc4_state->src_x >> 16;
1802
1803 switch (base_format_mod) {
1804 case DRM_FORMAT_MOD_LINEAR:
1805 tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
1806
1807 /* Adjust the base pointer to the first pixel to be scanned
1808 * out.
1809 */
1810 for (i = 0; i < num_planes; i++) {
1811 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1812 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1813 }
1814
1815 break;
1816
1817 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1818 case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1819 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1820 u32 components_per_word;
1821 u32 starting_offset;
1822 u32 fetch_count;
1823
1824 if (param > SCALER_TILE_HEIGHT_MASK) {
1825 DRM_DEBUG_KMS("SAND height too large (%d)\n",
1826 param);
1827 return -EINVAL;
1828 }
1829
1830 if (fb->format->format == DRM_FORMAT_P030) {
1831 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1832 tiling = SCALER6_CTL0_ADDR_MODE_128B;
1833 } else {
1834 hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
1835
1836 switch (base_format_mod) {
1837 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1838 tiling = SCALER6_CTL0_ADDR_MODE_128B;
1839 break;
1840 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1841 tiling = SCALER6_CTL0_ADDR_MODE_256B;
1842 break;
1843 default:
1844 return -EINVAL;
1845 }
1846 }
1847
1848 /* Adjust the base pointer to the first pixel to be scanned
1849 * out.
1850 *
1851 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1852 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1853 * word that should be taken as the first pixel.
1854 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1855 * element within the 128bit word, eg for pixel 3 the value
1856 * should be 6.
1857 */
1858 for (i = 0; i < num_planes; i++) {
1859 u32 tile_w, tile, x_off, pix_per_tile;
1860
1861 if (fb->format->format == DRM_FORMAT_P030) {
1862 /*
1863 * Spec says: bits [31:4] of the given address
1864 * should point to the 128-bit word containing
1865 * the desired starting pixel, and bits[3:0]
1866 * should be between 0 and 11, indicating which
1867 * of the 12-pixels in that 128-bit word is the
1868 * first pixel to be used
1869 */
1870 u32 remaining_pixels = src_x % 96;
1871 u32 aligned = remaining_pixels / 12;
1872 u32 last_bits = remaining_pixels % 12;
1873
1874 x_off = aligned * 16 + last_bits;
1875 tile_w = 128;
1876 pix_per_tile = 96;
1877 } else {
1878 switch (base_format_mod) {
1879 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1880 tile_w = 128;
1881 break;
1882 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1883 tile_w = 256;
1884 break;
1885 default:
1886 return -EINVAL;
1887 }
1888 pix_per_tile = tile_w / fb->format->cpp[0];
1889 x_off = (src_x % pix_per_tile) /
1890 (i ? h_subsample : 1) *
1891 fb->format->cpp[i];
1892 }
1893
1894 tile = src_x / pix_per_tile;
1895
1896 offsets[i] += param * tile_w * tile;
1897 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1898 offsets[i] += x_off & ~(i ? 1 : 0);
1899 }
1900
1901 components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
1902 starting_offset = src_x % components_per_word;
1903 fetch_count = (width + starting_offset + components_per_word - 1) /
1904 components_per_word;
1905
1906 pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
1907 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
1908 break;
1909 }
1910
1911 default:
1912 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1913 (long long)fb->modifier);
1914 return -EINVAL;
1915 }
1916
1917 /* fetch an extra pixel if we don't actually line up with the left edge. */
1918 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1919 width++;
1920
1921 /* same for the right side */
1922 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1923 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1924 width++;
1925
1926 /* now for the top */
1927 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1928 height++;
1929
1930 /* and the bottom */
1931 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1932 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1933 height++;
1934
1935 /* for YUV444 hardware wants double the width, otherwise it doesn't
1936 * fetch full width of chroma
1937 */
1938 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1939 width <<= 1;
1940
1941 /* Don't waste cycles mixing with plane alpha if the set alpha
1942 * is opaque or there is no per-pixel alpha information.
1943 * In any case we use the alpha property value as the fixed alpha.
1944 */
1945 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1946 fb->format->has_alpha;
1947
1948 /* Control Word 0: Scaling Configuration & Element Validity*/
1949 vc4_dlist_write(vc4_state,
1950 SCALER6_CTL0_VALID |
1951 VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
1952 vc4_hvs6_get_alpha_mask_mode(state) |
1953 (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
1954 VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
1955 VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
1956 VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
1957 VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
1958
1959 /* Position Word 0: Image Position */
1960 vc4_state->pos0_offset = vc4_state->dlist_count;
1961 vc4_dlist_write(vc4_state,
1962 VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
1963 (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
1964 VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
1965
1966 /* Control Word 2: Alpha Value & CSC */
1967 vc4_dlist_write(vc4_state,
1968 vc6_plane_get_csc_mode(vc4_state) |
1969 vc4_hvs5_get_alpha_blend_mode(state) |
1970 (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
1971 VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
1972
1973 /* Position Word 1: Scaled Image Dimensions */
1974 if (!vc4_state->is_unity)
1975 vc4_dlist_write(vc4_state,
1976 VC4_SET_FIELD(vc4_state->crtc_h - 1,
1977 SCALER6_POS1_SCL_LINES) |
1978 VC4_SET_FIELD(vc4_state->crtc_w - 1,
1979 SCALER6_POS1_SCL_WIDTH));
1980
1981 /* Position Word 2: Source Image Size */
1982 vc4_state->pos2_offset = vc4_state->dlist_count;
1983 vc4_dlist_write(vc4_state,
1984 VC4_SET_FIELD(height - 1,
1985 SCALER6_POS2_SRC_LINES) |
1986 VC4_SET_FIELD(width - 1,
1987 SCALER6_POS2_SRC_WIDTH));
1988
1989 /* Position Word 3: Context */
1990 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1991
1992 /*
1993 * TODO: This only covers Raster Scan Order planes
1994 */
1995 for (i = 0; i < num_planes; i++) {
1996 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1997 dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i];
1998
1999 /* Pointer Word 0 */
2000 vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
2001 vc4_dlist_write(vc4_state,
2002 (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
2003 /*
2004 * The UPM buffer will be allocated in
2005 * vc6_plane_allocate_upm().
2006 */
2007 VC4_SET_FIELD(upper_32_bits(paddr) & 0xff,
2008 SCALER6_PTR0_UPPER_ADDR));
2009
2010 /* Pointer Word 1 */
2011 vc4_dlist_write(vc4_state, lower_32_bits(paddr));
2012
2013 /* Pointer Word 2 */
2014 if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
2015 base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
2016 vc4_dlist_write(vc4_state,
2017 VC4_SET_FIELD(fb->pitches[i],
2018 SCALER6_PTR2_PITCH));
2019 } else {
2020 vc4_dlist_write(vc4_state, pitch0);
2021 }
2022 }
2023
2024 /*
2025 * Palette Word 0
2026 * TODO: We're not using the palette mode
2027 */
2028
2029 /*
2030 * Trans Word 0
2031 * TODO: It's only relevant if we set the trans_rgb bit in the
2032 * control word 0, and we don't at the moment.
2033 */
2034
2035 vc4_state->lbm_offset = 0;
2036
2037 if (!vc4_state->is_unity || fb->format->is_yuv) {
2038 /*
2039 * Reserve a slot for the LBM Base Address. The real value will
2040 * be set when calling vc4_plane_allocate_lbm().
2041 */
2042 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2043 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2044 vc4_state->lbm_offset = vc4_state->dlist_count;
2045 vc4_dlist_counter_increment(vc4_state);
2046 }
2047
2048 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
2049 vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
2050 vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2051 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2052 if (num_planes > 1)
2053 /*
2054 * Emit Cb/Cr as channel 0 and Y as channel
2055 * 1. This matches how we set up scl0/scl1
2056 * above.
2057 */
2058 vc4_write_scaling_parameters(state, 1);
2059
2060 vc4_write_scaling_parameters(state, 0);
2061 }
2062
2063 /*
2064 * If any PPF setup was done, then all the kernel
2065 * pointers get uploaded.
2066 */
2067 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
2068 vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
2069 vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
2070 vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
2071 u32 kernel =
2072 VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
2073 SCALER_PPF_KERNEL_OFFSET);
2074
2075 /* HPPF plane 0 */
2076 vc4_dlist_write(vc4_state, kernel);
2077 /* VPPF plane 0 */
2078 vc4_dlist_write(vc4_state, kernel);
2079 /* HPPF plane 1 */
2080 vc4_dlist_write(vc4_state, kernel);
2081 /* VPPF plane 1 */
2082 vc4_dlist_write(vc4_state, kernel);
2083 }
2084 }
2085
2086 vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
2087
2088 vc4_state->dlist[0] |=
2089 VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
2090
2091 /* crtc_* are already clipped coordinates. */
2092 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
2093 vc4_state->crtc_w == state->crtc->mode.hdisplay &&
2094 vc4_state->crtc_h == state->crtc->mode.vdisplay;
2095
2096 /*
2097 * Background fill might be necessary when the plane has per-pixel
2098 * alpha content or a non-opaque plane alpha and could blend from the
2099 * background or does not cover the entire screen.
2100 */
2101 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
2102 state->alpha != DRM_BLEND_ALPHA_OPAQUE;
2103
2104 /*
2105 * Flag the dlist as initialized to avoid checking it twice in case
2106 * the async update check already called vc4_plane_mode_set() and
2107 * decided to fallback to sync update because async update was not
2108 * possible.
2109 */
2110 vc4_state->dlist_initialized = 1;
2111
2112 vc4_plane_calc_load(state);
2113
2114 drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
2115 plane->base.id, plane->name, vc4_state->dlist_count);
2116
2117 return 0;
2118 }
2119
2120 /* If a modeset involves changing the setup of a plane, the atomic
2121 * infrastructure will call this to validate a proposed plane setup.
2122 * However, if a plane isn't getting updated, this (and the
2123 * corresponding vc4_plane_atomic_update) won't get called. Thus, we
2124 * compute the dlist here and have all active plane dlists get updated
2125 * in the CRTC's flush.
2126 */
vc4_plane_atomic_check(struct drm_plane * plane,struct drm_atomic_state * state)2127 static int vc4_plane_atomic_check(struct drm_plane *plane,
2128 struct drm_atomic_state *state)
2129 {
2130 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2131 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2132 plane);
2133 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
2134 int ret;
2135
2136 vc4_state->dlist_count = 0;
2137
2138 if (!plane_enabled(new_plane_state)) {
2139 struct drm_plane_state *old_plane_state =
2140 drm_atomic_get_old_plane_state(state, plane);
2141
2142 if (vc4->gen >= VC4_GEN_6_C && old_plane_state &&
2143 plane_enabled(old_plane_state)) {
2144 vc6_plane_free_upm(new_plane_state);
2145 }
2146 return 0;
2147 }
2148
2149 if (vc4->gen >= VC4_GEN_6_C)
2150 ret = vc6_plane_mode_set(plane, new_plane_state);
2151 else
2152 ret = vc4_plane_mode_set(plane, new_plane_state);
2153 if (ret)
2154 return ret;
2155
2156 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
2157 !vc4_state->crtc_w || !vc4_state->crtc_h)
2158 return 0;
2159
2160 ret = vc4_plane_allocate_lbm(new_plane_state);
2161 if (ret)
2162 return ret;
2163
2164 if (vc4->gen >= VC4_GEN_6_C) {
2165 ret = vc6_plane_allocate_upm(new_plane_state);
2166 if (ret)
2167 return ret;
2168 }
2169
2170 return 0;
2171 }
2172
vc4_plane_atomic_update(struct drm_plane * plane,struct drm_atomic_state * state)2173 static void vc4_plane_atomic_update(struct drm_plane *plane,
2174 struct drm_atomic_state *state)
2175 {
2176 /* No contents here. Since we don't know where in the CRTC's
2177 * dlist we should be stored, our dlist is uploaded to the
2178 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
2179 * time.
2180 */
2181 }
2182
vc4_plane_write_dlist(struct drm_plane * plane,u32 __iomem * dlist)2183 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
2184 {
2185 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2186 int i;
2187 int idx;
2188
2189 if (!drm_dev_enter(plane->dev, &idx))
2190 goto out;
2191
2192 vc4_state->hw_dlist = dlist;
2193
2194 /* Can't memcpy_toio() because it needs to be 32-bit writes. */
2195 for (i = 0; i < vc4_state->dlist_count; i++)
2196 writel(vc4_state->dlist[i], &dlist[i]);
2197
2198 drm_dev_exit(idx);
2199
2200 out:
2201 return vc4_state->dlist_count;
2202 }
2203
vc4_plane_dlist_size(const struct drm_plane_state * state)2204 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
2205 {
2206 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
2207
2208 return vc4_state->dlist_count;
2209 }
2210
2211 /* Updates the plane to immediately (well, once the FIFO needs
2212 * refilling) scan out from at a new framebuffer.
2213 */
vc4_plane_async_set_fb(struct drm_plane * plane,struct drm_framebuffer * fb)2214 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
2215 {
2216 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2217 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
2218 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2219 dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0];
2220 int idx;
2221
2222 if (!drm_dev_enter(plane->dev, &idx))
2223 return;
2224
2225 /* We're skipping the address adjustment for negative origin,
2226 * because this is only called on the primary plane.
2227 */
2228 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
2229
2230 if (vc4->gen == VC4_GEN_6_C) {
2231 u32 value;
2232
2233 value = vc4_state->dlist[vc4_state->ptr0_offset[0]] &
2234 ~SCALER6_PTR0_UPPER_ADDR_MASK;
2235 value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff,
2236 SCALER6_PTR0_UPPER_ADDR);
2237
2238 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2239 vc4_state->dlist[vc4_state->ptr0_offset[0]] = value;
2240
2241 value = lower_32_bits(dma_addr);
2242 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]);
2243 vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value;
2244 } else {
2245 u32 addr;
2246
2247 addr = (u32)dma_addr;
2248
2249 /* Write the new address into the hardware immediately. The
2250 * scanout will start from this address as soon as the FIFO
2251 * needs to refill with pixels.
2252 */
2253 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2254
2255 /* Also update the CPU-side dlist copy, so that any later
2256 * atomic updates that don't do a new modeset on our plane
2257 * also use our updated address.
2258 */
2259 vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
2260 }
2261
2262 drm_dev_exit(idx);
2263 }
2264
vc4_plane_atomic_async_update(struct drm_plane * plane,struct drm_atomic_state * state)2265 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
2266 struct drm_atomic_state *state)
2267 {
2268 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2269 plane);
2270 struct vc4_plane_state *vc4_state, *new_vc4_state;
2271 int idx;
2272
2273 if (!drm_dev_enter(plane->dev, &idx))
2274 return;
2275
2276 swap(plane->state->fb, new_plane_state->fb);
2277 plane->state->crtc_x = new_plane_state->crtc_x;
2278 plane->state->crtc_y = new_plane_state->crtc_y;
2279 plane->state->crtc_w = new_plane_state->crtc_w;
2280 plane->state->crtc_h = new_plane_state->crtc_h;
2281 plane->state->src_x = new_plane_state->src_x;
2282 plane->state->src_y = new_plane_state->src_y;
2283 plane->state->src_w = new_plane_state->src_w;
2284 plane->state->src_h = new_plane_state->src_h;
2285 plane->state->alpha = new_plane_state->alpha;
2286 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
2287 plane->state->rotation = new_plane_state->rotation;
2288 plane->state->zpos = new_plane_state->zpos;
2289 plane->state->normalized_zpos = new_plane_state->normalized_zpos;
2290 plane->state->color_encoding = new_plane_state->color_encoding;
2291 plane->state->color_range = new_plane_state->color_range;
2292 plane->state->src = new_plane_state->src;
2293 plane->state->dst = new_plane_state->dst;
2294 plane->state->visible = new_plane_state->visible;
2295
2296 new_vc4_state = to_vc4_plane_state(new_plane_state);
2297 vc4_state = to_vc4_plane_state(plane->state);
2298
2299 vc4_state->crtc_x = new_vc4_state->crtc_x;
2300 vc4_state->crtc_y = new_vc4_state->crtc_y;
2301 vc4_state->crtc_h = new_vc4_state->crtc_h;
2302 vc4_state->crtc_w = new_vc4_state->crtc_w;
2303 vc4_state->src_x = new_vc4_state->src_x;
2304 vc4_state->src_y = new_vc4_state->src_y;
2305 memcpy(vc4_state->src_w, new_vc4_state->src_w,
2306 sizeof(vc4_state->src_w));
2307 memcpy(vc4_state->src_h, new_vc4_state->src_h,
2308 sizeof(vc4_state->src_h));
2309 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
2310 sizeof(vc4_state->x_scaling));
2311 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
2312 sizeof(vc4_state->y_scaling));
2313 vc4_state->is_unity = new_vc4_state->is_unity;
2314 vc4_state->is_yuv = new_vc4_state->is_yuv;
2315 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
2316
2317 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
2318 vc4_state->dlist[vc4_state->pos0_offset] =
2319 new_vc4_state->dlist[vc4_state->pos0_offset];
2320 vc4_state->dlist[vc4_state->pos2_offset] =
2321 new_vc4_state->dlist[vc4_state->pos2_offset];
2322 vc4_state->dlist[vc4_state->ptr0_offset[0]] =
2323 new_vc4_state->dlist[vc4_state->ptr0_offset[0]];
2324
2325 /* Note that we can't just call vc4_plane_write_dlist()
2326 * because that would smash the context data that the HVS is
2327 * currently using.
2328 */
2329 writel(vc4_state->dlist[vc4_state->pos0_offset],
2330 &vc4_state->hw_dlist[vc4_state->pos0_offset]);
2331 writel(vc4_state->dlist[vc4_state->pos2_offset],
2332 &vc4_state->hw_dlist[vc4_state->pos2_offset]);
2333 writel(vc4_state->dlist[vc4_state->ptr0_offset[0]],
2334 &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2335
2336 drm_dev_exit(idx);
2337 }
2338
vc4_plane_atomic_async_check(struct drm_plane * plane,struct drm_atomic_state * state,bool flip)2339 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
2340 struct drm_atomic_state *state, bool flip)
2341 {
2342 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2343 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2344 plane);
2345 struct vc4_plane_state *old_vc4_state, *new_vc4_state;
2346 int ret;
2347 u32 i;
2348
2349 if (vc4->gen <= VC4_GEN_5)
2350 ret = vc4_plane_mode_set(plane, new_plane_state);
2351 else
2352 ret = vc6_plane_mode_set(plane, new_plane_state);
2353 if (ret)
2354 return ret;
2355
2356 old_vc4_state = to_vc4_plane_state(plane->state);
2357 new_vc4_state = to_vc4_plane_state(new_plane_state);
2358
2359 if (!new_vc4_state->hw_dlist)
2360 return -EINVAL;
2361
2362 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
2363 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
2364 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
2365 old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] ||
2366 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
2367 return -EINVAL;
2368
2369 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
2370 * if anything else has changed, fallback to a sync update.
2371 */
2372 for (i = 0; i < new_vc4_state->dlist_count; i++) {
2373 if (i == new_vc4_state->pos0_offset ||
2374 i == new_vc4_state->pos2_offset ||
2375 i == new_vc4_state->ptr0_offset[0] ||
2376 (new_vc4_state->lbm_offset &&
2377 i == new_vc4_state->lbm_offset))
2378 continue;
2379
2380 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
2381 return -EINVAL;
2382 }
2383
2384 return 0;
2385 }
2386
vc4_prepare_fb(struct drm_plane * plane,struct drm_plane_state * state)2387 static int vc4_prepare_fb(struct drm_plane *plane,
2388 struct drm_plane_state *state)
2389 {
2390 struct vc4_bo *bo;
2391 int ret;
2392
2393 if (!state->fb)
2394 return 0;
2395
2396 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2397
2398 ret = drm_gem_plane_helper_prepare_fb(plane, state);
2399 if (ret)
2400 return ret;
2401
2402 return vc4_bo_inc_usecnt(bo);
2403 }
2404
vc4_cleanup_fb(struct drm_plane * plane,struct drm_plane_state * state)2405 static void vc4_cleanup_fb(struct drm_plane *plane,
2406 struct drm_plane_state *state)
2407 {
2408 struct vc4_bo *bo;
2409
2410 if (!state->fb)
2411 return;
2412
2413 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2414 vc4_bo_dec_usecnt(bo);
2415 }
2416
2417 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
2418 .atomic_check = vc4_plane_atomic_check,
2419 .atomic_update = vc4_plane_atomic_update,
2420 .prepare_fb = vc4_prepare_fb,
2421 .cleanup_fb = vc4_cleanup_fb,
2422 .atomic_async_check = vc4_plane_atomic_async_check,
2423 .atomic_async_update = vc4_plane_atomic_async_update,
2424 };
2425
2426 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
2427 .atomic_check = vc4_plane_atomic_check,
2428 .atomic_update = vc4_plane_atomic_update,
2429 .atomic_async_check = vc4_plane_atomic_async_check,
2430 .atomic_async_update = vc4_plane_atomic_async_update,
2431 };
2432
vc4_format_mod_supported(struct drm_plane * plane,uint32_t format,uint64_t modifier)2433 static bool vc4_format_mod_supported(struct drm_plane *plane,
2434 uint32_t format,
2435 uint64_t modifier)
2436 {
2437 /* Support T_TILING for RGB formats only. */
2438 switch (format) {
2439 case DRM_FORMAT_XRGB8888:
2440 case DRM_FORMAT_ARGB8888:
2441 case DRM_FORMAT_ABGR8888:
2442 case DRM_FORMAT_XBGR8888:
2443 case DRM_FORMAT_RGB565:
2444 case DRM_FORMAT_BGR565:
2445 case DRM_FORMAT_ARGB1555:
2446 case DRM_FORMAT_XRGB1555:
2447 switch (fourcc_mod_broadcom_mod(modifier)) {
2448 case DRM_FORMAT_MOD_LINEAR:
2449 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
2450 return true;
2451 default:
2452 return false;
2453 }
2454 case DRM_FORMAT_NV12:
2455 case DRM_FORMAT_NV21:
2456 switch (fourcc_mod_broadcom_mod(modifier)) {
2457 case DRM_FORMAT_MOD_LINEAR:
2458 case DRM_FORMAT_MOD_BROADCOM_SAND64:
2459 case DRM_FORMAT_MOD_BROADCOM_SAND128:
2460 case DRM_FORMAT_MOD_BROADCOM_SAND256:
2461 return true;
2462 default:
2463 return false;
2464 }
2465 case DRM_FORMAT_P030:
2466 switch (fourcc_mod_broadcom_mod(modifier)) {
2467 case DRM_FORMAT_MOD_BROADCOM_SAND128:
2468 return true;
2469 default:
2470 return false;
2471 }
2472 case DRM_FORMAT_RGBX1010102:
2473 case DRM_FORMAT_BGRX1010102:
2474 case DRM_FORMAT_RGBA1010102:
2475 case DRM_FORMAT_BGRA1010102:
2476 case DRM_FORMAT_XRGB4444:
2477 case DRM_FORMAT_ARGB4444:
2478 case DRM_FORMAT_XBGR4444:
2479 case DRM_FORMAT_ABGR4444:
2480 case DRM_FORMAT_RGBX4444:
2481 case DRM_FORMAT_RGBA4444:
2482 case DRM_FORMAT_BGRX4444:
2483 case DRM_FORMAT_BGRA4444:
2484 case DRM_FORMAT_RGB332:
2485 case DRM_FORMAT_BGR233:
2486 case DRM_FORMAT_YUV422:
2487 case DRM_FORMAT_YVU422:
2488 case DRM_FORMAT_YUV420:
2489 case DRM_FORMAT_YVU420:
2490 case DRM_FORMAT_NV16:
2491 case DRM_FORMAT_NV61:
2492 default:
2493 return (modifier == DRM_FORMAT_MOD_LINEAR);
2494 }
2495 }
2496
2497 static const struct drm_plane_funcs vc4_plane_funcs = {
2498 .update_plane = drm_atomic_helper_update_plane,
2499 .disable_plane = drm_atomic_helper_disable_plane,
2500 .reset = vc4_plane_reset,
2501 .atomic_duplicate_state = vc4_plane_duplicate_state,
2502 .atomic_destroy_state = vc4_plane_destroy_state,
2503 .format_mod_supported = vc4_format_mod_supported,
2504 };
2505
vc4_plane_init(struct drm_device * dev,enum drm_plane_type type,uint32_t possible_crtcs)2506 struct drm_plane *vc4_plane_init(struct drm_device *dev,
2507 enum drm_plane_type type,
2508 uint32_t possible_crtcs)
2509 {
2510 struct vc4_dev *vc4 = to_vc4_dev(dev);
2511 struct drm_plane *plane;
2512 struct vc4_plane *vc4_plane;
2513 u32 formats[ARRAY_SIZE(hvs_formats)];
2514 int num_formats = 0;
2515 unsigned i;
2516 static const uint64_t modifiers[] = {
2517 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
2518 DRM_FORMAT_MOD_BROADCOM_SAND128,
2519 DRM_FORMAT_MOD_BROADCOM_SAND64,
2520 DRM_FORMAT_MOD_BROADCOM_SAND256,
2521 DRM_FORMAT_MOD_LINEAR,
2522 DRM_FORMAT_MOD_INVALID
2523 };
2524
2525 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
2526 if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
2527 formats[num_formats] = hvs_formats[i].drm;
2528 num_formats++;
2529 }
2530 }
2531
2532 vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base,
2533 possible_crtcs,
2534 &vc4_plane_funcs,
2535 formats, num_formats,
2536 modifiers, type, NULL);
2537 if (IS_ERR(vc4_plane))
2538 return ERR_CAST(vc4_plane);
2539 plane = &vc4_plane->base;
2540
2541 if (vc4->gen >= VC4_GEN_5)
2542 drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
2543 else
2544 drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
2545
2546 drm_plane_create_alpha_property(plane);
2547 drm_plane_create_blend_mode_property(plane,
2548 BIT(DRM_MODE_BLEND_PIXEL_NONE) |
2549 BIT(DRM_MODE_BLEND_PREMULTI) |
2550 BIT(DRM_MODE_BLEND_COVERAGE));
2551 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
2552 DRM_MODE_ROTATE_0 |
2553 DRM_MODE_ROTATE_180 |
2554 DRM_MODE_REFLECT_X |
2555 DRM_MODE_REFLECT_Y);
2556
2557 drm_plane_create_color_properties(plane,
2558 BIT(DRM_COLOR_YCBCR_BT601) |
2559 BIT(DRM_COLOR_YCBCR_BT709) |
2560 BIT(DRM_COLOR_YCBCR_BT2020),
2561 BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
2562 BIT(DRM_COLOR_YCBCR_FULL_RANGE),
2563 DRM_COLOR_YCBCR_BT709,
2564 DRM_COLOR_YCBCR_LIMITED_RANGE);
2565
2566 if (type == DRM_PLANE_TYPE_PRIMARY)
2567 drm_plane_create_zpos_immutable_property(plane, 0);
2568
2569 return plane;
2570 }
2571
2572 #define VC4_NUM_OVERLAY_PLANES 16
2573
vc4_plane_create_additional_planes(struct drm_device * drm)2574 int vc4_plane_create_additional_planes(struct drm_device *drm)
2575 {
2576 struct drm_plane *cursor_plane;
2577 struct drm_crtc *crtc;
2578 unsigned int i;
2579
2580 /* Set up some arbitrary number of planes. We're not limited
2581 * by a set number of physical registers, just the space in
2582 * the HVS (16k) and how small an plane can be (28 bytes).
2583 * However, each plane we set up takes up some memory, and
2584 * increases the cost of looping over planes, which atomic
2585 * modesetting does quite a bit. As a result, we pick a
2586 * modest number of planes to expose, that should hopefully
2587 * still cover any sane usecase.
2588 */
2589 for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) {
2590 struct drm_plane *plane =
2591 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY,
2592 GENMASK(drm->mode_config.num_crtc - 1, 0));
2593
2594 if (IS_ERR(plane))
2595 continue;
2596
2597 /* Create zpos property. Max of all the overlays + 1 primary +
2598 * 1 cursor plane on a crtc.
2599 */
2600 drm_plane_create_zpos_property(plane, i + 1, 1,
2601 VC4_NUM_OVERLAY_PLANES + 1);
2602 }
2603
2604 drm_for_each_crtc(crtc, drm) {
2605 /* Set up the legacy cursor after overlay initialization,
2606 * since the zpos fallback is that planes are rendered by plane
2607 * ID order, and that then puts the cursor on top.
2608 */
2609 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR,
2610 drm_crtc_mask(crtc));
2611 if (!IS_ERR(cursor_plane)) {
2612 crtc->cursor = cursor_plane;
2613
2614 drm_plane_create_zpos_property(cursor_plane,
2615 VC4_NUM_OVERLAY_PLANES + 1,
2616 1,
2617 VC4_NUM_OVERLAY_PLANES + 1);
2618 }
2619 }
2620
2621 return 0;
2622 }
2623