1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2015 Broadcom
4 */
5
6 /**
7 * DOC: VC4 plane module
8 *
9 * Each DRM plane is a layer of pixels being scanned out by the HVS.
10 *
11 * At atomic modeset check time, we compute the HVS display element
12 * state that would be necessary for displaying the plane (giving us a
13 * chance to figure out if a plane configuration is invalid), then at
14 * atomic flush time the CRTC will ask us to write our element state
15 * into the region of the HVS that it has allocated for us.
16 */
17
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_blend.h>
22 #include <drm/drm_drv.h>
23 #include <drm/drm_fb_dma_helper.h>
24 #include <drm/drm_fourcc.h>
25 #include <drm/drm_framebuffer.h>
26 #include <drm/drm_gem_atomic_helper.h>
27
28 #include "uapi/drm/vc4_drm.h"
29
30 #include "vc4_drv.h"
31 #include "vc4_regs.h"
32
33 static const struct hvs_format {
34 u32 drm; /* DRM_FORMAT_* */
35 u32 hvs; /* HVS_FORMAT_* */
36 u32 pixel_order;
37 u32 pixel_order_hvs5;
38 bool hvs5_only;
39 } hvs_formats[] = {
40 {
41 .drm = DRM_FORMAT_XRGB8888,
42 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
43 .pixel_order = HVS_PIXEL_ORDER_ABGR,
44 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
45 },
46 {
47 .drm = DRM_FORMAT_ARGB8888,
48 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
49 .pixel_order = HVS_PIXEL_ORDER_ABGR,
50 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
51 },
52 {
53 .drm = DRM_FORMAT_ABGR8888,
54 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
55 .pixel_order = HVS_PIXEL_ORDER_ARGB,
56 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
57 },
58 {
59 .drm = DRM_FORMAT_XBGR8888,
60 .hvs = HVS_PIXEL_FORMAT_RGBA8888,
61 .pixel_order = HVS_PIXEL_ORDER_ARGB,
62 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
63 },
64 {
65 .drm = DRM_FORMAT_RGB565,
66 .hvs = HVS_PIXEL_FORMAT_RGB565,
67 .pixel_order = HVS_PIXEL_ORDER_XRGB,
68 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
69 },
70 {
71 .drm = DRM_FORMAT_BGR565,
72 .hvs = HVS_PIXEL_FORMAT_RGB565,
73 .pixel_order = HVS_PIXEL_ORDER_XBGR,
74 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
75 },
76 {
77 .drm = DRM_FORMAT_ARGB1555,
78 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
79 .pixel_order = HVS_PIXEL_ORDER_ABGR,
80 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
81 },
82 {
83 .drm = DRM_FORMAT_XRGB1555,
84 .hvs = HVS_PIXEL_FORMAT_RGBA5551,
85 .pixel_order = HVS_PIXEL_ORDER_ABGR,
86 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
87 },
88 {
89 .drm = DRM_FORMAT_RGB888,
90 .hvs = HVS_PIXEL_FORMAT_RGB888,
91 .pixel_order = HVS_PIXEL_ORDER_XRGB,
92 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB,
93 },
94 {
95 .drm = DRM_FORMAT_BGR888,
96 .hvs = HVS_PIXEL_FORMAT_RGB888,
97 .pixel_order = HVS_PIXEL_ORDER_XBGR,
98 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR,
99 },
100 {
101 .drm = DRM_FORMAT_YUV422,
102 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
103 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
104 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
105 },
106 {
107 .drm = DRM_FORMAT_YVU422,
108 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
109 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
110 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
111 },
112 {
113 .drm = DRM_FORMAT_YUV444,
114 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
115 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
116 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
117 },
118 {
119 .drm = DRM_FORMAT_YVU444,
120 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
121 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
122 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
123 },
124 {
125 .drm = DRM_FORMAT_YUV420,
126 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
127 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
128 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
129 },
130 {
131 .drm = DRM_FORMAT_YVU420,
132 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
133 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
134 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
135 },
136 {
137 .drm = DRM_FORMAT_NV12,
138 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
139 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
140 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
141 },
142 {
143 .drm = DRM_FORMAT_NV21,
144 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
145 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
146 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
147 },
148 {
149 .drm = DRM_FORMAT_NV16,
150 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
151 .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
152 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
153 },
154 {
155 .drm = DRM_FORMAT_NV61,
156 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
157 .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
158 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB,
159 },
160 {
161 .drm = DRM_FORMAT_P030,
162 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT,
163 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR,
164 .hvs5_only = true,
165 },
166 {
167 .drm = DRM_FORMAT_XRGB2101010,
168 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
169 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
170 .hvs5_only = true,
171 },
172 {
173 .drm = DRM_FORMAT_ARGB2101010,
174 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
175 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
176 .hvs5_only = true,
177 },
178 {
179 .drm = DRM_FORMAT_ABGR2101010,
180 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
181 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
182 .hvs5_only = true,
183 },
184 {
185 .drm = DRM_FORMAT_XBGR2101010,
186 .hvs = HVS_PIXEL_FORMAT_RGBA1010102,
187 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
188 .hvs5_only = true,
189 },
190 {
191 .drm = DRM_FORMAT_RGB332,
192 .hvs = HVS_PIXEL_FORMAT_RGB332,
193 .pixel_order = HVS_PIXEL_ORDER_ARGB,
194 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
195 },
196 {
197 .drm = DRM_FORMAT_BGR233,
198 .hvs = HVS_PIXEL_FORMAT_RGB332,
199 .pixel_order = HVS_PIXEL_ORDER_ABGR,
200 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
201 },
202 {
203 .drm = DRM_FORMAT_XRGB4444,
204 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
205 .pixel_order = HVS_PIXEL_ORDER_ABGR,
206 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
207 },
208 {
209 .drm = DRM_FORMAT_ARGB4444,
210 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
211 .pixel_order = HVS_PIXEL_ORDER_ABGR,
212 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
213 },
214 {
215 .drm = DRM_FORMAT_XBGR4444,
216 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
217 .pixel_order = HVS_PIXEL_ORDER_ARGB,
218 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
219 },
220 {
221 .drm = DRM_FORMAT_ABGR4444,
222 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
223 .pixel_order = HVS_PIXEL_ORDER_ARGB,
224 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
225 },
226 {
227 .drm = DRM_FORMAT_BGRX4444,
228 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
229 .pixel_order = HVS_PIXEL_ORDER_RGBA,
230 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
231 },
232 {
233 .drm = DRM_FORMAT_BGRA4444,
234 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
235 .pixel_order = HVS_PIXEL_ORDER_RGBA,
236 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA,
237 },
238 {
239 .drm = DRM_FORMAT_RGBX4444,
240 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
241 .pixel_order = HVS_PIXEL_ORDER_BGRA,
242 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
243 },
244 {
245 .drm = DRM_FORMAT_RGBA4444,
246 .hvs = HVS_PIXEL_FORMAT_RGBA4444,
247 .pixel_order = HVS_PIXEL_ORDER_BGRA,
248 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA,
249 },
250 };
251
vc4_get_hvs_format(u32 drm_format)252 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
253 {
254 unsigned i;
255
256 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
257 if (hvs_formats[i].drm == drm_format)
258 return &hvs_formats[i];
259 }
260
261 return NULL;
262 }
263
vc4_get_scaling_mode(u32 src,u32 dst)264 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
265 {
266 if (dst == src >> 16)
267 return VC4_SCALING_NONE;
268 if (3 * dst >= 2 * (src >> 16))
269 return VC4_SCALING_PPF;
270 else
271 return VC4_SCALING_TPZ;
272 }
273
plane_enabled(struct drm_plane_state * state)274 static bool plane_enabled(struct drm_plane_state *state)
275 {
276 return state->fb && !WARN_ON(!state->crtc);
277 }
278
vc4_plane_duplicate_state(struct drm_plane * plane)279 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
280 {
281 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
282 struct vc4_hvs *hvs = vc4->hvs;
283 struct vc4_plane_state *vc4_state;
284 unsigned int i;
285
286 if (WARN_ON(!plane->state))
287 return NULL;
288
289 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
290 if (!vc4_state)
291 return NULL;
292
293 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
294
295 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
296 if (vc4_state->upm_handle[i])
297 refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
298 }
299
300 vc4_state->dlist_initialized = 0;
301
302 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
303
304 if (vc4_state->dlist) {
305 vc4_state->dlist = kmemdup(vc4_state->dlist,
306 vc4_state->dlist_count * 4,
307 GFP_KERNEL);
308 if (!vc4_state->dlist) {
309 kfree(vc4_state);
310 return NULL;
311 }
312 vc4_state->dlist_size = vc4_state->dlist_count;
313 }
314
315 return &vc4_state->base;
316 }
317
vc4_plane_release_upm_ida(struct vc4_hvs * hvs,unsigned int upm_handle)318 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
319 {
320 struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
321 unsigned long irqflags;
322
323 spin_lock_irqsave(&hvs->mm_lock, irqflags);
324 drm_mm_remove_node(&refcount->upm);
325 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
326 refcount->upm.start = 0;
327 refcount->upm.size = 0;
328 refcount->size = 0;
329
330 ida_free(&hvs->upm_handles, upm_handle);
331 }
332
vc4_plane_destroy_state(struct drm_plane * plane,struct drm_plane_state * state)333 static void vc4_plane_destroy_state(struct drm_plane *plane,
334 struct drm_plane_state *state)
335 {
336 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
337 struct vc4_hvs *hvs = vc4->hvs;
338 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
339 unsigned int i;
340
341 if (drm_mm_node_allocated(&vc4_state->lbm)) {
342 unsigned long irqflags;
343
344 spin_lock_irqsave(&hvs->mm_lock, irqflags);
345 drm_mm_remove_node(&vc4_state->lbm);
346 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
347 }
348
349 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
350 struct vc4_upm_refcounts *refcount;
351
352 if (!vc4_state->upm_handle[i])
353 continue;
354
355 refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]];
356
357 if (refcount_dec_and_test(&refcount->refcount))
358 vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]);
359 }
360
361 kfree(vc4_state->dlist);
362 __drm_atomic_helper_plane_destroy_state(&vc4_state->base);
363 kfree(state);
364 }
365
366 /* Called during init to allocate the plane's atomic state. */
vc4_plane_reset(struct drm_plane * plane)367 static void vc4_plane_reset(struct drm_plane *plane)
368 {
369 struct vc4_plane_state *vc4_state;
370
371 if (plane->state)
372 __drm_atomic_helper_plane_destroy_state(plane->state);
373
374 kfree(plane->state);
375
376 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
377 if (!vc4_state)
378 return;
379
380 __drm_atomic_helper_plane_reset(plane, &vc4_state->base);
381 }
382
vc4_dlist_counter_increment(struct vc4_plane_state * vc4_state)383 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
384 {
385 if (vc4_state->dlist_count == vc4_state->dlist_size) {
386 u32 new_size = max(4u, vc4_state->dlist_count * 2);
387 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
388
389 if (!new_dlist)
390 return;
391 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
392
393 kfree(vc4_state->dlist);
394 vc4_state->dlist = new_dlist;
395 vc4_state->dlist_size = new_size;
396 }
397
398 vc4_state->dlist_count++;
399 }
400
vc4_dlist_write(struct vc4_plane_state * vc4_state,u32 val)401 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
402 {
403 unsigned int idx = vc4_state->dlist_count;
404
405 vc4_dlist_counter_increment(vc4_state);
406 vc4_state->dlist[idx] = val;
407 }
408
409 /* Returns the scl0/scl1 field based on whether the dimensions need to
410 * be up/down/non-scaled.
411 *
412 * This is a replication of a table from the spec.
413 */
vc4_get_scl_field(struct drm_plane_state * state,int plane)414 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
415 {
416 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
417
418 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
419 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
420 return SCALER_CTL0_SCL_H_PPF_V_PPF;
421 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
422 return SCALER_CTL0_SCL_H_TPZ_V_PPF;
423 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
424 return SCALER_CTL0_SCL_H_PPF_V_TPZ;
425 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
426 return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
427 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
428 return SCALER_CTL0_SCL_H_PPF_V_NONE;
429 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
430 return SCALER_CTL0_SCL_H_NONE_V_PPF;
431 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
432 return SCALER_CTL0_SCL_H_NONE_V_TPZ;
433 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
434 return SCALER_CTL0_SCL_H_TPZ_V_NONE;
435 default:
436 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
437 /* The unity case is independently handled by
438 * SCALER_CTL0_UNITY.
439 */
440 return 0;
441 }
442 }
443
vc4_plane_margins_adj(struct drm_plane_state * pstate)444 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
445 {
446 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
447 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
448 struct drm_crtc_state *crtc_state;
449
450 crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
451 pstate->crtc);
452
453 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
454 if (!left && !right && !top && !bottom)
455 return 0;
456
457 if (left + right >= crtc_state->mode.hdisplay ||
458 top + bottom >= crtc_state->mode.vdisplay)
459 return -EINVAL;
460
461 adjhdisplay = crtc_state->mode.hdisplay - (left + right);
462 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
463 adjhdisplay,
464 crtc_state->mode.hdisplay);
465 vc4_pstate->crtc_x += left;
466 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
467 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
468
469 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
470 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
471 adjvdisplay,
472 crtc_state->mode.vdisplay);
473 vc4_pstate->crtc_y += top;
474 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
475 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
476
477 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
478 adjhdisplay,
479 crtc_state->mode.hdisplay);
480 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
481 adjvdisplay,
482 crtc_state->mode.vdisplay);
483
484 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
485 return -EINVAL;
486
487 return 0;
488 }
489
vc4_plane_setup_clipping_and_scaling(struct drm_plane_state * state)490 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
491 {
492 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
493 struct drm_framebuffer *fb = state->fb;
494 int num_planes = fb->format->num_planes;
495 struct drm_crtc_state *crtc_state;
496 u32 h_subsample = fb->format->hsub;
497 u32 v_subsample = fb->format->vsub;
498 int ret;
499
500 crtc_state = drm_atomic_get_existing_crtc_state(state->state,
501 state->crtc);
502 if (!crtc_state) {
503 DRM_DEBUG_KMS("Invalid crtc state\n");
504 return -EINVAL;
505 }
506
507 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
508 INT_MAX, true, true);
509 if (ret)
510 return ret;
511
512 vc4_state->src_x = state->src.x1;
513 vc4_state->src_y = state->src.y1;
514 vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
515 vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
516
517 vc4_state->crtc_x = state->dst.x1;
518 vc4_state->crtc_y = state->dst.y1;
519 vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
520 vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
521
522 ret = vc4_plane_margins_adj(state);
523 if (ret)
524 return ret;
525
526 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
527 vc4_state->crtc_w);
528 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
529 vc4_state->crtc_h);
530
531 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
532 vc4_state->y_scaling[0] == VC4_SCALING_NONE);
533
534 if (num_planes > 1) {
535 vc4_state->is_yuv = true;
536
537 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
538 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
539
540 vc4_state->x_scaling[1] =
541 vc4_get_scaling_mode(vc4_state->src_w[1],
542 vc4_state->crtc_w);
543 vc4_state->y_scaling[1] =
544 vc4_get_scaling_mode(vc4_state->src_h[1],
545 vc4_state->crtc_h);
546
547 /* YUV conversion requires that horizontal scaling be enabled
548 * on the UV plane even if vc4_get_scaling_mode() returned
549 * VC4_SCALING_NONE (which can happen when the down-scaling
550 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
551 * case.
552 */
553 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
554 vc4_state->x_scaling[1] = VC4_SCALING_PPF;
555
556 /* Similarly UV needs vertical scaling to be enabled.
557 * Without this a 1:1 scaled YUV422 plane isn't rendered.
558 */
559 if (vc4_state->y_scaling[1] == VC4_SCALING_NONE)
560 vc4_state->y_scaling[1] = VC4_SCALING_PPF;
561 } else {
562 vc4_state->is_yuv = false;
563 vc4_state->x_scaling[1] = VC4_SCALING_NONE;
564 vc4_state->y_scaling[1] = VC4_SCALING_NONE;
565 }
566
567 return 0;
568 }
569
vc4_write_tpz(struct vc4_plane_state * vc4_state,u32 src,u32 dst)570 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
571 {
572 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
573 u32 scale, recip;
574
575 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
576
577 scale = src / dst;
578
579 /* The specs note that while the reciprocal would be defined
580 * as (1<<32)/scale, ~0 is close enough.
581 */
582 recip = ~0 / scale;
583
584 vc4_dlist_write(vc4_state,
585 /*
586 * The BCM2712 is lacking BIT(31) compared to
587 * the previous generations, but we don't use
588 * it.
589 */
590 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
591 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
592 vc4_dlist_write(vc4_state,
593 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
594 }
595
596 /* phase magnitude bits */
597 #define PHASE_BITS 6
598
vc4_write_ppf(struct vc4_plane_state * vc4_state,u32 src,u32 dst,u32 xy,int channel)599 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst,
600 u32 xy, int channel)
601 {
602 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev);
603 u32 scale = src / dst;
604 s32 offset, offset2;
605 s32 phase;
606
607 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
608
609 /*
610 * Start the phase at 1/2 pixel from the 1st pixel at src_x.
611 * 1/4 pixel for YUV.
612 */
613 if (channel) {
614 /*
615 * The phase is relative to scale_src->x, so shift it for
616 * display list's x value
617 */
618 offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1;
619 offset += -(1 << PHASE_BITS >> 2);
620 } else {
621 /*
622 * The phase is relative to scale_src->x, so shift it for
623 * display list's x value
624 */
625 offset = (xy & 0xffff) >> (16 - PHASE_BITS);
626 offset += -(1 << PHASE_BITS >> 1);
627
628 /*
629 * This is a kludge to make sure the scaling factors are
630 * consistent with YUV's luma scaling. We lose 1-bit precision
631 * because of this.
632 */
633 scale &= ~1;
634 }
635
636 /*
637 * There may be a also small error introduced by precision of scale.
638 * Add half of that as a compromise
639 */
640 offset2 = src - dst * scale;
641 offset2 >>= 16 - PHASE_BITS;
642 phase = offset + (offset2 >> 1);
643
644 /* Ensure +ve values don't touch the sign bit, then truncate negative values */
645 if (phase >= 1 << PHASE_BITS)
646 phase = (1 << PHASE_BITS) - 1;
647
648 phase &= SCALER_PPF_IPHASE_MASK;
649
650 vc4_dlist_write(vc4_state,
651 SCALER_PPF_AGC |
652 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
653 /*
654 * The register layout documentation is slightly
655 * different to setup the phase in the BCM2712,
656 * but they seem equivalent.
657 */
658 VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
659 }
660
__vc4_lbm_size(struct drm_plane_state * state)661 static u32 __vc4_lbm_size(struct drm_plane_state *state)
662 {
663 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
664 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
665 u32 pix_per_line;
666 u32 lbm;
667
668 /* LBM is not needed when there's no vertical scaling. */
669 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
670 vc4_state->y_scaling[1] == VC4_SCALING_NONE)
671 return 0;
672
673 /*
674 * This can be further optimized in the RGB/YUV444 case if the PPF
675 * decimation factor is between 0.5 and 1.0 by using crtc_w.
676 *
677 * It's not an issue though, since in that case since src_w[0] is going
678 * to be greater than or equal to crtc_w.
679 */
680 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
681 pix_per_line = vc4_state->crtc_w;
682 else
683 pix_per_line = vc4_state->src_w[0] >> 16;
684
685 if (!vc4_state->is_yuv) {
686 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
687 lbm = pix_per_line * 8;
688 else {
689 /* In special cases, this multiplier might be 12. */
690 lbm = pix_per_line * 16;
691 }
692 } else {
693 /* There are cases for this going down to a multiplier
694 * of 2, but according to the firmware source, the
695 * table in the docs is somewhat wrong.
696 */
697 lbm = pix_per_line * 16;
698 }
699
700 /* Align it to 64 or 128 (hvs5) bytes */
701 lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64);
702
703 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
704 lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2;
705
706 return lbm;
707 }
708
vc4_lbm_words_per_component(const struct drm_plane_state * state,unsigned int channel)709 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
710 unsigned int channel)
711 {
712 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
713
714 switch (vc4_state->y_scaling[channel]) {
715 case VC4_SCALING_PPF:
716 return 4;
717
718 case VC4_SCALING_TPZ:
719 return 2;
720
721 default:
722 return 0;
723 }
724 }
725
vc4_lbm_components(const struct drm_plane_state * state,unsigned int channel)726 static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
727 unsigned int channel)
728 {
729 const struct drm_format_info *info = state->fb->format;
730 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
731
732 if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
733 return 0;
734
735 if (info->is_yuv)
736 return channel ? 2 : 1;
737
738 if (info->has_alpha)
739 return 4;
740
741 return 3;
742 }
743
vc4_lbm_channel_size(const struct drm_plane_state * state,unsigned int channel)744 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
745 unsigned int channel)
746 {
747 const struct drm_format_info *info = state->fb->format;
748 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
749 unsigned int channels_scaled = 0;
750 unsigned int components, words, wpc;
751 unsigned int width, lines;
752 unsigned int i;
753
754 /* LBM is meant to use the smaller of source or dest width, but there
755 * is a issue with UV scaling that the size required for the second
756 * channel is based on the source width only.
757 */
758 if (info->hsub > 1 && channel == 1)
759 width = state->src_w >> 16;
760 else
761 width = min(state->src_w >> 16, state->crtc_w);
762 width = round_up(width / info->hsub, 4);
763
764 wpc = vc4_lbm_words_per_component(state, channel);
765 if (!wpc)
766 return 0;
767
768 components = vc4_lbm_components(state, channel);
769 if (!components)
770 return 0;
771
772 if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha)
773 components -= 1;
774
775 words = width * wpc * components;
776
777 lines = DIV_ROUND_UP(words, 128 / info->hsub);
778
779 for (i = 0; i < 2; i++)
780 if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
781 channels_scaled++;
782
783 if (channels_scaled == 1)
784 lines = lines / 2;
785
786 return lines;
787 }
788
__vc6_lbm_size(const struct drm_plane_state * state)789 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
790 {
791 const struct drm_format_info *info = state->fb->format;
792
793 if (info->hsub > 1)
794 return max(vc4_lbm_channel_size(state, 0),
795 vc4_lbm_channel_size(state, 1));
796 else
797 return vc4_lbm_channel_size(state, 0);
798 }
799
vc4_lbm_size(struct drm_plane_state * state)800 static u32 vc4_lbm_size(struct drm_plane_state *state)
801 {
802 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
803 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
804
805 /* LBM is not needed when there's no vertical scaling. */
806 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
807 vc4_state->y_scaling[1] == VC4_SCALING_NONE)
808 return 0;
809
810 if (vc4->gen >= VC4_GEN_6_C)
811 return __vc6_lbm_size(state);
812 else
813 return __vc4_lbm_size(state);
814 }
815
vc6_upm_size(const struct drm_plane_state * state,unsigned int plane)816 static size_t vc6_upm_size(const struct drm_plane_state *state,
817 unsigned int plane)
818 {
819 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
820 unsigned int stride = state->fb->pitches[plane];
821
822 /*
823 * TODO: This only works for raster formats, and is sub-optimal
824 * for buffers with a stride aligned on 32 bytes.
825 */
826 unsigned int words_per_line = (stride + 62) / 32;
827 unsigned int fetch_region_size = words_per_line * 32;
828 unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
829 unsigned int buffer_size = fetch_region_size * buffer_lines;
830
831 return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
832 }
833
vc4_write_scaling_parameters(struct drm_plane_state * state,int channel)834 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
835 int channel)
836 {
837 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
838 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
839
840 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
841
842 /* Ch0 H-PPF Word 0: Scaling Parameters */
843 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
844 vc4_write_ppf(vc4_state, vc4_state->src_w[channel],
845 vc4_state->crtc_w, vc4_state->src_x, channel);
846 }
847
848 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
849 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
850 vc4_write_ppf(vc4_state, vc4_state->src_h[channel],
851 vc4_state->crtc_h, vc4_state->src_y, channel);
852 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
853 }
854
855 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
856 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
857 vc4_write_tpz(vc4_state, vc4_state->src_w[channel],
858 vc4_state->crtc_w);
859 }
860
861 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
862 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
863 vc4_write_tpz(vc4_state, vc4_state->src_h[channel],
864 vc4_state->crtc_h);
865 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
866 }
867 }
868
vc4_plane_calc_load(struct drm_plane_state * state)869 static void vc4_plane_calc_load(struct drm_plane_state *state)
870 {
871 unsigned int hvs_load_shift, vrefresh, i;
872 struct drm_framebuffer *fb = state->fb;
873 struct vc4_plane_state *vc4_state;
874 struct drm_crtc_state *crtc_state;
875 unsigned int vscale_factor;
876
877 vc4_state = to_vc4_plane_state(state);
878 crtc_state = drm_atomic_get_existing_crtc_state(state->state,
879 state->crtc);
880 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
881
882 /* The HVS is able to process 2 pixels/cycle when scaling the source,
883 * 4 pixels/cycle otherwise.
884 * Alpha blending step seems to be pipelined and it's always operating
885 * at 4 pixels/cycle, so the limiting aspect here seems to be the
886 * scaler block.
887 * HVS load is expressed in clk-cycles/sec (AKA Hz).
888 */
889 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
890 vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
891 vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
892 vc4_state->y_scaling[1] != VC4_SCALING_NONE)
893 hvs_load_shift = 1;
894 else
895 hvs_load_shift = 2;
896
897 vc4_state->membus_load = 0;
898 vc4_state->hvs_load = 0;
899 for (i = 0; i < fb->format->num_planes; i++) {
900 /* Even if the bandwidth/plane required for a single frame is
901 *
902 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
903 * cpp * vrefresh
904 *
905 * when downscaling, we have to read more pixels per line in
906 * the time frame reserved for a single line, so the bandwidth
907 * demand can be punctually higher. To account for that, we
908 * calculate the down-scaling factor and multiply the plane
909 * load by this number. We're likely over-estimating the read
910 * demand, but that's better than under-estimating it.
911 */
912 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
913 vc4_state->crtc_h);
914 vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
915 (vc4_state->src_h[i] >> 16) *
916 vscale_factor * fb->format->cpp[i];
917 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
918 }
919
920 vc4_state->hvs_load *= vrefresh;
921 vc4_state->hvs_load >>= hvs_load_shift;
922 vc4_state->membus_load *= vrefresh;
923 }
924
vc4_plane_allocate_lbm(struct drm_plane_state * state)925 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
926 {
927 struct drm_device *drm = state->plane->dev;
928 struct vc4_dev *vc4 = to_vc4_dev(drm);
929 struct drm_plane *plane = state->plane;
930 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
931 unsigned long irqflags;
932 u32 lbm_size;
933
934 lbm_size = vc4_lbm_size(state);
935 if (!lbm_size)
936 return 0;
937
938 /*
939 * NOTE: BCM2712 doesn't need to be aligned, since the size
940 * returned by vc4_lbm_size() is in words already.
941 */
942 if (vc4->gen == VC4_GEN_5)
943 lbm_size = ALIGN(lbm_size, 64);
944 else if (vc4->gen == VC4_GEN_4)
945 lbm_size = ALIGN(lbm_size, 32);
946
947 drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n",
948 plane->base.id, plane->name, lbm_size);
949
950 if (WARN_ON(!vc4_state->lbm_offset))
951 return -EINVAL;
952
953 /* Allocate the LBM memory that the HVS will use for temporary
954 * storage due to our scaling/format conversion.
955 */
956 if (!drm_mm_node_allocated(&vc4_state->lbm)) {
957 int ret;
958
959 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
960 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
961 &vc4_state->lbm,
962 lbm_size, 1,
963 0, 0);
964 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
965
966 if (ret) {
967 drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
968 return ret;
969 }
970 } else {
971 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
972 }
973
974 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
975
976 return 0;
977 }
978
vc6_plane_allocate_upm(struct drm_plane_state * state)979 static int vc6_plane_allocate_upm(struct drm_plane_state *state)
980 {
981 const struct drm_format_info *info = state->fb->format;
982 struct drm_device *drm = state->plane->dev;
983 struct vc4_dev *vc4 = to_vc4_dev(drm);
984 struct vc4_hvs *hvs = vc4->hvs;
985 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
986 unsigned int i;
987 int ret;
988
989 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
990
991 vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
992
993 for (i = 0; i < info->num_planes; i++) {
994 struct vc4_upm_refcounts *refcount;
995 int upm_handle;
996 unsigned long irqflags;
997 size_t upm_size;
998
999 upm_size = vc6_upm_size(state, i);
1000 if (!upm_size)
1001 return -EINVAL;
1002 upm_handle = vc4_state->upm_handle[i];
1003
1004 if (upm_handle &&
1005 hvs->upm_refcounts[upm_handle].size == upm_size) {
1006 /* Allocation is the same size as the previous user of
1007 * the plane. Keep the allocation.
1008 */
1009 vc4_state->upm_handle[i] = upm_handle;
1010 } else {
1011 if (upm_handle &&
1012 refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) {
1013 vc4_plane_release_upm_ida(hvs, upm_handle);
1014 vc4_state->upm_handle[i] = 0;
1015 }
1016
1017 upm_handle = ida_alloc_range(&hvs->upm_handles, 1,
1018 VC4_NUM_UPM_HANDLES,
1019 GFP_KERNEL);
1020 if (upm_handle < 0) {
1021 drm_dbg(drm, "Out of upm_handles\n");
1022 return upm_handle;
1023 }
1024 vc4_state->upm_handle[i] = upm_handle;
1025
1026 refcount = &hvs->upm_refcounts[upm_handle];
1027 refcount_set(&refcount->refcount, 1);
1028 refcount->size = upm_size;
1029
1030 spin_lock_irqsave(&hvs->mm_lock, irqflags);
1031 ret = drm_mm_insert_node_generic(&hvs->upm_mm,
1032 &refcount->upm,
1033 upm_size, HVS_UBM_WORD_SIZE,
1034 0, 0);
1035 spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
1036 if (ret) {
1037 drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
1038 refcount_set(&refcount->refcount, 0);
1039 ida_free(&hvs->upm_handles, upm_handle);
1040 vc4_state->upm_handle[i] = 0;
1041 return ret;
1042 }
1043 }
1044
1045 refcount = &hvs->upm_refcounts[upm_handle];
1046 vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
1047 VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE,
1048 SCALER6_PTR0_UPM_BASE) |
1049 VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
1050 SCALER6_PTR0_UPM_HANDLE) |
1051 VC4_SET_FIELD(vc4_state->upm_buffer_lines,
1052 SCALER6_PTR0_UPM_BUFF_SIZE);
1053 }
1054
1055 return 0;
1056 }
1057
vc6_plane_free_upm(struct drm_plane_state * state)1058 static void vc6_plane_free_upm(struct drm_plane_state *state)
1059 {
1060 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1061 struct drm_device *drm = state->plane->dev;
1062 struct vc4_dev *vc4 = to_vc4_dev(drm);
1063 struct vc4_hvs *hvs = vc4->hvs;
1064 unsigned int i;
1065
1066 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
1067
1068 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
1069 unsigned int upm_handle;
1070
1071 upm_handle = vc4_state->upm_handle[i];
1072 if (!upm_handle)
1073 continue;
1074
1075 if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount))
1076 vc4_plane_release_upm_ida(hvs, upm_handle);
1077 vc4_state->upm_handle[i] = 0;
1078 }
1079 }
1080
1081 /*
1082 * The colorspace conversion matrices are held in 3 entries in the dlist.
1083 * Create an array of them, with entries for each full and limited mode, and
1084 * each supported colorspace.
1085 */
1086 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = {
1087 {
1088 /* Limited range */
1089 {
1090 /* BT601 */
1091 SCALER_CSC0_ITR_R_601_5,
1092 SCALER_CSC1_ITR_R_601_5,
1093 SCALER_CSC2_ITR_R_601_5,
1094 }, {
1095 /* BT709 */
1096 SCALER_CSC0_ITR_R_709_3,
1097 SCALER_CSC1_ITR_R_709_3,
1098 SCALER_CSC2_ITR_R_709_3,
1099 }, {
1100 /* BT2020 */
1101 SCALER_CSC0_ITR_R_2020,
1102 SCALER_CSC1_ITR_R_2020,
1103 SCALER_CSC2_ITR_R_2020,
1104 }
1105 }, {
1106 /* Full range */
1107 {
1108 /* JFIF */
1109 SCALER_CSC0_JPEG_JFIF,
1110 SCALER_CSC1_JPEG_JFIF,
1111 SCALER_CSC2_JPEG_JFIF,
1112 }, {
1113 /* BT709 */
1114 SCALER_CSC0_ITR_R_709_3_FR,
1115 SCALER_CSC1_ITR_R_709_3_FR,
1116 SCALER_CSC2_ITR_R_709_3_FR,
1117 }, {
1118 /* BT2020 */
1119 SCALER_CSC0_ITR_R_2020_FR,
1120 SCALER_CSC1_ITR_R_2020_FR,
1121 SCALER_CSC2_ITR_R_2020_FR,
1122 }
1123 }
1124 };
1125
vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state * state)1126 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state)
1127 {
1128 struct drm_device *dev = state->state->dev;
1129 struct vc4_dev *vc4 = to_vc4_dev(dev);
1130
1131 WARN_ON_ONCE(vc4->gen != VC4_GEN_4);
1132
1133 if (!state->fb->format->has_alpha)
1134 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1135 SCALER_POS2_ALPHA_MODE);
1136
1137 switch (state->pixel_blend_mode) {
1138 case DRM_MODE_BLEND_PIXEL_NONE:
1139 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED,
1140 SCALER_POS2_ALPHA_MODE);
1141 default:
1142 case DRM_MODE_BLEND_PREMULTI:
1143 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1144 SCALER_POS2_ALPHA_MODE) |
1145 SCALER_POS2_ALPHA_PREMULT;
1146 case DRM_MODE_BLEND_COVERAGE:
1147 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE,
1148 SCALER_POS2_ALPHA_MODE);
1149 }
1150 }
1151
vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state * state)1152 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state)
1153 {
1154 struct drm_device *dev = state->state->dev;
1155 struct vc4_dev *vc4 = to_vc4_dev(dev);
1156
1157 WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C &&
1158 vc4->gen != VC4_GEN_6_D);
1159
1160 switch (vc4->gen) {
1161 default:
1162 case VC4_GEN_5:
1163 case VC4_GEN_6_C:
1164 if (!state->fb->format->has_alpha)
1165 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1166 SCALER5_CTL2_ALPHA_MODE);
1167
1168 switch (state->pixel_blend_mode) {
1169 case DRM_MODE_BLEND_PIXEL_NONE:
1170 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED,
1171 SCALER5_CTL2_ALPHA_MODE);
1172 default:
1173 case DRM_MODE_BLEND_PREMULTI:
1174 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1175 SCALER5_CTL2_ALPHA_MODE) |
1176 SCALER5_CTL2_ALPHA_PREMULT;
1177 case DRM_MODE_BLEND_COVERAGE:
1178 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE,
1179 SCALER5_CTL2_ALPHA_MODE);
1180 }
1181 case VC4_GEN_6_D:
1182 /* 2712-D configures fixed alpha mode in CTL0 */
1183 return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ?
1184 SCALER5_CTL2_ALPHA_PREMULT : 0;
1185 }
1186 }
1187
vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state * state)1188 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state)
1189 {
1190 struct drm_device *dev = state->state->dev;
1191 struct vc4_dev *vc4 = to_vc4_dev(dev);
1192
1193 WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D);
1194
1195 if (vc4->gen == VC4_GEN_6_D &&
1196 (!state->fb->format->has_alpha ||
1197 state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE))
1198 return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED,
1199 SCALER6_CTL0_ALPHA_MASK);
1200
1201 return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK);
1202 }
1203
1204 /* Writes out a full display list for an active plane to the plane's
1205 * private dlist state.
1206 */
vc4_plane_mode_set(struct drm_plane * plane,struct drm_plane_state * state)1207 static int vc4_plane_mode_set(struct drm_plane *plane,
1208 struct drm_plane_state *state)
1209 {
1210 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
1211 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1212 struct drm_framebuffer *fb = state->fb;
1213 u32 ctl0_offset = vc4_state->dlist_count;
1214 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1215 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1216 int num_planes = fb->format->num_planes;
1217 u32 h_subsample = fb->format->hsub;
1218 u32 v_subsample = fb->format->vsub;
1219 bool mix_plane_alpha;
1220 bool covers_screen;
1221 u32 scl0, scl1, pitch0;
1222 u32 tiling, src_x, src_y;
1223 u32 width, height;
1224 u32 hvs_format = format->hvs;
1225 unsigned int rotation;
1226 u32 offsets[3] = { 0 };
1227 int ret, i;
1228
1229 if (vc4_state->dlist_initialized)
1230 return 0;
1231
1232 ret = vc4_plane_setup_clipping_and_scaling(state);
1233 if (ret)
1234 return ret;
1235
1236 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1237 !vc4_state->crtc_w || !vc4_state->crtc_h) {
1238 /* 0 source size probably means the plane is offscreen */
1239 vc4_state->dlist_initialized = 1;
1240 return 0;
1241 }
1242
1243 width = vc4_state->src_w[0] >> 16;
1244 height = vc4_state->src_h[0] >> 16;
1245
1246 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
1247 * and 4:4:4, scl1 should be set to scl0 so both channels of
1248 * the scaler do the same thing. For YUV, the Y plane needs
1249 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1250 * the scl fields here.
1251 */
1252 if (num_planes == 1) {
1253 scl0 = vc4_get_scl_field(state, 0);
1254 scl1 = scl0;
1255 } else {
1256 scl0 = vc4_get_scl_field(state, 1);
1257 scl1 = vc4_get_scl_field(state, 0);
1258 }
1259
1260 rotation = drm_rotation_simplify(state->rotation,
1261 DRM_MODE_ROTATE_0 |
1262 DRM_MODE_REFLECT_X |
1263 DRM_MODE_REFLECT_Y);
1264
1265 /* We must point to the last line when Y reflection is enabled. */
1266 src_y = vc4_state->src_y >> 16;
1267 if (rotation & DRM_MODE_REFLECT_Y)
1268 src_y += height - 1;
1269
1270 src_x = vc4_state->src_x >> 16;
1271
1272 switch (base_format_mod) {
1273 case DRM_FORMAT_MOD_LINEAR:
1274 tiling = SCALER_CTL0_TILING_LINEAR;
1275 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
1276
1277 /* Adjust the base pointer to the first pixel to be scanned
1278 * out.
1279 */
1280 for (i = 0; i < num_planes; i++) {
1281 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1282 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1283 }
1284
1285 break;
1286
1287 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
1288 u32 tile_size_shift = 12; /* T tiles are 4kb */
1289 /* Whole-tile offsets, mostly for setting the pitch. */
1290 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
1291 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
1292 u32 tile_w_mask = (1 << tile_w_shift) - 1;
1293 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
1294 * the height (in pixels) of a 4k tile.
1295 */
1296 u32 tile_h_mask = (2 << tile_h_shift) - 1;
1297 /* For T-tiled, the FB pitch is "how many bytes from one row to
1298 * the next, such that
1299 *
1300 * pitch * tile_h == tile_size * tiles_per_row
1301 */
1302 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
1303 u32 tiles_l = src_x >> tile_w_shift;
1304 u32 tiles_r = tiles_w - tiles_l;
1305 u32 tiles_t = src_y >> tile_h_shift;
1306 /* Intra-tile offsets, which modify the base address (the
1307 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
1308 * base address).
1309 */
1310 u32 tile_y = (src_y >> 4) & 1;
1311 u32 subtile_y = (src_y >> 2) & 3;
1312 u32 utile_y = src_y & 3;
1313 u32 x_off = src_x & tile_w_mask;
1314 u32 y_off = src_y & tile_h_mask;
1315
1316 /* When Y reflection is requested we must set the
1317 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
1318 * after the initial one should be fetched in descending order,
1319 * which makes sense since we start from the last line and go
1320 * backward.
1321 * Don't know why we need y_off = max_y_off - y_off, but it's
1322 * definitely required (I guess it's also related to the "going
1323 * backward" situation).
1324 */
1325 if (rotation & DRM_MODE_REFLECT_Y) {
1326 y_off = tile_h_mask - y_off;
1327 pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
1328 } else {
1329 pitch0 = 0;
1330 }
1331
1332 tiling = SCALER_CTL0_TILING_256B_OR_T;
1333 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
1334 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
1335 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
1336 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
1337 offsets[0] += tiles_t * (tiles_w << tile_size_shift);
1338 offsets[0] += subtile_y << 8;
1339 offsets[0] += utile_y << 4;
1340
1341 /* Rows of tiles alternate left-to-right and right-to-left. */
1342 if (tiles_t & 1) {
1343 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
1344 offsets[0] += (tiles_w - tiles_l) << tile_size_shift;
1345 offsets[0] -= (1 + !tile_y) << 10;
1346 } else {
1347 offsets[0] += tiles_l << tile_size_shift;
1348 offsets[0] += tile_y << 10;
1349 }
1350
1351 break;
1352 }
1353
1354 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1355 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1356 case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1357 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1358
1359 if (param > SCALER_TILE_HEIGHT_MASK) {
1360 DRM_DEBUG_KMS("SAND height too large (%d)\n",
1361 param);
1362 return -EINVAL;
1363 }
1364
1365 if (fb->format->format == DRM_FORMAT_P030) {
1366 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1367 tiling = SCALER_CTL0_TILING_128B;
1368 } else {
1369 hvs_format = HVS_PIXEL_FORMAT_H264;
1370
1371 switch (base_format_mod) {
1372 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1373 tiling = SCALER_CTL0_TILING_64B;
1374 break;
1375 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1376 tiling = SCALER_CTL0_TILING_128B;
1377 break;
1378 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1379 tiling = SCALER_CTL0_TILING_256B_OR_T;
1380 break;
1381 default:
1382 return -EINVAL;
1383 }
1384 }
1385
1386 /* Adjust the base pointer to the first pixel to be scanned
1387 * out.
1388 *
1389 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1390 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1391 * word that should be taken as the first pixel.
1392 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1393 * element within the 128bit word, eg for pixel 3 the value
1394 * should be 6.
1395 */
1396 for (i = 0; i < num_planes; i++) {
1397 u32 tile_w, tile, x_off, pix_per_tile;
1398
1399 if (fb->format->format == DRM_FORMAT_P030) {
1400 /*
1401 * Spec says: bits [31:4] of the given address
1402 * should point to the 128-bit word containing
1403 * the desired starting pixel, and bits[3:0]
1404 * should be between 0 and 11, indicating which
1405 * of the 12-pixels in that 128-bit word is the
1406 * first pixel to be used
1407 */
1408 u32 remaining_pixels = src_x % 96;
1409 u32 aligned = remaining_pixels / 12;
1410 u32 last_bits = remaining_pixels % 12;
1411
1412 x_off = aligned * 16 + last_bits;
1413 tile_w = 128;
1414 pix_per_tile = 96;
1415 } else {
1416 switch (base_format_mod) {
1417 case DRM_FORMAT_MOD_BROADCOM_SAND64:
1418 tile_w = 64;
1419 break;
1420 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1421 tile_w = 128;
1422 break;
1423 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1424 tile_w = 256;
1425 break;
1426 default:
1427 return -EINVAL;
1428 }
1429 pix_per_tile = tile_w / fb->format->cpp[0];
1430 x_off = (src_x % pix_per_tile) /
1431 (i ? h_subsample : 1) *
1432 fb->format->cpp[i];
1433 }
1434
1435 tile = src_x / pix_per_tile;
1436
1437 offsets[i] += param * tile_w * tile;
1438 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1439 offsets[i] += x_off & ~(i ? 1 : 0);
1440 }
1441
1442 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
1443 break;
1444 }
1445
1446 default:
1447 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1448 (long long)fb->modifier);
1449 return -EINVAL;
1450 }
1451
1452 /* fetch an extra pixel if we don't actually line up with the left edge. */
1453 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1454 width++;
1455
1456 /* same for the right side */
1457 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1458 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1459 width++;
1460
1461 /* now for the top */
1462 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1463 height++;
1464
1465 /* and the bottom */
1466 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1467 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1468 height++;
1469
1470 /* For YUV444 the hardware wants double the width, otherwise it doesn't
1471 * fetch full width of chroma
1472 */
1473 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1474 width <<= 1;
1475
1476 /* Don't waste cycles mixing with plane alpha if the set alpha
1477 * is opaque or there is no per-pixel alpha information.
1478 * In any case we use the alpha property value as the fixed alpha.
1479 */
1480 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1481 fb->format->has_alpha;
1482
1483 if (vc4->gen == VC4_GEN_4) {
1484 /* Control word */
1485 vc4_dlist_write(vc4_state,
1486 SCALER_CTL0_VALID |
1487 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
1488 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
1489 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
1490 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
1491 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1492 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1493 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
1494 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1495 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
1496
1497 /* Position Word 0: Image Positions and Alpha Value */
1498 vc4_state->pos0_offset = vc4_state->dlist_count;
1499 vc4_dlist_write(vc4_state,
1500 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
1501 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
1502 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
1503
1504 /* Position Word 1: Scaled Image Dimensions. */
1505 if (!vc4_state->is_unity) {
1506 vc4_dlist_write(vc4_state,
1507 VC4_SET_FIELD(vc4_state->crtc_w,
1508 SCALER_POS1_SCL_WIDTH) |
1509 VC4_SET_FIELD(vc4_state->crtc_h,
1510 SCALER_POS1_SCL_HEIGHT));
1511 }
1512
1513 /* Position Word 2: Source Image Size, Alpha */
1514 vc4_state->pos2_offset = vc4_state->dlist_count;
1515 vc4_dlist_write(vc4_state,
1516 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
1517 vc4_hvs4_get_alpha_blend_mode(state) |
1518 VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
1519 VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
1520
1521 /* Position Word 3: Context. Written by the HVS. */
1522 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1523
1524 } else {
1525 /* Control word */
1526 vc4_dlist_write(vc4_state,
1527 SCALER_CTL0_VALID |
1528 (format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) |
1529 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
1530 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
1531 (vc4_state->is_unity ?
1532 SCALER5_CTL0_UNITY : 0) |
1533 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
1534 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
1535 SCALER5_CTL0_ALPHA_EXPAND |
1536 SCALER5_CTL0_RGB_EXPAND);
1537
1538 /* Position Word 0: Image Positions and Alpha Value */
1539 vc4_state->pos0_offset = vc4_state->dlist_count;
1540 vc4_dlist_write(vc4_state,
1541 (rotation & DRM_MODE_REFLECT_Y ?
1542 SCALER5_POS0_VFLIP : 0) |
1543 VC4_SET_FIELD(vc4_state->crtc_x,
1544 SCALER_POS0_START_X) |
1545 (rotation & DRM_MODE_REFLECT_X ?
1546 SCALER5_POS0_HFLIP : 0) |
1547 VC4_SET_FIELD(vc4_state->crtc_y,
1548 SCALER5_POS0_START_Y)
1549 );
1550
1551 /* Control Word 2 */
1552 vc4_dlist_write(vc4_state,
1553 VC4_SET_FIELD(state->alpha >> 4,
1554 SCALER5_CTL2_ALPHA) |
1555 vc4_hvs5_get_alpha_blend_mode(state) |
1556 (mix_plane_alpha ?
1557 SCALER5_CTL2_ALPHA_MIX : 0)
1558 );
1559
1560 /* Position Word 1: Scaled Image Dimensions. */
1561 if (!vc4_state->is_unity) {
1562 vc4_dlist_write(vc4_state,
1563 VC4_SET_FIELD(vc4_state->crtc_w,
1564 SCALER5_POS1_SCL_WIDTH) |
1565 VC4_SET_FIELD(vc4_state->crtc_h,
1566 SCALER5_POS1_SCL_HEIGHT));
1567 }
1568
1569 /* Position Word 2: Source Image Size */
1570 vc4_state->pos2_offset = vc4_state->dlist_count;
1571 vc4_dlist_write(vc4_state,
1572 VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
1573 VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
1574
1575 /* Position Word 3: Context. Written by the HVS. */
1576 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1577 }
1578
1579
1580 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
1581 *
1582 * The pointers may be any byte address.
1583 */
1584 vc4_state->ptr0_offset[0] = vc4_state->dlist_count;
1585
1586 for (i = 0; i < num_planes; i++) {
1587 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1588
1589 vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]);
1590 }
1591
1592 /* Pointer Context Word 0/1/2: Written by the HVS */
1593 for (i = 0; i < num_planes; i++)
1594 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1595
1596 /* Pitch word 0 */
1597 vc4_dlist_write(vc4_state, pitch0);
1598
1599 /* Pitch word 1/2 */
1600 for (i = 1; i < num_planes; i++) {
1601 if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1602 hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
1603 vc4_dlist_write(vc4_state,
1604 VC4_SET_FIELD(fb->pitches[i],
1605 SCALER_SRC_PITCH));
1606 } else {
1607 vc4_dlist_write(vc4_state, pitch0);
1608 }
1609 }
1610
1611 /* Colorspace conversion words */
1612 if (vc4_state->is_yuv) {
1613 enum drm_color_encoding color_encoding = state->color_encoding;
1614 enum drm_color_range color_range = state->color_range;
1615 const u32 *ccm;
1616
1617 if (color_encoding >= DRM_COLOR_ENCODING_MAX)
1618 color_encoding = DRM_COLOR_YCBCR_BT601;
1619 if (color_range >= DRM_COLOR_RANGE_MAX)
1620 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1621
1622 ccm = colorspace_coeffs[color_range][color_encoding];
1623
1624 vc4_dlist_write(vc4_state, ccm[0]);
1625 vc4_dlist_write(vc4_state, ccm[1]);
1626 vc4_dlist_write(vc4_state, ccm[2]);
1627 }
1628
1629 vc4_state->lbm_offset = 0;
1630
1631 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
1632 vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
1633 vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1634 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1635 /* Reserve a slot for the LBM Base Address. The real value will
1636 * be set when calling vc4_plane_allocate_lbm().
1637 */
1638 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
1639 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
1640 vc4_state->lbm_offset = vc4_state->dlist_count;
1641 vc4_dlist_counter_increment(vc4_state);
1642 }
1643
1644 if (num_planes > 1) {
1645 /* Emit Cb/Cr as channel 0 and Y as channel
1646 * 1. This matches how we set up scl0/scl1
1647 * above.
1648 */
1649 vc4_write_scaling_parameters(state, 1);
1650 }
1651 vc4_write_scaling_parameters(state, 0);
1652
1653 /* If any PPF setup was done, then all the kernel
1654 * pointers get uploaded.
1655 */
1656 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1657 vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1658 vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1659 vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1660 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1661 SCALER_PPF_KERNEL_OFFSET);
1662
1663 /* HPPF plane 0 */
1664 vc4_dlist_write(vc4_state, kernel);
1665 /* VPPF plane 0 */
1666 vc4_dlist_write(vc4_state, kernel);
1667 /* HPPF plane 1 */
1668 vc4_dlist_write(vc4_state, kernel);
1669 /* VPPF plane 1 */
1670 vc4_dlist_write(vc4_state, kernel);
1671 }
1672 }
1673
1674 vc4_state->dlist[ctl0_offset] |=
1675 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1676
1677 /* crtc_* are already clipped coordinates. */
1678 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1679 vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1680 vc4_state->crtc_h == state->crtc->mode.vdisplay;
1681 /* Background fill might be necessary when the plane has per-pixel
1682 * alpha content or a non-opaque plane alpha and could blend from the
1683 * background or does not cover the entire screen.
1684 */
1685 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1686 state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1687
1688 /* Flag the dlist as initialized to avoid checking it twice in case
1689 * the async update check already called vc4_plane_mode_set() and
1690 * decided to fallback to sync update because async update was not
1691 * possible.
1692 */
1693 vc4_state->dlist_initialized = 1;
1694
1695 vc4_plane_calc_load(state);
1696
1697 return 0;
1698 }
1699
vc6_plane_get_csc_mode(struct vc4_plane_state * vc4_state)1700 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
1701 {
1702 struct drm_plane_state *state = &vc4_state->base;
1703 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
1704 u32 ret = 0;
1705
1706 if (vc4_state->is_yuv) {
1707 enum drm_color_encoding color_encoding = state->color_encoding;
1708 enum drm_color_range color_range = state->color_range;
1709
1710 /* CSC pre-loaded with:
1711 * 0 = BT601 limited range
1712 * 1 = BT709 limited range
1713 * 2 = BT2020 limited range
1714 * 3 = BT601 full range
1715 * 4 = BT709 full range
1716 * 5 = BT2020 full range
1717 */
1718 if (color_encoding > DRM_COLOR_YCBCR_BT2020)
1719 color_encoding = DRM_COLOR_YCBCR_BT601;
1720 if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
1721 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
1722
1723 if (vc4->gen == VC4_GEN_6_C) {
1724 ret |= SCALER6C_CTL2_CSC_ENABLE;
1725 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1726 SCALER6C_CTL2_BRCM_CFC_CONTROL);
1727 } else {
1728 ret |= SCALER6D_CTL2_CSC_ENABLE;
1729 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
1730 SCALER6D_CTL2_BRCM_CFC_CONTROL);
1731 }
1732 }
1733
1734 return ret;
1735 }
1736
vc6_plane_mode_set(struct drm_plane * plane,struct drm_plane_state * state)1737 static int vc6_plane_mode_set(struct drm_plane *plane,
1738 struct drm_plane_state *state)
1739 {
1740 struct drm_device *drm = plane->dev;
1741 struct vc4_dev *vc4 = to_vc4_dev(drm);
1742 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1743 struct drm_framebuffer *fb = state->fb;
1744 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
1745 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
1746 int num_planes = fb->format->num_planes;
1747 u32 h_subsample = fb->format->hsub;
1748 u32 v_subsample = fb->format->vsub;
1749 bool mix_plane_alpha;
1750 bool covers_screen;
1751 u32 scl0, scl1, pitch0;
1752 u32 tiling, src_x, src_y;
1753 u32 width, height;
1754 u32 hvs_format = format->hvs;
1755 u32 offsets[3] = { 0 };
1756 unsigned int rotation;
1757 int ret, i;
1758
1759 if (vc4_state->dlist_initialized)
1760 return 0;
1761
1762 ret = vc4_plane_setup_clipping_and_scaling(state);
1763 if (ret)
1764 return ret;
1765
1766 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
1767 !vc4_state->crtc_w || !vc4_state->crtc_h) {
1768 /* 0 source size probably means the plane is offscreen.
1769 * 0 destination size is a redundant plane.
1770 */
1771 vc4_state->dlist_initialized = 1;
1772 return 0;
1773 }
1774
1775 width = vc4_state->src_w[0] >> 16;
1776 height = vc4_state->src_h[0] >> 16;
1777
1778 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
1779 * and 4:4:4, scl1 should be set to scl0 so both channels of
1780 * the scaler do the same thing. For YUV, the Y plane needs
1781 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
1782 * the scl fields here.
1783 */
1784 if (num_planes == 1) {
1785 scl0 = vc4_get_scl_field(state, 0);
1786 scl1 = scl0;
1787 } else {
1788 scl0 = vc4_get_scl_field(state, 1);
1789 scl1 = vc4_get_scl_field(state, 0);
1790 }
1791
1792 rotation = drm_rotation_simplify(state->rotation,
1793 DRM_MODE_ROTATE_0 |
1794 DRM_MODE_REFLECT_X |
1795 DRM_MODE_REFLECT_Y);
1796
1797 /* We must point to the last line when Y reflection is enabled. */
1798 src_y = vc4_state->src_y >> 16;
1799 if (rotation & DRM_MODE_REFLECT_Y)
1800 src_y += height - 1;
1801
1802 src_x = vc4_state->src_x >> 16;
1803
1804 switch (base_format_mod) {
1805 case DRM_FORMAT_MOD_LINEAR:
1806 tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
1807
1808 /* Adjust the base pointer to the first pixel to be scanned
1809 * out.
1810 */
1811 for (i = 0; i < num_planes; i++) {
1812 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
1813 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
1814 }
1815
1816 break;
1817
1818 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1819 case DRM_FORMAT_MOD_BROADCOM_SAND256: {
1820 uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
1821 u32 components_per_word;
1822 u32 starting_offset;
1823 u32 fetch_count;
1824
1825 if (param > SCALER_TILE_HEIGHT_MASK) {
1826 DRM_DEBUG_KMS("SAND height too large (%d)\n",
1827 param);
1828 return -EINVAL;
1829 }
1830
1831 if (fb->format->format == DRM_FORMAT_P030) {
1832 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
1833 tiling = SCALER6_CTL0_ADDR_MODE_128B;
1834 } else {
1835 hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
1836
1837 switch (base_format_mod) {
1838 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1839 tiling = SCALER6_CTL0_ADDR_MODE_128B;
1840 break;
1841 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1842 tiling = SCALER6_CTL0_ADDR_MODE_256B;
1843 break;
1844 default:
1845 return -EINVAL;
1846 }
1847 }
1848
1849 /* Adjust the base pointer to the first pixel to be scanned
1850 * out.
1851 *
1852 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
1853 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
1854 * word that should be taken as the first pixel.
1855 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
1856 * element within the 128bit word, eg for pixel 3 the value
1857 * should be 6.
1858 */
1859 for (i = 0; i < num_planes; i++) {
1860 u32 tile_w, tile, x_off, pix_per_tile;
1861
1862 if (fb->format->format == DRM_FORMAT_P030) {
1863 /*
1864 * Spec says: bits [31:4] of the given address
1865 * should point to the 128-bit word containing
1866 * the desired starting pixel, and bits[3:0]
1867 * should be between 0 and 11, indicating which
1868 * of the 12-pixels in that 128-bit word is the
1869 * first pixel to be used
1870 */
1871 u32 remaining_pixels = src_x % 96;
1872 u32 aligned = remaining_pixels / 12;
1873 u32 last_bits = remaining_pixels % 12;
1874
1875 x_off = aligned * 16 + last_bits;
1876 tile_w = 128;
1877 pix_per_tile = 96;
1878 } else {
1879 switch (base_format_mod) {
1880 case DRM_FORMAT_MOD_BROADCOM_SAND128:
1881 tile_w = 128;
1882 break;
1883 case DRM_FORMAT_MOD_BROADCOM_SAND256:
1884 tile_w = 256;
1885 break;
1886 default:
1887 return -EINVAL;
1888 }
1889 pix_per_tile = tile_w / fb->format->cpp[0];
1890 x_off = (src_x % pix_per_tile) /
1891 (i ? h_subsample : 1) *
1892 fb->format->cpp[i];
1893 }
1894
1895 tile = src_x / pix_per_tile;
1896
1897 offsets[i] += param * tile_w * tile;
1898 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
1899 offsets[i] += x_off & ~(i ? 1 : 0);
1900 }
1901
1902 components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
1903 starting_offset = src_x % components_per_word;
1904 fetch_count = (width + starting_offset + components_per_word - 1) /
1905 components_per_word;
1906
1907 pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
1908 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
1909 break;
1910 }
1911
1912 default:
1913 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
1914 (long long)fb->modifier);
1915 return -EINVAL;
1916 }
1917
1918 /* fetch an extra pixel if we don't actually line up with the left edge. */
1919 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
1920 width++;
1921
1922 /* same for the right side */
1923 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
1924 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
1925 width++;
1926
1927 /* now for the top */
1928 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
1929 height++;
1930
1931 /* and the bottom */
1932 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
1933 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
1934 height++;
1935
1936 /* for YUV444 hardware wants double the width, otherwise it doesn't
1937 * fetch full width of chroma
1938 */
1939 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
1940 width <<= 1;
1941
1942 /* Don't waste cycles mixing with plane alpha if the set alpha
1943 * is opaque or there is no per-pixel alpha information.
1944 * In any case we use the alpha property value as the fixed alpha.
1945 */
1946 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
1947 fb->format->has_alpha;
1948
1949 /* Control Word 0: Scaling Configuration & Element Validity*/
1950 vc4_dlist_write(vc4_state,
1951 SCALER6_CTL0_VALID |
1952 VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
1953 vc4_hvs6_get_alpha_mask_mode(state) |
1954 (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
1955 VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
1956 VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
1957 VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
1958 VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
1959
1960 /* Position Word 0: Image Position */
1961 vc4_state->pos0_offset = vc4_state->dlist_count;
1962 vc4_dlist_write(vc4_state,
1963 VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
1964 (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
1965 VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
1966
1967 /* Control Word 2: Alpha Value & CSC */
1968 vc4_dlist_write(vc4_state,
1969 vc6_plane_get_csc_mode(vc4_state) |
1970 vc4_hvs5_get_alpha_blend_mode(state) |
1971 (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
1972 VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
1973
1974 /* Position Word 1: Scaled Image Dimensions */
1975 if (!vc4_state->is_unity)
1976 vc4_dlist_write(vc4_state,
1977 VC4_SET_FIELD(vc4_state->crtc_h - 1,
1978 SCALER6_POS1_SCL_LINES) |
1979 VC4_SET_FIELD(vc4_state->crtc_w - 1,
1980 SCALER6_POS1_SCL_WIDTH));
1981
1982 /* Position Word 2: Source Image Size */
1983 vc4_state->pos2_offset = vc4_state->dlist_count;
1984 vc4_dlist_write(vc4_state,
1985 VC4_SET_FIELD(height - 1,
1986 SCALER6_POS2_SRC_LINES) |
1987 VC4_SET_FIELD(width - 1,
1988 SCALER6_POS2_SRC_WIDTH));
1989
1990 /* Position Word 3: Context */
1991 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
1992
1993 /*
1994 * TODO: This only covers Raster Scan Order planes
1995 */
1996 for (i = 0; i < num_planes; i++) {
1997 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
1998 dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i];
1999
2000 /* Pointer Word 0 */
2001 vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
2002 vc4_dlist_write(vc4_state,
2003 (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
2004 /*
2005 * The UPM buffer will be allocated in
2006 * vc6_plane_allocate_upm().
2007 */
2008 VC4_SET_FIELD(upper_32_bits(paddr) & 0xff,
2009 SCALER6_PTR0_UPPER_ADDR));
2010
2011 /* Pointer Word 1 */
2012 vc4_dlist_write(vc4_state, lower_32_bits(paddr));
2013
2014 /* Pointer Word 2 */
2015 if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
2016 base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
2017 vc4_dlist_write(vc4_state,
2018 VC4_SET_FIELD(fb->pitches[i],
2019 SCALER6_PTR2_PITCH));
2020 } else {
2021 vc4_dlist_write(vc4_state, pitch0);
2022 }
2023 }
2024
2025 /*
2026 * Palette Word 0
2027 * TODO: We're not using the palette mode
2028 */
2029
2030 /*
2031 * Trans Word 0
2032 * TODO: It's only relevant if we set the trans_rgb bit in the
2033 * control word 0, and we don't at the moment.
2034 */
2035
2036 vc4_state->lbm_offset = 0;
2037
2038 if (!vc4_state->is_unity || fb->format->is_yuv) {
2039 /*
2040 * Reserve a slot for the LBM Base Address. The real value will
2041 * be set when calling vc4_plane_allocate_lbm().
2042 */
2043 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2044 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2045 vc4_state->lbm_offset = vc4_state->dlist_count;
2046 vc4_dlist_counter_increment(vc4_state);
2047 }
2048
2049 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
2050 vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
2051 vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
2052 vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
2053 if (num_planes > 1)
2054 /*
2055 * Emit Cb/Cr as channel 0 and Y as channel
2056 * 1. This matches how we set up scl0/scl1
2057 * above.
2058 */
2059 vc4_write_scaling_parameters(state, 1);
2060
2061 vc4_write_scaling_parameters(state, 0);
2062 }
2063
2064 /*
2065 * If any PPF setup was done, then all the kernel
2066 * pointers get uploaded.
2067 */
2068 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
2069 vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
2070 vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
2071 vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
2072 u32 kernel =
2073 VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
2074 SCALER_PPF_KERNEL_OFFSET);
2075
2076 /* HPPF plane 0 */
2077 vc4_dlist_write(vc4_state, kernel);
2078 /* VPPF plane 0 */
2079 vc4_dlist_write(vc4_state, kernel);
2080 /* HPPF plane 1 */
2081 vc4_dlist_write(vc4_state, kernel);
2082 /* VPPF plane 1 */
2083 vc4_dlist_write(vc4_state, kernel);
2084 }
2085 }
2086
2087 vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
2088
2089 vc4_state->dlist[0] |=
2090 VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
2091
2092 /* crtc_* are already clipped coordinates. */
2093 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
2094 vc4_state->crtc_w == state->crtc->mode.hdisplay &&
2095 vc4_state->crtc_h == state->crtc->mode.vdisplay;
2096
2097 /*
2098 * Background fill might be necessary when the plane has per-pixel
2099 * alpha content or a non-opaque plane alpha and could blend from the
2100 * background or does not cover the entire screen.
2101 */
2102 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
2103 state->alpha != DRM_BLEND_ALPHA_OPAQUE;
2104
2105 /*
2106 * Flag the dlist as initialized to avoid checking it twice in case
2107 * the async update check already called vc4_plane_mode_set() and
2108 * decided to fallback to sync update because async update was not
2109 * possible.
2110 */
2111 vc4_state->dlist_initialized = 1;
2112
2113 vc4_plane_calc_load(state);
2114
2115 drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
2116 plane->base.id, plane->name, vc4_state->dlist_count);
2117
2118 return 0;
2119 }
2120
2121 /* If a modeset involves changing the setup of a plane, the atomic
2122 * infrastructure will call this to validate a proposed plane setup.
2123 * However, if a plane isn't getting updated, this (and the
2124 * corresponding vc4_plane_atomic_update) won't get called. Thus, we
2125 * compute the dlist here and have all active plane dlists get updated
2126 * in the CRTC's flush.
2127 */
vc4_plane_atomic_check(struct drm_plane * plane,struct drm_atomic_state * state)2128 static int vc4_plane_atomic_check(struct drm_plane *plane,
2129 struct drm_atomic_state *state)
2130 {
2131 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2132 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2133 plane);
2134 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
2135 int ret;
2136
2137 vc4_state->dlist_count = 0;
2138
2139 if (!plane_enabled(new_plane_state)) {
2140 struct drm_plane_state *old_plane_state =
2141 drm_atomic_get_old_plane_state(state, plane);
2142
2143 if (vc4->gen >= VC4_GEN_6_C && old_plane_state &&
2144 plane_enabled(old_plane_state)) {
2145 vc6_plane_free_upm(new_plane_state);
2146 }
2147 return 0;
2148 }
2149
2150 if (vc4->gen >= VC4_GEN_6_C)
2151 ret = vc6_plane_mode_set(plane, new_plane_state);
2152 else
2153 ret = vc4_plane_mode_set(plane, new_plane_state);
2154 if (ret)
2155 return ret;
2156
2157 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] ||
2158 !vc4_state->crtc_w || !vc4_state->crtc_h)
2159 return 0;
2160
2161 ret = vc4_plane_allocate_lbm(new_plane_state);
2162 if (ret)
2163 return ret;
2164
2165 if (vc4->gen >= VC4_GEN_6_C) {
2166 ret = vc6_plane_allocate_upm(new_plane_state);
2167 if (ret)
2168 return ret;
2169 }
2170
2171 return 0;
2172 }
2173
vc4_plane_atomic_update(struct drm_plane * plane,struct drm_atomic_state * state)2174 static void vc4_plane_atomic_update(struct drm_plane *plane,
2175 struct drm_atomic_state *state)
2176 {
2177 /* No contents here. Since we don't know where in the CRTC's
2178 * dlist we should be stored, our dlist is uploaded to the
2179 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
2180 * time.
2181 */
2182 }
2183
vc4_plane_write_dlist(struct drm_plane * plane,u32 __iomem * dlist)2184 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
2185 {
2186 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2187 int i;
2188 int idx;
2189
2190 if (!drm_dev_enter(plane->dev, &idx))
2191 goto out;
2192
2193 vc4_state->hw_dlist = dlist;
2194
2195 /* Can't memcpy_toio() because it needs to be 32-bit writes. */
2196 for (i = 0; i < vc4_state->dlist_count; i++)
2197 writel(vc4_state->dlist[i], &dlist[i]);
2198
2199 drm_dev_exit(idx);
2200
2201 out:
2202 return vc4_state->dlist_count;
2203 }
2204
vc4_plane_dlist_size(const struct drm_plane_state * state)2205 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
2206 {
2207 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
2208
2209 return vc4_state->dlist_count;
2210 }
2211
2212 /* Updates the plane to immediately (well, once the FIFO needs
2213 * refilling) scan out from at a new framebuffer.
2214 */
vc4_plane_async_set_fb(struct drm_plane * plane,struct drm_framebuffer * fb)2215 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
2216 {
2217 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
2218 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
2219 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2220 dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0];
2221 int idx;
2222
2223 if (!drm_dev_enter(plane->dev, &idx))
2224 return;
2225
2226 /* We're skipping the address adjustment for negative origin,
2227 * because this is only called on the primary plane.
2228 */
2229 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
2230
2231 if (vc4->gen == VC4_GEN_6_C) {
2232 u32 value;
2233
2234 value = vc4_state->dlist[vc4_state->ptr0_offset[0]] &
2235 ~SCALER6_PTR0_UPPER_ADDR_MASK;
2236 value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff,
2237 SCALER6_PTR0_UPPER_ADDR);
2238
2239 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2240 vc4_state->dlist[vc4_state->ptr0_offset[0]] = value;
2241
2242 value = lower_32_bits(dma_addr);
2243 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]);
2244 vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value;
2245 } else {
2246 u32 addr;
2247
2248 addr = (u32)dma_addr;
2249
2250 /* Write the new address into the hardware immediately. The
2251 * scanout will start from this address as soon as the FIFO
2252 * needs to refill with pixels.
2253 */
2254 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2255
2256 /* Also update the CPU-side dlist copy, so that any later
2257 * atomic updates that don't do a new modeset on our plane
2258 * also use our updated address.
2259 */
2260 vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
2261 }
2262
2263 drm_dev_exit(idx);
2264 }
2265
vc4_plane_atomic_async_update(struct drm_plane * plane,struct drm_atomic_state * state)2266 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
2267 struct drm_atomic_state *state)
2268 {
2269 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2270 plane);
2271 struct vc4_plane_state *vc4_state, *new_vc4_state;
2272 int idx;
2273
2274 if (!drm_dev_enter(plane->dev, &idx))
2275 return;
2276
2277 swap(plane->state->fb, new_plane_state->fb);
2278 plane->state->crtc_x = new_plane_state->crtc_x;
2279 plane->state->crtc_y = new_plane_state->crtc_y;
2280 plane->state->crtc_w = new_plane_state->crtc_w;
2281 plane->state->crtc_h = new_plane_state->crtc_h;
2282 plane->state->src_x = new_plane_state->src_x;
2283 plane->state->src_y = new_plane_state->src_y;
2284 plane->state->src_w = new_plane_state->src_w;
2285 plane->state->src_h = new_plane_state->src_h;
2286 plane->state->alpha = new_plane_state->alpha;
2287 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
2288 plane->state->rotation = new_plane_state->rotation;
2289 plane->state->zpos = new_plane_state->zpos;
2290 plane->state->normalized_zpos = new_plane_state->normalized_zpos;
2291 plane->state->color_encoding = new_plane_state->color_encoding;
2292 plane->state->color_range = new_plane_state->color_range;
2293 plane->state->src = new_plane_state->src;
2294 plane->state->dst = new_plane_state->dst;
2295 plane->state->visible = new_plane_state->visible;
2296
2297 new_vc4_state = to_vc4_plane_state(new_plane_state);
2298 vc4_state = to_vc4_plane_state(plane->state);
2299
2300 vc4_state->crtc_x = new_vc4_state->crtc_x;
2301 vc4_state->crtc_y = new_vc4_state->crtc_y;
2302 vc4_state->crtc_h = new_vc4_state->crtc_h;
2303 vc4_state->crtc_w = new_vc4_state->crtc_w;
2304 vc4_state->src_x = new_vc4_state->src_x;
2305 vc4_state->src_y = new_vc4_state->src_y;
2306 memcpy(vc4_state->src_w, new_vc4_state->src_w,
2307 sizeof(vc4_state->src_w));
2308 memcpy(vc4_state->src_h, new_vc4_state->src_h,
2309 sizeof(vc4_state->src_h));
2310 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
2311 sizeof(vc4_state->x_scaling));
2312 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
2313 sizeof(vc4_state->y_scaling));
2314 vc4_state->is_unity = new_vc4_state->is_unity;
2315 vc4_state->is_yuv = new_vc4_state->is_yuv;
2316 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
2317
2318 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
2319 vc4_state->dlist[vc4_state->pos0_offset] =
2320 new_vc4_state->dlist[vc4_state->pos0_offset];
2321 vc4_state->dlist[vc4_state->pos2_offset] =
2322 new_vc4_state->dlist[vc4_state->pos2_offset];
2323 vc4_state->dlist[vc4_state->ptr0_offset[0]] =
2324 new_vc4_state->dlist[vc4_state->ptr0_offset[0]];
2325
2326 /* Note that we can't just call vc4_plane_write_dlist()
2327 * because that would smash the context data that the HVS is
2328 * currently using.
2329 */
2330 writel(vc4_state->dlist[vc4_state->pos0_offset],
2331 &vc4_state->hw_dlist[vc4_state->pos0_offset]);
2332 writel(vc4_state->dlist[vc4_state->pos2_offset],
2333 &vc4_state->hw_dlist[vc4_state->pos2_offset]);
2334 writel(vc4_state->dlist[vc4_state->ptr0_offset[0]],
2335 &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
2336
2337 drm_dev_exit(idx);
2338 }
2339
vc4_plane_atomic_async_check(struct drm_plane * plane,struct drm_atomic_state * state)2340 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
2341 struct drm_atomic_state *state)
2342 {
2343 struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
2344 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
2345 plane);
2346 struct vc4_plane_state *old_vc4_state, *new_vc4_state;
2347 int ret;
2348 u32 i;
2349
2350 if (vc4->gen <= VC4_GEN_5)
2351 ret = vc4_plane_mode_set(plane, new_plane_state);
2352 else
2353 ret = vc6_plane_mode_set(plane, new_plane_state);
2354 if (ret)
2355 return ret;
2356
2357 old_vc4_state = to_vc4_plane_state(plane->state);
2358 new_vc4_state = to_vc4_plane_state(new_plane_state);
2359
2360 if (!new_vc4_state->hw_dlist)
2361 return -EINVAL;
2362
2363 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
2364 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
2365 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
2366 old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] ||
2367 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
2368 return -EINVAL;
2369
2370 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
2371 * if anything else has changed, fallback to a sync update.
2372 */
2373 for (i = 0; i < new_vc4_state->dlist_count; i++) {
2374 if (i == new_vc4_state->pos0_offset ||
2375 i == new_vc4_state->pos2_offset ||
2376 i == new_vc4_state->ptr0_offset[0] ||
2377 (new_vc4_state->lbm_offset &&
2378 i == new_vc4_state->lbm_offset))
2379 continue;
2380
2381 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
2382 return -EINVAL;
2383 }
2384
2385 return 0;
2386 }
2387
vc4_prepare_fb(struct drm_plane * plane,struct drm_plane_state * state)2388 static int vc4_prepare_fb(struct drm_plane *plane,
2389 struct drm_plane_state *state)
2390 {
2391 struct vc4_bo *bo;
2392 int ret;
2393
2394 if (!state->fb)
2395 return 0;
2396
2397 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2398
2399 ret = drm_gem_plane_helper_prepare_fb(plane, state);
2400 if (ret)
2401 return ret;
2402
2403 return vc4_bo_inc_usecnt(bo);
2404 }
2405
vc4_cleanup_fb(struct drm_plane * plane,struct drm_plane_state * state)2406 static void vc4_cleanup_fb(struct drm_plane *plane,
2407 struct drm_plane_state *state)
2408 {
2409 struct vc4_bo *bo;
2410
2411 if (!state->fb)
2412 return;
2413
2414 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base);
2415 vc4_bo_dec_usecnt(bo);
2416 }
2417
2418 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
2419 .atomic_check = vc4_plane_atomic_check,
2420 .atomic_update = vc4_plane_atomic_update,
2421 .prepare_fb = vc4_prepare_fb,
2422 .cleanup_fb = vc4_cleanup_fb,
2423 .atomic_async_check = vc4_plane_atomic_async_check,
2424 .atomic_async_update = vc4_plane_atomic_async_update,
2425 };
2426
2427 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
2428 .atomic_check = vc4_plane_atomic_check,
2429 .atomic_update = vc4_plane_atomic_update,
2430 .atomic_async_check = vc4_plane_atomic_async_check,
2431 .atomic_async_update = vc4_plane_atomic_async_update,
2432 };
2433
vc4_format_mod_supported(struct drm_plane * plane,uint32_t format,uint64_t modifier)2434 static bool vc4_format_mod_supported(struct drm_plane *plane,
2435 uint32_t format,
2436 uint64_t modifier)
2437 {
2438 /* Support T_TILING for RGB formats only. */
2439 switch (format) {
2440 case DRM_FORMAT_XRGB8888:
2441 case DRM_FORMAT_ARGB8888:
2442 case DRM_FORMAT_ABGR8888:
2443 case DRM_FORMAT_XBGR8888:
2444 case DRM_FORMAT_RGB565:
2445 case DRM_FORMAT_BGR565:
2446 case DRM_FORMAT_ARGB1555:
2447 case DRM_FORMAT_XRGB1555:
2448 switch (fourcc_mod_broadcom_mod(modifier)) {
2449 case DRM_FORMAT_MOD_LINEAR:
2450 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
2451 return true;
2452 default:
2453 return false;
2454 }
2455 case DRM_FORMAT_NV12:
2456 case DRM_FORMAT_NV21:
2457 switch (fourcc_mod_broadcom_mod(modifier)) {
2458 case DRM_FORMAT_MOD_LINEAR:
2459 case DRM_FORMAT_MOD_BROADCOM_SAND64:
2460 case DRM_FORMAT_MOD_BROADCOM_SAND128:
2461 case DRM_FORMAT_MOD_BROADCOM_SAND256:
2462 return true;
2463 default:
2464 return false;
2465 }
2466 case DRM_FORMAT_P030:
2467 switch (fourcc_mod_broadcom_mod(modifier)) {
2468 case DRM_FORMAT_MOD_BROADCOM_SAND128:
2469 return true;
2470 default:
2471 return false;
2472 }
2473 case DRM_FORMAT_RGBX1010102:
2474 case DRM_FORMAT_BGRX1010102:
2475 case DRM_FORMAT_RGBA1010102:
2476 case DRM_FORMAT_BGRA1010102:
2477 case DRM_FORMAT_XRGB4444:
2478 case DRM_FORMAT_ARGB4444:
2479 case DRM_FORMAT_XBGR4444:
2480 case DRM_FORMAT_ABGR4444:
2481 case DRM_FORMAT_RGBX4444:
2482 case DRM_FORMAT_RGBA4444:
2483 case DRM_FORMAT_BGRX4444:
2484 case DRM_FORMAT_BGRA4444:
2485 case DRM_FORMAT_RGB332:
2486 case DRM_FORMAT_BGR233:
2487 case DRM_FORMAT_YUV422:
2488 case DRM_FORMAT_YVU422:
2489 case DRM_FORMAT_YUV420:
2490 case DRM_FORMAT_YVU420:
2491 case DRM_FORMAT_NV16:
2492 case DRM_FORMAT_NV61:
2493 default:
2494 return (modifier == DRM_FORMAT_MOD_LINEAR);
2495 }
2496 }
2497
2498 static const struct drm_plane_funcs vc4_plane_funcs = {
2499 .update_plane = drm_atomic_helper_update_plane,
2500 .disable_plane = drm_atomic_helper_disable_plane,
2501 .reset = vc4_plane_reset,
2502 .atomic_duplicate_state = vc4_plane_duplicate_state,
2503 .atomic_destroy_state = vc4_plane_destroy_state,
2504 .format_mod_supported = vc4_format_mod_supported,
2505 };
2506
vc4_plane_init(struct drm_device * dev,enum drm_plane_type type,uint32_t possible_crtcs)2507 struct drm_plane *vc4_plane_init(struct drm_device *dev,
2508 enum drm_plane_type type,
2509 uint32_t possible_crtcs)
2510 {
2511 struct vc4_dev *vc4 = to_vc4_dev(dev);
2512 struct drm_plane *plane;
2513 struct vc4_plane *vc4_plane;
2514 u32 formats[ARRAY_SIZE(hvs_formats)];
2515 int num_formats = 0;
2516 unsigned i;
2517 static const uint64_t modifiers[] = {
2518 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
2519 DRM_FORMAT_MOD_BROADCOM_SAND128,
2520 DRM_FORMAT_MOD_BROADCOM_SAND64,
2521 DRM_FORMAT_MOD_BROADCOM_SAND256,
2522 DRM_FORMAT_MOD_LINEAR,
2523 DRM_FORMAT_MOD_INVALID
2524 };
2525
2526 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
2527 if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
2528 formats[num_formats] = hvs_formats[i].drm;
2529 num_formats++;
2530 }
2531 }
2532
2533 vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base,
2534 possible_crtcs,
2535 &vc4_plane_funcs,
2536 formats, num_formats,
2537 modifiers, type, NULL);
2538 if (IS_ERR(vc4_plane))
2539 return ERR_CAST(vc4_plane);
2540 plane = &vc4_plane->base;
2541
2542 if (vc4->gen >= VC4_GEN_5)
2543 drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
2544 else
2545 drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
2546
2547 drm_plane_create_alpha_property(plane);
2548 drm_plane_create_blend_mode_property(plane,
2549 BIT(DRM_MODE_BLEND_PIXEL_NONE) |
2550 BIT(DRM_MODE_BLEND_PREMULTI) |
2551 BIT(DRM_MODE_BLEND_COVERAGE));
2552 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
2553 DRM_MODE_ROTATE_0 |
2554 DRM_MODE_ROTATE_180 |
2555 DRM_MODE_REFLECT_X |
2556 DRM_MODE_REFLECT_Y);
2557
2558 drm_plane_create_color_properties(plane,
2559 BIT(DRM_COLOR_YCBCR_BT601) |
2560 BIT(DRM_COLOR_YCBCR_BT709) |
2561 BIT(DRM_COLOR_YCBCR_BT2020),
2562 BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
2563 BIT(DRM_COLOR_YCBCR_FULL_RANGE),
2564 DRM_COLOR_YCBCR_BT709,
2565 DRM_COLOR_YCBCR_LIMITED_RANGE);
2566
2567 if (type == DRM_PLANE_TYPE_PRIMARY)
2568 drm_plane_create_zpos_immutable_property(plane, 0);
2569
2570 return plane;
2571 }
2572
2573 #define VC4_NUM_OVERLAY_PLANES 16
2574
vc4_plane_create_additional_planes(struct drm_device * drm)2575 int vc4_plane_create_additional_planes(struct drm_device *drm)
2576 {
2577 struct drm_plane *cursor_plane;
2578 struct drm_crtc *crtc;
2579 unsigned int i;
2580
2581 /* Set up some arbitrary number of planes. We're not limited
2582 * by a set number of physical registers, just the space in
2583 * the HVS (16k) and how small an plane can be (28 bytes).
2584 * However, each plane we set up takes up some memory, and
2585 * increases the cost of looping over planes, which atomic
2586 * modesetting does quite a bit. As a result, we pick a
2587 * modest number of planes to expose, that should hopefully
2588 * still cover any sane usecase.
2589 */
2590 for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) {
2591 struct drm_plane *plane =
2592 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY,
2593 GENMASK(drm->mode_config.num_crtc - 1, 0));
2594
2595 if (IS_ERR(plane))
2596 continue;
2597
2598 /* Create zpos property. Max of all the overlays + 1 primary +
2599 * 1 cursor plane on a crtc.
2600 */
2601 drm_plane_create_zpos_property(plane, i + 1, 1,
2602 VC4_NUM_OVERLAY_PLANES + 1);
2603 }
2604
2605 drm_for_each_crtc(crtc, drm) {
2606 /* Set up the legacy cursor after overlay initialization,
2607 * since the zpos fallback is that planes are rendered by plane
2608 * ID order, and that then puts the cursor on top.
2609 */
2610 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR,
2611 drm_crtc_mask(crtc));
2612 if (!IS_ERR(cursor_plane)) {
2613 crtc->cursor = cursor_plane;
2614
2615 drm_plane_create_zpos_property(cursor_plane,
2616 VC4_NUM_OVERLAY_PLANES + 1,
2617 1,
2618 VC4_NUM_OVERLAY_PLANES + 1);
2619 }
2620 }
2621
2622 return 0;
2623 }
2624