1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_blend.h> 22 #include <drm/drm_drv.h> 23 #include <drm/drm_fb_dma_helper.h> 24 #include <drm/drm_fourcc.h> 25 #include <drm/drm_framebuffer.h> 26 #include <drm/drm_gem_atomic_helper.h> 27 #include <drm/drm_print.h> 28 29 #include "uapi/drm/vc4_drm.h" 30 31 #include "vc4_drv.h" 32 #include "vc4_regs.h" 33 34 static const struct hvs_format { 35 u32 drm; /* DRM_FORMAT_* */ 36 u32 hvs; /* HVS_FORMAT_* */ 37 u32 pixel_order; 38 u32 pixel_order_hvs5; 39 bool hvs5_only; 40 } hvs_formats[] = { 41 { 42 .drm = DRM_FORMAT_XRGB8888, 43 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 44 .pixel_order = HVS_PIXEL_ORDER_ABGR, 45 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 46 }, 47 { 48 .drm = DRM_FORMAT_ARGB8888, 49 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 50 .pixel_order = HVS_PIXEL_ORDER_ABGR, 51 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 52 }, 53 { 54 .drm = DRM_FORMAT_ABGR8888, 55 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 56 .pixel_order = HVS_PIXEL_ORDER_ARGB, 57 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 58 }, 59 { 60 .drm = DRM_FORMAT_XBGR8888, 61 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 62 .pixel_order = HVS_PIXEL_ORDER_ARGB, 63 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 64 }, 65 { 66 .drm = DRM_FORMAT_RGB565, 67 .hvs = HVS_PIXEL_FORMAT_RGB565, 68 .pixel_order = HVS_PIXEL_ORDER_XRGB, 69 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB, 70 }, 71 { 72 .drm = DRM_FORMAT_BGR565, 73 .hvs = HVS_PIXEL_FORMAT_RGB565, 74 .pixel_order = HVS_PIXEL_ORDER_XBGR, 75 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR, 76 }, 77 { 78 .drm = DRM_FORMAT_ARGB1555, 79 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 80 .pixel_order = HVS_PIXEL_ORDER_ABGR, 81 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 82 }, 83 { 84 .drm = DRM_FORMAT_XRGB1555, 85 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 86 .pixel_order = HVS_PIXEL_ORDER_ABGR, 87 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 88 }, 89 { 90 .drm = DRM_FORMAT_RGB888, 91 .hvs = HVS_PIXEL_FORMAT_RGB888, 92 .pixel_order = HVS_PIXEL_ORDER_XRGB, 93 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB, 94 }, 95 { 96 .drm = DRM_FORMAT_BGR888, 97 .hvs = HVS_PIXEL_FORMAT_RGB888, 98 .pixel_order = HVS_PIXEL_ORDER_XBGR, 99 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR, 100 }, 101 { 102 .drm = DRM_FORMAT_YUV422, 103 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 104 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 105 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 106 }, 107 { 108 .drm = DRM_FORMAT_YVU422, 109 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 110 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 111 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 112 }, 113 { 114 .drm = DRM_FORMAT_YUV444, 115 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 116 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 117 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 118 }, 119 { 120 .drm = DRM_FORMAT_YVU444, 121 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 122 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 123 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 124 }, 125 { 126 .drm = DRM_FORMAT_YUV420, 127 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 128 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 129 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 130 }, 131 { 132 .drm = DRM_FORMAT_YVU420, 133 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 134 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 135 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 136 }, 137 { 138 .drm = DRM_FORMAT_NV12, 139 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 140 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 141 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 142 }, 143 { 144 .drm = DRM_FORMAT_NV21, 145 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 146 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 147 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 148 }, 149 { 150 .drm = DRM_FORMAT_NV16, 151 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 152 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 153 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 154 }, 155 { 156 .drm = DRM_FORMAT_NV61, 157 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 158 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 159 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 160 }, 161 { 162 .drm = DRM_FORMAT_P030, 163 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT, 164 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 165 .hvs5_only = true, 166 }, 167 { 168 .drm = DRM_FORMAT_XRGB2101010, 169 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 170 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 171 .hvs5_only = true, 172 }, 173 { 174 .drm = DRM_FORMAT_ARGB2101010, 175 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 176 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 177 .hvs5_only = true, 178 }, 179 { 180 .drm = DRM_FORMAT_ABGR2101010, 181 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 182 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 183 .hvs5_only = true, 184 }, 185 { 186 .drm = DRM_FORMAT_XBGR2101010, 187 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 188 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 189 .hvs5_only = true, 190 }, 191 { 192 .drm = DRM_FORMAT_RGB332, 193 .hvs = HVS_PIXEL_FORMAT_RGB332, 194 .pixel_order = HVS_PIXEL_ORDER_ARGB, 195 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 196 }, 197 { 198 .drm = DRM_FORMAT_BGR233, 199 .hvs = HVS_PIXEL_FORMAT_RGB332, 200 .pixel_order = HVS_PIXEL_ORDER_ABGR, 201 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 202 }, 203 { 204 .drm = DRM_FORMAT_XRGB4444, 205 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 206 .pixel_order = HVS_PIXEL_ORDER_ABGR, 207 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 208 }, 209 { 210 .drm = DRM_FORMAT_ARGB4444, 211 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 212 .pixel_order = HVS_PIXEL_ORDER_ABGR, 213 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 214 }, 215 { 216 .drm = DRM_FORMAT_XBGR4444, 217 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 218 .pixel_order = HVS_PIXEL_ORDER_ARGB, 219 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 220 }, 221 { 222 .drm = DRM_FORMAT_ABGR4444, 223 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 224 .pixel_order = HVS_PIXEL_ORDER_ARGB, 225 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 226 }, 227 { 228 .drm = DRM_FORMAT_BGRX4444, 229 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 230 .pixel_order = HVS_PIXEL_ORDER_RGBA, 231 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA, 232 }, 233 { 234 .drm = DRM_FORMAT_BGRA4444, 235 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 236 .pixel_order = HVS_PIXEL_ORDER_RGBA, 237 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA, 238 }, 239 { 240 .drm = DRM_FORMAT_RGBX4444, 241 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 242 .pixel_order = HVS_PIXEL_ORDER_BGRA, 243 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, 244 }, 245 { 246 .drm = DRM_FORMAT_RGBA4444, 247 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 248 .pixel_order = HVS_PIXEL_ORDER_BGRA, 249 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, 250 }, 251 }; 252 253 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 254 { 255 unsigned i; 256 257 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 258 if (hvs_formats[i].drm == drm_format) 259 return &hvs_formats[i]; 260 } 261 262 return NULL; 263 } 264 265 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 266 { 267 if (dst == src >> 16) 268 return VC4_SCALING_NONE; 269 if (3 * dst >= 2 * (src >> 16)) 270 return VC4_SCALING_PPF; 271 else 272 return VC4_SCALING_TPZ; 273 } 274 275 static bool plane_enabled(struct drm_plane_state *state) 276 { 277 return state->fb && !WARN_ON(!state->crtc); 278 } 279 280 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 281 { 282 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 283 struct vc4_hvs *hvs = vc4->hvs; 284 struct vc4_plane_state *vc4_state; 285 unsigned int i; 286 287 if (WARN_ON(!plane->state)) 288 return NULL; 289 290 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 291 if (!vc4_state) 292 return NULL; 293 294 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 295 296 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 297 if (vc4_state->upm_handle[i]) 298 refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount); 299 } 300 301 vc4_state->dlist_initialized = 0; 302 303 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 304 305 if (vc4_state->dlist) { 306 vc4_state->dlist = kmemdup(vc4_state->dlist, 307 vc4_state->dlist_count * 4, 308 GFP_KERNEL); 309 if (!vc4_state->dlist) { 310 kfree(vc4_state); 311 return NULL; 312 } 313 vc4_state->dlist_size = vc4_state->dlist_count; 314 } 315 316 return &vc4_state->base; 317 } 318 319 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle) 320 { 321 struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle]; 322 unsigned long irqflags; 323 324 spin_lock_irqsave(&hvs->mm_lock, irqflags); 325 drm_mm_remove_node(&refcount->upm); 326 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 327 refcount->upm.start = 0; 328 refcount->upm.size = 0; 329 refcount->size = 0; 330 331 ida_free(&hvs->upm_handles, upm_handle); 332 } 333 334 static void vc4_plane_destroy_state(struct drm_plane *plane, 335 struct drm_plane_state *state) 336 { 337 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 338 struct vc4_hvs *hvs = vc4->hvs; 339 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 340 unsigned int i; 341 342 if (drm_mm_node_allocated(&vc4_state->lbm)) { 343 unsigned long irqflags; 344 345 spin_lock_irqsave(&hvs->mm_lock, irqflags); 346 drm_mm_remove_node(&vc4_state->lbm); 347 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 348 } 349 350 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 351 struct vc4_upm_refcounts *refcount; 352 353 if (!vc4_state->upm_handle[i]) 354 continue; 355 356 refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]]; 357 358 if (refcount_dec_and_test(&refcount->refcount)) 359 vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]); 360 } 361 362 kfree(vc4_state->dlist); 363 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 364 kfree(state); 365 } 366 367 /* Called during init to allocate the plane's atomic state. */ 368 static void vc4_plane_reset(struct drm_plane *plane) 369 { 370 struct vc4_plane_state *vc4_state; 371 372 if (plane->state) 373 __drm_atomic_helper_plane_destroy_state(plane->state); 374 375 kfree(plane->state); 376 377 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 378 if (!vc4_state) 379 return; 380 381 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 382 } 383 384 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 385 { 386 if (vc4_state->dlist_count == vc4_state->dlist_size) { 387 u32 new_size = max(4u, vc4_state->dlist_count * 2); 388 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 389 390 if (!new_dlist) 391 return; 392 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 393 394 kfree(vc4_state->dlist); 395 vc4_state->dlist = new_dlist; 396 vc4_state->dlist_size = new_size; 397 } 398 399 vc4_state->dlist_count++; 400 } 401 402 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 403 { 404 unsigned int idx = vc4_state->dlist_count; 405 406 vc4_dlist_counter_increment(vc4_state); 407 vc4_state->dlist[idx] = val; 408 } 409 410 /* Returns the scl0/scl1 field based on whether the dimensions need to 411 * be up/down/non-scaled. 412 * 413 * This is a replication of a table from the spec. 414 */ 415 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 416 { 417 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 418 419 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 420 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 421 return SCALER_CTL0_SCL_H_PPF_V_PPF; 422 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 423 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 424 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 425 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 426 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 427 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 428 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 429 return SCALER_CTL0_SCL_H_PPF_V_NONE; 430 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 431 return SCALER_CTL0_SCL_H_NONE_V_PPF; 432 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 433 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 434 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 435 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 436 default: 437 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 438 /* The unity case is independently handled by 439 * SCALER_CTL0_UNITY. 440 */ 441 return 0; 442 } 443 } 444 445 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 446 { 447 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 448 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 449 struct drm_crtc_state *crtc_state; 450 451 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 452 pstate->crtc); 453 454 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 455 if (!left && !right && !top && !bottom) 456 return 0; 457 458 if (left + right >= crtc_state->mode.hdisplay || 459 top + bottom >= crtc_state->mode.vdisplay) 460 return -EINVAL; 461 462 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 463 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 464 adjhdisplay, 465 crtc_state->mode.hdisplay); 466 vc4_pstate->crtc_x += left; 467 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right) 468 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right; 469 470 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 471 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 472 adjvdisplay, 473 crtc_state->mode.vdisplay); 474 vc4_pstate->crtc_y += top; 475 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom) 476 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom; 477 478 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 479 adjhdisplay, 480 crtc_state->mode.hdisplay); 481 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 482 adjvdisplay, 483 crtc_state->mode.vdisplay); 484 485 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 486 return -EINVAL; 487 488 return 0; 489 } 490 491 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 492 { 493 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 494 struct drm_framebuffer *fb = state->fb; 495 int num_planes = fb->format->num_planes; 496 struct drm_crtc_state *crtc_state; 497 u32 h_subsample = fb->format->hsub; 498 u32 v_subsample = fb->format->vsub; 499 int ret; 500 501 crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); 502 if (!crtc_state) { 503 DRM_DEBUG_KMS("Invalid crtc state\n"); 504 return -EINVAL; 505 } 506 507 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 508 INT_MAX, true, true); 509 if (ret) 510 return ret; 511 512 vc4_state->src_x = state->src.x1; 513 vc4_state->src_y = state->src.y1; 514 vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x; 515 vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y; 516 517 vc4_state->crtc_x = state->dst.x1; 518 vc4_state->crtc_y = state->dst.y1; 519 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 520 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 521 522 ret = vc4_plane_margins_adj(state); 523 if (ret) 524 return ret; 525 526 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 527 vc4_state->crtc_w); 528 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 529 vc4_state->crtc_h); 530 531 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 532 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 533 534 if (num_planes > 1) { 535 vc4_state->is_yuv = true; 536 537 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 538 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 539 540 vc4_state->x_scaling[1] = 541 vc4_get_scaling_mode(vc4_state->src_w[1], 542 vc4_state->crtc_w); 543 vc4_state->y_scaling[1] = 544 vc4_get_scaling_mode(vc4_state->src_h[1], 545 vc4_state->crtc_h); 546 547 /* YUV conversion requires that horizontal scaling be enabled 548 * on the UV plane even if vc4_get_scaling_mode() returned 549 * VC4_SCALING_NONE (which can happen when the down-scaling 550 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 551 * case. 552 */ 553 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 554 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 555 556 /* Similarly UV needs vertical scaling to be enabled. 557 * Without this a 1:1 scaled YUV422 plane isn't rendered. 558 */ 559 if (vc4_state->y_scaling[1] == VC4_SCALING_NONE) 560 vc4_state->y_scaling[1] = VC4_SCALING_PPF; 561 } else { 562 vc4_state->is_yuv = false; 563 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 564 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 565 } 566 567 return 0; 568 } 569 570 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 571 { 572 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); 573 u32 scale, recip; 574 575 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 576 577 scale = src / dst; 578 579 /* The specs note that while the reciprocal would be defined 580 * as (1<<32)/scale, ~0 is close enough. 581 */ 582 recip = ~0 / scale; 583 584 vc4_dlist_write(vc4_state, 585 /* 586 * The BCM2712 is lacking BIT(31) compared to 587 * the previous generations, but we don't use 588 * it. 589 */ 590 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 591 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 592 vc4_dlist_write(vc4_state, 593 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 594 } 595 596 /* phase magnitude bits */ 597 #define PHASE_BITS 6 598 599 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, 600 u32 xy, int channel) 601 { 602 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); 603 u32 scale = src / dst; 604 s32 offset, offset2; 605 s32 phase; 606 607 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 608 609 /* 610 * Start the phase at 1/2 pixel from the 1st pixel at src_x. 611 * 1/4 pixel for YUV. 612 */ 613 if (channel) { 614 /* 615 * The phase is relative to scale_src->x, so shift it for 616 * display list's x value 617 */ 618 offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1; 619 offset += -(1 << PHASE_BITS >> 2); 620 } else { 621 /* 622 * The phase is relative to scale_src->x, so shift it for 623 * display list's x value 624 */ 625 offset = (xy & 0xffff) >> (16 - PHASE_BITS); 626 offset += -(1 << PHASE_BITS >> 1); 627 628 /* 629 * This is a kludge to make sure the scaling factors are 630 * consistent with YUV's luma scaling. We lose 1-bit precision 631 * because of this. 632 */ 633 scale &= ~1; 634 } 635 636 /* 637 * There may be a also small error introduced by precision of scale. 638 * Add half of that as a compromise 639 */ 640 offset2 = src - dst * scale; 641 offset2 >>= 16 - PHASE_BITS; 642 phase = offset + (offset2 >> 1); 643 644 /* Ensure +ve values don't touch the sign bit, then truncate negative values */ 645 if (phase >= 1 << PHASE_BITS) 646 phase = (1 << PHASE_BITS) - 1; 647 648 phase &= SCALER_PPF_IPHASE_MASK; 649 650 vc4_dlist_write(vc4_state, 651 SCALER_PPF_AGC | 652 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 653 /* 654 * The register layout documentation is slightly 655 * different to setup the phase in the BCM2712, 656 * but they seem equivalent. 657 */ 658 VC4_SET_FIELD(phase, SCALER_PPF_IPHASE)); 659 } 660 661 static u32 __vc4_lbm_size(struct drm_plane_state *state) 662 { 663 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 664 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 665 u32 pix_per_line; 666 u32 lbm; 667 668 /* LBM is not needed when there's no vertical scaling. */ 669 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 670 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 671 return 0; 672 673 /* 674 * This can be further optimized in the RGB/YUV444 case if the PPF 675 * decimation factor is between 0.5 and 1.0 by using crtc_w. 676 * 677 * It's not an issue though, since in that case since src_w[0] is going 678 * to be greater than or equal to crtc_w. 679 */ 680 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 681 pix_per_line = vc4_state->crtc_w; 682 else 683 pix_per_line = vc4_state->src_w[0] >> 16; 684 685 if (!vc4_state->is_yuv) { 686 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 687 lbm = pix_per_line * 8; 688 else { 689 /* In special cases, this multiplier might be 12. */ 690 lbm = pix_per_line * 16; 691 } 692 } else { 693 /* There are cases for this going down to a multiplier 694 * of 2, but according to the firmware source, the 695 * table in the docs is somewhat wrong. 696 */ 697 lbm = pix_per_line * 16; 698 } 699 700 /* Align it to 64 or 128 (hvs5) bytes */ 701 lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64); 702 703 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 704 lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2; 705 706 return lbm; 707 } 708 709 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state, 710 unsigned int channel) 711 { 712 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 713 714 switch (vc4_state->y_scaling[channel]) { 715 case VC4_SCALING_PPF: 716 return 4; 717 718 case VC4_SCALING_TPZ: 719 return 2; 720 721 default: 722 return 0; 723 } 724 } 725 726 static unsigned int vc4_lbm_components(const struct drm_plane_state *state, 727 unsigned int channel) 728 { 729 const struct drm_format_info *info = state->fb->format; 730 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 731 732 if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE) 733 return 0; 734 735 if (info->is_yuv) 736 return channel ? 2 : 1; 737 738 if (info->has_alpha) 739 return 4; 740 741 return 3; 742 } 743 744 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state, 745 unsigned int channel) 746 { 747 const struct drm_format_info *info = state->fb->format; 748 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 749 unsigned int channels_scaled = 0; 750 unsigned int components, words, wpc; 751 unsigned int width, lines; 752 unsigned int i; 753 754 /* LBM is meant to use the smaller of source or dest width, but there 755 * is a issue with UV scaling that the size required for the second 756 * channel is based on the source width only. 757 */ 758 if (info->hsub > 1 && channel == 1) 759 width = state->src_w >> 16; 760 else 761 width = min(state->src_w >> 16, state->crtc_w); 762 width = round_up(width / info->hsub, 4); 763 764 wpc = vc4_lbm_words_per_component(state, channel); 765 if (!wpc) 766 return 0; 767 768 components = vc4_lbm_components(state, channel); 769 if (!components) 770 return 0; 771 772 if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha) 773 components -= 1; 774 775 words = width * wpc * components; 776 777 lines = DIV_ROUND_UP(words, 128 / info->hsub); 778 779 for (i = 0; i < 2; i++) 780 if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE) 781 channels_scaled++; 782 783 if (channels_scaled == 1) 784 lines = lines / 2; 785 786 return lines; 787 } 788 789 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state) 790 { 791 const struct drm_format_info *info = state->fb->format; 792 793 if (info->hsub > 1) 794 return max(vc4_lbm_channel_size(state, 0), 795 vc4_lbm_channel_size(state, 1)); 796 else 797 return vc4_lbm_channel_size(state, 0); 798 } 799 800 static u32 vc4_lbm_size(struct drm_plane_state *state) 801 { 802 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 803 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 804 805 /* LBM is not needed when there's no vertical scaling. */ 806 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 807 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 808 return 0; 809 810 if (vc4->gen >= VC4_GEN_6_C) 811 return __vc6_lbm_size(state); 812 else 813 return __vc4_lbm_size(state); 814 } 815 816 static size_t vc6_upm_size(const struct drm_plane_state *state, 817 unsigned int plane) 818 { 819 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 820 unsigned int stride = state->fb->pitches[plane]; 821 822 /* 823 * TODO: This only works for raster formats, and is sub-optimal 824 * for buffers with a stride aligned on 32 bytes. 825 */ 826 unsigned int words_per_line = (stride + 62) / 32; 827 unsigned int fetch_region_size = words_per_line * 32; 828 unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines; 829 unsigned int buffer_size = fetch_region_size * buffer_lines; 830 831 return ALIGN(buffer_size, HVS_UBM_WORD_SIZE); 832 } 833 834 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 835 int channel) 836 { 837 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 838 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 839 840 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 841 842 /* Ch0 H-PPF Word 0: Scaling Parameters */ 843 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 844 vc4_write_ppf(vc4_state, vc4_state->src_w[channel], 845 vc4_state->crtc_w, vc4_state->src_x, channel); 846 } 847 848 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 849 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 850 vc4_write_ppf(vc4_state, vc4_state->src_h[channel], 851 vc4_state->crtc_h, vc4_state->src_y, channel); 852 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 853 } 854 855 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 856 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 857 vc4_write_tpz(vc4_state, vc4_state->src_w[channel], 858 vc4_state->crtc_w); 859 } 860 861 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 862 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 863 vc4_write_tpz(vc4_state, vc4_state->src_h[channel], 864 vc4_state->crtc_h); 865 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 866 } 867 } 868 869 static void vc4_plane_calc_load(struct drm_plane_state *state) 870 { 871 unsigned int hvs_load_shift, vrefresh, i; 872 struct drm_framebuffer *fb = state->fb; 873 struct vc4_plane_state *vc4_state; 874 struct drm_crtc_state *crtc_state; 875 unsigned int vscale_factor; 876 877 vc4_state = to_vc4_plane_state(state); 878 crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); 879 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 880 881 /* The HVS is able to process 2 pixels/cycle when scaling the source, 882 * 4 pixels/cycle otherwise. 883 * Alpha blending step seems to be pipelined and it's always operating 884 * at 4 pixels/cycle, so the limiting aspect here seems to be the 885 * scaler block. 886 * HVS load is expressed in clk-cycles/sec (AKA Hz). 887 */ 888 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 889 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 890 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 891 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 892 hvs_load_shift = 1; 893 else 894 hvs_load_shift = 2; 895 896 vc4_state->membus_load = 0; 897 vc4_state->hvs_load = 0; 898 for (i = 0; i < fb->format->num_planes; i++) { 899 /* Even if the bandwidth/plane required for a single frame is 900 * 901 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) * 902 * cpp * vrefresh 903 * 904 * when downscaling, we have to read more pixels per line in 905 * the time frame reserved for a single line, so the bandwidth 906 * demand can be punctually higher. To account for that, we 907 * calculate the down-scaling factor and multiply the plane 908 * load by this number. We're likely over-estimating the read 909 * demand, but that's better than under-estimating it. 910 */ 911 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16, 912 vc4_state->crtc_h); 913 vc4_state->membus_load += (vc4_state->src_w[i] >> 16) * 914 (vc4_state->src_h[i] >> 16) * 915 vscale_factor * fb->format->cpp[i]; 916 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 917 } 918 919 vc4_state->hvs_load *= vrefresh; 920 vc4_state->hvs_load >>= hvs_load_shift; 921 vc4_state->membus_load *= vrefresh; 922 } 923 924 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 925 { 926 struct drm_device *drm = state->plane->dev; 927 struct vc4_dev *vc4 = to_vc4_dev(drm); 928 struct drm_plane *plane = state->plane; 929 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 930 unsigned long irqflags; 931 u32 lbm_size; 932 933 lbm_size = vc4_lbm_size(state); 934 if (!lbm_size) 935 return 0; 936 937 /* 938 * NOTE: BCM2712 doesn't need to be aligned, since the size 939 * returned by vc4_lbm_size() is in words already. 940 */ 941 if (vc4->gen == VC4_GEN_5) 942 lbm_size = ALIGN(lbm_size, 64); 943 else if (vc4->gen == VC4_GEN_4) 944 lbm_size = ALIGN(lbm_size, 32); 945 946 drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n", 947 plane->base.id, plane->name, lbm_size); 948 949 if (WARN_ON(!vc4_state->lbm_offset)) 950 return -EINVAL; 951 952 /* Allocate the LBM memory that the HVS will use for temporary 953 * storage due to our scaling/format conversion. 954 */ 955 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 956 int ret; 957 958 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 959 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 960 &vc4_state->lbm, 961 lbm_size, 1, 962 0, 0); 963 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 964 965 if (ret) { 966 drm_err(drm, "Failed to allocate LBM entry: %d\n", ret); 967 return ret; 968 } 969 } else { 970 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 971 } 972 973 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 974 975 return 0; 976 } 977 978 static int vc6_plane_allocate_upm(struct drm_plane_state *state) 979 { 980 const struct drm_format_info *info = state->fb->format; 981 struct drm_device *drm = state->plane->dev; 982 struct vc4_dev *vc4 = to_vc4_dev(drm); 983 struct vc4_hvs *hvs = vc4->hvs; 984 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 985 unsigned int i; 986 int ret; 987 988 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); 989 990 vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES; 991 992 for (i = 0; i < info->num_planes; i++) { 993 struct vc4_upm_refcounts *refcount; 994 int upm_handle; 995 unsigned long irqflags; 996 size_t upm_size; 997 998 upm_size = vc6_upm_size(state, i); 999 if (!upm_size) 1000 return -EINVAL; 1001 upm_handle = vc4_state->upm_handle[i]; 1002 1003 if (upm_handle && 1004 hvs->upm_refcounts[upm_handle].size == upm_size) { 1005 /* Allocation is the same size as the previous user of 1006 * the plane. Keep the allocation. 1007 */ 1008 vc4_state->upm_handle[i] = upm_handle; 1009 } else { 1010 if (upm_handle && 1011 refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) { 1012 vc4_plane_release_upm_ida(hvs, upm_handle); 1013 vc4_state->upm_handle[i] = 0; 1014 } 1015 1016 upm_handle = ida_alloc_range(&hvs->upm_handles, 1, 1017 VC4_NUM_UPM_HANDLES, 1018 GFP_KERNEL); 1019 if (upm_handle < 0) { 1020 drm_dbg(drm, "Out of upm_handles\n"); 1021 return upm_handle; 1022 } 1023 vc4_state->upm_handle[i] = upm_handle; 1024 1025 refcount = &hvs->upm_refcounts[upm_handle]; 1026 refcount_set(&refcount->refcount, 1); 1027 refcount->size = upm_size; 1028 1029 spin_lock_irqsave(&hvs->mm_lock, irqflags); 1030 ret = drm_mm_insert_node_generic(&hvs->upm_mm, 1031 &refcount->upm, 1032 upm_size, HVS_UBM_WORD_SIZE, 1033 0, 0); 1034 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 1035 if (ret) { 1036 drm_err(drm, "Failed to allocate UPM entry: %d\n", ret); 1037 refcount_set(&refcount->refcount, 0); 1038 ida_free(&hvs->upm_handles, upm_handle); 1039 vc4_state->upm_handle[i] = 0; 1040 return ret; 1041 } 1042 } 1043 1044 refcount = &hvs->upm_refcounts[upm_handle]; 1045 vc4_state->dlist[vc4_state->ptr0_offset[i]] |= 1046 VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE, 1047 SCALER6_PTR0_UPM_BASE) | 1048 VC4_SET_FIELD(vc4_state->upm_handle[i] - 1, 1049 SCALER6_PTR0_UPM_HANDLE) | 1050 VC4_SET_FIELD(vc4_state->upm_buffer_lines, 1051 SCALER6_PTR0_UPM_BUFF_SIZE); 1052 } 1053 1054 return 0; 1055 } 1056 1057 static void vc6_plane_free_upm(struct drm_plane_state *state) 1058 { 1059 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1060 struct drm_device *drm = state->plane->dev; 1061 struct vc4_dev *vc4 = to_vc4_dev(drm); 1062 struct vc4_hvs *hvs = vc4->hvs; 1063 unsigned int i; 1064 1065 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); 1066 1067 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 1068 unsigned int upm_handle; 1069 1070 upm_handle = vc4_state->upm_handle[i]; 1071 if (!upm_handle) 1072 continue; 1073 1074 if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) 1075 vc4_plane_release_upm_ida(hvs, upm_handle); 1076 vc4_state->upm_handle[i] = 0; 1077 } 1078 } 1079 1080 /* 1081 * The colorspace conversion matrices are held in 3 entries in the dlist. 1082 * Create an array of them, with entries for each full and limited mode, and 1083 * each supported colorspace. 1084 */ 1085 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = { 1086 { 1087 /* Limited range */ 1088 { 1089 /* BT601 */ 1090 SCALER_CSC0_ITR_R_601_5, 1091 SCALER_CSC1_ITR_R_601_5, 1092 SCALER_CSC2_ITR_R_601_5, 1093 }, { 1094 /* BT709 */ 1095 SCALER_CSC0_ITR_R_709_3, 1096 SCALER_CSC1_ITR_R_709_3, 1097 SCALER_CSC2_ITR_R_709_3, 1098 }, { 1099 /* BT2020 */ 1100 SCALER_CSC0_ITR_R_2020, 1101 SCALER_CSC1_ITR_R_2020, 1102 SCALER_CSC2_ITR_R_2020, 1103 } 1104 }, { 1105 /* Full range */ 1106 { 1107 /* JFIF */ 1108 SCALER_CSC0_JPEG_JFIF, 1109 SCALER_CSC1_JPEG_JFIF, 1110 SCALER_CSC2_JPEG_JFIF, 1111 }, { 1112 /* BT709 */ 1113 SCALER_CSC0_ITR_R_709_3_FR, 1114 SCALER_CSC1_ITR_R_709_3_FR, 1115 SCALER_CSC2_ITR_R_709_3_FR, 1116 }, { 1117 /* BT2020 */ 1118 SCALER_CSC0_ITR_R_2020_FR, 1119 SCALER_CSC1_ITR_R_2020_FR, 1120 SCALER_CSC2_ITR_R_2020_FR, 1121 } 1122 } 1123 }; 1124 1125 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state) 1126 { 1127 struct drm_device *dev = state->state->dev; 1128 struct vc4_dev *vc4 = to_vc4_dev(dev); 1129 1130 WARN_ON_ONCE(vc4->gen != VC4_GEN_4); 1131 1132 if (!state->fb->format->has_alpha) 1133 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 1134 SCALER_POS2_ALPHA_MODE); 1135 1136 switch (state->pixel_blend_mode) { 1137 case DRM_MODE_BLEND_PIXEL_NONE: 1138 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 1139 SCALER_POS2_ALPHA_MODE); 1140 default: 1141 case DRM_MODE_BLEND_PREMULTI: 1142 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 1143 SCALER_POS2_ALPHA_MODE) | 1144 SCALER_POS2_ALPHA_PREMULT; 1145 case DRM_MODE_BLEND_COVERAGE: 1146 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 1147 SCALER_POS2_ALPHA_MODE); 1148 } 1149 } 1150 1151 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state) 1152 { 1153 struct drm_device *dev = state->state->dev; 1154 struct vc4_dev *vc4 = to_vc4_dev(dev); 1155 1156 WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C && 1157 vc4->gen != VC4_GEN_6_D); 1158 1159 switch (vc4->gen) { 1160 default: 1161 case VC4_GEN_5: 1162 case VC4_GEN_6_C: 1163 if (!state->fb->format->has_alpha) 1164 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 1165 SCALER5_CTL2_ALPHA_MODE); 1166 1167 switch (state->pixel_blend_mode) { 1168 case DRM_MODE_BLEND_PIXEL_NONE: 1169 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 1170 SCALER5_CTL2_ALPHA_MODE); 1171 default: 1172 case DRM_MODE_BLEND_PREMULTI: 1173 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 1174 SCALER5_CTL2_ALPHA_MODE) | 1175 SCALER5_CTL2_ALPHA_PREMULT; 1176 case DRM_MODE_BLEND_COVERAGE: 1177 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 1178 SCALER5_CTL2_ALPHA_MODE); 1179 } 1180 case VC4_GEN_6_D: 1181 /* 2712-D configures fixed alpha mode in CTL0 */ 1182 return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ? 1183 SCALER5_CTL2_ALPHA_PREMULT : 0; 1184 } 1185 } 1186 1187 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state) 1188 { 1189 struct drm_device *dev = state->state->dev; 1190 struct vc4_dev *vc4 = to_vc4_dev(dev); 1191 1192 WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D); 1193 1194 if (vc4->gen == VC4_GEN_6_D && 1195 (!state->fb->format->has_alpha || 1196 state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE)) 1197 return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED, 1198 SCALER6_CTL0_ALPHA_MASK); 1199 1200 return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK); 1201 } 1202 1203 /* Writes out a full display list for an active plane to the plane's 1204 * private dlist state. 1205 */ 1206 static int vc4_plane_mode_set(struct drm_plane *plane, 1207 struct drm_plane_state *state) 1208 { 1209 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 1210 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1211 struct drm_framebuffer *fb = state->fb; 1212 u32 ctl0_offset = vc4_state->dlist_count; 1213 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 1214 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 1215 int num_planes = fb->format->num_planes; 1216 u32 h_subsample = fb->format->hsub; 1217 u32 v_subsample = fb->format->vsub; 1218 bool mix_plane_alpha; 1219 bool covers_screen; 1220 u32 scl0, scl1, pitch0; 1221 u32 tiling, src_x, src_y; 1222 u32 width, height; 1223 u32 hvs_format = format->hvs; 1224 unsigned int rotation; 1225 u32 offsets[3] = { 0 }; 1226 int ret, i; 1227 1228 if (vc4_state->dlist_initialized) 1229 return 0; 1230 1231 ret = vc4_plane_setup_clipping_and_scaling(state); 1232 if (ret) 1233 return ret; 1234 1235 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 1236 !vc4_state->crtc_w || !vc4_state->crtc_h) { 1237 /* 0 source size probably means the plane is offscreen */ 1238 vc4_state->dlist_initialized = 1; 1239 return 0; 1240 } 1241 1242 width = vc4_state->src_w[0] >> 16; 1243 height = vc4_state->src_h[0] >> 16; 1244 1245 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 1246 * and 4:4:4, scl1 should be set to scl0 so both channels of 1247 * the scaler do the same thing. For YUV, the Y plane needs 1248 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 1249 * the scl fields here. 1250 */ 1251 if (num_planes == 1) { 1252 scl0 = vc4_get_scl_field(state, 0); 1253 scl1 = scl0; 1254 } else { 1255 scl0 = vc4_get_scl_field(state, 1); 1256 scl1 = vc4_get_scl_field(state, 0); 1257 } 1258 1259 rotation = drm_rotation_simplify(state->rotation, 1260 DRM_MODE_ROTATE_0 | 1261 DRM_MODE_REFLECT_X | 1262 DRM_MODE_REFLECT_Y); 1263 1264 /* We must point to the last line when Y reflection is enabled. */ 1265 src_y = vc4_state->src_y >> 16; 1266 if (rotation & DRM_MODE_REFLECT_Y) 1267 src_y += height - 1; 1268 1269 src_x = vc4_state->src_x >> 16; 1270 1271 switch (base_format_mod) { 1272 case DRM_FORMAT_MOD_LINEAR: 1273 tiling = SCALER_CTL0_TILING_LINEAR; 1274 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 1275 1276 /* Adjust the base pointer to the first pixel to be scanned 1277 * out. 1278 */ 1279 for (i = 0; i < num_planes; i++) { 1280 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; 1281 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; 1282 } 1283 1284 break; 1285 1286 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 1287 u32 tile_size_shift = 12; /* T tiles are 4kb */ 1288 /* Whole-tile offsets, mostly for setting the pitch. */ 1289 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 1290 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 1291 u32 tile_w_mask = (1 << tile_w_shift) - 1; 1292 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 1293 * the height (in pixels) of a 4k tile. 1294 */ 1295 u32 tile_h_mask = (2 << tile_h_shift) - 1; 1296 /* For T-tiled, the FB pitch is "how many bytes from one row to 1297 * the next, such that 1298 * 1299 * pitch * tile_h == tile_size * tiles_per_row 1300 */ 1301 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 1302 u32 tiles_l = src_x >> tile_w_shift; 1303 u32 tiles_r = tiles_w - tiles_l; 1304 u32 tiles_t = src_y >> tile_h_shift; 1305 /* Intra-tile offsets, which modify the base address (the 1306 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 1307 * base address). 1308 */ 1309 u32 tile_y = (src_y >> 4) & 1; 1310 u32 subtile_y = (src_y >> 2) & 3; 1311 u32 utile_y = src_y & 3; 1312 u32 x_off = src_x & tile_w_mask; 1313 u32 y_off = src_y & tile_h_mask; 1314 1315 /* When Y reflection is requested we must set the 1316 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 1317 * after the initial one should be fetched in descending order, 1318 * which makes sense since we start from the last line and go 1319 * backward. 1320 * Don't know why we need y_off = max_y_off - y_off, but it's 1321 * definitely required (I guess it's also related to the "going 1322 * backward" situation). 1323 */ 1324 if (rotation & DRM_MODE_REFLECT_Y) { 1325 y_off = tile_h_mask - y_off; 1326 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 1327 } else { 1328 pitch0 = 0; 1329 } 1330 1331 tiling = SCALER_CTL0_TILING_256B_OR_T; 1332 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 1333 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 1334 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 1335 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 1336 offsets[0] += tiles_t * (tiles_w << tile_size_shift); 1337 offsets[0] += subtile_y << 8; 1338 offsets[0] += utile_y << 4; 1339 1340 /* Rows of tiles alternate left-to-right and right-to-left. */ 1341 if (tiles_t & 1) { 1342 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 1343 offsets[0] += (tiles_w - tiles_l) << tile_size_shift; 1344 offsets[0] -= (1 + !tile_y) << 10; 1345 } else { 1346 offsets[0] += tiles_l << tile_size_shift; 1347 offsets[0] += tile_y << 10; 1348 } 1349 1350 break; 1351 } 1352 1353 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1354 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1355 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 1356 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 1357 1358 if (param > SCALER_TILE_HEIGHT_MASK) { 1359 DRM_DEBUG_KMS("SAND height too large (%d)\n", 1360 param); 1361 return -EINVAL; 1362 } 1363 1364 if (fb->format->format == DRM_FORMAT_P030) { 1365 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 1366 tiling = SCALER_CTL0_TILING_128B; 1367 } else { 1368 hvs_format = HVS_PIXEL_FORMAT_H264; 1369 1370 switch (base_format_mod) { 1371 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1372 tiling = SCALER_CTL0_TILING_64B; 1373 break; 1374 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1375 tiling = SCALER_CTL0_TILING_128B; 1376 break; 1377 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1378 tiling = SCALER_CTL0_TILING_256B_OR_T; 1379 break; 1380 default: 1381 return -EINVAL; 1382 } 1383 } 1384 1385 /* Adjust the base pointer to the first pixel to be scanned 1386 * out. 1387 * 1388 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 1389 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 1390 * word that should be taken as the first pixel. 1391 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 1392 * element within the 128bit word, eg for pixel 3 the value 1393 * should be 6. 1394 */ 1395 for (i = 0; i < num_planes; i++) { 1396 u32 tile_w, tile, x_off, pix_per_tile; 1397 1398 if (fb->format->format == DRM_FORMAT_P030) { 1399 /* 1400 * Spec says: bits [31:4] of the given address 1401 * should point to the 128-bit word containing 1402 * the desired starting pixel, and bits[3:0] 1403 * should be between 0 and 11, indicating which 1404 * of the 12-pixels in that 128-bit word is the 1405 * first pixel to be used 1406 */ 1407 u32 remaining_pixels = src_x % 96; 1408 u32 aligned = remaining_pixels / 12; 1409 u32 last_bits = remaining_pixels % 12; 1410 1411 x_off = aligned * 16 + last_bits; 1412 tile_w = 128; 1413 pix_per_tile = 96; 1414 } else { 1415 switch (base_format_mod) { 1416 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1417 tile_w = 64; 1418 break; 1419 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1420 tile_w = 128; 1421 break; 1422 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1423 tile_w = 256; 1424 break; 1425 default: 1426 return -EINVAL; 1427 } 1428 pix_per_tile = tile_w / fb->format->cpp[0]; 1429 x_off = (src_x % pix_per_tile) / 1430 (i ? h_subsample : 1) * 1431 fb->format->cpp[i]; 1432 } 1433 1434 tile = src_x / pix_per_tile; 1435 1436 offsets[i] += param * tile_w * tile; 1437 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; 1438 offsets[i] += x_off & ~(i ? 1 : 0); 1439 } 1440 1441 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 1442 break; 1443 } 1444 1445 default: 1446 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 1447 (long long)fb->modifier); 1448 return -EINVAL; 1449 } 1450 1451 /* fetch an extra pixel if we don't actually line up with the left edge. */ 1452 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) 1453 width++; 1454 1455 /* same for the right side */ 1456 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && 1457 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) 1458 width++; 1459 1460 /* now for the top */ 1461 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) 1462 height++; 1463 1464 /* and the bottom */ 1465 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && 1466 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) 1467 height++; 1468 1469 /* For YUV444 the hardware wants double the width, otherwise it doesn't 1470 * fetch full width of chroma 1471 */ 1472 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) 1473 width <<= 1; 1474 1475 /* Don't waste cycles mixing with plane alpha if the set alpha 1476 * is opaque or there is no per-pixel alpha information. 1477 * In any case we use the alpha property value as the fixed alpha. 1478 */ 1479 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 1480 fb->format->has_alpha; 1481 1482 if (vc4->gen == VC4_GEN_4) { 1483 /* Control word */ 1484 vc4_dlist_write(vc4_state, 1485 SCALER_CTL0_VALID | 1486 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 1487 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 1488 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 1489 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 1490 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1491 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1492 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 1493 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1494 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 1495 1496 /* Position Word 0: Image Positions and Alpha Value */ 1497 vc4_state->pos0_offset = vc4_state->dlist_count; 1498 vc4_dlist_write(vc4_state, 1499 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 1500 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 1501 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 1502 1503 /* Position Word 1: Scaled Image Dimensions. */ 1504 if (!vc4_state->is_unity) { 1505 vc4_dlist_write(vc4_state, 1506 VC4_SET_FIELD(vc4_state->crtc_w, 1507 SCALER_POS1_SCL_WIDTH) | 1508 VC4_SET_FIELD(vc4_state->crtc_h, 1509 SCALER_POS1_SCL_HEIGHT)); 1510 } 1511 1512 /* Position Word 2: Source Image Size, Alpha */ 1513 vc4_state->pos2_offset = vc4_state->dlist_count; 1514 vc4_dlist_write(vc4_state, 1515 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 1516 vc4_hvs4_get_alpha_blend_mode(state) | 1517 VC4_SET_FIELD(width, SCALER_POS2_WIDTH) | 1518 VC4_SET_FIELD(height, SCALER_POS2_HEIGHT)); 1519 1520 /* Position Word 3: Context. Written by the HVS. */ 1521 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1522 1523 } else { 1524 /* Control word */ 1525 vc4_dlist_write(vc4_state, 1526 SCALER_CTL0_VALID | 1527 (format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) | 1528 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1529 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1530 (vc4_state->is_unity ? 1531 SCALER5_CTL0_UNITY : 0) | 1532 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1533 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 1534 SCALER5_CTL0_ALPHA_EXPAND | 1535 SCALER5_CTL0_RGB_EXPAND); 1536 1537 /* Position Word 0: Image Positions and Alpha Value */ 1538 vc4_state->pos0_offset = vc4_state->dlist_count; 1539 vc4_dlist_write(vc4_state, 1540 (rotation & DRM_MODE_REFLECT_Y ? 1541 SCALER5_POS0_VFLIP : 0) | 1542 VC4_SET_FIELD(vc4_state->crtc_x, 1543 SCALER_POS0_START_X) | 1544 (rotation & DRM_MODE_REFLECT_X ? 1545 SCALER5_POS0_HFLIP : 0) | 1546 VC4_SET_FIELD(vc4_state->crtc_y, 1547 SCALER5_POS0_START_Y) 1548 ); 1549 1550 /* Control Word 2 */ 1551 vc4_dlist_write(vc4_state, 1552 VC4_SET_FIELD(state->alpha >> 4, 1553 SCALER5_CTL2_ALPHA) | 1554 vc4_hvs5_get_alpha_blend_mode(state) | 1555 (mix_plane_alpha ? 1556 SCALER5_CTL2_ALPHA_MIX : 0) 1557 ); 1558 1559 /* Position Word 1: Scaled Image Dimensions. */ 1560 if (!vc4_state->is_unity) { 1561 vc4_dlist_write(vc4_state, 1562 VC4_SET_FIELD(vc4_state->crtc_w, 1563 SCALER5_POS1_SCL_WIDTH) | 1564 VC4_SET_FIELD(vc4_state->crtc_h, 1565 SCALER5_POS1_SCL_HEIGHT)); 1566 } 1567 1568 /* Position Word 2: Source Image Size */ 1569 vc4_state->pos2_offset = vc4_state->dlist_count; 1570 vc4_dlist_write(vc4_state, 1571 VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) | 1572 VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT)); 1573 1574 /* Position Word 3: Context. Written by the HVS. */ 1575 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1576 } 1577 1578 1579 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 1580 * 1581 * The pointers may be any byte address. 1582 */ 1583 vc4_state->ptr0_offset[0] = vc4_state->dlist_count; 1584 1585 for (i = 0; i < num_planes; i++) { 1586 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); 1587 1588 vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]); 1589 } 1590 1591 /* Pointer Context Word 0/1/2: Written by the HVS */ 1592 for (i = 0; i < num_planes; i++) 1593 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1594 1595 /* Pitch word 0 */ 1596 vc4_dlist_write(vc4_state, pitch0); 1597 1598 /* Pitch word 1/2 */ 1599 for (i = 1; i < num_planes; i++) { 1600 if (hvs_format != HVS_PIXEL_FORMAT_H264 && 1601 hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { 1602 vc4_dlist_write(vc4_state, 1603 VC4_SET_FIELD(fb->pitches[i], 1604 SCALER_SRC_PITCH)); 1605 } else { 1606 vc4_dlist_write(vc4_state, pitch0); 1607 } 1608 } 1609 1610 /* Colorspace conversion words */ 1611 if (vc4_state->is_yuv) { 1612 enum drm_color_encoding color_encoding = state->color_encoding; 1613 enum drm_color_range color_range = state->color_range; 1614 const u32 *ccm; 1615 1616 if (color_encoding >= DRM_COLOR_ENCODING_MAX) 1617 color_encoding = DRM_COLOR_YCBCR_BT601; 1618 if (color_range >= DRM_COLOR_RANGE_MAX) 1619 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1620 1621 ccm = colorspace_coeffs[color_range][color_encoding]; 1622 1623 vc4_dlist_write(vc4_state, ccm[0]); 1624 vc4_dlist_write(vc4_state, ccm[1]); 1625 vc4_dlist_write(vc4_state, ccm[2]); 1626 } 1627 1628 vc4_state->lbm_offset = 0; 1629 1630 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 1631 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 1632 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1633 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1634 /* Reserve a slot for the LBM Base Address. The real value will 1635 * be set when calling vc4_plane_allocate_lbm(). 1636 */ 1637 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1638 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1639 vc4_state->lbm_offset = vc4_state->dlist_count; 1640 vc4_dlist_counter_increment(vc4_state); 1641 } 1642 1643 if (num_planes > 1) { 1644 /* Emit Cb/Cr as channel 0 and Y as channel 1645 * 1. This matches how we set up scl0/scl1 1646 * above. 1647 */ 1648 vc4_write_scaling_parameters(state, 1); 1649 } 1650 vc4_write_scaling_parameters(state, 0); 1651 1652 /* If any PPF setup was done, then all the kernel 1653 * pointers get uploaded. 1654 */ 1655 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1656 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1657 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1658 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1659 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1660 SCALER_PPF_KERNEL_OFFSET); 1661 1662 /* HPPF plane 0 */ 1663 vc4_dlist_write(vc4_state, kernel); 1664 /* VPPF plane 0 */ 1665 vc4_dlist_write(vc4_state, kernel); 1666 /* HPPF plane 1 */ 1667 vc4_dlist_write(vc4_state, kernel); 1668 /* VPPF plane 1 */ 1669 vc4_dlist_write(vc4_state, kernel); 1670 } 1671 } 1672 1673 vc4_state->dlist[ctl0_offset] |= 1674 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1675 1676 /* crtc_* are already clipped coordinates. */ 1677 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1678 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1679 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1680 /* Background fill might be necessary when the plane has per-pixel 1681 * alpha content or a non-opaque plane alpha and could blend from the 1682 * background or does not cover the entire screen. 1683 */ 1684 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1685 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1686 1687 /* Flag the dlist as initialized to avoid checking it twice in case 1688 * the async update check already called vc4_plane_mode_set() and 1689 * decided to fallback to sync update because async update was not 1690 * possible. 1691 */ 1692 vc4_state->dlist_initialized = 1; 1693 1694 vc4_plane_calc_load(state); 1695 1696 return 0; 1697 } 1698 1699 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state) 1700 { 1701 struct drm_plane_state *state = &vc4_state->base; 1702 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 1703 u32 ret = 0; 1704 1705 if (vc4_state->is_yuv) { 1706 enum drm_color_encoding color_encoding = state->color_encoding; 1707 enum drm_color_range color_range = state->color_range; 1708 1709 /* CSC pre-loaded with: 1710 * 0 = BT601 limited range 1711 * 1 = BT709 limited range 1712 * 2 = BT2020 limited range 1713 * 3 = BT601 full range 1714 * 4 = BT709 full range 1715 * 5 = BT2020 full range 1716 */ 1717 if (color_encoding > DRM_COLOR_YCBCR_BT2020) 1718 color_encoding = DRM_COLOR_YCBCR_BT601; 1719 if (color_range > DRM_COLOR_YCBCR_FULL_RANGE) 1720 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1721 1722 if (vc4->gen == VC4_GEN_6_C) { 1723 ret |= SCALER6C_CTL2_CSC_ENABLE; 1724 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), 1725 SCALER6C_CTL2_BRCM_CFC_CONTROL); 1726 } else { 1727 ret |= SCALER6D_CTL2_CSC_ENABLE; 1728 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), 1729 SCALER6D_CTL2_BRCM_CFC_CONTROL); 1730 } 1731 } 1732 1733 return ret; 1734 } 1735 1736 static int vc6_plane_mode_set(struct drm_plane *plane, 1737 struct drm_plane_state *state) 1738 { 1739 struct drm_device *drm = plane->dev; 1740 struct vc4_dev *vc4 = to_vc4_dev(drm); 1741 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1742 struct drm_framebuffer *fb = state->fb; 1743 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 1744 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 1745 int num_planes = fb->format->num_planes; 1746 u32 h_subsample = fb->format->hsub; 1747 u32 v_subsample = fb->format->vsub; 1748 bool mix_plane_alpha; 1749 bool covers_screen; 1750 u32 scl0, scl1, pitch0; 1751 u32 tiling, src_x, src_y; 1752 u32 width, height; 1753 u32 hvs_format = format->hvs; 1754 u32 offsets[3] = { 0 }; 1755 unsigned int rotation; 1756 int ret, i; 1757 1758 if (vc4_state->dlist_initialized) 1759 return 0; 1760 1761 ret = vc4_plane_setup_clipping_and_scaling(state); 1762 if (ret) 1763 return ret; 1764 1765 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 1766 !vc4_state->crtc_w || !vc4_state->crtc_h) { 1767 /* 0 source size probably means the plane is offscreen. 1768 * 0 destination size is a redundant plane. 1769 */ 1770 vc4_state->dlist_initialized = 1; 1771 return 0; 1772 } 1773 1774 width = vc4_state->src_w[0] >> 16; 1775 height = vc4_state->src_h[0] >> 16; 1776 1777 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 1778 * and 4:4:4, scl1 should be set to scl0 so both channels of 1779 * the scaler do the same thing. For YUV, the Y plane needs 1780 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 1781 * the scl fields here. 1782 */ 1783 if (num_planes == 1) { 1784 scl0 = vc4_get_scl_field(state, 0); 1785 scl1 = scl0; 1786 } else { 1787 scl0 = vc4_get_scl_field(state, 1); 1788 scl1 = vc4_get_scl_field(state, 0); 1789 } 1790 1791 rotation = drm_rotation_simplify(state->rotation, 1792 DRM_MODE_ROTATE_0 | 1793 DRM_MODE_REFLECT_X | 1794 DRM_MODE_REFLECT_Y); 1795 1796 /* We must point to the last line when Y reflection is enabled. */ 1797 src_y = vc4_state->src_y >> 16; 1798 if (rotation & DRM_MODE_REFLECT_Y) 1799 src_y += height - 1; 1800 1801 src_x = vc4_state->src_x >> 16; 1802 1803 switch (base_format_mod) { 1804 case DRM_FORMAT_MOD_LINEAR: 1805 tiling = SCALER6_CTL0_ADDR_MODE_LINEAR; 1806 1807 /* Adjust the base pointer to the first pixel to be scanned 1808 * out. 1809 */ 1810 for (i = 0; i < num_planes; i++) { 1811 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; 1812 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; 1813 } 1814 1815 break; 1816 1817 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1818 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 1819 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 1820 u32 components_per_word; 1821 u32 starting_offset; 1822 u32 fetch_count; 1823 1824 if (param > SCALER_TILE_HEIGHT_MASK) { 1825 DRM_DEBUG_KMS("SAND height too large (%d)\n", 1826 param); 1827 return -EINVAL; 1828 } 1829 1830 if (fb->format->format == DRM_FORMAT_P030) { 1831 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 1832 tiling = SCALER6_CTL0_ADDR_MODE_128B; 1833 } else { 1834 hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE; 1835 1836 switch (base_format_mod) { 1837 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1838 tiling = SCALER6_CTL0_ADDR_MODE_128B; 1839 break; 1840 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1841 tiling = SCALER6_CTL0_ADDR_MODE_256B; 1842 break; 1843 default: 1844 return -EINVAL; 1845 } 1846 } 1847 1848 /* Adjust the base pointer to the first pixel to be scanned 1849 * out. 1850 * 1851 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 1852 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 1853 * word that should be taken as the first pixel. 1854 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 1855 * element within the 128bit word, eg for pixel 3 the value 1856 * should be 6. 1857 */ 1858 for (i = 0; i < num_planes; i++) { 1859 u32 tile_w, tile, x_off, pix_per_tile; 1860 1861 if (fb->format->format == DRM_FORMAT_P030) { 1862 /* 1863 * Spec says: bits [31:4] of the given address 1864 * should point to the 128-bit word containing 1865 * the desired starting pixel, and bits[3:0] 1866 * should be between 0 and 11, indicating which 1867 * of the 12-pixels in that 128-bit word is the 1868 * first pixel to be used 1869 */ 1870 u32 remaining_pixels = src_x % 96; 1871 u32 aligned = remaining_pixels / 12; 1872 u32 last_bits = remaining_pixels % 12; 1873 1874 x_off = aligned * 16 + last_bits; 1875 tile_w = 128; 1876 pix_per_tile = 96; 1877 } else { 1878 switch (base_format_mod) { 1879 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1880 tile_w = 128; 1881 break; 1882 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1883 tile_w = 256; 1884 break; 1885 default: 1886 return -EINVAL; 1887 } 1888 pix_per_tile = tile_w / fb->format->cpp[0]; 1889 x_off = (src_x % pix_per_tile) / 1890 (i ? h_subsample : 1) * 1891 fb->format->cpp[i]; 1892 } 1893 1894 tile = src_x / pix_per_tile; 1895 1896 offsets[i] += param * tile_w * tile; 1897 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; 1898 offsets[i] += x_off & ~(i ? 1 : 0); 1899 } 1900 1901 components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32; 1902 starting_offset = src_x % components_per_word; 1903 fetch_count = (width + starting_offset + components_per_word - 1) / 1904 components_per_word; 1905 1906 pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) | 1907 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT); 1908 break; 1909 } 1910 1911 default: 1912 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 1913 (long long)fb->modifier); 1914 return -EINVAL; 1915 } 1916 1917 /* fetch an extra pixel if we don't actually line up with the left edge. */ 1918 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) 1919 width++; 1920 1921 /* same for the right side */ 1922 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && 1923 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) 1924 width++; 1925 1926 /* now for the top */ 1927 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) 1928 height++; 1929 1930 /* and the bottom */ 1931 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && 1932 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) 1933 height++; 1934 1935 /* for YUV444 hardware wants double the width, otherwise it doesn't 1936 * fetch full width of chroma 1937 */ 1938 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) 1939 width <<= 1; 1940 1941 /* Don't waste cycles mixing with plane alpha if the set alpha 1942 * is opaque or there is no per-pixel alpha information. 1943 * In any case we use the alpha property value as the fixed alpha. 1944 */ 1945 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 1946 fb->format->has_alpha; 1947 1948 /* Control Word 0: Scaling Configuration & Element Validity*/ 1949 vc4_dlist_write(vc4_state, 1950 SCALER6_CTL0_VALID | 1951 VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) | 1952 vc4_hvs6_get_alpha_mask_mode(state) | 1953 (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) | 1954 VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) | 1955 VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) | 1956 VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) | 1957 VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT)); 1958 1959 /* Position Word 0: Image Position */ 1960 vc4_state->pos0_offset = vc4_state->dlist_count; 1961 vc4_dlist_write(vc4_state, 1962 VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) | 1963 (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) | 1964 VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X)); 1965 1966 /* Control Word 2: Alpha Value & CSC */ 1967 vc4_dlist_write(vc4_state, 1968 vc6_plane_get_csc_mode(vc4_state) | 1969 vc4_hvs5_get_alpha_blend_mode(state) | 1970 (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) | 1971 VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA)); 1972 1973 /* Position Word 1: Scaled Image Dimensions */ 1974 if (!vc4_state->is_unity) 1975 vc4_dlist_write(vc4_state, 1976 VC4_SET_FIELD(vc4_state->crtc_h - 1, 1977 SCALER6_POS1_SCL_LINES) | 1978 VC4_SET_FIELD(vc4_state->crtc_w - 1, 1979 SCALER6_POS1_SCL_WIDTH)); 1980 1981 /* Position Word 2: Source Image Size */ 1982 vc4_state->pos2_offset = vc4_state->dlist_count; 1983 vc4_dlist_write(vc4_state, 1984 VC4_SET_FIELD(height - 1, 1985 SCALER6_POS2_SRC_LINES) | 1986 VC4_SET_FIELD(width - 1, 1987 SCALER6_POS2_SRC_WIDTH)); 1988 1989 /* Position Word 3: Context */ 1990 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1991 1992 /* 1993 * TODO: This only covers Raster Scan Order planes 1994 */ 1995 for (i = 0; i < num_planes; i++) { 1996 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); 1997 dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i]; 1998 1999 /* Pointer Word 0 */ 2000 vc4_state->ptr0_offset[i] = vc4_state->dlist_count; 2001 vc4_dlist_write(vc4_state, 2002 (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) | 2003 /* 2004 * The UPM buffer will be allocated in 2005 * vc6_plane_allocate_upm(). 2006 */ 2007 VC4_SET_FIELD(upper_32_bits(paddr) & 0xff, 2008 SCALER6_PTR0_UPPER_ADDR)); 2009 2010 /* Pointer Word 1 */ 2011 vc4_dlist_write(vc4_state, lower_32_bits(paddr)); 2012 2013 /* Pointer Word 2 */ 2014 if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 && 2015 base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) { 2016 vc4_dlist_write(vc4_state, 2017 VC4_SET_FIELD(fb->pitches[i], 2018 SCALER6_PTR2_PITCH)); 2019 } else { 2020 vc4_dlist_write(vc4_state, pitch0); 2021 } 2022 } 2023 2024 /* 2025 * Palette Word 0 2026 * TODO: We're not using the palette mode 2027 */ 2028 2029 /* 2030 * Trans Word 0 2031 * TODO: It's only relevant if we set the trans_rgb bit in the 2032 * control word 0, and we don't at the moment. 2033 */ 2034 2035 vc4_state->lbm_offset = 0; 2036 2037 if (!vc4_state->is_unity || fb->format->is_yuv) { 2038 /* 2039 * Reserve a slot for the LBM Base Address. The real value will 2040 * be set when calling vc4_plane_allocate_lbm(). 2041 */ 2042 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 2043 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 2044 vc4_state->lbm_offset = vc4_state->dlist_count; 2045 vc4_dlist_counter_increment(vc4_state); 2046 } 2047 2048 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 2049 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 2050 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 2051 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 2052 if (num_planes > 1) 2053 /* 2054 * Emit Cb/Cr as channel 0 and Y as channel 2055 * 1. This matches how we set up scl0/scl1 2056 * above. 2057 */ 2058 vc4_write_scaling_parameters(state, 1); 2059 2060 vc4_write_scaling_parameters(state, 0); 2061 } 2062 2063 /* 2064 * If any PPF setup was done, then all the kernel 2065 * pointers get uploaded. 2066 */ 2067 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 2068 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 2069 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 2070 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 2071 u32 kernel = 2072 VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 2073 SCALER_PPF_KERNEL_OFFSET); 2074 2075 /* HPPF plane 0 */ 2076 vc4_dlist_write(vc4_state, kernel); 2077 /* VPPF plane 0 */ 2078 vc4_dlist_write(vc4_state, kernel); 2079 /* HPPF plane 1 */ 2080 vc4_dlist_write(vc4_state, kernel); 2081 /* VPPF plane 1 */ 2082 vc4_dlist_write(vc4_state, kernel); 2083 } 2084 } 2085 2086 vc4_dlist_write(vc4_state, SCALER6_CTL0_END); 2087 2088 vc4_state->dlist[0] |= 2089 VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT); 2090 2091 /* crtc_* are already clipped coordinates. */ 2092 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 2093 vc4_state->crtc_w == state->crtc->mode.hdisplay && 2094 vc4_state->crtc_h == state->crtc->mode.vdisplay; 2095 2096 /* 2097 * Background fill might be necessary when the plane has per-pixel 2098 * alpha content or a non-opaque plane alpha and could blend from the 2099 * background or does not cover the entire screen. 2100 */ 2101 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 2102 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 2103 2104 /* 2105 * Flag the dlist as initialized to avoid checking it twice in case 2106 * the async update check already called vc4_plane_mode_set() and 2107 * decided to fallback to sync update because async update was not 2108 * possible. 2109 */ 2110 vc4_state->dlist_initialized = 1; 2111 2112 vc4_plane_calc_load(state); 2113 2114 drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n", 2115 plane->base.id, plane->name, vc4_state->dlist_count); 2116 2117 return 0; 2118 } 2119 2120 /* If a modeset involves changing the setup of a plane, the atomic 2121 * infrastructure will call this to validate a proposed plane setup. 2122 * However, if a plane isn't getting updated, this (and the 2123 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 2124 * compute the dlist here and have all active plane dlists get updated 2125 * in the CRTC's flush. 2126 */ 2127 static int vc4_plane_atomic_check(struct drm_plane *plane, 2128 struct drm_atomic_state *state) 2129 { 2130 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2131 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2132 plane); 2133 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); 2134 int ret; 2135 2136 vc4_state->dlist_count = 0; 2137 2138 if (!plane_enabled(new_plane_state)) { 2139 struct drm_plane_state *old_plane_state = 2140 drm_atomic_get_old_plane_state(state, plane); 2141 2142 if (vc4->gen >= VC4_GEN_6_C && old_plane_state && 2143 plane_enabled(old_plane_state)) { 2144 vc6_plane_free_upm(new_plane_state); 2145 } 2146 return 0; 2147 } 2148 2149 if (vc4->gen >= VC4_GEN_6_C) 2150 ret = vc6_plane_mode_set(plane, new_plane_state); 2151 else 2152 ret = vc4_plane_mode_set(plane, new_plane_state); 2153 if (ret) 2154 return ret; 2155 2156 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 2157 !vc4_state->crtc_w || !vc4_state->crtc_h) 2158 return 0; 2159 2160 ret = vc4_plane_allocate_lbm(new_plane_state); 2161 if (ret) 2162 return ret; 2163 2164 if (vc4->gen >= VC4_GEN_6_C) { 2165 ret = vc6_plane_allocate_upm(new_plane_state); 2166 if (ret) 2167 return ret; 2168 } 2169 2170 return 0; 2171 } 2172 2173 static void vc4_plane_atomic_update(struct drm_plane *plane, 2174 struct drm_atomic_state *state) 2175 { 2176 /* No contents here. Since we don't know where in the CRTC's 2177 * dlist we should be stored, our dlist is uploaded to the 2178 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 2179 * time. 2180 */ 2181 } 2182 2183 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 2184 { 2185 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 2186 int i; 2187 int idx; 2188 2189 if (!drm_dev_enter(plane->dev, &idx)) 2190 goto out; 2191 2192 vc4_state->hw_dlist = dlist; 2193 2194 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 2195 for (i = 0; i < vc4_state->dlist_count; i++) 2196 writel(vc4_state->dlist[i], &dlist[i]); 2197 2198 drm_dev_exit(idx); 2199 2200 out: 2201 return vc4_state->dlist_count; 2202 } 2203 2204 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 2205 { 2206 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 2207 2208 return vc4_state->dlist_count; 2209 } 2210 2211 /* Updates the plane to immediately (well, once the FIFO needs 2212 * refilling) scan out from at a new framebuffer. 2213 */ 2214 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 2215 { 2216 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 2217 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0); 2218 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2219 dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0]; 2220 int idx; 2221 2222 if (!drm_dev_enter(plane->dev, &idx)) 2223 return; 2224 2225 /* We're skipping the address adjustment for negative origin, 2226 * because this is only called on the primary plane. 2227 */ 2228 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 2229 2230 if (vc4->gen == VC4_GEN_6_C) { 2231 u32 value; 2232 2233 value = vc4_state->dlist[vc4_state->ptr0_offset[0]] & 2234 ~SCALER6_PTR0_UPPER_ADDR_MASK; 2235 value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff, 2236 SCALER6_PTR0_UPPER_ADDR); 2237 2238 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2239 vc4_state->dlist[vc4_state->ptr0_offset[0]] = value; 2240 2241 value = lower_32_bits(dma_addr); 2242 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]); 2243 vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value; 2244 } else { 2245 u32 addr; 2246 2247 addr = (u32)dma_addr; 2248 2249 /* Write the new address into the hardware immediately. The 2250 * scanout will start from this address as soon as the FIFO 2251 * needs to refill with pixels. 2252 */ 2253 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2254 2255 /* Also update the CPU-side dlist copy, so that any later 2256 * atomic updates that don't do a new modeset on our plane 2257 * also use our updated address. 2258 */ 2259 vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr; 2260 } 2261 2262 drm_dev_exit(idx); 2263 } 2264 2265 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 2266 struct drm_atomic_state *state) 2267 { 2268 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2269 plane); 2270 struct vc4_plane_state *vc4_state, *new_vc4_state; 2271 int idx; 2272 2273 if (!drm_dev_enter(plane->dev, &idx)) 2274 return; 2275 2276 swap(plane->state->fb, new_plane_state->fb); 2277 plane->state->crtc_x = new_plane_state->crtc_x; 2278 plane->state->crtc_y = new_plane_state->crtc_y; 2279 plane->state->crtc_w = new_plane_state->crtc_w; 2280 plane->state->crtc_h = new_plane_state->crtc_h; 2281 plane->state->src_x = new_plane_state->src_x; 2282 plane->state->src_y = new_plane_state->src_y; 2283 plane->state->src_w = new_plane_state->src_w; 2284 plane->state->src_h = new_plane_state->src_h; 2285 plane->state->alpha = new_plane_state->alpha; 2286 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode; 2287 plane->state->rotation = new_plane_state->rotation; 2288 plane->state->zpos = new_plane_state->zpos; 2289 plane->state->normalized_zpos = new_plane_state->normalized_zpos; 2290 plane->state->color_encoding = new_plane_state->color_encoding; 2291 plane->state->color_range = new_plane_state->color_range; 2292 plane->state->src = new_plane_state->src; 2293 plane->state->dst = new_plane_state->dst; 2294 plane->state->visible = new_plane_state->visible; 2295 2296 new_vc4_state = to_vc4_plane_state(new_plane_state); 2297 vc4_state = to_vc4_plane_state(plane->state); 2298 2299 vc4_state->crtc_x = new_vc4_state->crtc_x; 2300 vc4_state->crtc_y = new_vc4_state->crtc_y; 2301 vc4_state->crtc_h = new_vc4_state->crtc_h; 2302 vc4_state->crtc_w = new_vc4_state->crtc_w; 2303 vc4_state->src_x = new_vc4_state->src_x; 2304 vc4_state->src_y = new_vc4_state->src_y; 2305 memcpy(vc4_state->src_w, new_vc4_state->src_w, 2306 sizeof(vc4_state->src_w)); 2307 memcpy(vc4_state->src_h, new_vc4_state->src_h, 2308 sizeof(vc4_state->src_h)); 2309 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 2310 sizeof(vc4_state->x_scaling)); 2311 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 2312 sizeof(vc4_state->y_scaling)); 2313 vc4_state->is_unity = new_vc4_state->is_unity; 2314 vc4_state->is_yuv = new_vc4_state->is_yuv; 2315 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 2316 2317 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 2318 vc4_state->dlist[vc4_state->pos0_offset] = 2319 new_vc4_state->dlist[vc4_state->pos0_offset]; 2320 vc4_state->dlist[vc4_state->pos2_offset] = 2321 new_vc4_state->dlist[vc4_state->pos2_offset]; 2322 vc4_state->dlist[vc4_state->ptr0_offset[0]] = 2323 new_vc4_state->dlist[vc4_state->ptr0_offset[0]]; 2324 2325 /* Note that we can't just call vc4_plane_write_dlist() 2326 * because that would smash the context data that the HVS is 2327 * currently using. 2328 */ 2329 writel(vc4_state->dlist[vc4_state->pos0_offset], 2330 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 2331 writel(vc4_state->dlist[vc4_state->pos2_offset], 2332 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 2333 writel(vc4_state->dlist[vc4_state->ptr0_offset[0]], 2334 &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2335 2336 drm_dev_exit(idx); 2337 } 2338 2339 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 2340 struct drm_atomic_state *state, bool flip) 2341 { 2342 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2343 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2344 plane); 2345 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 2346 int ret; 2347 u32 i; 2348 2349 if (vc4->gen <= VC4_GEN_5) 2350 ret = vc4_plane_mode_set(plane, new_plane_state); 2351 else 2352 ret = vc6_plane_mode_set(plane, new_plane_state); 2353 if (ret) 2354 return ret; 2355 2356 old_vc4_state = to_vc4_plane_state(plane->state); 2357 new_vc4_state = to_vc4_plane_state(new_plane_state); 2358 2359 if (!new_vc4_state->hw_dlist) 2360 return -EINVAL; 2361 2362 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 2363 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 2364 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 2365 old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] || 2366 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state)) 2367 return -EINVAL; 2368 2369 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 2370 * if anything else has changed, fallback to a sync update. 2371 */ 2372 for (i = 0; i < new_vc4_state->dlist_count; i++) { 2373 if (i == new_vc4_state->pos0_offset || 2374 i == new_vc4_state->pos2_offset || 2375 i == new_vc4_state->ptr0_offset[0] || 2376 (new_vc4_state->lbm_offset && 2377 i == new_vc4_state->lbm_offset)) 2378 continue; 2379 2380 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 2381 return -EINVAL; 2382 } 2383 2384 return 0; 2385 } 2386 2387 static int vc4_prepare_fb(struct drm_plane *plane, 2388 struct drm_plane_state *state) 2389 { 2390 struct vc4_bo *bo; 2391 int ret; 2392 2393 if (!state->fb) 2394 return 0; 2395 2396 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 2397 2398 ret = drm_gem_plane_helper_prepare_fb(plane, state); 2399 if (ret) 2400 return ret; 2401 2402 return vc4_bo_inc_usecnt(bo); 2403 } 2404 2405 static void vc4_cleanup_fb(struct drm_plane *plane, 2406 struct drm_plane_state *state) 2407 { 2408 struct vc4_bo *bo; 2409 2410 if (!state->fb) 2411 return; 2412 2413 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 2414 vc4_bo_dec_usecnt(bo); 2415 } 2416 2417 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 2418 .atomic_check = vc4_plane_atomic_check, 2419 .atomic_update = vc4_plane_atomic_update, 2420 .prepare_fb = vc4_prepare_fb, 2421 .cleanup_fb = vc4_cleanup_fb, 2422 .atomic_async_check = vc4_plane_atomic_async_check, 2423 .atomic_async_update = vc4_plane_atomic_async_update, 2424 }; 2425 2426 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = { 2427 .atomic_check = vc4_plane_atomic_check, 2428 .atomic_update = vc4_plane_atomic_update, 2429 .atomic_async_check = vc4_plane_atomic_async_check, 2430 .atomic_async_update = vc4_plane_atomic_async_update, 2431 }; 2432 2433 static bool vc4_format_mod_supported(struct drm_plane *plane, 2434 uint32_t format, 2435 uint64_t modifier) 2436 { 2437 /* Support T_TILING for RGB formats only. */ 2438 switch (format) { 2439 case DRM_FORMAT_XRGB8888: 2440 case DRM_FORMAT_ARGB8888: 2441 case DRM_FORMAT_ABGR8888: 2442 case DRM_FORMAT_XBGR8888: 2443 case DRM_FORMAT_RGB565: 2444 case DRM_FORMAT_BGR565: 2445 case DRM_FORMAT_ARGB1555: 2446 case DRM_FORMAT_XRGB1555: 2447 switch (fourcc_mod_broadcom_mod(modifier)) { 2448 case DRM_FORMAT_MOD_LINEAR: 2449 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 2450 return true; 2451 default: 2452 return false; 2453 } 2454 case DRM_FORMAT_NV12: 2455 case DRM_FORMAT_NV21: 2456 switch (fourcc_mod_broadcom_mod(modifier)) { 2457 case DRM_FORMAT_MOD_LINEAR: 2458 case DRM_FORMAT_MOD_BROADCOM_SAND64: 2459 case DRM_FORMAT_MOD_BROADCOM_SAND128: 2460 case DRM_FORMAT_MOD_BROADCOM_SAND256: 2461 return true; 2462 default: 2463 return false; 2464 } 2465 case DRM_FORMAT_P030: 2466 switch (fourcc_mod_broadcom_mod(modifier)) { 2467 case DRM_FORMAT_MOD_BROADCOM_SAND128: 2468 return true; 2469 default: 2470 return false; 2471 } 2472 case DRM_FORMAT_RGBX1010102: 2473 case DRM_FORMAT_BGRX1010102: 2474 case DRM_FORMAT_RGBA1010102: 2475 case DRM_FORMAT_BGRA1010102: 2476 case DRM_FORMAT_XRGB4444: 2477 case DRM_FORMAT_ARGB4444: 2478 case DRM_FORMAT_XBGR4444: 2479 case DRM_FORMAT_ABGR4444: 2480 case DRM_FORMAT_RGBX4444: 2481 case DRM_FORMAT_RGBA4444: 2482 case DRM_FORMAT_BGRX4444: 2483 case DRM_FORMAT_BGRA4444: 2484 case DRM_FORMAT_RGB332: 2485 case DRM_FORMAT_BGR233: 2486 case DRM_FORMAT_YUV422: 2487 case DRM_FORMAT_YVU422: 2488 case DRM_FORMAT_YUV420: 2489 case DRM_FORMAT_YVU420: 2490 case DRM_FORMAT_NV16: 2491 case DRM_FORMAT_NV61: 2492 default: 2493 return (modifier == DRM_FORMAT_MOD_LINEAR); 2494 } 2495 } 2496 2497 static const struct drm_plane_funcs vc4_plane_funcs = { 2498 .update_plane = drm_atomic_helper_update_plane, 2499 .disable_plane = drm_atomic_helper_disable_plane, 2500 .reset = vc4_plane_reset, 2501 .atomic_duplicate_state = vc4_plane_duplicate_state, 2502 .atomic_destroy_state = vc4_plane_destroy_state, 2503 .format_mod_supported = vc4_format_mod_supported, 2504 }; 2505 2506 struct drm_plane *vc4_plane_init(struct drm_device *dev, 2507 enum drm_plane_type type, 2508 uint32_t possible_crtcs) 2509 { 2510 struct vc4_dev *vc4 = to_vc4_dev(dev); 2511 struct drm_plane *plane; 2512 struct vc4_plane *vc4_plane; 2513 u32 formats[ARRAY_SIZE(hvs_formats)]; 2514 int num_formats = 0; 2515 unsigned i; 2516 static const uint64_t modifiers[] = { 2517 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 2518 DRM_FORMAT_MOD_BROADCOM_SAND128, 2519 DRM_FORMAT_MOD_BROADCOM_SAND64, 2520 DRM_FORMAT_MOD_BROADCOM_SAND256, 2521 DRM_FORMAT_MOD_LINEAR, 2522 DRM_FORMAT_MOD_INVALID 2523 }; 2524 2525 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 2526 if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) { 2527 formats[num_formats] = hvs_formats[i].drm; 2528 num_formats++; 2529 } 2530 } 2531 2532 vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base, 2533 possible_crtcs, 2534 &vc4_plane_funcs, 2535 formats, num_formats, 2536 modifiers, type, NULL); 2537 if (IS_ERR(vc4_plane)) 2538 return ERR_CAST(vc4_plane); 2539 plane = &vc4_plane->base; 2540 2541 if (vc4->gen >= VC4_GEN_5) 2542 drm_plane_helper_add(plane, &vc5_plane_helper_funcs); 2543 else 2544 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 2545 2546 drm_plane_create_alpha_property(plane); 2547 drm_plane_create_blend_mode_property(plane, 2548 BIT(DRM_MODE_BLEND_PIXEL_NONE) | 2549 BIT(DRM_MODE_BLEND_PREMULTI) | 2550 BIT(DRM_MODE_BLEND_COVERAGE)); 2551 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 2552 DRM_MODE_ROTATE_0 | 2553 DRM_MODE_ROTATE_180 | 2554 DRM_MODE_REFLECT_X | 2555 DRM_MODE_REFLECT_Y); 2556 2557 drm_plane_create_color_properties(plane, 2558 BIT(DRM_COLOR_YCBCR_BT601) | 2559 BIT(DRM_COLOR_YCBCR_BT709) | 2560 BIT(DRM_COLOR_YCBCR_BT2020), 2561 BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | 2562 BIT(DRM_COLOR_YCBCR_FULL_RANGE), 2563 DRM_COLOR_YCBCR_BT709, 2564 DRM_COLOR_YCBCR_LIMITED_RANGE); 2565 2566 if (type == DRM_PLANE_TYPE_PRIMARY) 2567 drm_plane_create_zpos_immutable_property(plane, 0); 2568 2569 return plane; 2570 } 2571 2572 #define VC4_NUM_OVERLAY_PLANES 16 2573 2574 int vc4_plane_create_additional_planes(struct drm_device *drm) 2575 { 2576 struct drm_plane *cursor_plane; 2577 struct drm_crtc *crtc; 2578 unsigned int i; 2579 2580 /* Set up some arbitrary number of planes. We're not limited 2581 * by a set number of physical registers, just the space in 2582 * the HVS (16k) and how small an plane can be (28 bytes). 2583 * However, each plane we set up takes up some memory, and 2584 * increases the cost of looping over planes, which atomic 2585 * modesetting does quite a bit. As a result, we pick a 2586 * modest number of planes to expose, that should hopefully 2587 * still cover any sane usecase. 2588 */ 2589 for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) { 2590 struct drm_plane *plane = 2591 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY, 2592 GENMASK(drm->mode_config.num_crtc - 1, 0)); 2593 2594 if (IS_ERR(plane)) 2595 continue; 2596 2597 /* Create zpos property. Max of all the overlays + 1 primary + 2598 * 1 cursor plane on a crtc. 2599 */ 2600 drm_plane_create_zpos_property(plane, i + 1, 1, 2601 VC4_NUM_OVERLAY_PLANES + 1); 2602 } 2603 2604 drm_for_each_crtc(crtc, drm) { 2605 /* Set up the legacy cursor after overlay initialization, 2606 * since the zpos fallback is that planes are rendered by plane 2607 * ID order, and that then puts the cursor on top. 2608 */ 2609 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR, 2610 drm_crtc_mask(crtc)); 2611 if (!IS_ERR(cursor_plane)) { 2612 crtc->cursor = cursor_plane; 2613 2614 drm_plane_create_zpos_property(cursor_plane, 2615 VC4_NUM_OVERLAY_PLANES + 1, 2616 1, 2617 VC4_NUM_OVERLAY_PLANES + 1); 2618 } 2619 } 2620 2621 return 0; 2622 } 2623