1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_blend.h> 22 #include <drm/drm_drv.h> 23 #include <drm/drm_fb_dma_helper.h> 24 #include <drm/drm_fourcc.h> 25 #include <drm/drm_framebuffer.h> 26 #include <drm/drm_gem_atomic_helper.h> 27 28 #include "uapi/drm/vc4_drm.h" 29 30 #include "vc4_drv.h" 31 #include "vc4_regs.h" 32 33 static const struct hvs_format { 34 u32 drm; /* DRM_FORMAT_* */ 35 u32 hvs; /* HVS_FORMAT_* */ 36 u32 pixel_order; 37 u32 pixel_order_hvs5; 38 bool hvs5_only; 39 } hvs_formats[] = { 40 { 41 .drm = DRM_FORMAT_XRGB8888, 42 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 43 .pixel_order = HVS_PIXEL_ORDER_ABGR, 44 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 45 }, 46 { 47 .drm = DRM_FORMAT_ARGB8888, 48 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 49 .pixel_order = HVS_PIXEL_ORDER_ABGR, 50 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 51 }, 52 { 53 .drm = DRM_FORMAT_ABGR8888, 54 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 55 .pixel_order = HVS_PIXEL_ORDER_ARGB, 56 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 57 }, 58 { 59 .drm = DRM_FORMAT_XBGR8888, 60 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 61 .pixel_order = HVS_PIXEL_ORDER_ARGB, 62 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 63 }, 64 { 65 .drm = DRM_FORMAT_RGB565, 66 .hvs = HVS_PIXEL_FORMAT_RGB565, 67 .pixel_order = HVS_PIXEL_ORDER_XRGB, 68 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB, 69 }, 70 { 71 .drm = DRM_FORMAT_BGR565, 72 .hvs = HVS_PIXEL_FORMAT_RGB565, 73 .pixel_order = HVS_PIXEL_ORDER_XBGR, 74 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR, 75 }, 76 { 77 .drm = DRM_FORMAT_ARGB1555, 78 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 79 .pixel_order = HVS_PIXEL_ORDER_ABGR, 80 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 81 }, 82 { 83 .drm = DRM_FORMAT_XRGB1555, 84 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 85 .pixel_order = HVS_PIXEL_ORDER_ABGR, 86 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 87 }, 88 { 89 .drm = DRM_FORMAT_RGB888, 90 .hvs = HVS_PIXEL_FORMAT_RGB888, 91 .pixel_order = HVS_PIXEL_ORDER_XRGB, 92 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB, 93 }, 94 { 95 .drm = DRM_FORMAT_BGR888, 96 .hvs = HVS_PIXEL_FORMAT_RGB888, 97 .pixel_order = HVS_PIXEL_ORDER_XBGR, 98 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR, 99 }, 100 { 101 .drm = DRM_FORMAT_YUV422, 102 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 103 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 104 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 105 }, 106 { 107 .drm = DRM_FORMAT_YVU422, 108 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 109 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 110 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 111 }, 112 { 113 .drm = DRM_FORMAT_YUV444, 114 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 115 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 116 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 117 }, 118 { 119 .drm = DRM_FORMAT_YVU444, 120 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 121 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 122 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 123 }, 124 { 125 .drm = DRM_FORMAT_YUV420, 126 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 127 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 128 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 129 }, 130 { 131 .drm = DRM_FORMAT_YVU420, 132 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 133 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 134 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 135 }, 136 { 137 .drm = DRM_FORMAT_NV12, 138 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 139 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 140 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 141 }, 142 { 143 .drm = DRM_FORMAT_NV21, 144 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 145 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 146 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 147 }, 148 { 149 .drm = DRM_FORMAT_NV16, 150 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 151 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 152 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 153 }, 154 { 155 .drm = DRM_FORMAT_NV61, 156 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 157 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 158 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 159 }, 160 { 161 .drm = DRM_FORMAT_P030, 162 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT, 163 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 164 .hvs5_only = true, 165 }, 166 { 167 .drm = DRM_FORMAT_XRGB2101010, 168 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 169 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 170 .hvs5_only = true, 171 }, 172 { 173 .drm = DRM_FORMAT_ARGB2101010, 174 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 175 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 176 .hvs5_only = true, 177 }, 178 { 179 .drm = DRM_FORMAT_ABGR2101010, 180 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 181 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 182 .hvs5_only = true, 183 }, 184 { 185 .drm = DRM_FORMAT_XBGR2101010, 186 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 187 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 188 .hvs5_only = true, 189 }, 190 { 191 .drm = DRM_FORMAT_RGB332, 192 .hvs = HVS_PIXEL_FORMAT_RGB332, 193 .pixel_order = HVS_PIXEL_ORDER_ARGB, 194 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 195 }, 196 { 197 .drm = DRM_FORMAT_BGR233, 198 .hvs = HVS_PIXEL_FORMAT_RGB332, 199 .pixel_order = HVS_PIXEL_ORDER_ABGR, 200 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 201 }, 202 { 203 .drm = DRM_FORMAT_XRGB4444, 204 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 205 .pixel_order = HVS_PIXEL_ORDER_ABGR, 206 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 207 }, 208 { 209 .drm = DRM_FORMAT_ARGB4444, 210 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 211 .pixel_order = HVS_PIXEL_ORDER_ABGR, 212 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 213 }, 214 { 215 .drm = DRM_FORMAT_XBGR4444, 216 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 217 .pixel_order = HVS_PIXEL_ORDER_ARGB, 218 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 219 }, 220 { 221 .drm = DRM_FORMAT_ABGR4444, 222 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 223 .pixel_order = HVS_PIXEL_ORDER_ARGB, 224 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 225 }, 226 { 227 .drm = DRM_FORMAT_BGRX4444, 228 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 229 .pixel_order = HVS_PIXEL_ORDER_RGBA, 230 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA, 231 }, 232 { 233 .drm = DRM_FORMAT_BGRA4444, 234 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 235 .pixel_order = HVS_PIXEL_ORDER_RGBA, 236 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA, 237 }, 238 { 239 .drm = DRM_FORMAT_RGBX4444, 240 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 241 .pixel_order = HVS_PIXEL_ORDER_BGRA, 242 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, 243 }, 244 { 245 .drm = DRM_FORMAT_RGBA4444, 246 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 247 .pixel_order = HVS_PIXEL_ORDER_BGRA, 248 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, 249 }, 250 }; 251 252 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 253 { 254 unsigned i; 255 256 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 257 if (hvs_formats[i].drm == drm_format) 258 return &hvs_formats[i]; 259 } 260 261 return NULL; 262 } 263 264 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 265 { 266 if (dst == src >> 16) 267 return VC4_SCALING_NONE; 268 if (3 * dst >= 2 * (src >> 16)) 269 return VC4_SCALING_PPF; 270 else 271 return VC4_SCALING_TPZ; 272 } 273 274 static bool plane_enabled(struct drm_plane_state *state) 275 { 276 return state->fb && !WARN_ON(!state->crtc); 277 } 278 279 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 280 { 281 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 282 struct vc4_hvs *hvs = vc4->hvs; 283 struct vc4_plane_state *vc4_state; 284 unsigned int i; 285 286 if (WARN_ON(!plane->state)) 287 return NULL; 288 289 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 290 if (!vc4_state) 291 return NULL; 292 293 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 294 295 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 296 if (vc4_state->upm_handle[i]) 297 refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount); 298 } 299 300 vc4_state->dlist_initialized = 0; 301 302 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 303 304 if (vc4_state->dlist) { 305 vc4_state->dlist = kmemdup(vc4_state->dlist, 306 vc4_state->dlist_count * 4, 307 GFP_KERNEL); 308 if (!vc4_state->dlist) { 309 kfree(vc4_state); 310 return NULL; 311 } 312 vc4_state->dlist_size = vc4_state->dlist_count; 313 } 314 315 return &vc4_state->base; 316 } 317 318 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle) 319 { 320 struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle]; 321 unsigned long irqflags; 322 323 spin_lock_irqsave(&hvs->mm_lock, irqflags); 324 drm_mm_remove_node(&refcount->upm); 325 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 326 refcount->upm.start = 0; 327 refcount->upm.size = 0; 328 refcount->size = 0; 329 330 ida_free(&hvs->upm_handles, upm_handle); 331 } 332 333 static void vc4_plane_destroy_state(struct drm_plane *plane, 334 struct drm_plane_state *state) 335 { 336 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 337 struct vc4_hvs *hvs = vc4->hvs; 338 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 339 unsigned int i; 340 341 if (drm_mm_node_allocated(&vc4_state->lbm)) { 342 unsigned long irqflags; 343 344 spin_lock_irqsave(&hvs->mm_lock, irqflags); 345 drm_mm_remove_node(&vc4_state->lbm); 346 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 347 } 348 349 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 350 struct vc4_upm_refcounts *refcount; 351 352 if (!vc4_state->upm_handle[i]) 353 continue; 354 355 refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]]; 356 357 if (refcount_dec_and_test(&refcount->refcount)) 358 vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]); 359 } 360 361 kfree(vc4_state->dlist); 362 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 363 kfree(state); 364 } 365 366 /* Called during init to allocate the plane's atomic state. */ 367 static void vc4_plane_reset(struct drm_plane *plane) 368 { 369 struct vc4_plane_state *vc4_state; 370 371 if (plane->state) 372 __drm_atomic_helper_plane_destroy_state(plane->state); 373 374 kfree(plane->state); 375 376 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 377 if (!vc4_state) 378 return; 379 380 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 381 } 382 383 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 384 { 385 if (vc4_state->dlist_count == vc4_state->dlist_size) { 386 u32 new_size = max(4u, vc4_state->dlist_count * 2); 387 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 388 389 if (!new_dlist) 390 return; 391 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 392 393 kfree(vc4_state->dlist); 394 vc4_state->dlist = new_dlist; 395 vc4_state->dlist_size = new_size; 396 } 397 398 vc4_state->dlist_count++; 399 } 400 401 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 402 { 403 unsigned int idx = vc4_state->dlist_count; 404 405 vc4_dlist_counter_increment(vc4_state); 406 vc4_state->dlist[idx] = val; 407 } 408 409 /* Returns the scl0/scl1 field based on whether the dimensions need to 410 * be up/down/non-scaled. 411 * 412 * This is a replication of a table from the spec. 413 */ 414 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 415 { 416 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 417 418 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 419 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 420 return SCALER_CTL0_SCL_H_PPF_V_PPF; 421 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 422 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 423 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 424 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 425 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 426 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 427 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 428 return SCALER_CTL0_SCL_H_PPF_V_NONE; 429 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 430 return SCALER_CTL0_SCL_H_NONE_V_PPF; 431 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 432 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 433 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 434 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 435 default: 436 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 437 /* The unity case is independently handled by 438 * SCALER_CTL0_UNITY. 439 */ 440 return 0; 441 } 442 } 443 444 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 445 { 446 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 447 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 448 struct drm_crtc_state *crtc_state; 449 450 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 451 pstate->crtc); 452 453 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 454 if (!left && !right && !top && !bottom) 455 return 0; 456 457 if (left + right >= crtc_state->mode.hdisplay || 458 top + bottom >= crtc_state->mode.vdisplay) 459 return -EINVAL; 460 461 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 462 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 463 adjhdisplay, 464 crtc_state->mode.hdisplay); 465 vc4_pstate->crtc_x += left; 466 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right) 467 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right; 468 469 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 470 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 471 adjvdisplay, 472 crtc_state->mode.vdisplay); 473 vc4_pstate->crtc_y += top; 474 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom) 475 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom; 476 477 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 478 adjhdisplay, 479 crtc_state->mode.hdisplay); 480 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 481 adjvdisplay, 482 crtc_state->mode.vdisplay); 483 484 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 485 return -EINVAL; 486 487 return 0; 488 } 489 490 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 491 { 492 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 493 struct drm_framebuffer *fb = state->fb; 494 int num_planes = fb->format->num_planes; 495 struct drm_crtc_state *crtc_state; 496 u32 h_subsample = fb->format->hsub; 497 u32 v_subsample = fb->format->vsub; 498 int ret; 499 500 crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); 501 if (!crtc_state) { 502 DRM_DEBUG_KMS("Invalid crtc state\n"); 503 return -EINVAL; 504 } 505 506 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 507 INT_MAX, true, true); 508 if (ret) 509 return ret; 510 511 vc4_state->src_x = state->src.x1; 512 vc4_state->src_y = state->src.y1; 513 vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x; 514 vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y; 515 516 vc4_state->crtc_x = state->dst.x1; 517 vc4_state->crtc_y = state->dst.y1; 518 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 519 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 520 521 ret = vc4_plane_margins_adj(state); 522 if (ret) 523 return ret; 524 525 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 526 vc4_state->crtc_w); 527 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 528 vc4_state->crtc_h); 529 530 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 531 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 532 533 if (num_planes > 1) { 534 vc4_state->is_yuv = true; 535 536 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 537 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 538 539 vc4_state->x_scaling[1] = 540 vc4_get_scaling_mode(vc4_state->src_w[1], 541 vc4_state->crtc_w); 542 vc4_state->y_scaling[1] = 543 vc4_get_scaling_mode(vc4_state->src_h[1], 544 vc4_state->crtc_h); 545 546 /* YUV conversion requires that horizontal scaling be enabled 547 * on the UV plane even if vc4_get_scaling_mode() returned 548 * VC4_SCALING_NONE (which can happen when the down-scaling 549 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 550 * case. 551 */ 552 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 553 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 554 555 /* Similarly UV needs vertical scaling to be enabled. 556 * Without this a 1:1 scaled YUV422 plane isn't rendered. 557 */ 558 if (vc4_state->y_scaling[1] == VC4_SCALING_NONE) 559 vc4_state->y_scaling[1] = VC4_SCALING_PPF; 560 } else { 561 vc4_state->is_yuv = false; 562 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 563 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 564 } 565 566 return 0; 567 } 568 569 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 570 { 571 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); 572 u32 scale, recip; 573 574 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 575 576 scale = src / dst; 577 578 /* The specs note that while the reciprocal would be defined 579 * as (1<<32)/scale, ~0 is close enough. 580 */ 581 recip = ~0 / scale; 582 583 vc4_dlist_write(vc4_state, 584 /* 585 * The BCM2712 is lacking BIT(31) compared to 586 * the previous generations, but we don't use 587 * it. 588 */ 589 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 590 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 591 vc4_dlist_write(vc4_state, 592 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 593 } 594 595 /* phase magnitude bits */ 596 #define PHASE_BITS 6 597 598 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, 599 u32 xy, int channel) 600 { 601 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); 602 u32 scale = src / dst; 603 s32 offset, offset2; 604 s32 phase; 605 606 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 607 608 /* 609 * Start the phase at 1/2 pixel from the 1st pixel at src_x. 610 * 1/4 pixel for YUV. 611 */ 612 if (channel) { 613 /* 614 * The phase is relative to scale_src->x, so shift it for 615 * display list's x value 616 */ 617 offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1; 618 offset += -(1 << PHASE_BITS >> 2); 619 } else { 620 /* 621 * The phase is relative to scale_src->x, so shift it for 622 * display list's x value 623 */ 624 offset = (xy & 0xffff) >> (16 - PHASE_BITS); 625 offset += -(1 << PHASE_BITS >> 1); 626 627 /* 628 * This is a kludge to make sure the scaling factors are 629 * consistent with YUV's luma scaling. We lose 1-bit precision 630 * because of this. 631 */ 632 scale &= ~1; 633 } 634 635 /* 636 * There may be a also small error introduced by precision of scale. 637 * Add half of that as a compromise 638 */ 639 offset2 = src - dst * scale; 640 offset2 >>= 16 - PHASE_BITS; 641 phase = offset + (offset2 >> 1); 642 643 /* Ensure +ve values don't touch the sign bit, then truncate negative values */ 644 if (phase >= 1 << PHASE_BITS) 645 phase = (1 << PHASE_BITS) - 1; 646 647 phase &= SCALER_PPF_IPHASE_MASK; 648 649 vc4_dlist_write(vc4_state, 650 SCALER_PPF_AGC | 651 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 652 /* 653 * The register layout documentation is slightly 654 * different to setup the phase in the BCM2712, 655 * but they seem equivalent. 656 */ 657 VC4_SET_FIELD(phase, SCALER_PPF_IPHASE)); 658 } 659 660 static u32 __vc4_lbm_size(struct drm_plane_state *state) 661 { 662 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 663 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 664 u32 pix_per_line; 665 u32 lbm; 666 667 /* LBM is not needed when there's no vertical scaling. */ 668 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 669 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 670 return 0; 671 672 /* 673 * This can be further optimized in the RGB/YUV444 case if the PPF 674 * decimation factor is between 0.5 and 1.0 by using crtc_w. 675 * 676 * It's not an issue though, since in that case since src_w[0] is going 677 * to be greater than or equal to crtc_w. 678 */ 679 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 680 pix_per_line = vc4_state->crtc_w; 681 else 682 pix_per_line = vc4_state->src_w[0] >> 16; 683 684 if (!vc4_state->is_yuv) { 685 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 686 lbm = pix_per_line * 8; 687 else { 688 /* In special cases, this multiplier might be 12. */ 689 lbm = pix_per_line * 16; 690 } 691 } else { 692 /* There are cases for this going down to a multiplier 693 * of 2, but according to the firmware source, the 694 * table in the docs is somewhat wrong. 695 */ 696 lbm = pix_per_line * 16; 697 } 698 699 /* Align it to 64 or 128 (hvs5) bytes */ 700 lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64); 701 702 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 703 lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2; 704 705 return lbm; 706 } 707 708 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state, 709 unsigned int channel) 710 { 711 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 712 713 switch (vc4_state->y_scaling[channel]) { 714 case VC4_SCALING_PPF: 715 return 4; 716 717 case VC4_SCALING_TPZ: 718 return 2; 719 720 default: 721 return 0; 722 } 723 } 724 725 static unsigned int vc4_lbm_components(const struct drm_plane_state *state, 726 unsigned int channel) 727 { 728 const struct drm_format_info *info = state->fb->format; 729 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 730 731 if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE) 732 return 0; 733 734 if (info->is_yuv) 735 return channel ? 2 : 1; 736 737 if (info->has_alpha) 738 return 4; 739 740 return 3; 741 } 742 743 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state, 744 unsigned int channel) 745 { 746 const struct drm_format_info *info = state->fb->format; 747 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 748 unsigned int channels_scaled = 0; 749 unsigned int components, words, wpc; 750 unsigned int width, lines; 751 unsigned int i; 752 753 /* LBM is meant to use the smaller of source or dest width, but there 754 * is a issue with UV scaling that the size required for the second 755 * channel is based on the source width only. 756 */ 757 if (info->hsub > 1 && channel == 1) 758 width = state->src_w >> 16; 759 else 760 width = min(state->src_w >> 16, state->crtc_w); 761 width = round_up(width / info->hsub, 4); 762 763 wpc = vc4_lbm_words_per_component(state, channel); 764 if (!wpc) 765 return 0; 766 767 components = vc4_lbm_components(state, channel); 768 if (!components) 769 return 0; 770 771 if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha) 772 components -= 1; 773 774 words = width * wpc * components; 775 776 lines = DIV_ROUND_UP(words, 128 / info->hsub); 777 778 for (i = 0; i < 2; i++) 779 if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE) 780 channels_scaled++; 781 782 if (channels_scaled == 1) 783 lines = lines / 2; 784 785 return lines; 786 } 787 788 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state) 789 { 790 const struct drm_format_info *info = state->fb->format; 791 792 if (info->hsub > 1) 793 return max(vc4_lbm_channel_size(state, 0), 794 vc4_lbm_channel_size(state, 1)); 795 else 796 return vc4_lbm_channel_size(state, 0); 797 } 798 799 static u32 vc4_lbm_size(struct drm_plane_state *state) 800 { 801 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 802 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 803 804 /* LBM is not needed when there's no vertical scaling. */ 805 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 806 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 807 return 0; 808 809 if (vc4->gen >= VC4_GEN_6_C) 810 return __vc6_lbm_size(state); 811 else 812 return __vc4_lbm_size(state); 813 } 814 815 static size_t vc6_upm_size(const struct drm_plane_state *state, 816 unsigned int plane) 817 { 818 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 819 unsigned int stride = state->fb->pitches[plane]; 820 821 /* 822 * TODO: This only works for raster formats, and is sub-optimal 823 * for buffers with a stride aligned on 32 bytes. 824 */ 825 unsigned int words_per_line = (stride + 62) / 32; 826 unsigned int fetch_region_size = words_per_line * 32; 827 unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines; 828 unsigned int buffer_size = fetch_region_size * buffer_lines; 829 830 return ALIGN(buffer_size, HVS_UBM_WORD_SIZE); 831 } 832 833 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 834 int channel) 835 { 836 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 837 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 838 839 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 840 841 /* Ch0 H-PPF Word 0: Scaling Parameters */ 842 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 843 vc4_write_ppf(vc4_state, vc4_state->src_w[channel], 844 vc4_state->crtc_w, vc4_state->src_x, channel); 845 } 846 847 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 848 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 849 vc4_write_ppf(vc4_state, vc4_state->src_h[channel], 850 vc4_state->crtc_h, vc4_state->src_y, channel); 851 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 852 } 853 854 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 855 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 856 vc4_write_tpz(vc4_state, vc4_state->src_w[channel], 857 vc4_state->crtc_w); 858 } 859 860 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 861 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 862 vc4_write_tpz(vc4_state, vc4_state->src_h[channel], 863 vc4_state->crtc_h); 864 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 865 } 866 } 867 868 static void vc4_plane_calc_load(struct drm_plane_state *state) 869 { 870 unsigned int hvs_load_shift, vrefresh, i; 871 struct drm_framebuffer *fb = state->fb; 872 struct vc4_plane_state *vc4_state; 873 struct drm_crtc_state *crtc_state; 874 unsigned int vscale_factor; 875 876 vc4_state = to_vc4_plane_state(state); 877 crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); 878 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 879 880 /* The HVS is able to process 2 pixels/cycle when scaling the source, 881 * 4 pixels/cycle otherwise. 882 * Alpha blending step seems to be pipelined and it's always operating 883 * at 4 pixels/cycle, so the limiting aspect here seems to be the 884 * scaler block. 885 * HVS load is expressed in clk-cycles/sec (AKA Hz). 886 */ 887 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 888 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 889 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 890 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 891 hvs_load_shift = 1; 892 else 893 hvs_load_shift = 2; 894 895 vc4_state->membus_load = 0; 896 vc4_state->hvs_load = 0; 897 for (i = 0; i < fb->format->num_planes; i++) { 898 /* Even if the bandwidth/plane required for a single frame is 899 * 900 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) * 901 * cpp * vrefresh 902 * 903 * when downscaling, we have to read more pixels per line in 904 * the time frame reserved for a single line, so the bandwidth 905 * demand can be punctually higher. To account for that, we 906 * calculate the down-scaling factor and multiply the plane 907 * load by this number. We're likely over-estimating the read 908 * demand, but that's better than under-estimating it. 909 */ 910 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16, 911 vc4_state->crtc_h); 912 vc4_state->membus_load += (vc4_state->src_w[i] >> 16) * 913 (vc4_state->src_h[i] >> 16) * 914 vscale_factor * fb->format->cpp[i]; 915 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 916 } 917 918 vc4_state->hvs_load *= vrefresh; 919 vc4_state->hvs_load >>= hvs_load_shift; 920 vc4_state->membus_load *= vrefresh; 921 } 922 923 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 924 { 925 struct drm_device *drm = state->plane->dev; 926 struct vc4_dev *vc4 = to_vc4_dev(drm); 927 struct drm_plane *plane = state->plane; 928 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 929 unsigned long irqflags; 930 u32 lbm_size; 931 932 lbm_size = vc4_lbm_size(state); 933 if (!lbm_size) 934 return 0; 935 936 /* 937 * NOTE: BCM2712 doesn't need to be aligned, since the size 938 * returned by vc4_lbm_size() is in words already. 939 */ 940 if (vc4->gen == VC4_GEN_5) 941 lbm_size = ALIGN(lbm_size, 64); 942 else if (vc4->gen == VC4_GEN_4) 943 lbm_size = ALIGN(lbm_size, 32); 944 945 drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n", 946 plane->base.id, plane->name, lbm_size); 947 948 if (WARN_ON(!vc4_state->lbm_offset)) 949 return -EINVAL; 950 951 /* Allocate the LBM memory that the HVS will use for temporary 952 * storage due to our scaling/format conversion. 953 */ 954 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 955 int ret; 956 957 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 958 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 959 &vc4_state->lbm, 960 lbm_size, 1, 961 0, 0); 962 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 963 964 if (ret) { 965 drm_err(drm, "Failed to allocate LBM entry: %d\n", ret); 966 return ret; 967 } 968 } else { 969 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 970 } 971 972 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 973 974 return 0; 975 } 976 977 static int vc6_plane_allocate_upm(struct drm_plane_state *state) 978 { 979 const struct drm_format_info *info = state->fb->format; 980 struct drm_device *drm = state->plane->dev; 981 struct vc4_dev *vc4 = to_vc4_dev(drm); 982 struct vc4_hvs *hvs = vc4->hvs; 983 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 984 unsigned int i; 985 int ret; 986 987 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); 988 989 vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES; 990 991 for (i = 0; i < info->num_planes; i++) { 992 struct vc4_upm_refcounts *refcount; 993 int upm_handle; 994 unsigned long irqflags; 995 size_t upm_size; 996 997 upm_size = vc6_upm_size(state, i); 998 if (!upm_size) 999 return -EINVAL; 1000 upm_handle = vc4_state->upm_handle[i]; 1001 1002 if (upm_handle && 1003 hvs->upm_refcounts[upm_handle].size == upm_size) { 1004 /* Allocation is the same size as the previous user of 1005 * the plane. Keep the allocation. 1006 */ 1007 vc4_state->upm_handle[i] = upm_handle; 1008 } else { 1009 if (upm_handle && 1010 refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) { 1011 vc4_plane_release_upm_ida(hvs, upm_handle); 1012 vc4_state->upm_handle[i] = 0; 1013 } 1014 1015 upm_handle = ida_alloc_range(&hvs->upm_handles, 1, 1016 VC4_NUM_UPM_HANDLES, 1017 GFP_KERNEL); 1018 if (upm_handle < 0) { 1019 drm_dbg(drm, "Out of upm_handles\n"); 1020 return upm_handle; 1021 } 1022 vc4_state->upm_handle[i] = upm_handle; 1023 1024 refcount = &hvs->upm_refcounts[upm_handle]; 1025 refcount_set(&refcount->refcount, 1); 1026 refcount->size = upm_size; 1027 1028 spin_lock_irqsave(&hvs->mm_lock, irqflags); 1029 ret = drm_mm_insert_node_generic(&hvs->upm_mm, 1030 &refcount->upm, 1031 upm_size, HVS_UBM_WORD_SIZE, 1032 0, 0); 1033 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 1034 if (ret) { 1035 drm_err(drm, "Failed to allocate UPM entry: %d\n", ret); 1036 refcount_set(&refcount->refcount, 0); 1037 ida_free(&hvs->upm_handles, upm_handle); 1038 vc4_state->upm_handle[i] = 0; 1039 return ret; 1040 } 1041 } 1042 1043 refcount = &hvs->upm_refcounts[upm_handle]; 1044 vc4_state->dlist[vc4_state->ptr0_offset[i]] |= 1045 VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE, 1046 SCALER6_PTR0_UPM_BASE) | 1047 VC4_SET_FIELD(vc4_state->upm_handle[i] - 1, 1048 SCALER6_PTR0_UPM_HANDLE) | 1049 VC4_SET_FIELD(vc4_state->upm_buffer_lines, 1050 SCALER6_PTR0_UPM_BUFF_SIZE); 1051 } 1052 1053 return 0; 1054 } 1055 1056 static void vc6_plane_free_upm(struct drm_plane_state *state) 1057 { 1058 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1059 struct drm_device *drm = state->plane->dev; 1060 struct vc4_dev *vc4 = to_vc4_dev(drm); 1061 struct vc4_hvs *hvs = vc4->hvs; 1062 unsigned int i; 1063 1064 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); 1065 1066 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 1067 unsigned int upm_handle; 1068 1069 upm_handle = vc4_state->upm_handle[i]; 1070 if (!upm_handle) 1071 continue; 1072 1073 if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) 1074 vc4_plane_release_upm_ida(hvs, upm_handle); 1075 vc4_state->upm_handle[i] = 0; 1076 } 1077 } 1078 1079 /* 1080 * The colorspace conversion matrices are held in 3 entries in the dlist. 1081 * Create an array of them, with entries for each full and limited mode, and 1082 * each supported colorspace. 1083 */ 1084 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = { 1085 { 1086 /* Limited range */ 1087 { 1088 /* BT601 */ 1089 SCALER_CSC0_ITR_R_601_5, 1090 SCALER_CSC1_ITR_R_601_5, 1091 SCALER_CSC2_ITR_R_601_5, 1092 }, { 1093 /* BT709 */ 1094 SCALER_CSC0_ITR_R_709_3, 1095 SCALER_CSC1_ITR_R_709_3, 1096 SCALER_CSC2_ITR_R_709_3, 1097 }, { 1098 /* BT2020 */ 1099 SCALER_CSC0_ITR_R_2020, 1100 SCALER_CSC1_ITR_R_2020, 1101 SCALER_CSC2_ITR_R_2020, 1102 } 1103 }, { 1104 /* Full range */ 1105 { 1106 /* JFIF */ 1107 SCALER_CSC0_JPEG_JFIF, 1108 SCALER_CSC1_JPEG_JFIF, 1109 SCALER_CSC2_JPEG_JFIF, 1110 }, { 1111 /* BT709 */ 1112 SCALER_CSC0_ITR_R_709_3_FR, 1113 SCALER_CSC1_ITR_R_709_3_FR, 1114 SCALER_CSC2_ITR_R_709_3_FR, 1115 }, { 1116 /* BT2020 */ 1117 SCALER_CSC0_ITR_R_2020_FR, 1118 SCALER_CSC1_ITR_R_2020_FR, 1119 SCALER_CSC2_ITR_R_2020_FR, 1120 } 1121 } 1122 }; 1123 1124 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state) 1125 { 1126 struct drm_device *dev = state->state->dev; 1127 struct vc4_dev *vc4 = to_vc4_dev(dev); 1128 1129 WARN_ON_ONCE(vc4->gen != VC4_GEN_4); 1130 1131 if (!state->fb->format->has_alpha) 1132 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 1133 SCALER_POS2_ALPHA_MODE); 1134 1135 switch (state->pixel_blend_mode) { 1136 case DRM_MODE_BLEND_PIXEL_NONE: 1137 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 1138 SCALER_POS2_ALPHA_MODE); 1139 default: 1140 case DRM_MODE_BLEND_PREMULTI: 1141 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 1142 SCALER_POS2_ALPHA_MODE) | 1143 SCALER_POS2_ALPHA_PREMULT; 1144 case DRM_MODE_BLEND_COVERAGE: 1145 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 1146 SCALER_POS2_ALPHA_MODE); 1147 } 1148 } 1149 1150 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state) 1151 { 1152 struct drm_device *dev = state->state->dev; 1153 struct vc4_dev *vc4 = to_vc4_dev(dev); 1154 1155 WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C && 1156 vc4->gen != VC4_GEN_6_D); 1157 1158 switch (vc4->gen) { 1159 default: 1160 case VC4_GEN_5: 1161 case VC4_GEN_6_C: 1162 if (!state->fb->format->has_alpha) 1163 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 1164 SCALER5_CTL2_ALPHA_MODE); 1165 1166 switch (state->pixel_blend_mode) { 1167 case DRM_MODE_BLEND_PIXEL_NONE: 1168 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 1169 SCALER5_CTL2_ALPHA_MODE); 1170 default: 1171 case DRM_MODE_BLEND_PREMULTI: 1172 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 1173 SCALER5_CTL2_ALPHA_MODE) | 1174 SCALER5_CTL2_ALPHA_PREMULT; 1175 case DRM_MODE_BLEND_COVERAGE: 1176 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 1177 SCALER5_CTL2_ALPHA_MODE); 1178 } 1179 case VC4_GEN_6_D: 1180 /* 2712-D configures fixed alpha mode in CTL0 */ 1181 return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ? 1182 SCALER5_CTL2_ALPHA_PREMULT : 0; 1183 } 1184 } 1185 1186 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state) 1187 { 1188 struct drm_device *dev = state->state->dev; 1189 struct vc4_dev *vc4 = to_vc4_dev(dev); 1190 1191 WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D); 1192 1193 if (vc4->gen == VC4_GEN_6_D && 1194 (!state->fb->format->has_alpha || 1195 state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE)) 1196 return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED, 1197 SCALER6_CTL0_ALPHA_MASK); 1198 1199 return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK); 1200 } 1201 1202 /* Writes out a full display list for an active plane to the plane's 1203 * private dlist state. 1204 */ 1205 static int vc4_plane_mode_set(struct drm_plane *plane, 1206 struct drm_plane_state *state) 1207 { 1208 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 1209 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1210 struct drm_framebuffer *fb = state->fb; 1211 u32 ctl0_offset = vc4_state->dlist_count; 1212 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 1213 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 1214 int num_planes = fb->format->num_planes; 1215 u32 h_subsample = fb->format->hsub; 1216 u32 v_subsample = fb->format->vsub; 1217 bool mix_plane_alpha; 1218 bool covers_screen; 1219 u32 scl0, scl1, pitch0; 1220 u32 tiling, src_x, src_y; 1221 u32 width, height; 1222 u32 hvs_format = format->hvs; 1223 unsigned int rotation; 1224 u32 offsets[3] = { 0 }; 1225 int ret, i; 1226 1227 if (vc4_state->dlist_initialized) 1228 return 0; 1229 1230 ret = vc4_plane_setup_clipping_and_scaling(state); 1231 if (ret) 1232 return ret; 1233 1234 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 1235 !vc4_state->crtc_w || !vc4_state->crtc_h) { 1236 /* 0 source size probably means the plane is offscreen */ 1237 vc4_state->dlist_initialized = 1; 1238 return 0; 1239 } 1240 1241 width = vc4_state->src_w[0] >> 16; 1242 height = vc4_state->src_h[0] >> 16; 1243 1244 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 1245 * and 4:4:4, scl1 should be set to scl0 so both channels of 1246 * the scaler do the same thing. For YUV, the Y plane needs 1247 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 1248 * the scl fields here. 1249 */ 1250 if (num_planes == 1) { 1251 scl0 = vc4_get_scl_field(state, 0); 1252 scl1 = scl0; 1253 } else { 1254 scl0 = vc4_get_scl_field(state, 1); 1255 scl1 = vc4_get_scl_field(state, 0); 1256 } 1257 1258 rotation = drm_rotation_simplify(state->rotation, 1259 DRM_MODE_ROTATE_0 | 1260 DRM_MODE_REFLECT_X | 1261 DRM_MODE_REFLECT_Y); 1262 1263 /* We must point to the last line when Y reflection is enabled. */ 1264 src_y = vc4_state->src_y >> 16; 1265 if (rotation & DRM_MODE_REFLECT_Y) 1266 src_y += height - 1; 1267 1268 src_x = vc4_state->src_x >> 16; 1269 1270 switch (base_format_mod) { 1271 case DRM_FORMAT_MOD_LINEAR: 1272 tiling = SCALER_CTL0_TILING_LINEAR; 1273 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 1274 1275 /* Adjust the base pointer to the first pixel to be scanned 1276 * out. 1277 */ 1278 for (i = 0; i < num_planes; i++) { 1279 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; 1280 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; 1281 } 1282 1283 break; 1284 1285 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 1286 u32 tile_size_shift = 12; /* T tiles are 4kb */ 1287 /* Whole-tile offsets, mostly for setting the pitch. */ 1288 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 1289 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 1290 u32 tile_w_mask = (1 << tile_w_shift) - 1; 1291 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 1292 * the height (in pixels) of a 4k tile. 1293 */ 1294 u32 tile_h_mask = (2 << tile_h_shift) - 1; 1295 /* For T-tiled, the FB pitch is "how many bytes from one row to 1296 * the next, such that 1297 * 1298 * pitch * tile_h == tile_size * tiles_per_row 1299 */ 1300 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 1301 u32 tiles_l = src_x >> tile_w_shift; 1302 u32 tiles_r = tiles_w - tiles_l; 1303 u32 tiles_t = src_y >> tile_h_shift; 1304 /* Intra-tile offsets, which modify the base address (the 1305 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 1306 * base address). 1307 */ 1308 u32 tile_y = (src_y >> 4) & 1; 1309 u32 subtile_y = (src_y >> 2) & 3; 1310 u32 utile_y = src_y & 3; 1311 u32 x_off = src_x & tile_w_mask; 1312 u32 y_off = src_y & tile_h_mask; 1313 1314 /* When Y reflection is requested we must set the 1315 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 1316 * after the initial one should be fetched in descending order, 1317 * which makes sense since we start from the last line and go 1318 * backward. 1319 * Don't know why we need y_off = max_y_off - y_off, but it's 1320 * definitely required (I guess it's also related to the "going 1321 * backward" situation). 1322 */ 1323 if (rotation & DRM_MODE_REFLECT_Y) { 1324 y_off = tile_h_mask - y_off; 1325 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 1326 } else { 1327 pitch0 = 0; 1328 } 1329 1330 tiling = SCALER_CTL0_TILING_256B_OR_T; 1331 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 1332 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 1333 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 1334 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 1335 offsets[0] += tiles_t * (tiles_w << tile_size_shift); 1336 offsets[0] += subtile_y << 8; 1337 offsets[0] += utile_y << 4; 1338 1339 /* Rows of tiles alternate left-to-right and right-to-left. */ 1340 if (tiles_t & 1) { 1341 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 1342 offsets[0] += (tiles_w - tiles_l) << tile_size_shift; 1343 offsets[0] -= (1 + !tile_y) << 10; 1344 } else { 1345 offsets[0] += tiles_l << tile_size_shift; 1346 offsets[0] += tile_y << 10; 1347 } 1348 1349 break; 1350 } 1351 1352 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1353 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1354 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 1355 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 1356 1357 if (param > SCALER_TILE_HEIGHT_MASK) { 1358 DRM_DEBUG_KMS("SAND height too large (%d)\n", 1359 param); 1360 return -EINVAL; 1361 } 1362 1363 if (fb->format->format == DRM_FORMAT_P030) { 1364 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 1365 tiling = SCALER_CTL0_TILING_128B; 1366 } else { 1367 hvs_format = HVS_PIXEL_FORMAT_H264; 1368 1369 switch (base_format_mod) { 1370 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1371 tiling = SCALER_CTL0_TILING_64B; 1372 break; 1373 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1374 tiling = SCALER_CTL0_TILING_128B; 1375 break; 1376 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1377 tiling = SCALER_CTL0_TILING_256B_OR_T; 1378 break; 1379 default: 1380 return -EINVAL; 1381 } 1382 } 1383 1384 /* Adjust the base pointer to the first pixel to be scanned 1385 * out. 1386 * 1387 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 1388 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 1389 * word that should be taken as the first pixel. 1390 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 1391 * element within the 128bit word, eg for pixel 3 the value 1392 * should be 6. 1393 */ 1394 for (i = 0; i < num_planes; i++) { 1395 u32 tile_w, tile, x_off, pix_per_tile; 1396 1397 if (fb->format->format == DRM_FORMAT_P030) { 1398 /* 1399 * Spec says: bits [31:4] of the given address 1400 * should point to the 128-bit word containing 1401 * the desired starting pixel, and bits[3:0] 1402 * should be between 0 and 11, indicating which 1403 * of the 12-pixels in that 128-bit word is the 1404 * first pixel to be used 1405 */ 1406 u32 remaining_pixels = src_x % 96; 1407 u32 aligned = remaining_pixels / 12; 1408 u32 last_bits = remaining_pixels % 12; 1409 1410 x_off = aligned * 16 + last_bits; 1411 tile_w = 128; 1412 pix_per_tile = 96; 1413 } else { 1414 switch (base_format_mod) { 1415 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1416 tile_w = 64; 1417 break; 1418 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1419 tile_w = 128; 1420 break; 1421 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1422 tile_w = 256; 1423 break; 1424 default: 1425 return -EINVAL; 1426 } 1427 pix_per_tile = tile_w / fb->format->cpp[0]; 1428 x_off = (src_x % pix_per_tile) / 1429 (i ? h_subsample : 1) * 1430 fb->format->cpp[i]; 1431 } 1432 1433 tile = src_x / pix_per_tile; 1434 1435 offsets[i] += param * tile_w * tile; 1436 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; 1437 offsets[i] += x_off & ~(i ? 1 : 0); 1438 } 1439 1440 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 1441 break; 1442 } 1443 1444 default: 1445 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 1446 (long long)fb->modifier); 1447 return -EINVAL; 1448 } 1449 1450 /* fetch an extra pixel if we don't actually line up with the left edge. */ 1451 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) 1452 width++; 1453 1454 /* same for the right side */ 1455 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && 1456 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) 1457 width++; 1458 1459 /* now for the top */ 1460 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) 1461 height++; 1462 1463 /* and the bottom */ 1464 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && 1465 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) 1466 height++; 1467 1468 /* For YUV444 the hardware wants double the width, otherwise it doesn't 1469 * fetch full width of chroma 1470 */ 1471 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) 1472 width <<= 1; 1473 1474 /* Don't waste cycles mixing with plane alpha if the set alpha 1475 * is opaque or there is no per-pixel alpha information. 1476 * In any case we use the alpha property value as the fixed alpha. 1477 */ 1478 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 1479 fb->format->has_alpha; 1480 1481 if (vc4->gen == VC4_GEN_4) { 1482 /* Control word */ 1483 vc4_dlist_write(vc4_state, 1484 SCALER_CTL0_VALID | 1485 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 1486 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 1487 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 1488 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 1489 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1490 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1491 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 1492 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1493 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 1494 1495 /* Position Word 0: Image Positions and Alpha Value */ 1496 vc4_state->pos0_offset = vc4_state->dlist_count; 1497 vc4_dlist_write(vc4_state, 1498 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 1499 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 1500 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 1501 1502 /* Position Word 1: Scaled Image Dimensions. */ 1503 if (!vc4_state->is_unity) { 1504 vc4_dlist_write(vc4_state, 1505 VC4_SET_FIELD(vc4_state->crtc_w, 1506 SCALER_POS1_SCL_WIDTH) | 1507 VC4_SET_FIELD(vc4_state->crtc_h, 1508 SCALER_POS1_SCL_HEIGHT)); 1509 } 1510 1511 /* Position Word 2: Source Image Size, Alpha */ 1512 vc4_state->pos2_offset = vc4_state->dlist_count; 1513 vc4_dlist_write(vc4_state, 1514 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 1515 vc4_hvs4_get_alpha_blend_mode(state) | 1516 VC4_SET_FIELD(width, SCALER_POS2_WIDTH) | 1517 VC4_SET_FIELD(height, SCALER_POS2_HEIGHT)); 1518 1519 /* Position Word 3: Context. Written by the HVS. */ 1520 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1521 1522 } else { 1523 /* Control word */ 1524 vc4_dlist_write(vc4_state, 1525 SCALER_CTL0_VALID | 1526 (format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) | 1527 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1528 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1529 (vc4_state->is_unity ? 1530 SCALER5_CTL0_UNITY : 0) | 1531 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1532 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 1533 SCALER5_CTL0_ALPHA_EXPAND | 1534 SCALER5_CTL0_RGB_EXPAND); 1535 1536 /* Position Word 0: Image Positions and Alpha Value */ 1537 vc4_state->pos0_offset = vc4_state->dlist_count; 1538 vc4_dlist_write(vc4_state, 1539 (rotation & DRM_MODE_REFLECT_Y ? 1540 SCALER5_POS0_VFLIP : 0) | 1541 VC4_SET_FIELD(vc4_state->crtc_x, 1542 SCALER_POS0_START_X) | 1543 (rotation & DRM_MODE_REFLECT_X ? 1544 SCALER5_POS0_HFLIP : 0) | 1545 VC4_SET_FIELD(vc4_state->crtc_y, 1546 SCALER5_POS0_START_Y) 1547 ); 1548 1549 /* Control Word 2 */ 1550 vc4_dlist_write(vc4_state, 1551 VC4_SET_FIELD(state->alpha >> 4, 1552 SCALER5_CTL2_ALPHA) | 1553 vc4_hvs5_get_alpha_blend_mode(state) | 1554 (mix_plane_alpha ? 1555 SCALER5_CTL2_ALPHA_MIX : 0) 1556 ); 1557 1558 /* Position Word 1: Scaled Image Dimensions. */ 1559 if (!vc4_state->is_unity) { 1560 vc4_dlist_write(vc4_state, 1561 VC4_SET_FIELD(vc4_state->crtc_w, 1562 SCALER5_POS1_SCL_WIDTH) | 1563 VC4_SET_FIELD(vc4_state->crtc_h, 1564 SCALER5_POS1_SCL_HEIGHT)); 1565 } 1566 1567 /* Position Word 2: Source Image Size */ 1568 vc4_state->pos2_offset = vc4_state->dlist_count; 1569 vc4_dlist_write(vc4_state, 1570 VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) | 1571 VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT)); 1572 1573 /* Position Word 3: Context. Written by the HVS. */ 1574 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1575 } 1576 1577 1578 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 1579 * 1580 * The pointers may be any byte address. 1581 */ 1582 vc4_state->ptr0_offset[0] = vc4_state->dlist_count; 1583 1584 for (i = 0; i < num_planes; i++) { 1585 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); 1586 1587 vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]); 1588 } 1589 1590 /* Pointer Context Word 0/1/2: Written by the HVS */ 1591 for (i = 0; i < num_planes; i++) 1592 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1593 1594 /* Pitch word 0 */ 1595 vc4_dlist_write(vc4_state, pitch0); 1596 1597 /* Pitch word 1/2 */ 1598 for (i = 1; i < num_planes; i++) { 1599 if (hvs_format != HVS_PIXEL_FORMAT_H264 && 1600 hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { 1601 vc4_dlist_write(vc4_state, 1602 VC4_SET_FIELD(fb->pitches[i], 1603 SCALER_SRC_PITCH)); 1604 } else { 1605 vc4_dlist_write(vc4_state, pitch0); 1606 } 1607 } 1608 1609 /* Colorspace conversion words */ 1610 if (vc4_state->is_yuv) { 1611 enum drm_color_encoding color_encoding = state->color_encoding; 1612 enum drm_color_range color_range = state->color_range; 1613 const u32 *ccm; 1614 1615 if (color_encoding >= DRM_COLOR_ENCODING_MAX) 1616 color_encoding = DRM_COLOR_YCBCR_BT601; 1617 if (color_range >= DRM_COLOR_RANGE_MAX) 1618 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1619 1620 ccm = colorspace_coeffs[color_range][color_encoding]; 1621 1622 vc4_dlist_write(vc4_state, ccm[0]); 1623 vc4_dlist_write(vc4_state, ccm[1]); 1624 vc4_dlist_write(vc4_state, ccm[2]); 1625 } 1626 1627 vc4_state->lbm_offset = 0; 1628 1629 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 1630 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 1631 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1632 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1633 /* Reserve a slot for the LBM Base Address. The real value will 1634 * be set when calling vc4_plane_allocate_lbm(). 1635 */ 1636 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1637 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1638 vc4_state->lbm_offset = vc4_state->dlist_count; 1639 vc4_dlist_counter_increment(vc4_state); 1640 } 1641 1642 if (num_planes > 1) { 1643 /* Emit Cb/Cr as channel 0 and Y as channel 1644 * 1. This matches how we set up scl0/scl1 1645 * above. 1646 */ 1647 vc4_write_scaling_parameters(state, 1); 1648 } 1649 vc4_write_scaling_parameters(state, 0); 1650 1651 /* If any PPF setup was done, then all the kernel 1652 * pointers get uploaded. 1653 */ 1654 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1655 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1656 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1657 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1658 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1659 SCALER_PPF_KERNEL_OFFSET); 1660 1661 /* HPPF plane 0 */ 1662 vc4_dlist_write(vc4_state, kernel); 1663 /* VPPF plane 0 */ 1664 vc4_dlist_write(vc4_state, kernel); 1665 /* HPPF plane 1 */ 1666 vc4_dlist_write(vc4_state, kernel); 1667 /* VPPF plane 1 */ 1668 vc4_dlist_write(vc4_state, kernel); 1669 } 1670 } 1671 1672 vc4_state->dlist[ctl0_offset] |= 1673 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1674 1675 /* crtc_* are already clipped coordinates. */ 1676 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1677 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1678 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1679 /* Background fill might be necessary when the plane has per-pixel 1680 * alpha content or a non-opaque plane alpha and could blend from the 1681 * background or does not cover the entire screen. 1682 */ 1683 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1684 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1685 1686 /* Flag the dlist as initialized to avoid checking it twice in case 1687 * the async update check already called vc4_plane_mode_set() and 1688 * decided to fallback to sync update because async update was not 1689 * possible. 1690 */ 1691 vc4_state->dlist_initialized = 1; 1692 1693 vc4_plane_calc_load(state); 1694 1695 return 0; 1696 } 1697 1698 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state) 1699 { 1700 struct drm_plane_state *state = &vc4_state->base; 1701 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 1702 u32 ret = 0; 1703 1704 if (vc4_state->is_yuv) { 1705 enum drm_color_encoding color_encoding = state->color_encoding; 1706 enum drm_color_range color_range = state->color_range; 1707 1708 /* CSC pre-loaded with: 1709 * 0 = BT601 limited range 1710 * 1 = BT709 limited range 1711 * 2 = BT2020 limited range 1712 * 3 = BT601 full range 1713 * 4 = BT709 full range 1714 * 5 = BT2020 full range 1715 */ 1716 if (color_encoding > DRM_COLOR_YCBCR_BT2020) 1717 color_encoding = DRM_COLOR_YCBCR_BT601; 1718 if (color_range > DRM_COLOR_YCBCR_FULL_RANGE) 1719 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1720 1721 if (vc4->gen == VC4_GEN_6_C) { 1722 ret |= SCALER6C_CTL2_CSC_ENABLE; 1723 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), 1724 SCALER6C_CTL2_BRCM_CFC_CONTROL); 1725 } else { 1726 ret |= SCALER6D_CTL2_CSC_ENABLE; 1727 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), 1728 SCALER6D_CTL2_BRCM_CFC_CONTROL); 1729 } 1730 } 1731 1732 return ret; 1733 } 1734 1735 static int vc6_plane_mode_set(struct drm_plane *plane, 1736 struct drm_plane_state *state) 1737 { 1738 struct drm_device *drm = plane->dev; 1739 struct vc4_dev *vc4 = to_vc4_dev(drm); 1740 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1741 struct drm_framebuffer *fb = state->fb; 1742 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 1743 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 1744 int num_planes = fb->format->num_planes; 1745 u32 h_subsample = fb->format->hsub; 1746 u32 v_subsample = fb->format->vsub; 1747 bool mix_plane_alpha; 1748 bool covers_screen; 1749 u32 scl0, scl1, pitch0; 1750 u32 tiling, src_x, src_y; 1751 u32 width, height; 1752 u32 hvs_format = format->hvs; 1753 u32 offsets[3] = { 0 }; 1754 unsigned int rotation; 1755 int ret, i; 1756 1757 if (vc4_state->dlist_initialized) 1758 return 0; 1759 1760 ret = vc4_plane_setup_clipping_and_scaling(state); 1761 if (ret) 1762 return ret; 1763 1764 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 1765 !vc4_state->crtc_w || !vc4_state->crtc_h) { 1766 /* 0 source size probably means the plane is offscreen. 1767 * 0 destination size is a redundant plane. 1768 */ 1769 vc4_state->dlist_initialized = 1; 1770 return 0; 1771 } 1772 1773 width = vc4_state->src_w[0] >> 16; 1774 height = vc4_state->src_h[0] >> 16; 1775 1776 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 1777 * and 4:4:4, scl1 should be set to scl0 so both channels of 1778 * the scaler do the same thing. For YUV, the Y plane needs 1779 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 1780 * the scl fields here. 1781 */ 1782 if (num_planes == 1) { 1783 scl0 = vc4_get_scl_field(state, 0); 1784 scl1 = scl0; 1785 } else { 1786 scl0 = vc4_get_scl_field(state, 1); 1787 scl1 = vc4_get_scl_field(state, 0); 1788 } 1789 1790 rotation = drm_rotation_simplify(state->rotation, 1791 DRM_MODE_ROTATE_0 | 1792 DRM_MODE_REFLECT_X | 1793 DRM_MODE_REFLECT_Y); 1794 1795 /* We must point to the last line when Y reflection is enabled. */ 1796 src_y = vc4_state->src_y >> 16; 1797 if (rotation & DRM_MODE_REFLECT_Y) 1798 src_y += height - 1; 1799 1800 src_x = vc4_state->src_x >> 16; 1801 1802 switch (base_format_mod) { 1803 case DRM_FORMAT_MOD_LINEAR: 1804 tiling = SCALER6_CTL0_ADDR_MODE_LINEAR; 1805 1806 /* Adjust the base pointer to the first pixel to be scanned 1807 * out. 1808 */ 1809 for (i = 0; i < num_planes; i++) { 1810 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; 1811 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; 1812 } 1813 1814 break; 1815 1816 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1817 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 1818 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 1819 u32 components_per_word; 1820 u32 starting_offset; 1821 u32 fetch_count; 1822 1823 if (param > SCALER_TILE_HEIGHT_MASK) { 1824 DRM_DEBUG_KMS("SAND height too large (%d)\n", 1825 param); 1826 return -EINVAL; 1827 } 1828 1829 if (fb->format->format == DRM_FORMAT_P030) { 1830 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 1831 tiling = SCALER6_CTL0_ADDR_MODE_128B; 1832 } else { 1833 hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE; 1834 1835 switch (base_format_mod) { 1836 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1837 tiling = SCALER6_CTL0_ADDR_MODE_128B; 1838 break; 1839 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1840 tiling = SCALER6_CTL0_ADDR_MODE_256B; 1841 break; 1842 default: 1843 return -EINVAL; 1844 } 1845 } 1846 1847 /* Adjust the base pointer to the first pixel to be scanned 1848 * out. 1849 * 1850 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 1851 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 1852 * word that should be taken as the first pixel. 1853 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 1854 * element within the 128bit word, eg for pixel 3 the value 1855 * should be 6. 1856 */ 1857 for (i = 0; i < num_planes; i++) { 1858 u32 tile_w, tile, x_off, pix_per_tile; 1859 1860 if (fb->format->format == DRM_FORMAT_P030) { 1861 /* 1862 * Spec says: bits [31:4] of the given address 1863 * should point to the 128-bit word containing 1864 * the desired starting pixel, and bits[3:0] 1865 * should be between 0 and 11, indicating which 1866 * of the 12-pixels in that 128-bit word is the 1867 * first pixel to be used 1868 */ 1869 u32 remaining_pixels = src_x % 96; 1870 u32 aligned = remaining_pixels / 12; 1871 u32 last_bits = remaining_pixels % 12; 1872 1873 x_off = aligned * 16 + last_bits; 1874 tile_w = 128; 1875 pix_per_tile = 96; 1876 } else { 1877 switch (base_format_mod) { 1878 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1879 tile_w = 128; 1880 break; 1881 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1882 tile_w = 256; 1883 break; 1884 default: 1885 return -EINVAL; 1886 } 1887 pix_per_tile = tile_w / fb->format->cpp[0]; 1888 x_off = (src_x % pix_per_tile) / 1889 (i ? h_subsample : 1) * 1890 fb->format->cpp[i]; 1891 } 1892 1893 tile = src_x / pix_per_tile; 1894 1895 offsets[i] += param * tile_w * tile; 1896 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; 1897 offsets[i] += x_off & ~(i ? 1 : 0); 1898 } 1899 1900 components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32; 1901 starting_offset = src_x % components_per_word; 1902 fetch_count = (width + starting_offset + components_per_word - 1) / 1903 components_per_word; 1904 1905 pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) | 1906 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT); 1907 break; 1908 } 1909 1910 default: 1911 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 1912 (long long)fb->modifier); 1913 return -EINVAL; 1914 } 1915 1916 /* fetch an extra pixel if we don't actually line up with the left edge. */ 1917 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) 1918 width++; 1919 1920 /* same for the right side */ 1921 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && 1922 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) 1923 width++; 1924 1925 /* now for the top */ 1926 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) 1927 height++; 1928 1929 /* and the bottom */ 1930 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && 1931 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) 1932 height++; 1933 1934 /* for YUV444 hardware wants double the width, otherwise it doesn't 1935 * fetch full width of chroma 1936 */ 1937 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) 1938 width <<= 1; 1939 1940 /* Don't waste cycles mixing with plane alpha if the set alpha 1941 * is opaque or there is no per-pixel alpha information. 1942 * In any case we use the alpha property value as the fixed alpha. 1943 */ 1944 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 1945 fb->format->has_alpha; 1946 1947 /* Control Word 0: Scaling Configuration & Element Validity*/ 1948 vc4_dlist_write(vc4_state, 1949 SCALER6_CTL0_VALID | 1950 VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) | 1951 vc4_hvs6_get_alpha_mask_mode(state) | 1952 (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) | 1953 VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) | 1954 VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) | 1955 VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) | 1956 VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT)); 1957 1958 /* Position Word 0: Image Position */ 1959 vc4_state->pos0_offset = vc4_state->dlist_count; 1960 vc4_dlist_write(vc4_state, 1961 VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) | 1962 (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) | 1963 VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X)); 1964 1965 /* Control Word 2: Alpha Value & CSC */ 1966 vc4_dlist_write(vc4_state, 1967 vc6_plane_get_csc_mode(vc4_state) | 1968 vc4_hvs5_get_alpha_blend_mode(state) | 1969 (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) | 1970 VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA)); 1971 1972 /* Position Word 1: Scaled Image Dimensions */ 1973 if (!vc4_state->is_unity) 1974 vc4_dlist_write(vc4_state, 1975 VC4_SET_FIELD(vc4_state->crtc_h - 1, 1976 SCALER6_POS1_SCL_LINES) | 1977 VC4_SET_FIELD(vc4_state->crtc_w - 1, 1978 SCALER6_POS1_SCL_WIDTH)); 1979 1980 /* Position Word 2: Source Image Size */ 1981 vc4_state->pos2_offset = vc4_state->dlist_count; 1982 vc4_dlist_write(vc4_state, 1983 VC4_SET_FIELD(height - 1, 1984 SCALER6_POS2_SRC_LINES) | 1985 VC4_SET_FIELD(width - 1, 1986 SCALER6_POS2_SRC_WIDTH)); 1987 1988 /* Position Word 3: Context */ 1989 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1990 1991 /* 1992 * TODO: This only covers Raster Scan Order planes 1993 */ 1994 for (i = 0; i < num_planes; i++) { 1995 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); 1996 dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i]; 1997 1998 /* Pointer Word 0 */ 1999 vc4_state->ptr0_offset[i] = vc4_state->dlist_count; 2000 vc4_dlist_write(vc4_state, 2001 (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) | 2002 /* 2003 * The UPM buffer will be allocated in 2004 * vc6_plane_allocate_upm(). 2005 */ 2006 VC4_SET_FIELD(upper_32_bits(paddr) & 0xff, 2007 SCALER6_PTR0_UPPER_ADDR)); 2008 2009 /* Pointer Word 1 */ 2010 vc4_dlist_write(vc4_state, lower_32_bits(paddr)); 2011 2012 /* Pointer Word 2 */ 2013 if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 && 2014 base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) { 2015 vc4_dlist_write(vc4_state, 2016 VC4_SET_FIELD(fb->pitches[i], 2017 SCALER6_PTR2_PITCH)); 2018 } else { 2019 vc4_dlist_write(vc4_state, pitch0); 2020 } 2021 } 2022 2023 /* 2024 * Palette Word 0 2025 * TODO: We're not using the palette mode 2026 */ 2027 2028 /* 2029 * Trans Word 0 2030 * TODO: It's only relevant if we set the trans_rgb bit in the 2031 * control word 0, and we don't at the moment. 2032 */ 2033 2034 vc4_state->lbm_offset = 0; 2035 2036 if (!vc4_state->is_unity || fb->format->is_yuv) { 2037 /* 2038 * Reserve a slot for the LBM Base Address. The real value will 2039 * be set when calling vc4_plane_allocate_lbm(). 2040 */ 2041 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 2042 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 2043 vc4_state->lbm_offset = vc4_state->dlist_count; 2044 vc4_dlist_counter_increment(vc4_state); 2045 } 2046 2047 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 2048 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 2049 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 2050 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 2051 if (num_planes > 1) 2052 /* 2053 * Emit Cb/Cr as channel 0 and Y as channel 2054 * 1. This matches how we set up scl0/scl1 2055 * above. 2056 */ 2057 vc4_write_scaling_parameters(state, 1); 2058 2059 vc4_write_scaling_parameters(state, 0); 2060 } 2061 2062 /* 2063 * If any PPF setup was done, then all the kernel 2064 * pointers get uploaded. 2065 */ 2066 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 2067 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 2068 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 2069 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 2070 u32 kernel = 2071 VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 2072 SCALER_PPF_KERNEL_OFFSET); 2073 2074 /* HPPF plane 0 */ 2075 vc4_dlist_write(vc4_state, kernel); 2076 /* VPPF plane 0 */ 2077 vc4_dlist_write(vc4_state, kernel); 2078 /* HPPF plane 1 */ 2079 vc4_dlist_write(vc4_state, kernel); 2080 /* VPPF plane 1 */ 2081 vc4_dlist_write(vc4_state, kernel); 2082 } 2083 } 2084 2085 vc4_dlist_write(vc4_state, SCALER6_CTL0_END); 2086 2087 vc4_state->dlist[0] |= 2088 VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT); 2089 2090 /* crtc_* are already clipped coordinates. */ 2091 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 2092 vc4_state->crtc_w == state->crtc->mode.hdisplay && 2093 vc4_state->crtc_h == state->crtc->mode.vdisplay; 2094 2095 /* 2096 * Background fill might be necessary when the plane has per-pixel 2097 * alpha content or a non-opaque plane alpha and could blend from the 2098 * background or does not cover the entire screen. 2099 */ 2100 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 2101 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 2102 2103 /* 2104 * Flag the dlist as initialized to avoid checking it twice in case 2105 * the async update check already called vc4_plane_mode_set() and 2106 * decided to fallback to sync update because async update was not 2107 * possible. 2108 */ 2109 vc4_state->dlist_initialized = 1; 2110 2111 vc4_plane_calc_load(state); 2112 2113 drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n", 2114 plane->base.id, plane->name, vc4_state->dlist_count); 2115 2116 return 0; 2117 } 2118 2119 /* If a modeset involves changing the setup of a plane, the atomic 2120 * infrastructure will call this to validate a proposed plane setup. 2121 * However, if a plane isn't getting updated, this (and the 2122 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 2123 * compute the dlist here and have all active plane dlists get updated 2124 * in the CRTC's flush. 2125 */ 2126 static int vc4_plane_atomic_check(struct drm_plane *plane, 2127 struct drm_atomic_state *state) 2128 { 2129 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2130 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2131 plane); 2132 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); 2133 int ret; 2134 2135 vc4_state->dlist_count = 0; 2136 2137 if (!plane_enabled(new_plane_state)) { 2138 struct drm_plane_state *old_plane_state = 2139 drm_atomic_get_old_plane_state(state, plane); 2140 2141 if (vc4->gen >= VC4_GEN_6_C && old_plane_state && 2142 plane_enabled(old_plane_state)) { 2143 vc6_plane_free_upm(new_plane_state); 2144 } 2145 return 0; 2146 } 2147 2148 if (vc4->gen >= VC4_GEN_6_C) 2149 ret = vc6_plane_mode_set(plane, new_plane_state); 2150 else 2151 ret = vc4_plane_mode_set(plane, new_plane_state); 2152 if (ret) 2153 return ret; 2154 2155 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 2156 !vc4_state->crtc_w || !vc4_state->crtc_h) 2157 return 0; 2158 2159 ret = vc4_plane_allocate_lbm(new_plane_state); 2160 if (ret) 2161 return ret; 2162 2163 if (vc4->gen >= VC4_GEN_6_C) { 2164 ret = vc6_plane_allocate_upm(new_plane_state); 2165 if (ret) 2166 return ret; 2167 } 2168 2169 return 0; 2170 } 2171 2172 static void vc4_plane_atomic_update(struct drm_plane *plane, 2173 struct drm_atomic_state *state) 2174 { 2175 /* No contents here. Since we don't know where in the CRTC's 2176 * dlist we should be stored, our dlist is uploaded to the 2177 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 2178 * time. 2179 */ 2180 } 2181 2182 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 2183 { 2184 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 2185 int i; 2186 int idx; 2187 2188 if (!drm_dev_enter(plane->dev, &idx)) 2189 goto out; 2190 2191 vc4_state->hw_dlist = dlist; 2192 2193 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 2194 for (i = 0; i < vc4_state->dlist_count; i++) 2195 writel(vc4_state->dlist[i], &dlist[i]); 2196 2197 drm_dev_exit(idx); 2198 2199 out: 2200 return vc4_state->dlist_count; 2201 } 2202 2203 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 2204 { 2205 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 2206 2207 return vc4_state->dlist_count; 2208 } 2209 2210 /* Updates the plane to immediately (well, once the FIFO needs 2211 * refilling) scan out from at a new framebuffer. 2212 */ 2213 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 2214 { 2215 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 2216 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0); 2217 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2218 dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0]; 2219 int idx; 2220 2221 if (!drm_dev_enter(plane->dev, &idx)) 2222 return; 2223 2224 /* We're skipping the address adjustment for negative origin, 2225 * because this is only called on the primary plane. 2226 */ 2227 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 2228 2229 if (vc4->gen == VC4_GEN_6_C) { 2230 u32 value; 2231 2232 value = vc4_state->dlist[vc4_state->ptr0_offset[0]] & 2233 ~SCALER6_PTR0_UPPER_ADDR_MASK; 2234 value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff, 2235 SCALER6_PTR0_UPPER_ADDR); 2236 2237 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2238 vc4_state->dlist[vc4_state->ptr0_offset[0]] = value; 2239 2240 value = lower_32_bits(dma_addr); 2241 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]); 2242 vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value; 2243 } else { 2244 u32 addr; 2245 2246 addr = (u32)dma_addr; 2247 2248 /* Write the new address into the hardware immediately. The 2249 * scanout will start from this address as soon as the FIFO 2250 * needs to refill with pixels. 2251 */ 2252 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2253 2254 /* Also update the CPU-side dlist copy, so that any later 2255 * atomic updates that don't do a new modeset on our plane 2256 * also use our updated address. 2257 */ 2258 vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr; 2259 } 2260 2261 drm_dev_exit(idx); 2262 } 2263 2264 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 2265 struct drm_atomic_state *state) 2266 { 2267 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2268 plane); 2269 struct vc4_plane_state *vc4_state, *new_vc4_state; 2270 int idx; 2271 2272 if (!drm_dev_enter(plane->dev, &idx)) 2273 return; 2274 2275 swap(plane->state->fb, new_plane_state->fb); 2276 plane->state->crtc_x = new_plane_state->crtc_x; 2277 plane->state->crtc_y = new_plane_state->crtc_y; 2278 plane->state->crtc_w = new_plane_state->crtc_w; 2279 plane->state->crtc_h = new_plane_state->crtc_h; 2280 plane->state->src_x = new_plane_state->src_x; 2281 plane->state->src_y = new_plane_state->src_y; 2282 plane->state->src_w = new_plane_state->src_w; 2283 plane->state->src_h = new_plane_state->src_h; 2284 plane->state->alpha = new_plane_state->alpha; 2285 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode; 2286 plane->state->rotation = new_plane_state->rotation; 2287 plane->state->zpos = new_plane_state->zpos; 2288 plane->state->normalized_zpos = new_plane_state->normalized_zpos; 2289 plane->state->color_encoding = new_plane_state->color_encoding; 2290 plane->state->color_range = new_plane_state->color_range; 2291 plane->state->src = new_plane_state->src; 2292 plane->state->dst = new_plane_state->dst; 2293 plane->state->visible = new_plane_state->visible; 2294 2295 new_vc4_state = to_vc4_plane_state(new_plane_state); 2296 vc4_state = to_vc4_plane_state(plane->state); 2297 2298 vc4_state->crtc_x = new_vc4_state->crtc_x; 2299 vc4_state->crtc_y = new_vc4_state->crtc_y; 2300 vc4_state->crtc_h = new_vc4_state->crtc_h; 2301 vc4_state->crtc_w = new_vc4_state->crtc_w; 2302 vc4_state->src_x = new_vc4_state->src_x; 2303 vc4_state->src_y = new_vc4_state->src_y; 2304 memcpy(vc4_state->src_w, new_vc4_state->src_w, 2305 sizeof(vc4_state->src_w)); 2306 memcpy(vc4_state->src_h, new_vc4_state->src_h, 2307 sizeof(vc4_state->src_h)); 2308 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 2309 sizeof(vc4_state->x_scaling)); 2310 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 2311 sizeof(vc4_state->y_scaling)); 2312 vc4_state->is_unity = new_vc4_state->is_unity; 2313 vc4_state->is_yuv = new_vc4_state->is_yuv; 2314 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 2315 2316 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 2317 vc4_state->dlist[vc4_state->pos0_offset] = 2318 new_vc4_state->dlist[vc4_state->pos0_offset]; 2319 vc4_state->dlist[vc4_state->pos2_offset] = 2320 new_vc4_state->dlist[vc4_state->pos2_offset]; 2321 vc4_state->dlist[vc4_state->ptr0_offset[0]] = 2322 new_vc4_state->dlist[vc4_state->ptr0_offset[0]]; 2323 2324 /* Note that we can't just call vc4_plane_write_dlist() 2325 * because that would smash the context data that the HVS is 2326 * currently using. 2327 */ 2328 writel(vc4_state->dlist[vc4_state->pos0_offset], 2329 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 2330 writel(vc4_state->dlist[vc4_state->pos2_offset], 2331 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 2332 writel(vc4_state->dlist[vc4_state->ptr0_offset[0]], 2333 &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2334 2335 drm_dev_exit(idx); 2336 } 2337 2338 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 2339 struct drm_atomic_state *state, bool flip) 2340 { 2341 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2342 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2343 plane); 2344 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 2345 int ret; 2346 u32 i; 2347 2348 if (vc4->gen <= VC4_GEN_5) 2349 ret = vc4_plane_mode_set(plane, new_plane_state); 2350 else 2351 ret = vc6_plane_mode_set(plane, new_plane_state); 2352 if (ret) 2353 return ret; 2354 2355 old_vc4_state = to_vc4_plane_state(plane->state); 2356 new_vc4_state = to_vc4_plane_state(new_plane_state); 2357 2358 if (!new_vc4_state->hw_dlist) 2359 return -EINVAL; 2360 2361 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 2362 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 2363 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 2364 old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] || 2365 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state)) 2366 return -EINVAL; 2367 2368 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 2369 * if anything else has changed, fallback to a sync update. 2370 */ 2371 for (i = 0; i < new_vc4_state->dlist_count; i++) { 2372 if (i == new_vc4_state->pos0_offset || 2373 i == new_vc4_state->pos2_offset || 2374 i == new_vc4_state->ptr0_offset[0] || 2375 (new_vc4_state->lbm_offset && 2376 i == new_vc4_state->lbm_offset)) 2377 continue; 2378 2379 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 2380 return -EINVAL; 2381 } 2382 2383 return 0; 2384 } 2385 2386 static int vc4_prepare_fb(struct drm_plane *plane, 2387 struct drm_plane_state *state) 2388 { 2389 struct vc4_bo *bo; 2390 int ret; 2391 2392 if (!state->fb) 2393 return 0; 2394 2395 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 2396 2397 ret = drm_gem_plane_helper_prepare_fb(plane, state); 2398 if (ret) 2399 return ret; 2400 2401 return vc4_bo_inc_usecnt(bo); 2402 } 2403 2404 static void vc4_cleanup_fb(struct drm_plane *plane, 2405 struct drm_plane_state *state) 2406 { 2407 struct vc4_bo *bo; 2408 2409 if (!state->fb) 2410 return; 2411 2412 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 2413 vc4_bo_dec_usecnt(bo); 2414 } 2415 2416 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 2417 .atomic_check = vc4_plane_atomic_check, 2418 .atomic_update = vc4_plane_atomic_update, 2419 .prepare_fb = vc4_prepare_fb, 2420 .cleanup_fb = vc4_cleanup_fb, 2421 .atomic_async_check = vc4_plane_atomic_async_check, 2422 .atomic_async_update = vc4_plane_atomic_async_update, 2423 }; 2424 2425 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = { 2426 .atomic_check = vc4_plane_atomic_check, 2427 .atomic_update = vc4_plane_atomic_update, 2428 .atomic_async_check = vc4_plane_atomic_async_check, 2429 .atomic_async_update = vc4_plane_atomic_async_update, 2430 }; 2431 2432 static bool vc4_format_mod_supported(struct drm_plane *plane, 2433 uint32_t format, 2434 uint64_t modifier) 2435 { 2436 /* Support T_TILING for RGB formats only. */ 2437 switch (format) { 2438 case DRM_FORMAT_XRGB8888: 2439 case DRM_FORMAT_ARGB8888: 2440 case DRM_FORMAT_ABGR8888: 2441 case DRM_FORMAT_XBGR8888: 2442 case DRM_FORMAT_RGB565: 2443 case DRM_FORMAT_BGR565: 2444 case DRM_FORMAT_ARGB1555: 2445 case DRM_FORMAT_XRGB1555: 2446 switch (fourcc_mod_broadcom_mod(modifier)) { 2447 case DRM_FORMAT_MOD_LINEAR: 2448 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 2449 return true; 2450 default: 2451 return false; 2452 } 2453 case DRM_FORMAT_NV12: 2454 case DRM_FORMAT_NV21: 2455 switch (fourcc_mod_broadcom_mod(modifier)) { 2456 case DRM_FORMAT_MOD_LINEAR: 2457 case DRM_FORMAT_MOD_BROADCOM_SAND64: 2458 case DRM_FORMAT_MOD_BROADCOM_SAND128: 2459 case DRM_FORMAT_MOD_BROADCOM_SAND256: 2460 return true; 2461 default: 2462 return false; 2463 } 2464 case DRM_FORMAT_P030: 2465 switch (fourcc_mod_broadcom_mod(modifier)) { 2466 case DRM_FORMAT_MOD_BROADCOM_SAND128: 2467 return true; 2468 default: 2469 return false; 2470 } 2471 case DRM_FORMAT_RGBX1010102: 2472 case DRM_FORMAT_BGRX1010102: 2473 case DRM_FORMAT_RGBA1010102: 2474 case DRM_FORMAT_BGRA1010102: 2475 case DRM_FORMAT_XRGB4444: 2476 case DRM_FORMAT_ARGB4444: 2477 case DRM_FORMAT_XBGR4444: 2478 case DRM_FORMAT_ABGR4444: 2479 case DRM_FORMAT_RGBX4444: 2480 case DRM_FORMAT_RGBA4444: 2481 case DRM_FORMAT_BGRX4444: 2482 case DRM_FORMAT_BGRA4444: 2483 case DRM_FORMAT_RGB332: 2484 case DRM_FORMAT_BGR233: 2485 case DRM_FORMAT_YUV422: 2486 case DRM_FORMAT_YVU422: 2487 case DRM_FORMAT_YUV420: 2488 case DRM_FORMAT_YVU420: 2489 case DRM_FORMAT_NV16: 2490 case DRM_FORMAT_NV61: 2491 default: 2492 return (modifier == DRM_FORMAT_MOD_LINEAR); 2493 } 2494 } 2495 2496 static const struct drm_plane_funcs vc4_plane_funcs = { 2497 .update_plane = drm_atomic_helper_update_plane, 2498 .disable_plane = drm_atomic_helper_disable_plane, 2499 .reset = vc4_plane_reset, 2500 .atomic_duplicate_state = vc4_plane_duplicate_state, 2501 .atomic_destroy_state = vc4_plane_destroy_state, 2502 .format_mod_supported = vc4_format_mod_supported, 2503 }; 2504 2505 struct drm_plane *vc4_plane_init(struct drm_device *dev, 2506 enum drm_plane_type type, 2507 uint32_t possible_crtcs) 2508 { 2509 struct vc4_dev *vc4 = to_vc4_dev(dev); 2510 struct drm_plane *plane; 2511 struct vc4_plane *vc4_plane; 2512 u32 formats[ARRAY_SIZE(hvs_formats)]; 2513 int num_formats = 0; 2514 unsigned i; 2515 static const uint64_t modifiers[] = { 2516 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 2517 DRM_FORMAT_MOD_BROADCOM_SAND128, 2518 DRM_FORMAT_MOD_BROADCOM_SAND64, 2519 DRM_FORMAT_MOD_BROADCOM_SAND256, 2520 DRM_FORMAT_MOD_LINEAR, 2521 DRM_FORMAT_MOD_INVALID 2522 }; 2523 2524 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 2525 if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) { 2526 formats[num_formats] = hvs_formats[i].drm; 2527 num_formats++; 2528 } 2529 } 2530 2531 vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base, 2532 possible_crtcs, 2533 &vc4_plane_funcs, 2534 formats, num_formats, 2535 modifiers, type, NULL); 2536 if (IS_ERR(vc4_plane)) 2537 return ERR_CAST(vc4_plane); 2538 plane = &vc4_plane->base; 2539 2540 if (vc4->gen >= VC4_GEN_5) 2541 drm_plane_helper_add(plane, &vc5_plane_helper_funcs); 2542 else 2543 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 2544 2545 drm_plane_create_alpha_property(plane); 2546 drm_plane_create_blend_mode_property(plane, 2547 BIT(DRM_MODE_BLEND_PIXEL_NONE) | 2548 BIT(DRM_MODE_BLEND_PREMULTI) | 2549 BIT(DRM_MODE_BLEND_COVERAGE)); 2550 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 2551 DRM_MODE_ROTATE_0 | 2552 DRM_MODE_ROTATE_180 | 2553 DRM_MODE_REFLECT_X | 2554 DRM_MODE_REFLECT_Y); 2555 2556 drm_plane_create_color_properties(plane, 2557 BIT(DRM_COLOR_YCBCR_BT601) | 2558 BIT(DRM_COLOR_YCBCR_BT709) | 2559 BIT(DRM_COLOR_YCBCR_BT2020), 2560 BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | 2561 BIT(DRM_COLOR_YCBCR_FULL_RANGE), 2562 DRM_COLOR_YCBCR_BT709, 2563 DRM_COLOR_YCBCR_LIMITED_RANGE); 2564 2565 if (type == DRM_PLANE_TYPE_PRIMARY) 2566 drm_plane_create_zpos_immutable_property(plane, 0); 2567 2568 return plane; 2569 } 2570 2571 #define VC4_NUM_OVERLAY_PLANES 16 2572 2573 int vc4_plane_create_additional_planes(struct drm_device *drm) 2574 { 2575 struct drm_plane *cursor_plane; 2576 struct drm_crtc *crtc; 2577 unsigned int i; 2578 2579 /* Set up some arbitrary number of planes. We're not limited 2580 * by a set number of physical registers, just the space in 2581 * the HVS (16k) and how small an plane can be (28 bytes). 2582 * However, each plane we set up takes up some memory, and 2583 * increases the cost of looping over planes, which atomic 2584 * modesetting does quite a bit. As a result, we pick a 2585 * modest number of planes to expose, that should hopefully 2586 * still cover any sane usecase. 2587 */ 2588 for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) { 2589 struct drm_plane *plane = 2590 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY, 2591 GENMASK(drm->mode_config.num_crtc - 1, 0)); 2592 2593 if (IS_ERR(plane)) 2594 continue; 2595 2596 /* Create zpos property. Max of all the overlays + 1 primary + 2597 * 1 cursor plane on a crtc. 2598 */ 2599 drm_plane_create_zpos_property(plane, i + 1, 1, 2600 VC4_NUM_OVERLAY_PLANES + 1); 2601 } 2602 2603 drm_for_each_crtc(crtc, drm) { 2604 /* Set up the legacy cursor after overlay initialization, 2605 * since the zpos fallback is that planes are rendered by plane 2606 * ID order, and that then puts the cursor on top. 2607 */ 2608 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR, 2609 drm_crtc_mask(crtc)); 2610 if (!IS_ERR(cursor_plane)) { 2611 crtc->cursor = cursor_plane; 2612 2613 drm_plane_create_zpos_property(cursor_plane, 2614 VC4_NUM_OVERLAY_PLANES + 1, 2615 1, 2616 VC4_NUM_OVERLAY_PLANES + 1); 2617 } 2618 } 2619 2620 return 0; 2621 } 2622