1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_blend.h> 22 #include <drm/drm_drv.h> 23 #include <drm/drm_fb_dma_helper.h> 24 #include <drm/drm_fourcc.h> 25 #include <drm/drm_framebuffer.h> 26 #include <drm/drm_gem_atomic_helper.h> 27 #include <drm/drm_print.h> 28 29 #include "vc4_drv.h" 30 #include "vc4_regs.h" 31 32 static const struct hvs_format { 33 u32 drm; /* DRM_FORMAT_* */ 34 u32 hvs; /* HVS_FORMAT_* */ 35 u32 pixel_order; 36 u32 pixel_order_hvs5; 37 bool hvs5_only; 38 } hvs_formats[] = { 39 { 40 .drm = DRM_FORMAT_XRGB8888, 41 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 42 .pixel_order = HVS_PIXEL_ORDER_ABGR, 43 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 44 }, 45 { 46 .drm = DRM_FORMAT_ARGB8888, 47 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 48 .pixel_order = HVS_PIXEL_ORDER_ABGR, 49 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 50 }, 51 { 52 .drm = DRM_FORMAT_ABGR8888, 53 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 54 .pixel_order = HVS_PIXEL_ORDER_ARGB, 55 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 56 }, 57 { 58 .drm = DRM_FORMAT_XBGR8888, 59 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 60 .pixel_order = HVS_PIXEL_ORDER_ARGB, 61 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 62 }, 63 { 64 .drm = DRM_FORMAT_RGB565, 65 .hvs = HVS_PIXEL_FORMAT_RGB565, 66 .pixel_order = HVS_PIXEL_ORDER_XRGB, 67 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB, 68 }, 69 { 70 .drm = DRM_FORMAT_BGR565, 71 .hvs = HVS_PIXEL_FORMAT_RGB565, 72 .pixel_order = HVS_PIXEL_ORDER_XBGR, 73 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR, 74 }, 75 { 76 .drm = DRM_FORMAT_ARGB1555, 77 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 78 .pixel_order = HVS_PIXEL_ORDER_ABGR, 79 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 80 }, 81 { 82 .drm = DRM_FORMAT_XRGB1555, 83 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 84 .pixel_order = HVS_PIXEL_ORDER_ABGR, 85 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 86 }, 87 { 88 .drm = DRM_FORMAT_RGB888, 89 .hvs = HVS_PIXEL_FORMAT_RGB888, 90 .pixel_order = HVS_PIXEL_ORDER_XRGB, 91 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB, 92 }, 93 { 94 .drm = DRM_FORMAT_BGR888, 95 .hvs = HVS_PIXEL_FORMAT_RGB888, 96 .pixel_order = HVS_PIXEL_ORDER_XBGR, 97 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR, 98 }, 99 { 100 .drm = DRM_FORMAT_YUV422, 101 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 102 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 103 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 104 }, 105 { 106 .drm = DRM_FORMAT_YVU422, 107 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 108 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 109 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 110 }, 111 { 112 .drm = DRM_FORMAT_YUV444, 113 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 114 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 115 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 116 }, 117 { 118 .drm = DRM_FORMAT_YVU444, 119 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 120 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 121 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 122 }, 123 { 124 .drm = DRM_FORMAT_YUV420, 125 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 126 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 127 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 128 }, 129 { 130 .drm = DRM_FORMAT_YVU420, 131 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 132 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 133 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 134 }, 135 { 136 .drm = DRM_FORMAT_NV12, 137 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 138 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 139 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 140 }, 141 { 142 .drm = DRM_FORMAT_NV21, 143 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 144 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 145 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 146 }, 147 { 148 .drm = DRM_FORMAT_NV16, 149 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 150 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 151 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 152 }, 153 { 154 .drm = DRM_FORMAT_NV61, 155 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 156 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 157 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 158 }, 159 { 160 .drm = DRM_FORMAT_P030, 161 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT, 162 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 163 .hvs5_only = true, 164 }, 165 { 166 .drm = DRM_FORMAT_XRGB2101010, 167 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 168 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 169 .hvs5_only = true, 170 }, 171 { 172 .drm = DRM_FORMAT_ARGB2101010, 173 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 174 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 175 .hvs5_only = true, 176 }, 177 { 178 .drm = DRM_FORMAT_ABGR2101010, 179 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 180 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 181 .hvs5_only = true, 182 }, 183 { 184 .drm = DRM_FORMAT_XBGR2101010, 185 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 186 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 187 .hvs5_only = true, 188 }, 189 { 190 .drm = DRM_FORMAT_RGB332, 191 .hvs = HVS_PIXEL_FORMAT_RGB332, 192 .pixel_order = HVS_PIXEL_ORDER_ARGB, 193 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 194 }, 195 { 196 .drm = DRM_FORMAT_BGR233, 197 .hvs = HVS_PIXEL_FORMAT_RGB332, 198 .pixel_order = HVS_PIXEL_ORDER_ABGR, 199 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 200 }, 201 { 202 .drm = DRM_FORMAT_XRGB4444, 203 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 204 .pixel_order = HVS_PIXEL_ORDER_ABGR, 205 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 206 }, 207 { 208 .drm = DRM_FORMAT_ARGB4444, 209 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 210 .pixel_order = HVS_PIXEL_ORDER_ABGR, 211 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 212 }, 213 { 214 .drm = DRM_FORMAT_XBGR4444, 215 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 216 .pixel_order = HVS_PIXEL_ORDER_ARGB, 217 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 218 }, 219 { 220 .drm = DRM_FORMAT_ABGR4444, 221 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 222 .pixel_order = HVS_PIXEL_ORDER_ARGB, 223 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 224 }, 225 { 226 .drm = DRM_FORMAT_BGRX4444, 227 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 228 .pixel_order = HVS_PIXEL_ORDER_RGBA, 229 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA, 230 }, 231 { 232 .drm = DRM_FORMAT_BGRA4444, 233 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 234 .pixel_order = HVS_PIXEL_ORDER_RGBA, 235 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA, 236 }, 237 { 238 .drm = DRM_FORMAT_RGBX4444, 239 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 240 .pixel_order = HVS_PIXEL_ORDER_BGRA, 241 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, 242 }, 243 { 244 .drm = DRM_FORMAT_RGBA4444, 245 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 246 .pixel_order = HVS_PIXEL_ORDER_BGRA, 247 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, 248 }, 249 }; 250 251 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 252 { 253 unsigned i; 254 255 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 256 if (hvs_formats[i].drm == drm_format) 257 return &hvs_formats[i]; 258 } 259 260 return NULL; 261 } 262 263 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 264 { 265 if (dst == src >> 16) 266 return VC4_SCALING_NONE; 267 if (3 * dst >= 2 * (src >> 16)) 268 return VC4_SCALING_PPF; 269 else 270 return VC4_SCALING_TPZ; 271 } 272 273 static bool plane_enabled(struct drm_plane_state *state) 274 { 275 return state->fb && !WARN_ON(!state->crtc); 276 } 277 278 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 279 { 280 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 281 struct vc4_hvs *hvs = vc4->hvs; 282 struct vc4_plane_state *vc4_state; 283 unsigned int i; 284 285 if (WARN_ON(!plane->state)) 286 return NULL; 287 288 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 289 if (!vc4_state) 290 return NULL; 291 292 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 293 294 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 295 if (vc4_state->upm_handle[i]) 296 refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount); 297 } 298 299 vc4_state->dlist_initialized = 0; 300 301 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 302 303 if (vc4_state->dlist) { 304 vc4_state->dlist = kmemdup(vc4_state->dlist, 305 vc4_state->dlist_count * 4, 306 GFP_KERNEL); 307 if (!vc4_state->dlist) { 308 kfree(vc4_state); 309 return NULL; 310 } 311 vc4_state->dlist_size = vc4_state->dlist_count; 312 } 313 314 return &vc4_state->base; 315 } 316 317 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle) 318 { 319 struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle]; 320 unsigned long irqflags; 321 322 spin_lock_irqsave(&hvs->mm_lock, irqflags); 323 drm_mm_remove_node(&refcount->upm); 324 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 325 refcount->upm.start = 0; 326 refcount->upm.size = 0; 327 refcount->size = 0; 328 329 ida_free(&hvs->upm_handles, upm_handle); 330 } 331 332 static void vc4_plane_destroy_state(struct drm_plane *plane, 333 struct drm_plane_state *state) 334 { 335 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 336 struct vc4_hvs *hvs = vc4->hvs; 337 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 338 unsigned int i; 339 340 if (drm_mm_node_allocated(&vc4_state->lbm)) { 341 unsigned long irqflags; 342 343 spin_lock_irqsave(&hvs->mm_lock, irqflags); 344 drm_mm_remove_node(&vc4_state->lbm); 345 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 346 } 347 348 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 349 struct vc4_upm_refcounts *refcount; 350 351 if (!vc4_state->upm_handle[i]) 352 continue; 353 354 refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]]; 355 356 if (refcount_dec_and_test(&refcount->refcount)) 357 vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]); 358 } 359 360 kfree(vc4_state->dlist); 361 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 362 kfree(state); 363 } 364 365 /* Called during init to allocate the plane's atomic state. */ 366 static void vc4_plane_reset(struct drm_plane *plane) 367 { 368 struct vc4_plane_state *vc4_state; 369 370 if (plane->state) 371 __drm_atomic_helper_plane_destroy_state(plane->state); 372 373 kfree(plane->state); 374 375 vc4_state = kzalloc_obj(*vc4_state); 376 if (!vc4_state) 377 return; 378 379 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 380 } 381 382 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 383 { 384 if (vc4_state->dlist_count == vc4_state->dlist_size) { 385 u32 new_size = max(4u, vc4_state->dlist_count * 2); 386 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 387 388 if (!new_dlist) 389 return; 390 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 391 392 kfree(vc4_state->dlist); 393 vc4_state->dlist = new_dlist; 394 vc4_state->dlist_size = new_size; 395 } 396 397 vc4_state->dlist_count++; 398 } 399 400 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 401 { 402 unsigned int idx = vc4_state->dlist_count; 403 404 vc4_dlist_counter_increment(vc4_state); 405 vc4_state->dlist[idx] = val; 406 } 407 408 /* Returns the scl0/scl1 field based on whether the dimensions need to 409 * be up/down/non-scaled. 410 * 411 * This is a replication of a table from the spec. 412 */ 413 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 414 { 415 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 416 417 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 418 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 419 return SCALER_CTL0_SCL_H_PPF_V_PPF; 420 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 421 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 422 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 423 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 424 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 425 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 426 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 427 return SCALER_CTL0_SCL_H_PPF_V_NONE; 428 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 429 return SCALER_CTL0_SCL_H_NONE_V_PPF; 430 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 431 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 432 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 433 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 434 default: 435 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 436 /* The unity case is independently handled by 437 * SCALER_CTL0_UNITY. 438 */ 439 return 0; 440 } 441 } 442 443 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 444 { 445 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 446 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 447 struct drm_crtc_state *crtc_state; 448 449 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 450 pstate->crtc); 451 452 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 453 if (!left && !right && !top && !bottom) 454 return 0; 455 456 if (left + right >= crtc_state->mode.hdisplay || 457 top + bottom >= crtc_state->mode.vdisplay) 458 return -EINVAL; 459 460 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 461 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 462 adjhdisplay, 463 crtc_state->mode.hdisplay); 464 vc4_pstate->crtc_x += left; 465 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right) 466 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right; 467 468 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 469 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 470 adjvdisplay, 471 crtc_state->mode.vdisplay); 472 vc4_pstate->crtc_y += top; 473 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom) 474 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom; 475 476 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 477 adjhdisplay, 478 crtc_state->mode.hdisplay); 479 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 480 adjvdisplay, 481 crtc_state->mode.vdisplay); 482 483 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 484 return -EINVAL; 485 486 return 0; 487 } 488 489 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 490 { 491 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 492 struct drm_framebuffer *fb = state->fb; 493 int num_planes = fb->format->num_planes; 494 struct drm_crtc_state *crtc_state; 495 u32 h_subsample = fb->format->hsub; 496 u32 v_subsample = fb->format->vsub; 497 int ret; 498 499 crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); 500 if (!crtc_state) { 501 DRM_DEBUG_KMS("Invalid crtc state\n"); 502 return -EINVAL; 503 } 504 505 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 506 INT_MAX, true, true); 507 if (ret) 508 return ret; 509 510 vc4_state->src_x = state->src.x1; 511 vc4_state->src_y = state->src.y1; 512 vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x; 513 vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y; 514 515 vc4_state->crtc_x = state->dst.x1; 516 vc4_state->crtc_y = state->dst.y1; 517 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 518 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 519 520 ret = vc4_plane_margins_adj(state); 521 if (ret) 522 return ret; 523 524 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 525 vc4_state->crtc_w); 526 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 527 vc4_state->crtc_h); 528 529 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 530 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 531 532 if (num_planes > 1) { 533 vc4_state->is_yuv = true; 534 535 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 536 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 537 538 vc4_state->x_scaling[1] = 539 vc4_get_scaling_mode(vc4_state->src_w[1], 540 vc4_state->crtc_w); 541 vc4_state->y_scaling[1] = 542 vc4_get_scaling_mode(vc4_state->src_h[1], 543 vc4_state->crtc_h); 544 545 /* YUV conversion requires that horizontal scaling be enabled 546 * on the UV plane even if vc4_get_scaling_mode() returned 547 * VC4_SCALING_NONE (which can happen when the down-scaling 548 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 549 * case. 550 */ 551 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 552 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 553 554 /* Similarly UV needs vertical scaling to be enabled. 555 * Without this a 1:1 scaled YUV422 plane isn't rendered. 556 */ 557 if (vc4_state->y_scaling[1] == VC4_SCALING_NONE) 558 vc4_state->y_scaling[1] = VC4_SCALING_PPF; 559 } else { 560 vc4_state->is_yuv = false; 561 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 562 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 563 } 564 565 return 0; 566 } 567 568 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 569 { 570 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); 571 u32 scale, recip; 572 573 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 574 575 scale = src / dst; 576 577 /* The specs note that while the reciprocal would be defined 578 * as (1<<32)/scale, ~0 is close enough. 579 */ 580 recip = ~0 / scale; 581 582 vc4_dlist_write(vc4_state, 583 /* 584 * The BCM2712 is lacking BIT(31) compared to 585 * the previous generations, but we don't use 586 * it. 587 */ 588 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 589 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 590 vc4_dlist_write(vc4_state, 591 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 592 } 593 594 /* phase magnitude bits */ 595 #define PHASE_BITS 6 596 597 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, 598 u32 xy, int channel) 599 { 600 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); 601 u32 scale = src / dst; 602 s32 offset, offset2; 603 s32 phase; 604 605 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 606 607 /* 608 * Start the phase at 1/2 pixel from the 1st pixel at src_x. 609 * 1/4 pixel for YUV. 610 */ 611 if (channel) { 612 /* 613 * The phase is relative to scale_src->x, so shift it for 614 * display list's x value 615 */ 616 offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1; 617 offset += -(1 << PHASE_BITS >> 2); 618 } else { 619 /* 620 * The phase is relative to scale_src->x, so shift it for 621 * display list's x value 622 */ 623 offset = (xy & 0xffff) >> (16 - PHASE_BITS); 624 offset += -(1 << PHASE_BITS >> 1); 625 626 /* 627 * This is a kludge to make sure the scaling factors are 628 * consistent with YUV's luma scaling. We lose 1-bit precision 629 * because of this. 630 */ 631 scale &= ~1; 632 } 633 634 /* 635 * There may be a also small error introduced by precision of scale. 636 * Add half of that as a compromise 637 */ 638 offset2 = src - dst * scale; 639 offset2 >>= 16 - PHASE_BITS; 640 phase = offset + (offset2 >> 1); 641 642 /* Ensure +ve values don't touch the sign bit, then truncate negative values */ 643 if (phase >= 1 << PHASE_BITS) 644 phase = (1 << PHASE_BITS) - 1; 645 646 phase &= SCALER_PPF_IPHASE_MASK; 647 648 vc4_dlist_write(vc4_state, 649 SCALER_PPF_AGC | 650 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 651 /* 652 * The register layout documentation is slightly 653 * different to setup the phase in the BCM2712, 654 * but they seem equivalent. 655 */ 656 VC4_SET_FIELD(phase, SCALER_PPF_IPHASE)); 657 } 658 659 static u32 __vc4_lbm_size(struct drm_plane_state *state) 660 { 661 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 662 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 663 u32 pix_per_line; 664 u32 lbm; 665 666 /* LBM is not needed when there's no vertical scaling. */ 667 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 668 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 669 return 0; 670 671 /* 672 * This can be further optimized in the RGB/YUV444 case if the PPF 673 * decimation factor is between 0.5 and 1.0 by using crtc_w. 674 * 675 * It's not an issue though, since in that case since src_w[0] is going 676 * to be greater than or equal to crtc_w. 677 */ 678 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 679 pix_per_line = vc4_state->crtc_w; 680 else 681 pix_per_line = vc4_state->src_w[0] >> 16; 682 683 if (!vc4_state->is_yuv) { 684 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 685 lbm = pix_per_line * 8; 686 else { 687 /* In special cases, this multiplier might be 12. */ 688 lbm = pix_per_line * 16; 689 } 690 } else { 691 /* There are cases for this going down to a multiplier 692 * of 2, but according to the firmware source, the 693 * table in the docs is somewhat wrong. 694 */ 695 lbm = pix_per_line * 16; 696 } 697 698 /* Align it to 64 or 128 (hvs5) bytes */ 699 lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64); 700 701 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 702 lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2; 703 704 return lbm; 705 } 706 707 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state, 708 unsigned int channel) 709 { 710 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 711 712 switch (vc4_state->y_scaling[channel]) { 713 case VC4_SCALING_PPF: 714 return 4; 715 716 case VC4_SCALING_TPZ: 717 return 2; 718 719 default: 720 return 0; 721 } 722 } 723 724 static unsigned int vc4_lbm_components(const struct drm_plane_state *state, 725 unsigned int channel) 726 { 727 const struct drm_format_info *info = state->fb->format; 728 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 729 730 if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE) 731 return 0; 732 733 if (info->is_yuv) 734 return channel ? 2 : 1; 735 736 if (info->has_alpha) 737 return 4; 738 739 return 3; 740 } 741 742 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state, 743 unsigned int channel) 744 { 745 const struct drm_format_info *info = state->fb->format; 746 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 747 unsigned int channels_scaled = 0; 748 unsigned int components, words, wpc; 749 unsigned int width, lines; 750 unsigned int i; 751 752 /* LBM is meant to use the smaller of source or dest width, but there 753 * is a issue with UV scaling that the size required for the second 754 * channel is based on the source width only. 755 */ 756 if (info->hsub > 1 && channel == 1) 757 width = state->src_w >> 16; 758 else 759 width = min(state->src_w >> 16, state->crtc_w); 760 width = round_up(width / info->hsub, 4); 761 762 wpc = vc4_lbm_words_per_component(state, channel); 763 if (!wpc) 764 return 0; 765 766 components = vc4_lbm_components(state, channel); 767 if (!components) 768 return 0; 769 770 if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha) 771 components -= 1; 772 773 words = width * wpc * components; 774 775 lines = DIV_ROUND_UP(words, 128 / info->hsub); 776 777 for (i = 0; i < 2; i++) 778 if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE) 779 channels_scaled++; 780 781 if (channels_scaled == 1) 782 lines = lines / 2; 783 784 return lines; 785 } 786 787 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state) 788 { 789 const struct drm_format_info *info = state->fb->format; 790 791 if (info->hsub > 1) 792 return max(vc4_lbm_channel_size(state, 0), 793 vc4_lbm_channel_size(state, 1)); 794 else 795 return vc4_lbm_channel_size(state, 0); 796 } 797 798 static u32 vc4_lbm_size(struct drm_plane_state *state) 799 { 800 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 801 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 802 803 /* LBM is not needed when there's no vertical scaling. */ 804 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 805 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 806 return 0; 807 808 if (vc4->gen >= VC4_GEN_6_C) 809 return __vc6_lbm_size(state); 810 else 811 return __vc4_lbm_size(state); 812 } 813 814 static size_t vc6_upm_size(const struct drm_plane_state *state, 815 unsigned int plane) 816 { 817 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 818 unsigned int stride = state->fb->pitches[plane]; 819 820 /* 821 * TODO: This only works for raster formats, and is sub-optimal 822 * for buffers with a stride aligned on 32 bytes. 823 */ 824 unsigned int words_per_line = (stride + 62) / 32; 825 unsigned int fetch_region_size = words_per_line * 32; 826 unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines; 827 unsigned int buffer_size = fetch_region_size * buffer_lines; 828 829 return ALIGN(buffer_size, HVS_UBM_WORD_SIZE); 830 } 831 832 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 833 int channel) 834 { 835 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 836 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 837 838 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 839 840 /* Ch0 H-PPF Word 0: Scaling Parameters */ 841 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 842 vc4_write_ppf(vc4_state, vc4_state->src_w[channel], 843 vc4_state->crtc_w, vc4_state->src_x, channel); 844 } 845 846 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 847 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 848 vc4_write_ppf(vc4_state, vc4_state->src_h[channel], 849 vc4_state->crtc_h, vc4_state->src_y, channel); 850 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 851 } 852 853 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 854 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 855 vc4_write_tpz(vc4_state, vc4_state->src_w[channel], 856 vc4_state->crtc_w); 857 } 858 859 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 860 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 861 vc4_write_tpz(vc4_state, vc4_state->src_h[channel], 862 vc4_state->crtc_h); 863 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 864 } 865 } 866 867 static void vc4_plane_calc_load(struct drm_plane_state *state) 868 { 869 unsigned int hvs_load_shift, vrefresh, i; 870 struct drm_framebuffer *fb = state->fb; 871 struct vc4_plane_state *vc4_state; 872 struct drm_crtc_state *crtc_state; 873 unsigned int vscale_factor; 874 875 vc4_state = to_vc4_plane_state(state); 876 crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); 877 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 878 879 /* The HVS is able to process 2 pixels/cycle when scaling the source, 880 * 4 pixels/cycle otherwise. 881 * Alpha blending step seems to be pipelined and it's always operating 882 * at 4 pixels/cycle, so the limiting aspect here seems to be the 883 * scaler block. 884 * HVS load is expressed in clk-cycles/sec (AKA Hz). 885 */ 886 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 887 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 888 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 889 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 890 hvs_load_shift = 1; 891 else 892 hvs_load_shift = 2; 893 894 vc4_state->membus_load = 0; 895 vc4_state->hvs_load = 0; 896 for (i = 0; i < fb->format->num_planes; i++) { 897 /* Even if the bandwidth/plane required for a single frame is 898 * 899 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) * 900 * cpp * vrefresh 901 * 902 * when downscaling, we have to read more pixels per line in 903 * the time frame reserved for a single line, so the bandwidth 904 * demand can be punctually higher. To account for that, we 905 * calculate the down-scaling factor and multiply the plane 906 * load by this number. We're likely over-estimating the read 907 * demand, but that's better than under-estimating it. 908 */ 909 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16, 910 vc4_state->crtc_h); 911 vc4_state->membus_load += (vc4_state->src_w[i] >> 16) * 912 (vc4_state->src_h[i] >> 16) * 913 vscale_factor * fb->format->cpp[i]; 914 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 915 } 916 917 vc4_state->hvs_load *= vrefresh; 918 vc4_state->hvs_load >>= hvs_load_shift; 919 vc4_state->membus_load *= vrefresh; 920 } 921 922 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 923 { 924 struct drm_device *drm = state->plane->dev; 925 struct vc4_dev *vc4 = to_vc4_dev(drm); 926 struct drm_plane *plane = state->plane; 927 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 928 unsigned long irqflags; 929 u32 lbm_size; 930 931 lbm_size = vc4_lbm_size(state); 932 if (!lbm_size) 933 return 0; 934 935 /* 936 * NOTE: BCM2712 doesn't need to be aligned, since the size 937 * returned by vc4_lbm_size() is in words already. 938 */ 939 if (vc4->gen == VC4_GEN_5) 940 lbm_size = ALIGN(lbm_size, 64); 941 else if (vc4->gen == VC4_GEN_4) 942 lbm_size = ALIGN(lbm_size, 32); 943 944 drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n", 945 plane->base.id, plane->name, lbm_size); 946 947 if (WARN_ON(!vc4_state->lbm_offset)) 948 return -EINVAL; 949 950 /* Allocate the LBM memory that the HVS will use for temporary 951 * storage due to our scaling/format conversion. 952 */ 953 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 954 int ret; 955 956 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 957 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 958 &vc4_state->lbm, 959 lbm_size, 1, 960 0, 0); 961 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 962 963 if (ret) { 964 drm_err(drm, "Failed to allocate LBM entry: %d\n", ret); 965 return ret; 966 } 967 } else { 968 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 969 } 970 971 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 972 973 return 0; 974 } 975 976 static int vc6_plane_allocate_upm(struct drm_plane_state *state) 977 { 978 const struct drm_format_info *info = state->fb->format; 979 struct drm_device *drm = state->plane->dev; 980 struct vc4_dev *vc4 = to_vc4_dev(drm); 981 struct vc4_hvs *hvs = vc4->hvs; 982 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 983 unsigned int i; 984 int ret; 985 986 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); 987 988 vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES; 989 990 for (i = 0; i < info->num_planes; i++) { 991 struct vc4_upm_refcounts *refcount; 992 int upm_handle; 993 unsigned long irqflags; 994 size_t upm_size; 995 996 upm_size = vc6_upm_size(state, i); 997 if (!upm_size) 998 return -EINVAL; 999 upm_handle = vc4_state->upm_handle[i]; 1000 1001 if (upm_handle && 1002 hvs->upm_refcounts[upm_handle].size == upm_size) { 1003 /* Allocation is the same size as the previous user of 1004 * the plane. Keep the allocation. 1005 */ 1006 vc4_state->upm_handle[i] = upm_handle; 1007 } else { 1008 if (upm_handle && 1009 refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) { 1010 vc4_plane_release_upm_ida(hvs, upm_handle); 1011 vc4_state->upm_handle[i] = 0; 1012 } 1013 1014 upm_handle = ida_alloc_range(&hvs->upm_handles, 1, 1015 VC4_NUM_UPM_HANDLES, 1016 GFP_KERNEL); 1017 if (upm_handle < 0) { 1018 drm_dbg(drm, "Out of upm_handles\n"); 1019 return upm_handle; 1020 } 1021 vc4_state->upm_handle[i] = upm_handle; 1022 1023 refcount = &hvs->upm_refcounts[upm_handle]; 1024 refcount_set(&refcount->refcount, 1); 1025 refcount->size = upm_size; 1026 1027 spin_lock_irqsave(&hvs->mm_lock, irqflags); 1028 ret = drm_mm_insert_node_generic(&hvs->upm_mm, 1029 &refcount->upm, 1030 upm_size, HVS_UBM_WORD_SIZE, 1031 0, 0); 1032 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 1033 if (ret) { 1034 drm_err(drm, "Failed to allocate UPM entry: %d\n", ret); 1035 refcount_set(&refcount->refcount, 0); 1036 ida_free(&hvs->upm_handles, upm_handle); 1037 vc4_state->upm_handle[i] = 0; 1038 return ret; 1039 } 1040 } 1041 1042 refcount = &hvs->upm_refcounts[upm_handle]; 1043 vc4_state->dlist[vc4_state->ptr0_offset[i]] |= 1044 VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE, 1045 SCALER6_PTR0_UPM_BASE) | 1046 VC4_SET_FIELD(vc4_state->upm_handle[i] - 1, 1047 SCALER6_PTR0_UPM_HANDLE) | 1048 VC4_SET_FIELD(vc4_state->upm_buffer_lines, 1049 SCALER6_PTR0_UPM_BUFF_SIZE); 1050 } 1051 1052 return 0; 1053 } 1054 1055 static void vc6_plane_free_upm(struct drm_plane_state *state) 1056 { 1057 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1058 struct drm_device *drm = state->plane->dev; 1059 struct vc4_dev *vc4 = to_vc4_dev(drm); 1060 struct vc4_hvs *hvs = vc4->hvs; 1061 unsigned int i; 1062 1063 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); 1064 1065 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 1066 unsigned int upm_handle; 1067 1068 upm_handle = vc4_state->upm_handle[i]; 1069 if (!upm_handle) 1070 continue; 1071 1072 if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) 1073 vc4_plane_release_upm_ida(hvs, upm_handle); 1074 vc4_state->upm_handle[i] = 0; 1075 } 1076 } 1077 1078 /* 1079 * The colorspace conversion matrices are held in 3 entries in the dlist. 1080 * Create an array of them, with entries for each full and limited mode, and 1081 * each supported colorspace. 1082 */ 1083 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = { 1084 { 1085 /* Limited range */ 1086 { 1087 /* BT601 */ 1088 SCALER_CSC0_ITR_R_601_5, 1089 SCALER_CSC1_ITR_R_601_5, 1090 SCALER_CSC2_ITR_R_601_5, 1091 }, { 1092 /* BT709 */ 1093 SCALER_CSC0_ITR_R_709_3, 1094 SCALER_CSC1_ITR_R_709_3, 1095 SCALER_CSC2_ITR_R_709_3, 1096 }, { 1097 /* BT2020 */ 1098 SCALER_CSC0_ITR_R_2020, 1099 SCALER_CSC1_ITR_R_2020, 1100 SCALER_CSC2_ITR_R_2020, 1101 } 1102 }, { 1103 /* Full range */ 1104 { 1105 /* JFIF */ 1106 SCALER_CSC0_JPEG_JFIF, 1107 SCALER_CSC1_JPEG_JFIF, 1108 SCALER_CSC2_JPEG_JFIF, 1109 }, { 1110 /* BT709 */ 1111 SCALER_CSC0_ITR_R_709_3_FR, 1112 SCALER_CSC1_ITR_R_709_3_FR, 1113 SCALER_CSC2_ITR_R_709_3_FR, 1114 }, { 1115 /* BT2020 */ 1116 SCALER_CSC0_ITR_R_2020_FR, 1117 SCALER_CSC1_ITR_R_2020_FR, 1118 SCALER_CSC2_ITR_R_2020_FR, 1119 } 1120 } 1121 }; 1122 1123 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state) 1124 { 1125 struct drm_device *dev = state->state->dev; 1126 struct vc4_dev *vc4 = to_vc4_dev(dev); 1127 1128 WARN_ON_ONCE(vc4->gen != VC4_GEN_4); 1129 1130 if (!state->fb->format->has_alpha) 1131 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 1132 SCALER_POS2_ALPHA_MODE); 1133 1134 switch (state->pixel_blend_mode) { 1135 case DRM_MODE_BLEND_PIXEL_NONE: 1136 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 1137 SCALER_POS2_ALPHA_MODE); 1138 default: 1139 case DRM_MODE_BLEND_PREMULTI: 1140 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 1141 SCALER_POS2_ALPHA_MODE) | 1142 SCALER_POS2_ALPHA_PREMULT; 1143 case DRM_MODE_BLEND_COVERAGE: 1144 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 1145 SCALER_POS2_ALPHA_MODE); 1146 } 1147 } 1148 1149 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state) 1150 { 1151 struct drm_device *dev = state->state->dev; 1152 struct vc4_dev *vc4 = to_vc4_dev(dev); 1153 1154 WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C && 1155 vc4->gen != VC4_GEN_6_D); 1156 1157 switch (vc4->gen) { 1158 default: 1159 case VC4_GEN_5: 1160 case VC4_GEN_6_C: 1161 if (!state->fb->format->has_alpha) 1162 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 1163 SCALER5_CTL2_ALPHA_MODE); 1164 1165 switch (state->pixel_blend_mode) { 1166 case DRM_MODE_BLEND_PIXEL_NONE: 1167 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 1168 SCALER5_CTL2_ALPHA_MODE); 1169 default: 1170 case DRM_MODE_BLEND_PREMULTI: 1171 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 1172 SCALER5_CTL2_ALPHA_MODE) | 1173 SCALER5_CTL2_ALPHA_PREMULT; 1174 case DRM_MODE_BLEND_COVERAGE: 1175 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 1176 SCALER5_CTL2_ALPHA_MODE); 1177 } 1178 case VC4_GEN_6_D: 1179 /* 2712-D configures fixed alpha mode in CTL0 */ 1180 return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ? 1181 SCALER5_CTL2_ALPHA_PREMULT : 0; 1182 } 1183 } 1184 1185 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state) 1186 { 1187 struct drm_device *dev = state->state->dev; 1188 struct vc4_dev *vc4 = to_vc4_dev(dev); 1189 1190 WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D); 1191 1192 if (vc4->gen == VC4_GEN_6_D && 1193 (!state->fb->format->has_alpha || 1194 state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE)) 1195 return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED, 1196 SCALER6_CTL0_ALPHA_MASK); 1197 1198 return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK); 1199 } 1200 1201 /* Writes out a full display list for an active plane to the plane's 1202 * private dlist state. 1203 */ 1204 static int vc4_plane_mode_set(struct drm_plane *plane, 1205 struct drm_plane_state *state) 1206 { 1207 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 1208 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1209 struct drm_framebuffer *fb = state->fb; 1210 u32 ctl0_offset = vc4_state->dlist_count; 1211 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 1212 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 1213 int num_planes = fb->format->num_planes; 1214 u32 h_subsample = fb->format->hsub; 1215 u32 v_subsample = fb->format->vsub; 1216 bool mix_plane_alpha; 1217 bool covers_screen; 1218 u32 scl0, scl1, pitch0; 1219 u32 tiling, src_x, src_y; 1220 u32 width, height; 1221 u32 hvs_format = format->hvs; 1222 unsigned int rotation; 1223 u32 offsets[3] = { 0 }; 1224 int ret, i; 1225 1226 if (vc4_state->dlist_initialized) 1227 return 0; 1228 1229 ret = vc4_plane_setup_clipping_and_scaling(state); 1230 if (ret) 1231 return ret; 1232 1233 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 1234 !vc4_state->crtc_w || !vc4_state->crtc_h) { 1235 /* 0 source size probably means the plane is offscreen */ 1236 vc4_state->dlist_initialized = 1; 1237 return 0; 1238 } 1239 1240 width = vc4_state->src_w[0] >> 16; 1241 height = vc4_state->src_h[0] >> 16; 1242 1243 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 1244 * and 4:4:4, scl1 should be set to scl0 so both channels of 1245 * the scaler do the same thing. For YUV, the Y plane needs 1246 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 1247 * the scl fields here. 1248 */ 1249 if (num_planes == 1) { 1250 scl0 = vc4_get_scl_field(state, 0); 1251 scl1 = scl0; 1252 } else { 1253 scl0 = vc4_get_scl_field(state, 1); 1254 scl1 = vc4_get_scl_field(state, 0); 1255 } 1256 1257 rotation = drm_rotation_simplify(state->rotation, 1258 DRM_MODE_ROTATE_0 | 1259 DRM_MODE_REFLECT_X | 1260 DRM_MODE_REFLECT_Y); 1261 1262 /* We must point to the last line when Y reflection is enabled. */ 1263 src_y = vc4_state->src_y >> 16; 1264 if (rotation & DRM_MODE_REFLECT_Y) 1265 src_y += height - 1; 1266 1267 src_x = vc4_state->src_x >> 16; 1268 1269 switch (base_format_mod) { 1270 case DRM_FORMAT_MOD_LINEAR: 1271 tiling = SCALER_CTL0_TILING_LINEAR; 1272 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 1273 1274 /* Adjust the base pointer to the first pixel to be scanned 1275 * out. 1276 */ 1277 for (i = 0; i < num_planes; i++) { 1278 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; 1279 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; 1280 } 1281 1282 break; 1283 1284 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 1285 u32 tile_size_shift = 12; /* T tiles are 4kb */ 1286 /* Whole-tile offsets, mostly for setting the pitch. */ 1287 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 1288 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 1289 u32 tile_w_mask = (1 << tile_w_shift) - 1; 1290 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 1291 * the height (in pixels) of a 4k tile. 1292 */ 1293 u32 tile_h_mask = (2 << tile_h_shift) - 1; 1294 /* For T-tiled, the FB pitch is "how many bytes from one row to 1295 * the next, such that 1296 * 1297 * pitch * tile_h == tile_size * tiles_per_row 1298 */ 1299 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 1300 u32 tiles_l = src_x >> tile_w_shift; 1301 u32 tiles_r = tiles_w - tiles_l; 1302 u32 tiles_t = src_y >> tile_h_shift; 1303 /* Intra-tile offsets, which modify the base address (the 1304 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 1305 * base address). 1306 */ 1307 u32 tile_y = (src_y >> 4) & 1; 1308 u32 subtile_y = (src_y >> 2) & 3; 1309 u32 utile_y = src_y & 3; 1310 u32 x_off = src_x & tile_w_mask; 1311 u32 y_off = src_y & tile_h_mask; 1312 1313 /* When Y reflection is requested we must set the 1314 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 1315 * after the initial one should be fetched in descending order, 1316 * which makes sense since we start from the last line and go 1317 * backward. 1318 * Don't know why we need y_off = max_y_off - y_off, but it's 1319 * definitely required (I guess it's also related to the "going 1320 * backward" situation). 1321 */ 1322 if (rotation & DRM_MODE_REFLECT_Y) { 1323 y_off = tile_h_mask - y_off; 1324 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 1325 } else { 1326 pitch0 = 0; 1327 } 1328 1329 tiling = SCALER_CTL0_TILING_256B_OR_T; 1330 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 1331 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 1332 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 1333 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 1334 offsets[0] += tiles_t * (tiles_w << tile_size_shift); 1335 offsets[0] += subtile_y << 8; 1336 offsets[0] += utile_y << 4; 1337 1338 /* Rows of tiles alternate left-to-right and right-to-left. */ 1339 if (tiles_t & 1) { 1340 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 1341 offsets[0] += (tiles_w - tiles_l) << tile_size_shift; 1342 offsets[0] -= (1 + !tile_y) << 10; 1343 } else { 1344 offsets[0] += tiles_l << tile_size_shift; 1345 offsets[0] += tile_y << 10; 1346 } 1347 1348 break; 1349 } 1350 1351 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1352 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1353 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 1354 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 1355 1356 if (param > SCALER_TILE_HEIGHT_MASK) { 1357 DRM_DEBUG_KMS("SAND height too large (%d)\n", 1358 param); 1359 return -EINVAL; 1360 } 1361 1362 if (fb->format->format == DRM_FORMAT_P030) { 1363 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 1364 tiling = SCALER_CTL0_TILING_128B; 1365 } else { 1366 hvs_format = HVS_PIXEL_FORMAT_H264; 1367 1368 switch (base_format_mod) { 1369 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1370 tiling = SCALER_CTL0_TILING_64B; 1371 break; 1372 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1373 tiling = SCALER_CTL0_TILING_128B; 1374 break; 1375 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1376 tiling = SCALER_CTL0_TILING_256B_OR_T; 1377 break; 1378 default: 1379 return -EINVAL; 1380 } 1381 } 1382 1383 /* Adjust the base pointer to the first pixel to be scanned 1384 * out. 1385 * 1386 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 1387 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 1388 * word that should be taken as the first pixel. 1389 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 1390 * element within the 128bit word, eg for pixel 3 the value 1391 * should be 6. 1392 */ 1393 for (i = 0; i < num_planes; i++) { 1394 u32 tile_w, tile, x_off, pix_per_tile; 1395 1396 if (fb->format->format == DRM_FORMAT_P030) { 1397 /* 1398 * Spec says: bits [31:4] of the given address 1399 * should point to the 128-bit word containing 1400 * the desired starting pixel, and bits[3:0] 1401 * should be between 0 and 11, indicating which 1402 * of the 12-pixels in that 128-bit word is the 1403 * first pixel to be used 1404 */ 1405 u32 remaining_pixels = src_x % 96; 1406 u32 aligned = remaining_pixels / 12; 1407 u32 last_bits = remaining_pixels % 12; 1408 1409 x_off = aligned * 16 + last_bits; 1410 tile_w = 128; 1411 pix_per_tile = 96; 1412 } else { 1413 switch (base_format_mod) { 1414 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1415 tile_w = 64; 1416 break; 1417 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1418 tile_w = 128; 1419 break; 1420 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1421 tile_w = 256; 1422 break; 1423 default: 1424 return -EINVAL; 1425 } 1426 pix_per_tile = tile_w / fb->format->cpp[0]; 1427 x_off = (src_x % pix_per_tile) / 1428 (i ? h_subsample : 1) * 1429 fb->format->cpp[i]; 1430 } 1431 1432 tile = src_x / pix_per_tile; 1433 1434 offsets[i] += param * tile_w * tile; 1435 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; 1436 offsets[i] += x_off & ~(i ? 1 : 0); 1437 } 1438 1439 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 1440 break; 1441 } 1442 1443 default: 1444 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 1445 (long long)fb->modifier); 1446 return -EINVAL; 1447 } 1448 1449 /* fetch an extra pixel if we don't actually line up with the left edge. */ 1450 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) 1451 width++; 1452 1453 /* same for the right side */ 1454 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && 1455 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) 1456 width++; 1457 1458 /* now for the top */ 1459 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) 1460 height++; 1461 1462 /* and the bottom */ 1463 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && 1464 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) 1465 height++; 1466 1467 /* For YUV444 the hardware wants double the width, otherwise it doesn't 1468 * fetch full width of chroma 1469 */ 1470 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) 1471 width <<= 1; 1472 1473 /* Don't waste cycles mixing with plane alpha if the set alpha 1474 * is opaque or there is no per-pixel alpha information. 1475 * In any case we use the alpha property value as the fixed alpha. 1476 */ 1477 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 1478 fb->format->has_alpha; 1479 1480 if (vc4->gen == VC4_GEN_4) { 1481 /* Control word */ 1482 vc4_dlist_write(vc4_state, 1483 SCALER_CTL0_VALID | 1484 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 1485 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 1486 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 1487 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 1488 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1489 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1490 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 1491 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1492 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 1493 1494 /* Position Word 0: Image Positions and Alpha Value */ 1495 vc4_state->pos0_offset = vc4_state->dlist_count; 1496 vc4_dlist_write(vc4_state, 1497 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 1498 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 1499 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 1500 1501 /* Position Word 1: Scaled Image Dimensions. */ 1502 if (!vc4_state->is_unity) { 1503 vc4_dlist_write(vc4_state, 1504 VC4_SET_FIELD(vc4_state->crtc_w, 1505 SCALER_POS1_SCL_WIDTH) | 1506 VC4_SET_FIELD(vc4_state->crtc_h, 1507 SCALER_POS1_SCL_HEIGHT)); 1508 } 1509 1510 /* Position Word 2: Source Image Size, Alpha */ 1511 vc4_state->pos2_offset = vc4_state->dlist_count; 1512 vc4_dlist_write(vc4_state, 1513 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 1514 vc4_hvs4_get_alpha_blend_mode(state) | 1515 VC4_SET_FIELD(width, SCALER_POS2_WIDTH) | 1516 VC4_SET_FIELD(height, SCALER_POS2_HEIGHT)); 1517 1518 /* Position Word 3: Context. Written by the HVS. */ 1519 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1520 1521 } else { 1522 /* Control word */ 1523 vc4_dlist_write(vc4_state, 1524 SCALER_CTL0_VALID | 1525 (format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) | 1526 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1527 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1528 (vc4_state->is_unity ? 1529 SCALER5_CTL0_UNITY : 0) | 1530 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1531 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 1532 SCALER5_CTL0_ALPHA_EXPAND | 1533 SCALER5_CTL0_RGB_EXPAND); 1534 1535 /* Position Word 0: Image Positions and Alpha Value */ 1536 vc4_state->pos0_offset = vc4_state->dlist_count; 1537 vc4_dlist_write(vc4_state, 1538 (rotation & DRM_MODE_REFLECT_Y ? 1539 SCALER5_POS0_VFLIP : 0) | 1540 VC4_SET_FIELD(vc4_state->crtc_x, 1541 SCALER_POS0_START_X) | 1542 (rotation & DRM_MODE_REFLECT_X ? 1543 SCALER5_POS0_HFLIP : 0) | 1544 VC4_SET_FIELD(vc4_state->crtc_y, 1545 SCALER5_POS0_START_Y) 1546 ); 1547 1548 /* Control Word 2 */ 1549 vc4_dlist_write(vc4_state, 1550 VC4_SET_FIELD(state->alpha >> 4, 1551 SCALER5_CTL2_ALPHA) | 1552 vc4_hvs5_get_alpha_blend_mode(state) | 1553 (mix_plane_alpha ? 1554 SCALER5_CTL2_ALPHA_MIX : 0) 1555 ); 1556 1557 /* Position Word 1: Scaled Image Dimensions. */ 1558 if (!vc4_state->is_unity) { 1559 vc4_dlist_write(vc4_state, 1560 VC4_SET_FIELD(vc4_state->crtc_w, 1561 SCALER5_POS1_SCL_WIDTH) | 1562 VC4_SET_FIELD(vc4_state->crtc_h, 1563 SCALER5_POS1_SCL_HEIGHT)); 1564 } 1565 1566 /* Position Word 2: Source Image Size */ 1567 vc4_state->pos2_offset = vc4_state->dlist_count; 1568 vc4_dlist_write(vc4_state, 1569 VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) | 1570 VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT)); 1571 1572 /* Position Word 3: Context. Written by the HVS. */ 1573 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1574 } 1575 1576 1577 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 1578 * 1579 * The pointers may be any byte address. 1580 */ 1581 vc4_state->ptr0_offset[0] = vc4_state->dlist_count; 1582 1583 for (i = 0; i < num_planes; i++) { 1584 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); 1585 1586 vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]); 1587 } 1588 1589 /* Pointer Context Word 0/1/2: Written by the HVS */ 1590 for (i = 0; i < num_planes; i++) 1591 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1592 1593 /* Pitch word 0 */ 1594 vc4_dlist_write(vc4_state, pitch0); 1595 1596 /* Pitch word 1/2 */ 1597 for (i = 1; i < num_planes; i++) { 1598 if (hvs_format != HVS_PIXEL_FORMAT_H264 && 1599 hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { 1600 vc4_dlist_write(vc4_state, 1601 VC4_SET_FIELD(fb->pitches[i], 1602 SCALER_SRC_PITCH)); 1603 } else { 1604 vc4_dlist_write(vc4_state, pitch0); 1605 } 1606 } 1607 1608 /* Colorspace conversion words */ 1609 if (vc4_state->is_yuv) { 1610 enum drm_color_encoding color_encoding = state->color_encoding; 1611 enum drm_color_range color_range = state->color_range; 1612 const u32 *ccm; 1613 1614 if (color_encoding >= DRM_COLOR_ENCODING_MAX) 1615 color_encoding = DRM_COLOR_YCBCR_BT601; 1616 if (color_range >= DRM_COLOR_RANGE_MAX) 1617 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1618 1619 ccm = colorspace_coeffs[color_range][color_encoding]; 1620 1621 vc4_dlist_write(vc4_state, ccm[0]); 1622 vc4_dlist_write(vc4_state, ccm[1]); 1623 vc4_dlist_write(vc4_state, ccm[2]); 1624 } 1625 1626 vc4_state->lbm_offset = 0; 1627 1628 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 1629 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 1630 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1631 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1632 /* Reserve a slot for the LBM Base Address. The real value will 1633 * be set when calling vc4_plane_allocate_lbm(). 1634 */ 1635 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1636 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1637 vc4_state->lbm_offset = vc4_state->dlist_count; 1638 vc4_dlist_counter_increment(vc4_state); 1639 } 1640 1641 if (num_planes > 1) { 1642 /* Emit Cb/Cr as channel 0 and Y as channel 1643 * 1. This matches how we set up scl0/scl1 1644 * above. 1645 */ 1646 vc4_write_scaling_parameters(state, 1); 1647 } 1648 vc4_write_scaling_parameters(state, 0); 1649 1650 /* If any PPF setup was done, then all the kernel 1651 * pointers get uploaded. 1652 */ 1653 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1654 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1655 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1656 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1657 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1658 SCALER_PPF_KERNEL_OFFSET); 1659 1660 /* HPPF plane 0 */ 1661 vc4_dlist_write(vc4_state, kernel); 1662 /* VPPF plane 0 */ 1663 vc4_dlist_write(vc4_state, kernel); 1664 /* HPPF plane 1 */ 1665 vc4_dlist_write(vc4_state, kernel); 1666 /* VPPF plane 1 */ 1667 vc4_dlist_write(vc4_state, kernel); 1668 } 1669 } 1670 1671 vc4_state->dlist[ctl0_offset] |= 1672 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1673 1674 /* crtc_* are already clipped coordinates. */ 1675 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1676 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1677 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1678 /* Background fill might be necessary when the plane has per-pixel 1679 * alpha content or a non-opaque plane alpha and could blend from the 1680 * background or does not cover the entire screen. 1681 */ 1682 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1683 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1684 1685 /* Flag the dlist as initialized to avoid checking it twice in case 1686 * the async update check already called vc4_plane_mode_set() and 1687 * decided to fallback to sync update because async update was not 1688 * possible. 1689 */ 1690 vc4_state->dlist_initialized = 1; 1691 1692 vc4_plane_calc_load(state); 1693 1694 return 0; 1695 } 1696 1697 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state) 1698 { 1699 struct drm_plane_state *state = &vc4_state->base; 1700 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 1701 u32 ret = 0; 1702 1703 if (vc4_state->is_yuv) { 1704 enum drm_color_encoding color_encoding = state->color_encoding; 1705 enum drm_color_range color_range = state->color_range; 1706 1707 /* CSC pre-loaded with: 1708 * 0 = BT601 limited range 1709 * 1 = BT709 limited range 1710 * 2 = BT2020 limited range 1711 * 3 = BT601 full range 1712 * 4 = BT709 full range 1713 * 5 = BT2020 full range 1714 */ 1715 if (color_encoding > DRM_COLOR_YCBCR_BT2020) 1716 color_encoding = DRM_COLOR_YCBCR_BT601; 1717 if (color_range > DRM_COLOR_YCBCR_FULL_RANGE) 1718 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1719 1720 if (vc4->gen == VC4_GEN_6_C) { 1721 ret |= SCALER6C_CTL2_CSC_ENABLE; 1722 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), 1723 SCALER6C_CTL2_BRCM_CFC_CONTROL); 1724 } else { 1725 ret |= SCALER6D_CTL2_CSC_ENABLE; 1726 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), 1727 SCALER6D_CTL2_BRCM_CFC_CONTROL); 1728 } 1729 } 1730 1731 return ret; 1732 } 1733 1734 static int vc6_plane_mode_set(struct drm_plane *plane, 1735 struct drm_plane_state *state) 1736 { 1737 struct drm_device *drm = plane->dev; 1738 struct vc4_dev *vc4 = to_vc4_dev(drm); 1739 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1740 struct drm_framebuffer *fb = state->fb; 1741 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 1742 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 1743 int num_planes = fb->format->num_planes; 1744 u32 h_subsample = fb->format->hsub; 1745 u32 v_subsample = fb->format->vsub; 1746 bool mix_plane_alpha; 1747 bool covers_screen; 1748 u32 scl0, scl1, pitch0; 1749 u32 tiling, src_x, src_y; 1750 u32 width, height; 1751 u32 hvs_format = format->hvs; 1752 u32 offsets[3] = { 0 }; 1753 unsigned int rotation; 1754 int ret, i; 1755 1756 if (vc4_state->dlist_initialized) 1757 return 0; 1758 1759 ret = vc4_plane_setup_clipping_and_scaling(state); 1760 if (ret) 1761 return ret; 1762 1763 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 1764 !vc4_state->crtc_w || !vc4_state->crtc_h) { 1765 /* 0 source size probably means the plane is offscreen. 1766 * 0 destination size is a redundant plane. 1767 */ 1768 vc4_state->dlist_initialized = 1; 1769 return 0; 1770 } 1771 1772 width = vc4_state->src_w[0] >> 16; 1773 height = vc4_state->src_h[0] >> 16; 1774 1775 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 1776 * and 4:4:4, scl1 should be set to scl0 so both channels of 1777 * the scaler do the same thing. For YUV, the Y plane needs 1778 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 1779 * the scl fields here. 1780 */ 1781 if (num_planes == 1) { 1782 scl0 = vc4_get_scl_field(state, 0); 1783 scl1 = scl0; 1784 } else { 1785 scl0 = vc4_get_scl_field(state, 1); 1786 scl1 = vc4_get_scl_field(state, 0); 1787 } 1788 1789 rotation = drm_rotation_simplify(state->rotation, 1790 DRM_MODE_ROTATE_0 | 1791 DRM_MODE_REFLECT_X | 1792 DRM_MODE_REFLECT_Y); 1793 1794 /* We must point to the last line when Y reflection is enabled. */ 1795 src_y = vc4_state->src_y >> 16; 1796 if (rotation & DRM_MODE_REFLECT_Y) 1797 src_y += height - 1; 1798 1799 src_x = vc4_state->src_x >> 16; 1800 1801 switch (base_format_mod) { 1802 case DRM_FORMAT_MOD_LINEAR: 1803 tiling = SCALER6_CTL0_ADDR_MODE_LINEAR; 1804 1805 /* Adjust the base pointer to the first pixel to be scanned 1806 * out. 1807 */ 1808 for (i = 0; i < num_planes; i++) { 1809 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; 1810 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; 1811 } 1812 1813 break; 1814 1815 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1816 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 1817 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 1818 u32 components_per_word; 1819 u32 starting_offset; 1820 u32 fetch_count; 1821 1822 if (param > SCALER_TILE_HEIGHT_MASK) { 1823 DRM_DEBUG_KMS("SAND height too large (%d)\n", 1824 param); 1825 return -EINVAL; 1826 } 1827 1828 if (fb->format->format == DRM_FORMAT_P030) { 1829 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 1830 tiling = SCALER6_CTL0_ADDR_MODE_128B; 1831 } else { 1832 hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE; 1833 1834 switch (base_format_mod) { 1835 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1836 tiling = SCALER6_CTL0_ADDR_MODE_128B; 1837 break; 1838 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1839 tiling = SCALER6_CTL0_ADDR_MODE_256B; 1840 break; 1841 default: 1842 return -EINVAL; 1843 } 1844 } 1845 1846 /* Adjust the base pointer to the first pixel to be scanned 1847 * out. 1848 * 1849 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 1850 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 1851 * word that should be taken as the first pixel. 1852 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 1853 * element within the 128bit word, eg for pixel 3 the value 1854 * should be 6. 1855 */ 1856 for (i = 0; i < num_planes; i++) { 1857 u32 tile_w, tile, x_off, pix_per_tile; 1858 1859 if (fb->format->format == DRM_FORMAT_P030) { 1860 /* 1861 * Spec says: bits [31:4] of the given address 1862 * should point to the 128-bit word containing 1863 * the desired starting pixel, and bits[3:0] 1864 * should be between 0 and 11, indicating which 1865 * of the 12-pixels in that 128-bit word is the 1866 * first pixel to be used 1867 */ 1868 u32 remaining_pixels = src_x % 96; 1869 u32 aligned = remaining_pixels / 12; 1870 u32 last_bits = remaining_pixels % 12; 1871 1872 x_off = aligned * 16 + last_bits; 1873 tile_w = 128; 1874 pix_per_tile = 96; 1875 } else { 1876 switch (base_format_mod) { 1877 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1878 tile_w = 128; 1879 break; 1880 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1881 tile_w = 256; 1882 break; 1883 default: 1884 return -EINVAL; 1885 } 1886 pix_per_tile = tile_w / fb->format->cpp[0]; 1887 x_off = (src_x % pix_per_tile) / 1888 (i ? h_subsample : 1) * 1889 fb->format->cpp[i]; 1890 } 1891 1892 tile = src_x / pix_per_tile; 1893 1894 offsets[i] += param * tile_w * tile; 1895 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; 1896 offsets[i] += x_off & ~(i ? 1 : 0); 1897 } 1898 1899 components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32; 1900 starting_offset = src_x % components_per_word; 1901 fetch_count = (width + starting_offset + components_per_word - 1) / 1902 components_per_word; 1903 1904 pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) | 1905 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT); 1906 break; 1907 } 1908 1909 default: 1910 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 1911 (long long)fb->modifier); 1912 return -EINVAL; 1913 } 1914 1915 /* fetch an extra pixel if we don't actually line up with the left edge. */ 1916 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) 1917 width++; 1918 1919 /* same for the right side */ 1920 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && 1921 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) 1922 width++; 1923 1924 /* now for the top */ 1925 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) 1926 height++; 1927 1928 /* and the bottom */ 1929 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && 1930 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) 1931 height++; 1932 1933 /* for YUV444 hardware wants double the width, otherwise it doesn't 1934 * fetch full width of chroma 1935 */ 1936 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) 1937 width <<= 1; 1938 1939 /* Don't waste cycles mixing with plane alpha if the set alpha 1940 * is opaque or there is no per-pixel alpha information. 1941 * In any case we use the alpha property value as the fixed alpha. 1942 */ 1943 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 1944 fb->format->has_alpha; 1945 1946 /* Control Word 0: Scaling Configuration & Element Validity*/ 1947 vc4_dlist_write(vc4_state, 1948 SCALER6_CTL0_VALID | 1949 VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) | 1950 vc4_hvs6_get_alpha_mask_mode(state) | 1951 (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) | 1952 VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) | 1953 VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) | 1954 VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) | 1955 VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT)); 1956 1957 /* Position Word 0: Image Position */ 1958 vc4_state->pos0_offset = vc4_state->dlist_count; 1959 vc4_dlist_write(vc4_state, 1960 VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) | 1961 (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) | 1962 VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X)); 1963 1964 /* Control Word 2: Alpha Value & CSC */ 1965 vc4_dlist_write(vc4_state, 1966 vc6_plane_get_csc_mode(vc4_state) | 1967 vc4_hvs5_get_alpha_blend_mode(state) | 1968 (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) | 1969 VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA)); 1970 1971 /* Position Word 1: Scaled Image Dimensions */ 1972 if (!vc4_state->is_unity) 1973 vc4_dlist_write(vc4_state, 1974 VC4_SET_FIELD(vc4_state->crtc_h - 1, 1975 SCALER6_POS1_SCL_LINES) | 1976 VC4_SET_FIELD(vc4_state->crtc_w - 1, 1977 SCALER6_POS1_SCL_WIDTH)); 1978 1979 /* Position Word 2: Source Image Size */ 1980 vc4_state->pos2_offset = vc4_state->dlist_count; 1981 vc4_dlist_write(vc4_state, 1982 VC4_SET_FIELD(height - 1, 1983 SCALER6_POS2_SRC_LINES) | 1984 VC4_SET_FIELD(width - 1, 1985 SCALER6_POS2_SRC_WIDTH)); 1986 1987 /* Position Word 3: Context */ 1988 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1989 1990 /* 1991 * TODO: This only covers Raster Scan Order planes 1992 */ 1993 for (i = 0; i < num_planes; i++) { 1994 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); 1995 dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i]; 1996 1997 /* Pointer Word 0 */ 1998 vc4_state->ptr0_offset[i] = vc4_state->dlist_count; 1999 vc4_dlist_write(vc4_state, 2000 (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) | 2001 /* 2002 * The UPM buffer will be allocated in 2003 * vc6_plane_allocate_upm(). 2004 */ 2005 VC4_SET_FIELD(upper_32_bits(paddr) & 0xff, 2006 SCALER6_PTR0_UPPER_ADDR)); 2007 2008 /* Pointer Word 1 */ 2009 vc4_dlist_write(vc4_state, lower_32_bits(paddr)); 2010 2011 /* Pointer Word 2 */ 2012 if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 && 2013 base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) { 2014 vc4_dlist_write(vc4_state, 2015 VC4_SET_FIELD(fb->pitches[i], 2016 SCALER6_PTR2_PITCH)); 2017 } else { 2018 vc4_dlist_write(vc4_state, pitch0); 2019 } 2020 } 2021 2022 /* 2023 * Palette Word 0 2024 * TODO: We're not using the palette mode 2025 */ 2026 2027 /* 2028 * Trans Word 0 2029 * TODO: It's only relevant if we set the trans_rgb bit in the 2030 * control word 0, and we don't at the moment. 2031 */ 2032 2033 vc4_state->lbm_offset = 0; 2034 2035 if (!vc4_state->is_unity || fb->format->is_yuv) { 2036 /* 2037 * Reserve a slot for the LBM Base Address. The real value will 2038 * be set when calling vc4_plane_allocate_lbm(). 2039 */ 2040 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 2041 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 2042 vc4_state->lbm_offset = vc4_state->dlist_count; 2043 vc4_dlist_counter_increment(vc4_state); 2044 } 2045 2046 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 2047 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 2048 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 2049 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 2050 if (num_planes > 1) 2051 /* 2052 * Emit Cb/Cr as channel 0 and Y as channel 2053 * 1. This matches how we set up scl0/scl1 2054 * above. 2055 */ 2056 vc4_write_scaling_parameters(state, 1); 2057 2058 vc4_write_scaling_parameters(state, 0); 2059 } 2060 2061 /* 2062 * If any PPF setup was done, then all the kernel 2063 * pointers get uploaded. 2064 */ 2065 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 2066 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 2067 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 2068 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 2069 u32 kernel = 2070 VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 2071 SCALER_PPF_KERNEL_OFFSET); 2072 2073 /* HPPF plane 0 */ 2074 vc4_dlist_write(vc4_state, kernel); 2075 /* VPPF plane 0 */ 2076 vc4_dlist_write(vc4_state, kernel); 2077 /* HPPF plane 1 */ 2078 vc4_dlist_write(vc4_state, kernel); 2079 /* VPPF plane 1 */ 2080 vc4_dlist_write(vc4_state, kernel); 2081 } 2082 } 2083 2084 vc4_dlist_write(vc4_state, SCALER6_CTL0_END); 2085 2086 vc4_state->dlist[0] |= 2087 VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT); 2088 2089 /* crtc_* are already clipped coordinates. */ 2090 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 2091 vc4_state->crtc_w == state->crtc->mode.hdisplay && 2092 vc4_state->crtc_h == state->crtc->mode.vdisplay; 2093 2094 /* 2095 * Background fill might be necessary when the plane has per-pixel 2096 * alpha content or a non-opaque plane alpha and could blend from the 2097 * background or does not cover the entire screen. 2098 */ 2099 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 2100 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 2101 2102 /* 2103 * Flag the dlist as initialized to avoid checking it twice in case 2104 * the async update check already called vc4_plane_mode_set() and 2105 * decided to fallback to sync update because async update was not 2106 * possible. 2107 */ 2108 vc4_state->dlist_initialized = 1; 2109 2110 vc4_plane_calc_load(state); 2111 2112 drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n", 2113 plane->base.id, plane->name, vc4_state->dlist_count); 2114 2115 return 0; 2116 } 2117 2118 /* If a modeset involves changing the setup of a plane, the atomic 2119 * infrastructure will call this to validate a proposed plane setup. 2120 * However, if a plane isn't getting updated, this (and the 2121 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 2122 * compute the dlist here and have all active plane dlists get updated 2123 * in the CRTC's flush. 2124 */ 2125 static int vc4_plane_atomic_check(struct drm_plane *plane, 2126 struct drm_atomic_commit *state) 2127 { 2128 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2129 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2130 plane); 2131 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); 2132 int ret; 2133 2134 vc4_state->dlist_count = 0; 2135 2136 if (!plane_enabled(new_plane_state)) { 2137 struct drm_plane_state *old_plane_state = 2138 drm_atomic_get_old_plane_state(state, plane); 2139 2140 if (vc4->gen >= VC4_GEN_6_C && old_plane_state && 2141 plane_enabled(old_plane_state)) { 2142 vc6_plane_free_upm(new_plane_state); 2143 } 2144 return 0; 2145 } 2146 2147 if (vc4->gen >= VC4_GEN_6_C) 2148 ret = vc6_plane_mode_set(plane, new_plane_state); 2149 else 2150 ret = vc4_plane_mode_set(plane, new_plane_state); 2151 if (ret) 2152 return ret; 2153 2154 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 2155 !vc4_state->crtc_w || !vc4_state->crtc_h) 2156 return 0; 2157 2158 ret = vc4_plane_allocate_lbm(new_plane_state); 2159 if (ret) 2160 return ret; 2161 2162 if (vc4->gen >= VC4_GEN_6_C) { 2163 ret = vc6_plane_allocate_upm(new_plane_state); 2164 if (ret) 2165 return ret; 2166 } 2167 2168 return 0; 2169 } 2170 2171 static void vc4_plane_atomic_update(struct drm_plane *plane, 2172 struct drm_atomic_commit *state) 2173 { 2174 /* No contents here. Since we don't know where in the CRTC's 2175 * dlist we should be stored, our dlist is uploaded to the 2176 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 2177 * time. 2178 */ 2179 } 2180 2181 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 2182 { 2183 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 2184 int i; 2185 int idx; 2186 2187 if (!drm_dev_enter(plane->dev, &idx)) 2188 goto out; 2189 2190 vc4_state->hw_dlist = dlist; 2191 2192 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 2193 for (i = 0; i < vc4_state->dlist_count; i++) 2194 writel(vc4_state->dlist[i], &dlist[i]); 2195 2196 drm_dev_exit(idx); 2197 2198 out: 2199 return vc4_state->dlist_count; 2200 } 2201 2202 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 2203 { 2204 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 2205 2206 return vc4_state->dlist_count; 2207 } 2208 2209 /* Updates the plane to immediately (well, once the FIFO needs 2210 * refilling) scan out from at a new framebuffer. 2211 */ 2212 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 2213 { 2214 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 2215 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0); 2216 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2217 dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0]; 2218 int idx; 2219 2220 if (!drm_dev_enter(plane->dev, &idx)) 2221 return; 2222 2223 /* We're skipping the address adjustment for negative origin, 2224 * because this is only called on the primary plane. 2225 */ 2226 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 2227 2228 if (vc4->gen == VC4_GEN_6_C) { 2229 u32 value; 2230 2231 value = vc4_state->dlist[vc4_state->ptr0_offset[0]] & 2232 ~SCALER6_PTR0_UPPER_ADDR_MASK; 2233 value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff, 2234 SCALER6_PTR0_UPPER_ADDR); 2235 2236 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2237 vc4_state->dlist[vc4_state->ptr0_offset[0]] = value; 2238 2239 value = lower_32_bits(dma_addr); 2240 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]); 2241 vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value; 2242 } else { 2243 u32 addr; 2244 2245 addr = (u32)dma_addr; 2246 2247 /* Write the new address into the hardware immediately. The 2248 * scanout will start from this address as soon as the FIFO 2249 * needs to refill with pixels. 2250 */ 2251 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2252 2253 /* Also update the CPU-side dlist copy, so that any later 2254 * atomic updates that don't do a new modeset on our plane 2255 * also use our updated address. 2256 */ 2257 vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr; 2258 } 2259 2260 drm_dev_exit(idx); 2261 } 2262 2263 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 2264 struct drm_atomic_commit *state) 2265 { 2266 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2267 plane); 2268 struct vc4_plane_state *vc4_state, *new_vc4_state; 2269 int idx; 2270 2271 if (!drm_dev_enter(plane->dev, &idx)) 2272 return; 2273 2274 swap(plane->state->fb, new_plane_state->fb); 2275 plane->state->crtc_x = new_plane_state->crtc_x; 2276 plane->state->crtc_y = new_plane_state->crtc_y; 2277 plane->state->crtc_w = new_plane_state->crtc_w; 2278 plane->state->crtc_h = new_plane_state->crtc_h; 2279 plane->state->src_x = new_plane_state->src_x; 2280 plane->state->src_y = new_plane_state->src_y; 2281 plane->state->src_w = new_plane_state->src_w; 2282 plane->state->src_h = new_plane_state->src_h; 2283 plane->state->alpha = new_plane_state->alpha; 2284 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode; 2285 plane->state->rotation = new_plane_state->rotation; 2286 plane->state->zpos = new_plane_state->zpos; 2287 plane->state->normalized_zpos = new_plane_state->normalized_zpos; 2288 plane->state->color_encoding = new_plane_state->color_encoding; 2289 plane->state->color_range = new_plane_state->color_range; 2290 plane->state->src = new_plane_state->src; 2291 plane->state->dst = new_plane_state->dst; 2292 plane->state->visible = new_plane_state->visible; 2293 2294 new_vc4_state = to_vc4_plane_state(new_plane_state); 2295 vc4_state = to_vc4_plane_state(plane->state); 2296 2297 vc4_state->crtc_x = new_vc4_state->crtc_x; 2298 vc4_state->crtc_y = new_vc4_state->crtc_y; 2299 vc4_state->crtc_h = new_vc4_state->crtc_h; 2300 vc4_state->crtc_w = new_vc4_state->crtc_w; 2301 vc4_state->src_x = new_vc4_state->src_x; 2302 vc4_state->src_y = new_vc4_state->src_y; 2303 memcpy(vc4_state->src_w, new_vc4_state->src_w, 2304 sizeof(vc4_state->src_w)); 2305 memcpy(vc4_state->src_h, new_vc4_state->src_h, 2306 sizeof(vc4_state->src_h)); 2307 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 2308 sizeof(vc4_state->x_scaling)); 2309 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 2310 sizeof(vc4_state->y_scaling)); 2311 vc4_state->is_unity = new_vc4_state->is_unity; 2312 vc4_state->is_yuv = new_vc4_state->is_yuv; 2313 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 2314 2315 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 2316 vc4_state->dlist[vc4_state->pos0_offset] = 2317 new_vc4_state->dlist[vc4_state->pos0_offset]; 2318 vc4_state->dlist[vc4_state->pos2_offset] = 2319 new_vc4_state->dlist[vc4_state->pos2_offset]; 2320 vc4_state->dlist[vc4_state->ptr0_offset[0]] = 2321 new_vc4_state->dlist[vc4_state->ptr0_offset[0]]; 2322 2323 /* Note that we can't just call vc4_plane_write_dlist() 2324 * because that would smash the context data that the HVS is 2325 * currently using. 2326 */ 2327 writel(vc4_state->dlist[vc4_state->pos0_offset], 2328 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 2329 writel(vc4_state->dlist[vc4_state->pos2_offset], 2330 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 2331 writel(vc4_state->dlist[vc4_state->ptr0_offset[0]], 2332 &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2333 2334 drm_dev_exit(idx); 2335 } 2336 2337 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 2338 struct drm_atomic_commit *state, bool flip) 2339 { 2340 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2341 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2342 plane); 2343 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 2344 int ret; 2345 u32 i; 2346 2347 if (vc4->gen <= VC4_GEN_5) 2348 ret = vc4_plane_mode_set(plane, new_plane_state); 2349 else 2350 ret = vc6_plane_mode_set(plane, new_plane_state); 2351 if (ret) 2352 return ret; 2353 2354 old_vc4_state = to_vc4_plane_state(plane->state); 2355 new_vc4_state = to_vc4_plane_state(new_plane_state); 2356 2357 if (!new_vc4_state->hw_dlist) 2358 return -EINVAL; 2359 2360 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 2361 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 2362 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 2363 old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] || 2364 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state)) 2365 return -EINVAL; 2366 2367 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 2368 * if anything else has changed, fallback to a sync update. 2369 */ 2370 for (i = 0; i < new_vc4_state->dlist_count; i++) { 2371 if (i == new_vc4_state->pos0_offset || 2372 i == new_vc4_state->pos2_offset || 2373 i == new_vc4_state->ptr0_offset[0] || 2374 (new_vc4_state->lbm_offset && 2375 i == new_vc4_state->lbm_offset)) 2376 continue; 2377 2378 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 2379 return -EINVAL; 2380 } 2381 2382 return 0; 2383 } 2384 2385 static int vc4_prepare_fb(struct drm_plane *plane, 2386 struct drm_plane_state *state) 2387 { 2388 struct vc4_bo *bo; 2389 int ret; 2390 2391 if (!state->fb) 2392 return 0; 2393 2394 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 2395 2396 ret = drm_gem_plane_helper_prepare_fb(plane, state); 2397 if (ret) 2398 return ret; 2399 2400 return vc4_bo_inc_usecnt(bo); 2401 } 2402 2403 static void vc4_cleanup_fb(struct drm_plane *plane, 2404 struct drm_plane_state *state) 2405 { 2406 struct vc4_bo *bo; 2407 2408 if (!state->fb) 2409 return; 2410 2411 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 2412 vc4_bo_dec_usecnt(bo); 2413 } 2414 2415 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 2416 .atomic_check = vc4_plane_atomic_check, 2417 .atomic_update = vc4_plane_atomic_update, 2418 .prepare_fb = vc4_prepare_fb, 2419 .cleanup_fb = vc4_cleanup_fb, 2420 .atomic_async_check = vc4_plane_atomic_async_check, 2421 .atomic_async_update = vc4_plane_atomic_async_update, 2422 }; 2423 2424 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = { 2425 .atomic_check = vc4_plane_atomic_check, 2426 .atomic_update = vc4_plane_atomic_update, 2427 .atomic_async_check = vc4_plane_atomic_async_check, 2428 .atomic_async_update = vc4_plane_atomic_async_update, 2429 }; 2430 2431 static bool vc4_format_mod_supported(struct drm_plane *plane, 2432 uint32_t format, 2433 uint64_t modifier) 2434 { 2435 /* Support T_TILING for RGB formats only. */ 2436 switch (format) { 2437 case DRM_FORMAT_XRGB8888: 2438 case DRM_FORMAT_ARGB8888: 2439 case DRM_FORMAT_ABGR8888: 2440 case DRM_FORMAT_XBGR8888: 2441 case DRM_FORMAT_RGB565: 2442 case DRM_FORMAT_BGR565: 2443 case DRM_FORMAT_ARGB1555: 2444 case DRM_FORMAT_XRGB1555: 2445 switch (fourcc_mod_broadcom_mod(modifier)) { 2446 case DRM_FORMAT_MOD_LINEAR: 2447 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 2448 return true; 2449 default: 2450 return false; 2451 } 2452 case DRM_FORMAT_NV12: 2453 case DRM_FORMAT_NV21: 2454 switch (fourcc_mod_broadcom_mod(modifier)) { 2455 case DRM_FORMAT_MOD_LINEAR: 2456 case DRM_FORMAT_MOD_BROADCOM_SAND64: 2457 case DRM_FORMAT_MOD_BROADCOM_SAND128: 2458 case DRM_FORMAT_MOD_BROADCOM_SAND256: 2459 return true; 2460 default: 2461 return false; 2462 } 2463 case DRM_FORMAT_P030: 2464 switch (fourcc_mod_broadcom_mod(modifier)) { 2465 case DRM_FORMAT_MOD_BROADCOM_SAND128: 2466 return true; 2467 default: 2468 return false; 2469 } 2470 case DRM_FORMAT_RGBX1010102: 2471 case DRM_FORMAT_BGRX1010102: 2472 case DRM_FORMAT_RGBA1010102: 2473 case DRM_FORMAT_BGRA1010102: 2474 case DRM_FORMAT_XRGB4444: 2475 case DRM_FORMAT_ARGB4444: 2476 case DRM_FORMAT_XBGR4444: 2477 case DRM_FORMAT_ABGR4444: 2478 case DRM_FORMAT_RGBX4444: 2479 case DRM_FORMAT_RGBA4444: 2480 case DRM_FORMAT_BGRX4444: 2481 case DRM_FORMAT_BGRA4444: 2482 case DRM_FORMAT_RGB332: 2483 case DRM_FORMAT_BGR233: 2484 case DRM_FORMAT_YUV422: 2485 case DRM_FORMAT_YVU422: 2486 case DRM_FORMAT_YUV420: 2487 case DRM_FORMAT_YVU420: 2488 case DRM_FORMAT_NV16: 2489 case DRM_FORMAT_NV61: 2490 default: 2491 return (modifier == DRM_FORMAT_MOD_LINEAR); 2492 } 2493 } 2494 2495 static const struct drm_plane_funcs vc4_plane_funcs = { 2496 .update_plane = drm_atomic_helper_update_plane, 2497 .disable_plane = drm_atomic_helper_disable_plane, 2498 .reset = vc4_plane_reset, 2499 .atomic_duplicate_state = vc4_plane_duplicate_state, 2500 .atomic_destroy_state = vc4_plane_destroy_state, 2501 .format_mod_supported = vc4_format_mod_supported, 2502 }; 2503 2504 struct drm_plane *vc4_plane_init(struct drm_device *dev, 2505 enum drm_plane_type type, 2506 uint32_t possible_crtcs) 2507 { 2508 struct vc4_dev *vc4 = to_vc4_dev(dev); 2509 struct drm_plane *plane; 2510 struct vc4_plane *vc4_plane; 2511 u32 formats[ARRAY_SIZE(hvs_formats)]; 2512 int num_formats = 0; 2513 unsigned i; 2514 static const uint64_t modifiers[] = { 2515 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 2516 DRM_FORMAT_MOD_BROADCOM_SAND128, 2517 DRM_FORMAT_MOD_BROADCOM_SAND64, 2518 DRM_FORMAT_MOD_BROADCOM_SAND256, 2519 DRM_FORMAT_MOD_LINEAR, 2520 DRM_FORMAT_MOD_INVALID 2521 }; 2522 2523 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 2524 if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) { 2525 formats[num_formats] = hvs_formats[i].drm; 2526 num_formats++; 2527 } 2528 } 2529 2530 vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base, 2531 possible_crtcs, 2532 &vc4_plane_funcs, 2533 formats, num_formats, 2534 modifiers, type, NULL); 2535 if (IS_ERR(vc4_plane)) 2536 return ERR_CAST(vc4_plane); 2537 plane = &vc4_plane->base; 2538 2539 if (vc4->gen >= VC4_GEN_5) 2540 drm_plane_helper_add(plane, &vc5_plane_helper_funcs); 2541 else 2542 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 2543 2544 drm_plane_create_alpha_property(plane); 2545 drm_plane_create_blend_mode_property(plane, 2546 BIT(DRM_MODE_BLEND_PIXEL_NONE) | 2547 BIT(DRM_MODE_BLEND_PREMULTI) | 2548 BIT(DRM_MODE_BLEND_COVERAGE)); 2549 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 2550 DRM_MODE_ROTATE_0 | 2551 DRM_MODE_ROTATE_180 | 2552 DRM_MODE_REFLECT_X | 2553 DRM_MODE_REFLECT_Y); 2554 2555 drm_plane_create_color_properties(plane, 2556 BIT(DRM_COLOR_YCBCR_BT601) | 2557 BIT(DRM_COLOR_YCBCR_BT709) | 2558 BIT(DRM_COLOR_YCBCR_BT2020), 2559 BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | 2560 BIT(DRM_COLOR_YCBCR_FULL_RANGE), 2561 DRM_COLOR_YCBCR_BT709, 2562 DRM_COLOR_YCBCR_LIMITED_RANGE); 2563 2564 if (type == DRM_PLANE_TYPE_PRIMARY) 2565 drm_plane_create_zpos_immutable_property(plane, 0); 2566 2567 return plane; 2568 } 2569 2570 #define VC4_NUM_OVERLAY_PLANES 16 2571 2572 int vc4_plane_create_additional_planes(struct drm_device *drm) 2573 { 2574 struct drm_plane *cursor_plane; 2575 struct drm_crtc *crtc; 2576 unsigned int i; 2577 2578 /* Set up some arbitrary number of planes. We're not limited 2579 * by a set number of physical registers, just the space in 2580 * the HVS (16k) and how small an plane can be (28 bytes). 2581 * However, each plane we set up takes up some memory, and 2582 * increases the cost of looping over planes, which atomic 2583 * modesetting does quite a bit. As a result, we pick a 2584 * modest number of planes to expose, that should hopefully 2585 * still cover any sane usecase. 2586 */ 2587 for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) { 2588 struct drm_plane *plane = 2589 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY, 2590 GENMASK(drm->mode_config.num_crtc - 1, 0)); 2591 2592 if (IS_ERR(plane)) 2593 continue; 2594 2595 /* Create zpos property. Max of all the overlays + 1 primary + 2596 * 1 cursor plane on a crtc. 2597 */ 2598 drm_plane_create_zpos_property(plane, i + 1, 1, 2599 VC4_NUM_OVERLAY_PLANES + 1); 2600 } 2601 2602 drm_for_each_crtc(crtc, drm) { 2603 /* Set up the legacy cursor after overlay initialization, 2604 * since the zpos fallback is that planes are rendered by plane 2605 * ID order, and that then puts the cursor on top. 2606 */ 2607 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR, 2608 drm_crtc_mask(crtc)); 2609 if (!IS_ERR(cursor_plane)) { 2610 crtc->cursor = cursor_plane; 2611 2612 drm_plane_create_zpos_property(cursor_plane, 2613 VC4_NUM_OVERLAY_PLANES + 1, 2614 1, 2615 VC4_NUM_OVERLAY_PLANES + 1); 2616 } 2617 } 2618 2619 return 0; 2620 } 2621