1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_blend.h> 22 #include <drm/drm_drv.h> 23 #include <drm/drm_fb_dma_helper.h> 24 #include <drm/drm_fourcc.h> 25 #include <drm/drm_framebuffer.h> 26 #include <drm/drm_gem_atomic_helper.h> 27 28 #include "uapi/drm/vc4_drm.h" 29 30 #include "vc4_drv.h" 31 #include "vc4_regs.h" 32 33 static const struct hvs_format { 34 u32 drm; /* DRM_FORMAT_* */ 35 u32 hvs; /* HVS_FORMAT_* */ 36 u32 pixel_order; 37 u32 pixel_order_hvs5; 38 bool hvs5_only; 39 } hvs_formats[] = { 40 { 41 .drm = DRM_FORMAT_XRGB8888, 42 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 43 .pixel_order = HVS_PIXEL_ORDER_ABGR, 44 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 45 }, 46 { 47 .drm = DRM_FORMAT_ARGB8888, 48 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 49 .pixel_order = HVS_PIXEL_ORDER_ABGR, 50 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 51 }, 52 { 53 .drm = DRM_FORMAT_ABGR8888, 54 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 55 .pixel_order = HVS_PIXEL_ORDER_ARGB, 56 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 57 }, 58 { 59 .drm = DRM_FORMAT_XBGR8888, 60 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 61 .pixel_order = HVS_PIXEL_ORDER_ARGB, 62 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 63 }, 64 { 65 .drm = DRM_FORMAT_RGB565, 66 .hvs = HVS_PIXEL_FORMAT_RGB565, 67 .pixel_order = HVS_PIXEL_ORDER_XRGB, 68 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB, 69 }, 70 { 71 .drm = DRM_FORMAT_BGR565, 72 .hvs = HVS_PIXEL_FORMAT_RGB565, 73 .pixel_order = HVS_PIXEL_ORDER_XBGR, 74 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR, 75 }, 76 { 77 .drm = DRM_FORMAT_ARGB1555, 78 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 79 .pixel_order = HVS_PIXEL_ORDER_ABGR, 80 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 81 }, 82 { 83 .drm = DRM_FORMAT_XRGB1555, 84 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 85 .pixel_order = HVS_PIXEL_ORDER_ABGR, 86 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 87 }, 88 { 89 .drm = DRM_FORMAT_RGB888, 90 .hvs = HVS_PIXEL_FORMAT_RGB888, 91 .pixel_order = HVS_PIXEL_ORDER_XRGB, 92 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XRGB, 93 }, 94 { 95 .drm = DRM_FORMAT_BGR888, 96 .hvs = HVS_PIXEL_FORMAT_RGB888, 97 .pixel_order = HVS_PIXEL_ORDER_XBGR, 98 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XBGR, 99 }, 100 { 101 .drm = DRM_FORMAT_YUV422, 102 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 103 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 104 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 105 }, 106 { 107 .drm = DRM_FORMAT_YVU422, 108 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 109 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 110 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 111 }, 112 { 113 .drm = DRM_FORMAT_YUV444, 114 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 115 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 116 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 117 }, 118 { 119 .drm = DRM_FORMAT_YVU444, 120 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 121 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 122 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 123 }, 124 { 125 .drm = DRM_FORMAT_YUV420, 126 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 127 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 128 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 129 }, 130 { 131 .drm = DRM_FORMAT_YVU420, 132 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 133 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 134 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 135 }, 136 { 137 .drm = DRM_FORMAT_NV12, 138 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 139 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 140 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 141 }, 142 { 143 .drm = DRM_FORMAT_NV21, 144 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 145 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 146 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 147 }, 148 { 149 .drm = DRM_FORMAT_NV16, 150 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 151 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 152 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 153 }, 154 { 155 .drm = DRM_FORMAT_NV61, 156 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 157 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 158 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCRCB, 159 }, 160 { 161 .drm = DRM_FORMAT_P030, 162 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT, 163 .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, 164 .hvs5_only = true, 165 }, 166 { 167 .drm = DRM_FORMAT_XRGB2101010, 168 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 169 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 170 .hvs5_only = true, 171 }, 172 { 173 .drm = DRM_FORMAT_ARGB2101010, 174 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 175 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 176 .hvs5_only = true, 177 }, 178 { 179 .drm = DRM_FORMAT_ABGR2101010, 180 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 181 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 182 .hvs5_only = true, 183 }, 184 { 185 .drm = DRM_FORMAT_XBGR2101010, 186 .hvs = HVS_PIXEL_FORMAT_RGBA1010102, 187 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 188 .hvs5_only = true, 189 }, 190 { 191 .drm = DRM_FORMAT_RGB332, 192 .hvs = HVS_PIXEL_FORMAT_RGB332, 193 .pixel_order = HVS_PIXEL_ORDER_ARGB, 194 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 195 }, 196 { 197 .drm = DRM_FORMAT_BGR233, 198 .hvs = HVS_PIXEL_FORMAT_RGB332, 199 .pixel_order = HVS_PIXEL_ORDER_ABGR, 200 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 201 }, 202 { 203 .drm = DRM_FORMAT_XRGB4444, 204 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 205 .pixel_order = HVS_PIXEL_ORDER_ABGR, 206 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 207 }, 208 { 209 .drm = DRM_FORMAT_ARGB4444, 210 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 211 .pixel_order = HVS_PIXEL_ORDER_ABGR, 212 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 213 }, 214 { 215 .drm = DRM_FORMAT_XBGR4444, 216 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 217 .pixel_order = HVS_PIXEL_ORDER_ARGB, 218 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 219 }, 220 { 221 .drm = DRM_FORMAT_ABGR4444, 222 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 223 .pixel_order = HVS_PIXEL_ORDER_ARGB, 224 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 225 }, 226 { 227 .drm = DRM_FORMAT_BGRX4444, 228 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 229 .pixel_order = HVS_PIXEL_ORDER_RGBA, 230 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA, 231 }, 232 { 233 .drm = DRM_FORMAT_BGRA4444, 234 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 235 .pixel_order = HVS_PIXEL_ORDER_RGBA, 236 .pixel_order_hvs5 = HVS_PIXEL_ORDER_BGRA, 237 }, 238 { 239 .drm = DRM_FORMAT_RGBX4444, 240 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 241 .pixel_order = HVS_PIXEL_ORDER_BGRA, 242 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, 243 }, 244 { 245 .drm = DRM_FORMAT_RGBA4444, 246 .hvs = HVS_PIXEL_FORMAT_RGBA4444, 247 .pixel_order = HVS_PIXEL_ORDER_BGRA, 248 .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, 249 }, 250 }; 251 252 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 253 { 254 unsigned i; 255 256 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 257 if (hvs_formats[i].drm == drm_format) 258 return &hvs_formats[i]; 259 } 260 261 return NULL; 262 } 263 264 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 265 { 266 if (dst == src >> 16) 267 return VC4_SCALING_NONE; 268 if (3 * dst >= 2 * (src >> 16)) 269 return VC4_SCALING_PPF; 270 else 271 return VC4_SCALING_TPZ; 272 } 273 274 static bool plane_enabled(struct drm_plane_state *state) 275 { 276 return state->fb && !WARN_ON(!state->crtc); 277 } 278 279 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 280 { 281 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 282 struct vc4_hvs *hvs = vc4->hvs; 283 struct vc4_plane_state *vc4_state; 284 unsigned int i; 285 286 if (WARN_ON(!plane->state)) 287 return NULL; 288 289 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 290 if (!vc4_state) 291 return NULL; 292 293 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 294 295 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 296 if (vc4_state->upm_handle[i]) 297 refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount); 298 } 299 300 vc4_state->dlist_initialized = 0; 301 302 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 303 304 if (vc4_state->dlist) { 305 vc4_state->dlist = kmemdup(vc4_state->dlist, 306 vc4_state->dlist_count * 4, 307 GFP_KERNEL); 308 if (!vc4_state->dlist) { 309 kfree(vc4_state); 310 return NULL; 311 } 312 vc4_state->dlist_size = vc4_state->dlist_count; 313 } 314 315 return &vc4_state->base; 316 } 317 318 static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle) 319 { 320 struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle]; 321 unsigned long irqflags; 322 323 spin_lock_irqsave(&hvs->mm_lock, irqflags); 324 drm_mm_remove_node(&refcount->upm); 325 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 326 refcount->upm.start = 0; 327 refcount->upm.size = 0; 328 refcount->size = 0; 329 330 ida_free(&hvs->upm_handles, upm_handle); 331 } 332 333 static void vc4_plane_destroy_state(struct drm_plane *plane, 334 struct drm_plane_state *state) 335 { 336 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 337 struct vc4_hvs *hvs = vc4->hvs; 338 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 339 unsigned int i; 340 341 if (drm_mm_node_allocated(&vc4_state->lbm)) { 342 unsigned long irqflags; 343 344 spin_lock_irqsave(&hvs->mm_lock, irqflags); 345 drm_mm_remove_node(&vc4_state->lbm); 346 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 347 } 348 349 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 350 struct vc4_upm_refcounts *refcount; 351 352 if (!vc4_state->upm_handle[i]) 353 continue; 354 355 refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]]; 356 357 if (refcount_dec_and_test(&refcount->refcount)) 358 vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]); 359 } 360 361 kfree(vc4_state->dlist); 362 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 363 kfree(state); 364 } 365 366 /* Called during init to allocate the plane's atomic state. */ 367 static void vc4_plane_reset(struct drm_plane *plane) 368 { 369 struct vc4_plane_state *vc4_state; 370 371 if (plane->state) 372 __drm_atomic_helper_plane_destroy_state(plane->state); 373 374 kfree(plane->state); 375 376 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 377 if (!vc4_state) 378 return; 379 380 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 381 } 382 383 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 384 { 385 if (vc4_state->dlist_count == vc4_state->dlist_size) { 386 u32 new_size = max(4u, vc4_state->dlist_count * 2); 387 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 388 389 if (!new_dlist) 390 return; 391 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 392 393 kfree(vc4_state->dlist); 394 vc4_state->dlist = new_dlist; 395 vc4_state->dlist_size = new_size; 396 } 397 398 vc4_state->dlist_count++; 399 } 400 401 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 402 { 403 unsigned int idx = vc4_state->dlist_count; 404 405 vc4_dlist_counter_increment(vc4_state); 406 vc4_state->dlist[idx] = val; 407 } 408 409 /* Returns the scl0/scl1 field based on whether the dimensions need to 410 * be up/down/non-scaled. 411 * 412 * This is a replication of a table from the spec. 413 */ 414 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 415 { 416 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 417 418 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 419 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 420 return SCALER_CTL0_SCL_H_PPF_V_PPF; 421 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 422 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 423 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 424 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 425 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 426 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 427 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 428 return SCALER_CTL0_SCL_H_PPF_V_NONE; 429 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 430 return SCALER_CTL0_SCL_H_NONE_V_PPF; 431 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 432 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 433 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 434 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 435 default: 436 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 437 /* The unity case is independently handled by 438 * SCALER_CTL0_UNITY. 439 */ 440 return 0; 441 } 442 } 443 444 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 445 { 446 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 447 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 448 struct drm_crtc_state *crtc_state; 449 450 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 451 pstate->crtc); 452 453 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 454 if (!left && !right && !top && !bottom) 455 return 0; 456 457 if (left + right >= crtc_state->mode.hdisplay || 458 top + bottom >= crtc_state->mode.vdisplay) 459 return -EINVAL; 460 461 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 462 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 463 adjhdisplay, 464 crtc_state->mode.hdisplay); 465 vc4_pstate->crtc_x += left; 466 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right) 467 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right; 468 469 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 470 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 471 adjvdisplay, 472 crtc_state->mode.vdisplay); 473 vc4_pstate->crtc_y += top; 474 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom) 475 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom; 476 477 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 478 adjhdisplay, 479 crtc_state->mode.hdisplay); 480 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 481 adjvdisplay, 482 crtc_state->mode.vdisplay); 483 484 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 485 return -EINVAL; 486 487 return 0; 488 } 489 490 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 491 { 492 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 493 struct drm_framebuffer *fb = state->fb; 494 int num_planes = fb->format->num_planes; 495 struct drm_crtc_state *crtc_state; 496 u32 h_subsample = fb->format->hsub; 497 u32 v_subsample = fb->format->vsub; 498 int ret; 499 500 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 501 state->crtc); 502 if (!crtc_state) { 503 DRM_DEBUG_KMS("Invalid crtc state\n"); 504 return -EINVAL; 505 } 506 507 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 508 INT_MAX, true, true); 509 if (ret) 510 return ret; 511 512 vc4_state->src_x = state->src.x1; 513 vc4_state->src_y = state->src.y1; 514 vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x; 515 vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y; 516 517 vc4_state->crtc_x = state->dst.x1; 518 vc4_state->crtc_y = state->dst.y1; 519 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 520 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 521 522 ret = vc4_plane_margins_adj(state); 523 if (ret) 524 return ret; 525 526 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 527 vc4_state->crtc_w); 528 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 529 vc4_state->crtc_h); 530 531 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 532 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 533 534 if (num_planes > 1) { 535 vc4_state->is_yuv = true; 536 537 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 538 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 539 540 vc4_state->x_scaling[1] = 541 vc4_get_scaling_mode(vc4_state->src_w[1], 542 vc4_state->crtc_w); 543 vc4_state->y_scaling[1] = 544 vc4_get_scaling_mode(vc4_state->src_h[1], 545 vc4_state->crtc_h); 546 547 /* YUV conversion requires that horizontal scaling be enabled 548 * on the UV plane even if vc4_get_scaling_mode() returned 549 * VC4_SCALING_NONE (which can happen when the down-scaling 550 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 551 * case. 552 */ 553 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 554 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 555 556 /* Similarly UV needs vertical scaling to be enabled. 557 * Without this a 1:1 scaled YUV422 plane isn't rendered. 558 */ 559 if (vc4_state->y_scaling[1] == VC4_SCALING_NONE) 560 vc4_state->y_scaling[1] = VC4_SCALING_PPF; 561 } else { 562 vc4_state->is_yuv = false; 563 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 564 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 565 } 566 567 return 0; 568 } 569 570 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 571 { 572 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); 573 u32 scale, recip; 574 575 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 576 577 scale = src / dst; 578 579 /* The specs note that while the reciprocal would be defined 580 * as (1<<32)/scale, ~0 is close enough. 581 */ 582 recip = ~0 / scale; 583 584 vc4_dlist_write(vc4_state, 585 /* 586 * The BCM2712 is lacking BIT(31) compared to 587 * the previous generations, but we don't use 588 * it. 589 */ 590 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 591 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 592 vc4_dlist_write(vc4_state, 593 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 594 } 595 596 /* phase magnitude bits */ 597 #define PHASE_BITS 6 598 599 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, 600 u32 xy, int channel) 601 { 602 struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); 603 u32 scale = src / dst; 604 s32 offset, offset2; 605 s32 phase; 606 607 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 608 609 /* 610 * Start the phase at 1/2 pixel from the 1st pixel at src_x. 611 * 1/4 pixel for YUV. 612 */ 613 if (channel) { 614 /* 615 * The phase is relative to scale_src->x, so shift it for 616 * display list's x value 617 */ 618 offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1; 619 offset += -(1 << PHASE_BITS >> 2); 620 } else { 621 /* 622 * The phase is relative to scale_src->x, so shift it for 623 * display list's x value 624 */ 625 offset = (xy & 0xffff) >> (16 - PHASE_BITS); 626 offset += -(1 << PHASE_BITS >> 1); 627 628 /* 629 * This is a kludge to make sure the scaling factors are 630 * consistent with YUV's luma scaling. We lose 1-bit precision 631 * because of this. 632 */ 633 scale &= ~1; 634 } 635 636 /* 637 * There may be a also small error introduced by precision of scale. 638 * Add half of that as a compromise 639 */ 640 offset2 = src - dst * scale; 641 offset2 >>= 16 - PHASE_BITS; 642 phase = offset + (offset2 >> 1); 643 644 /* Ensure +ve values don't touch the sign bit, then truncate negative values */ 645 if (phase >= 1 << PHASE_BITS) 646 phase = (1 << PHASE_BITS) - 1; 647 648 phase &= SCALER_PPF_IPHASE_MASK; 649 650 vc4_dlist_write(vc4_state, 651 SCALER_PPF_AGC | 652 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 653 /* 654 * The register layout documentation is slightly 655 * different to setup the phase in the BCM2712, 656 * but they seem equivalent. 657 */ 658 VC4_SET_FIELD(phase, SCALER_PPF_IPHASE)); 659 } 660 661 static u32 __vc4_lbm_size(struct drm_plane_state *state) 662 { 663 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 664 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 665 u32 pix_per_line; 666 u32 lbm; 667 668 /* LBM is not needed when there's no vertical scaling. */ 669 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 670 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 671 return 0; 672 673 /* 674 * This can be further optimized in the RGB/YUV444 case if the PPF 675 * decimation factor is between 0.5 and 1.0 by using crtc_w. 676 * 677 * It's not an issue though, since in that case since src_w[0] is going 678 * to be greater than or equal to crtc_w. 679 */ 680 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 681 pix_per_line = vc4_state->crtc_w; 682 else 683 pix_per_line = vc4_state->src_w[0] >> 16; 684 685 if (!vc4_state->is_yuv) { 686 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 687 lbm = pix_per_line * 8; 688 else { 689 /* In special cases, this multiplier might be 12. */ 690 lbm = pix_per_line * 16; 691 } 692 } else { 693 /* There are cases for this going down to a multiplier 694 * of 2, but according to the firmware source, the 695 * table in the docs is somewhat wrong. 696 */ 697 lbm = pix_per_line * 16; 698 } 699 700 /* Align it to 64 or 128 (hvs5) bytes */ 701 lbm = roundup(lbm, vc4->gen == VC4_GEN_5 ? 128 : 64); 702 703 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 704 lbm /= vc4->gen == VC4_GEN_5 ? 4 : 2; 705 706 return lbm; 707 } 708 709 static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state, 710 unsigned int channel) 711 { 712 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 713 714 switch (vc4_state->y_scaling[channel]) { 715 case VC4_SCALING_PPF: 716 return 4; 717 718 case VC4_SCALING_TPZ: 719 return 2; 720 721 default: 722 return 0; 723 } 724 } 725 726 static unsigned int vc4_lbm_components(const struct drm_plane_state *state, 727 unsigned int channel) 728 { 729 const struct drm_format_info *info = state->fb->format; 730 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 731 732 if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE) 733 return 0; 734 735 if (info->is_yuv) 736 return channel ? 2 : 1; 737 738 if (info->has_alpha) 739 return 4; 740 741 return 3; 742 } 743 744 static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state, 745 unsigned int channel) 746 { 747 const struct drm_format_info *info = state->fb->format; 748 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 749 unsigned int channels_scaled = 0; 750 unsigned int components, words, wpc; 751 unsigned int width, lines; 752 unsigned int i; 753 754 /* LBM is meant to use the smaller of source or dest width, but there 755 * is a issue with UV scaling that the size required for the second 756 * channel is based on the source width only. 757 */ 758 if (info->hsub > 1 && channel == 1) 759 width = state->src_w >> 16; 760 else 761 width = min(state->src_w >> 16, state->crtc_w); 762 width = round_up(width / info->hsub, 4); 763 764 wpc = vc4_lbm_words_per_component(state, channel); 765 if (!wpc) 766 return 0; 767 768 components = vc4_lbm_components(state, channel); 769 if (!components) 770 return 0; 771 772 if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha) 773 components -= 1; 774 775 words = width * wpc * components; 776 777 lines = DIV_ROUND_UP(words, 128 / info->hsub); 778 779 for (i = 0; i < 2; i++) 780 if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE) 781 channels_scaled++; 782 783 if (channels_scaled == 1) 784 lines = lines / 2; 785 786 return lines; 787 } 788 789 static unsigned int __vc6_lbm_size(const struct drm_plane_state *state) 790 { 791 const struct drm_format_info *info = state->fb->format; 792 793 if (info->hsub > 1) 794 return max(vc4_lbm_channel_size(state, 0), 795 vc4_lbm_channel_size(state, 1)); 796 else 797 return vc4_lbm_channel_size(state, 0); 798 } 799 800 static u32 vc4_lbm_size(struct drm_plane_state *state) 801 { 802 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 803 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 804 805 /* LBM is not needed when there's no vertical scaling. */ 806 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 807 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 808 return 0; 809 810 if (vc4->gen >= VC4_GEN_6_C) 811 return __vc6_lbm_size(state); 812 else 813 return __vc4_lbm_size(state); 814 } 815 816 static size_t vc6_upm_size(const struct drm_plane_state *state, 817 unsigned int plane) 818 { 819 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 820 unsigned int stride = state->fb->pitches[plane]; 821 822 /* 823 * TODO: This only works for raster formats, and is sub-optimal 824 * for buffers with a stride aligned on 32 bytes. 825 */ 826 unsigned int words_per_line = (stride + 62) / 32; 827 unsigned int fetch_region_size = words_per_line * 32; 828 unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines; 829 unsigned int buffer_size = fetch_region_size * buffer_lines; 830 831 return ALIGN(buffer_size, HVS_UBM_WORD_SIZE); 832 } 833 834 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 835 int channel) 836 { 837 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 838 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 839 840 WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); 841 842 /* Ch0 H-PPF Word 0: Scaling Parameters */ 843 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 844 vc4_write_ppf(vc4_state, vc4_state->src_w[channel], 845 vc4_state->crtc_w, vc4_state->src_x, channel); 846 } 847 848 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 849 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 850 vc4_write_ppf(vc4_state, vc4_state->src_h[channel], 851 vc4_state->crtc_h, vc4_state->src_y, channel); 852 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 853 } 854 855 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 856 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 857 vc4_write_tpz(vc4_state, vc4_state->src_w[channel], 858 vc4_state->crtc_w); 859 } 860 861 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 862 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 863 vc4_write_tpz(vc4_state, vc4_state->src_h[channel], 864 vc4_state->crtc_h); 865 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 866 } 867 } 868 869 static void vc4_plane_calc_load(struct drm_plane_state *state) 870 { 871 unsigned int hvs_load_shift, vrefresh, i; 872 struct drm_framebuffer *fb = state->fb; 873 struct vc4_plane_state *vc4_state; 874 struct drm_crtc_state *crtc_state; 875 unsigned int vscale_factor; 876 877 vc4_state = to_vc4_plane_state(state); 878 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 879 state->crtc); 880 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 881 882 /* The HVS is able to process 2 pixels/cycle when scaling the source, 883 * 4 pixels/cycle otherwise. 884 * Alpha blending step seems to be pipelined and it's always operating 885 * at 4 pixels/cycle, so the limiting aspect here seems to be the 886 * scaler block. 887 * HVS load is expressed in clk-cycles/sec (AKA Hz). 888 */ 889 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 890 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 891 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 892 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 893 hvs_load_shift = 1; 894 else 895 hvs_load_shift = 2; 896 897 vc4_state->membus_load = 0; 898 vc4_state->hvs_load = 0; 899 for (i = 0; i < fb->format->num_planes; i++) { 900 /* Even if the bandwidth/plane required for a single frame is 901 * 902 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) * 903 * cpp * vrefresh 904 * 905 * when downscaling, we have to read more pixels per line in 906 * the time frame reserved for a single line, so the bandwidth 907 * demand can be punctually higher. To account for that, we 908 * calculate the down-scaling factor and multiply the plane 909 * load by this number. We're likely over-estimating the read 910 * demand, but that's better than under-estimating it. 911 */ 912 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16, 913 vc4_state->crtc_h); 914 vc4_state->membus_load += (vc4_state->src_w[i] >> 16) * 915 (vc4_state->src_h[i] >> 16) * 916 vscale_factor * fb->format->cpp[i]; 917 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 918 } 919 920 vc4_state->hvs_load *= vrefresh; 921 vc4_state->hvs_load >>= hvs_load_shift; 922 vc4_state->membus_load *= vrefresh; 923 } 924 925 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 926 { 927 struct drm_device *drm = state->plane->dev; 928 struct vc4_dev *vc4 = to_vc4_dev(drm); 929 struct drm_plane *plane = state->plane; 930 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 931 unsigned long irqflags; 932 u32 lbm_size; 933 934 lbm_size = vc4_lbm_size(state); 935 if (!lbm_size) 936 return 0; 937 938 /* 939 * NOTE: BCM2712 doesn't need to be aligned, since the size 940 * returned by vc4_lbm_size() is in words already. 941 */ 942 if (vc4->gen == VC4_GEN_5) 943 lbm_size = ALIGN(lbm_size, 64); 944 else if (vc4->gen == VC4_GEN_4) 945 lbm_size = ALIGN(lbm_size, 32); 946 947 drm_dbg_driver(drm, "[PLANE:%d:%s] LBM Allocation Size: %u\n", 948 plane->base.id, plane->name, lbm_size); 949 950 if (WARN_ON(!vc4_state->lbm_offset)) 951 return -EINVAL; 952 953 /* Allocate the LBM memory that the HVS will use for temporary 954 * storage due to our scaling/format conversion. 955 */ 956 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 957 int ret; 958 959 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 960 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 961 &vc4_state->lbm, 962 lbm_size, 1, 963 0, 0); 964 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 965 966 if (ret) { 967 drm_err(drm, "Failed to allocate LBM entry: %d\n", ret); 968 return ret; 969 } 970 } else { 971 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 972 } 973 974 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 975 976 return 0; 977 } 978 979 static int vc6_plane_allocate_upm(struct drm_plane_state *state) 980 { 981 const struct drm_format_info *info = state->fb->format; 982 struct drm_device *drm = state->plane->dev; 983 struct vc4_dev *vc4 = to_vc4_dev(drm); 984 struct vc4_hvs *hvs = vc4->hvs; 985 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 986 unsigned int i; 987 int ret; 988 989 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); 990 991 vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES; 992 993 for (i = 0; i < info->num_planes; i++) { 994 struct vc4_upm_refcounts *refcount; 995 int upm_handle; 996 unsigned long irqflags; 997 size_t upm_size; 998 999 upm_size = vc6_upm_size(state, i); 1000 if (!upm_size) 1001 return -EINVAL; 1002 upm_handle = vc4_state->upm_handle[i]; 1003 1004 if (upm_handle && 1005 hvs->upm_refcounts[upm_handle].size == upm_size) { 1006 /* Allocation is the same size as the previous user of 1007 * the plane. Keep the allocation. 1008 */ 1009 vc4_state->upm_handle[i] = upm_handle; 1010 } else { 1011 if (upm_handle && 1012 refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) { 1013 vc4_plane_release_upm_ida(hvs, upm_handle); 1014 vc4_state->upm_handle[i] = 0; 1015 } 1016 1017 upm_handle = ida_alloc_range(&hvs->upm_handles, 1, 1018 VC4_NUM_UPM_HANDLES, 1019 GFP_KERNEL); 1020 if (upm_handle < 0) { 1021 drm_dbg(drm, "Out of upm_handles\n"); 1022 return upm_handle; 1023 } 1024 vc4_state->upm_handle[i] = upm_handle; 1025 1026 refcount = &hvs->upm_refcounts[upm_handle]; 1027 refcount_set(&refcount->refcount, 1); 1028 refcount->size = upm_size; 1029 1030 spin_lock_irqsave(&hvs->mm_lock, irqflags); 1031 ret = drm_mm_insert_node_generic(&hvs->upm_mm, 1032 &refcount->upm, 1033 upm_size, HVS_UBM_WORD_SIZE, 1034 0, 0); 1035 spin_unlock_irqrestore(&hvs->mm_lock, irqflags); 1036 if (ret) { 1037 drm_err(drm, "Failed to allocate UPM entry: %d\n", ret); 1038 refcount_set(&refcount->refcount, 0); 1039 ida_free(&hvs->upm_handles, upm_handle); 1040 vc4_state->upm_handle[i] = 0; 1041 return ret; 1042 } 1043 } 1044 1045 refcount = &hvs->upm_refcounts[upm_handle]; 1046 vc4_state->dlist[vc4_state->ptr0_offset[i]] |= 1047 VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE, 1048 SCALER6_PTR0_UPM_BASE) | 1049 VC4_SET_FIELD(vc4_state->upm_handle[i] - 1, 1050 SCALER6_PTR0_UPM_HANDLE) | 1051 VC4_SET_FIELD(vc4_state->upm_buffer_lines, 1052 SCALER6_PTR0_UPM_BUFF_SIZE); 1053 } 1054 1055 return 0; 1056 } 1057 1058 static void vc6_plane_free_upm(struct drm_plane_state *state) 1059 { 1060 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1061 struct drm_device *drm = state->plane->dev; 1062 struct vc4_dev *vc4 = to_vc4_dev(drm); 1063 struct vc4_hvs *hvs = vc4->hvs; 1064 unsigned int i; 1065 1066 WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); 1067 1068 for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { 1069 unsigned int upm_handle; 1070 1071 upm_handle = vc4_state->upm_handle[i]; 1072 if (!upm_handle) 1073 continue; 1074 1075 if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) 1076 vc4_plane_release_upm_ida(hvs, upm_handle); 1077 vc4_state->upm_handle[i] = 0; 1078 } 1079 } 1080 1081 /* 1082 * The colorspace conversion matrices are held in 3 entries in the dlist. 1083 * Create an array of them, with entries for each full and limited mode, and 1084 * each supported colorspace. 1085 */ 1086 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = { 1087 { 1088 /* Limited range */ 1089 { 1090 /* BT601 */ 1091 SCALER_CSC0_ITR_R_601_5, 1092 SCALER_CSC1_ITR_R_601_5, 1093 SCALER_CSC2_ITR_R_601_5, 1094 }, { 1095 /* BT709 */ 1096 SCALER_CSC0_ITR_R_709_3, 1097 SCALER_CSC1_ITR_R_709_3, 1098 SCALER_CSC2_ITR_R_709_3, 1099 }, { 1100 /* BT2020 */ 1101 SCALER_CSC0_ITR_R_2020, 1102 SCALER_CSC1_ITR_R_2020, 1103 SCALER_CSC2_ITR_R_2020, 1104 } 1105 }, { 1106 /* Full range */ 1107 { 1108 /* JFIF */ 1109 SCALER_CSC0_JPEG_JFIF, 1110 SCALER_CSC1_JPEG_JFIF, 1111 SCALER_CSC2_JPEG_JFIF, 1112 }, { 1113 /* BT709 */ 1114 SCALER_CSC0_ITR_R_709_3_FR, 1115 SCALER_CSC1_ITR_R_709_3_FR, 1116 SCALER_CSC2_ITR_R_709_3_FR, 1117 }, { 1118 /* BT2020 */ 1119 SCALER_CSC0_ITR_R_2020_FR, 1120 SCALER_CSC1_ITR_R_2020_FR, 1121 SCALER_CSC2_ITR_R_2020_FR, 1122 } 1123 } 1124 }; 1125 1126 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state) 1127 { 1128 struct drm_device *dev = state->state->dev; 1129 struct vc4_dev *vc4 = to_vc4_dev(dev); 1130 1131 WARN_ON_ONCE(vc4->gen != VC4_GEN_4); 1132 1133 if (!state->fb->format->has_alpha) 1134 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 1135 SCALER_POS2_ALPHA_MODE); 1136 1137 switch (state->pixel_blend_mode) { 1138 case DRM_MODE_BLEND_PIXEL_NONE: 1139 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 1140 SCALER_POS2_ALPHA_MODE); 1141 default: 1142 case DRM_MODE_BLEND_PREMULTI: 1143 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 1144 SCALER_POS2_ALPHA_MODE) | 1145 SCALER_POS2_ALPHA_PREMULT; 1146 case DRM_MODE_BLEND_COVERAGE: 1147 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 1148 SCALER_POS2_ALPHA_MODE); 1149 } 1150 } 1151 1152 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state) 1153 { 1154 struct drm_device *dev = state->state->dev; 1155 struct vc4_dev *vc4 = to_vc4_dev(dev); 1156 1157 WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C && 1158 vc4->gen != VC4_GEN_6_D); 1159 1160 switch (vc4->gen) { 1161 default: 1162 case VC4_GEN_5: 1163 case VC4_GEN_6_C: 1164 if (!state->fb->format->has_alpha) 1165 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 1166 SCALER5_CTL2_ALPHA_MODE); 1167 1168 switch (state->pixel_blend_mode) { 1169 case DRM_MODE_BLEND_PIXEL_NONE: 1170 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 1171 SCALER5_CTL2_ALPHA_MODE); 1172 default: 1173 case DRM_MODE_BLEND_PREMULTI: 1174 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 1175 SCALER5_CTL2_ALPHA_MODE) | 1176 SCALER5_CTL2_ALPHA_PREMULT; 1177 case DRM_MODE_BLEND_COVERAGE: 1178 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 1179 SCALER5_CTL2_ALPHA_MODE); 1180 } 1181 case VC4_GEN_6_D: 1182 /* 2712-D configures fixed alpha mode in CTL0 */ 1183 return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ? 1184 SCALER5_CTL2_ALPHA_PREMULT : 0; 1185 } 1186 } 1187 1188 static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state) 1189 { 1190 struct drm_device *dev = state->state->dev; 1191 struct vc4_dev *vc4 = to_vc4_dev(dev); 1192 1193 WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D); 1194 1195 if (vc4->gen == VC4_GEN_6_D && 1196 (!state->fb->format->has_alpha || 1197 state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE)) 1198 return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED, 1199 SCALER6_CTL0_ALPHA_MASK); 1200 1201 return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK); 1202 } 1203 1204 /* Writes out a full display list for an active plane to the plane's 1205 * private dlist state. 1206 */ 1207 static int vc4_plane_mode_set(struct drm_plane *plane, 1208 struct drm_plane_state *state) 1209 { 1210 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 1211 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1212 struct drm_framebuffer *fb = state->fb; 1213 u32 ctl0_offset = vc4_state->dlist_count; 1214 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 1215 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 1216 int num_planes = fb->format->num_planes; 1217 u32 h_subsample = fb->format->hsub; 1218 u32 v_subsample = fb->format->vsub; 1219 bool mix_plane_alpha; 1220 bool covers_screen; 1221 u32 scl0, scl1, pitch0; 1222 u32 tiling, src_x, src_y; 1223 u32 width, height; 1224 u32 hvs_format = format->hvs; 1225 unsigned int rotation; 1226 u32 offsets[3] = { 0 }; 1227 int ret, i; 1228 1229 if (vc4_state->dlist_initialized) 1230 return 0; 1231 1232 ret = vc4_plane_setup_clipping_and_scaling(state); 1233 if (ret) 1234 return ret; 1235 1236 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 1237 !vc4_state->crtc_w || !vc4_state->crtc_h) { 1238 /* 0 source size probably means the plane is offscreen */ 1239 vc4_state->dlist_initialized = 1; 1240 return 0; 1241 } 1242 1243 width = vc4_state->src_w[0] >> 16; 1244 height = vc4_state->src_h[0] >> 16; 1245 1246 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 1247 * and 4:4:4, scl1 should be set to scl0 so both channels of 1248 * the scaler do the same thing. For YUV, the Y plane needs 1249 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 1250 * the scl fields here. 1251 */ 1252 if (num_planes == 1) { 1253 scl0 = vc4_get_scl_field(state, 0); 1254 scl1 = scl0; 1255 } else { 1256 scl0 = vc4_get_scl_field(state, 1); 1257 scl1 = vc4_get_scl_field(state, 0); 1258 } 1259 1260 rotation = drm_rotation_simplify(state->rotation, 1261 DRM_MODE_ROTATE_0 | 1262 DRM_MODE_REFLECT_X | 1263 DRM_MODE_REFLECT_Y); 1264 1265 /* We must point to the last line when Y reflection is enabled. */ 1266 src_y = vc4_state->src_y >> 16; 1267 if (rotation & DRM_MODE_REFLECT_Y) 1268 src_y += height - 1; 1269 1270 src_x = vc4_state->src_x >> 16; 1271 1272 switch (base_format_mod) { 1273 case DRM_FORMAT_MOD_LINEAR: 1274 tiling = SCALER_CTL0_TILING_LINEAR; 1275 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 1276 1277 /* Adjust the base pointer to the first pixel to be scanned 1278 * out. 1279 */ 1280 for (i = 0; i < num_planes; i++) { 1281 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; 1282 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; 1283 } 1284 1285 break; 1286 1287 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 1288 u32 tile_size_shift = 12; /* T tiles are 4kb */ 1289 /* Whole-tile offsets, mostly for setting the pitch. */ 1290 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 1291 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 1292 u32 tile_w_mask = (1 << tile_w_shift) - 1; 1293 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 1294 * the height (in pixels) of a 4k tile. 1295 */ 1296 u32 tile_h_mask = (2 << tile_h_shift) - 1; 1297 /* For T-tiled, the FB pitch is "how many bytes from one row to 1298 * the next, such that 1299 * 1300 * pitch * tile_h == tile_size * tiles_per_row 1301 */ 1302 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 1303 u32 tiles_l = src_x >> tile_w_shift; 1304 u32 tiles_r = tiles_w - tiles_l; 1305 u32 tiles_t = src_y >> tile_h_shift; 1306 /* Intra-tile offsets, which modify the base address (the 1307 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 1308 * base address). 1309 */ 1310 u32 tile_y = (src_y >> 4) & 1; 1311 u32 subtile_y = (src_y >> 2) & 3; 1312 u32 utile_y = src_y & 3; 1313 u32 x_off = src_x & tile_w_mask; 1314 u32 y_off = src_y & tile_h_mask; 1315 1316 /* When Y reflection is requested we must set the 1317 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 1318 * after the initial one should be fetched in descending order, 1319 * which makes sense since we start from the last line and go 1320 * backward. 1321 * Don't know why we need y_off = max_y_off - y_off, but it's 1322 * definitely required (I guess it's also related to the "going 1323 * backward" situation). 1324 */ 1325 if (rotation & DRM_MODE_REFLECT_Y) { 1326 y_off = tile_h_mask - y_off; 1327 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 1328 } else { 1329 pitch0 = 0; 1330 } 1331 1332 tiling = SCALER_CTL0_TILING_256B_OR_T; 1333 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 1334 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 1335 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 1336 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 1337 offsets[0] += tiles_t * (tiles_w << tile_size_shift); 1338 offsets[0] += subtile_y << 8; 1339 offsets[0] += utile_y << 4; 1340 1341 /* Rows of tiles alternate left-to-right and right-to-left. */ 1342 if (tiles_t & 1) { 1343 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 1344 offsets[0] += (tiles_w - tiles_l) << tile_size_shift; 1345 offsets[0] -= (1 + !tile_y) << 10; 1346 } else { 1347 offsets[0] += tiles_l << tile_size_shift; 1348 offsets[0] += tile_y << 10; 1349 } 1350 1351 break; 1352 } 1353 1354 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1355 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1356 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 1357 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 1358 1359 if (param > SCALER_TILE_HEIGHT_MASK) { 1360 DRM_DEBUG_KMS("SAND height too large (%d)\n", 1361 param); 1362 return -EINVAL; 1363 } 1364 1365 if (fb->format->format == DRM_FORMAT_P030) { 1366 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 1367 tiling = SCALER_CTL0_TILING_128B; 1368 } else { 1369 hvs_format = HVS_PIXEL_FORMAT_H264; 1370 1371 switch (base_format_mod) { 1372 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1373 tiling = SCALER_CTL0_TILING_64B; 1374 break; 1375 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1376 tiling = SCALER_CTL0_TILING_128B; 1377 break; 1378 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1379 tiling = SCALER_CTL0_TILING_256B_OR_T; 1380 break; 1381 default: 1382 return -EINVAL; 1383 } 1384 } 1385 1386 /* Adjust the base pointer to the first pixel to be scanned 1387 * out. 1388 * 1389 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 1390 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 1391 * word that should be taken as the first pixel. 1392 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 1393 * element within the 128bit word, eg for pixel 3 the value 1394 * should be 6. 1395 */ 1396 for (i = 0; i < num_planes; i++) { 1397 u32 tile_w, tile, x_off, pix_per_tile; 1398 1399 if (fb->format->format == DRM_FORMAT_P030) { 1400 /* 1401 * Spec says: bits [31:4] of the given address 1402 * should point to the 128-bit word containing 1403 * the desired starting pixel, and bits[3:0] 1404 * should be between 0 and 11, indicating which 1405 * of the 12-pixels in that 128-bit word is the 1406 * first pixel to be used 1407 */ 1408 u32 remaining_pixels = src_x % 96; 1409 u32 aligned = remaining_pixels / 12; 1410 u32 last_bits = remaining_pixels % 12; 1411 1412 x_off = aligned * 16 + last_bits; 1413 tile_w = 128; 1414 pix_per_tile = 96; 1415 } else { 1416 switch (base_format_mod) { 1417 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1418 tile_w = 64; 1419 break; 1420 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1421 tile_w = 128; 1422 break; 1423 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1424 tile_w = 256; 1425 break; 1426 default: 1427 return -EINVAL; 1428 } 1429 pix_per_tile = tile_w / fb->format->cpp[0]; 1430 x_off = (src_x % pix_per_tile) / 1431 (i ? h_subsample : 1) * 1432 fb->format->cpp[i]; 1433 } 1434 1435 tile = src_x / pix_per_tile; 1436 1437 offsets[i] += param * tile_w * tile; 1438 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; 1439 offsets[i] += x_off & ~(i ? 1 : 0); 1440 } 1441 1442 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 1443 break; 1444 } 1445 1446 default: 1447 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 1448 (long long)fb->modifier); 1449 return -EINVAL; 1450 } 1451 1452 /* fetch an extra pixel if we don't actually line up with the left edge. */ 1453 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) 1454 width++; 1455 1456 /* same for the right side */ 1457 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && 1458 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) 1459 width++; 1460 1461 /* now for the top */ 1462 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) 1463 height++; 1464 1465 /* and the bottom */ 1466 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && 1467 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) 1468 height++; 1469 1470 /* For YUV444 the hardware wants double the width, otherwise it doesn't 1471 * fetch full width of chroma 1472 */ 1473 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) 1474 width <<= 1; 1475 1476 /* Don't waste cycles mixing with plane alpha if the set alpha 1477 * is opaque or there is no per-pixel alpha information. 1478 * In any case we use the alpha property value as the fixed alpha. 1479 */ 1480 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 1481 fb->format->has_alpha; 1482 1483 if (vc4->gen == VC4_GEN_4) { 1484 /* Control word */ 1485 vc4_dlist_write(vc4_state, 1486 SCALER_CTL0_VALID | 1487 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 1488 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 1489 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 1490 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 1491 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1492 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1493 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 1494 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1495 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 1496 1497 /* Position Word 0: Image Positions and Alpha Value */ 1498 vc4_state->pos0_offset = vc4_state->dlist_count; 1499 vc4_dlist_write(vc4_state, 1500 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 1501 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 1502 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 1503 1504 /* Position Word 1: Scaled Image Dimensions. */ 1505 if (!vc4_state->is_unity) { 1506 vc4_dlist_write(vc4_state, 1507 VC4_SET_FIELD(vc4_state->crtc_w, 1508 SCALER_POS1_SCL_WIDTH) | 1509 VC4_SET_FIELD(vc4_state->crtc_h, 1510 SCALER_POS1_SCL_HEIGHT)); 1511 } 1512 1513 /* Position Word 2: Source Image Size, Alpha */ 1514 vc4_state->pos2_offset = vc4_state->dlist_count; 1515 vc4_dlist_write(vc4_state, 1516 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 1517 vc4_hvs4_get_alpha_blend_mode(state) | 1518 VC4_SET_FIELD(width, SCALER_POS2_WIDTH) | 1519 VC4_SET_FIELD(height, SCALER_POS2_HEIGHT)); 1520 1521 /* Position Word 3: Context. Written by the HVS. */ 1522 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1523 1524 } else { 1525 /* Control word */ 1526 vc4_dlist_write(vc4_state, 1527 SCALER_CTL0_VALID | 1528 (format->pixel_order_hvs5 << SCALER_CTL0_ORDER_SHIFT) | 1529 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1530 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1531 (vc4_state->is_unity ? 1532 SCALER5_CTL0_UNITY : 0) | 1533 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1534 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 1535 SCALER5_CTL0_ALPHA_EXPAND | 1536 SCALER5_CTL0_RGB_EXPAND); 1537 1538 /* Position Word 0: Image Positions and Alpha Value */ 1539 vc4_state->pos0_offset = vc4_state->dlist_count; 1540 vc4_dlist_write(vc4_state, 1541 (rotation & DRM_MODE_REFLECT_Y ? 1542 SCALER5_POS0_VFLIP : 0) | 1543 VC4_SET_FIELD(vc4_state->crtc_x, 1544 SCALER_POS0_START_X) | 1545 (rotation & DRM_MODE_REFLECT_X ? 1546 SCALER5_POS0_HFLIP : 0) | 1547 VC4_SET_FIELD(vc4_state->crtc_y, 1548 SCALER5_POS0_START_Y) 1549 ); 1550 1551 /* Control Word 2 */ 1552 vc4_dlist_write(vc4_state, 1553 VC4_SET_FIELD(state->alpha >> 4, 1554 SCALER5_CTL2_ALPHA) | 1555 vc4_hvs5_get_alpha_blend_mode(state) | 1556 (mix_plane_alpha ? 1557 SCALER5_CTL2_ALPHA_MIX : 0) 1558 ); 1559 1560 /* Position Word 1: Scaled Image Dimensions. */ 1561 if (!vc4_state->is_unity) { 1562 vc4_dlist_write(vc4_state, 1563 VC4_SET_FIELD(vc4_state->crtc_w, 1564 SCALER5_POS1_SCL_WIDTH) | 1565 VC4_SET_FIELD(vc4_state->crtc_h, 1566 SCALER5_POS1_SCL_HEIGHT)); 1567 } 1568 1569 /* Position Word 2: Source Image Size */ 1570 vc4_state->pos2_offset = vc4_state->dlist_count; 1571 vc4_dlist_write(vc4_state, 1572 VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) | 1573 VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT)); 1574 1575 /* Position Word 3: Context. Written by the HVS. */ 1576 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1577 } 1578 1579 1580 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 1581 * 1582 * The pointers may be any byte address. 1583 */ 1584 vc4_state->ptr0_offset[0] = vc4_state->dlist_count; 1585 1586 for (i = 0; i < num_planes; i++) { 1587 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); 1588 1589 vc4_dlist_write(vc4_state, bo->dma_addr + fb->offsets[i] + offsets[i]); 1590 } 1591 1592 /* Pointer Context Word 0/1/2: Written by the HVS */ 1593 for (i = 0; i < num_planes; i++) 1594 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1595 1596 /* Pitch word 0 */ 1597 vc4_dlist_write(vc4_state, pitch0); 1598 1599 /* Pitch word 1/2 */ 1600 for (i = 1; i < num_planes; i++) { 1601 if (hvs_format != HVS_PIXEL_FORMAT_H264 && 1602 hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { 1603 vc4_dlist_write(vc4_state, 1604 VC4_SET_FIELD(fb->pitches[i], 1605 SCALER_SRC_PITCH)); 1606 } else { 1607 vc4_dlist_write(vc4_state, pitch0); 1608 } 1609 } 1610 1611 /* Colorspace conversion words */ 1612 if (vc4_state->is_yuv) { 1613 enum drm_color_encoding color_encoding = state->color_encoding; 1614 enum drm_color_range color_range = state->color_range; 1615 const u32 *ccm; 1616 1617 if (color_encoding >= DRM_COLOR_ENCODING_MAX) 1618 color_encoding = DRM_COLOR_YCBCR_BT601; 1619 if (color_range >= DRM_COLOR_RANGE_MAX) 1620 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1621 1622 ccm = colorspace_coeffs[color_range][color_encoding]; 1623 1624 vc4_dlist_write(vc4_state, ccm[0]); 1625 vc4_dlist_write(vc4_state, ccm[1]); 1626 vc4_dlist_write(vc4_state, ccm[2]); 1627 } 1628 1629 vc4_state->lbm_offset = 0; 1630 1631 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 1632 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 1633 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1634 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1635 /* Reserve a slot for the LBM Base Address. The real value will 1636 * be set when calling vc4_plane_allocate_lbm(). 1637 */ 1638 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1639 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1640 vc4_state->lbm_offset = vc4_state->dlist_count; 1641 vc4_dlist_counter_increment(vc4_state); 1642 } 1643 1644 if (num_planes > 1) { 1645 /* Emit Cb/Cr as channel 0 and Y as channel 1646 * 1. This matches how we set up scl0/scl1 1647 * above. 1648 */ 1649 vc4_write_scaling_parameters(state, 1); 1650 } 1651 vc4_write_scaling_parameters(state, 0); 1652 1653 /* If any PPF setup was done, then all the kernel 1654 * pointers get uploaded. 1655 */ 1656 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1657 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1658 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1659 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1660 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1661 SCALER_PPF_KERNEL_OFFSET); 1662 1663 /* HPPF plane 0 */ 1664 vc4_dlist_write(vc4_state, kernel); 1665 /* VPPF plane 0 */ 1666 vc4_dlist_write(vc4_state, kernel); 1667 /* HPPF plane 1 */ 1668 vc4_dlist_write(vc4_state, kernel); 1669 /* VPPF plane 1 */ 1670 vc4_dlist_write(vc4_state, kernel); 1671 } 1672 } 1673 1674 vc4_state->dlist[ctl0_offset] |= 1675 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1676 1677 /* crtc_* are already clipped coordinates. */ 1678 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1679 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1680 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1681 /* Background fill might be necessary when the plane has per-pixel 1682 * alpha content or a non-opaque plane alpha and could blend from the 1683 * background or does not cover the entire screen. 1684 */ 1685 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1686 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1687 1688 /* Flag the dlist as initialized to avoid checking it twice in case 1689 * the async update check already called vc4_plane_mode_set() and 1690 * decided to fallback to sync update because async update was not 1691 * possible. 1692 */ 1693 vc4_state->dlist_initialized = 1; 1694 1695 vc4_plane_calc_load(state); 1696 1697 return 0; 1698 } 1699 1700 static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state) 1701 { 1702 struct drm_plane_state *state = &vc4_state->base; 1703 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 1704 u32 ret = 0; 1705 1706 if (vc4_state->is_yuv) { 1707 enum drm_color_encoding color_encoding = state->color_encoding; 1708 enum drm_color_range color_range = state->color_range; 1709 1710 /* CSC pre-loaded with: 1711 * 0 = BT601 limited range 1712 * 1 = BT709 limited range 1713 * 2 = BT2020 limited range 1714 * 3 = BT601 full range 1715 * 4 = BT709 full range 1716 * 5 = BT2020 full range 1717 */ 1718 if (color_encoding > DRM_COLOR_YCBCR_BT2020) 1719 color_encoding = DRM_COLOR_YCBCR_BT601; 1720 if (color_range > DRM_COLOR_YCBCR_FULL_RANGE) 1721 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1722 1723 if (vc4->gen == VC4_GEN_6_C) { 1724 ret |= SCALER6C_CTL2_CSC_ENABLE; 1725 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), 1726 SCALER6C_CTL2_BRCM_CFC_CONTROL); 1727 } else { 1728 ret |= SCALER6D_CTL2_CSC_ENABLE; 1729 ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), 1730 SCALER6D_CTL2_BRCM_CFC_CONTROL); 1731 } 1732 } 1733 1734 return ret; 1735 } 1736 1737 static int vc6_plane_mode_set(struct drm_plane *plane, 1738 struct drm_plane_state *state) 1739 { 1740 struct drm_device *drm = plane->dev; 1741 struct vc4_dev *vc4 = to_vc4_dev(drm); 1742 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1743 struct drm_framebuffer *fb = state->fb; 1744 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 1745 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 1746 int num_planes = fb->format->num_planes; 1747 u32 h_subsample = fb->format->hsub; 1748 u32 v_subsample = fb->format->vsub; 1749 bool mix_plane_alpha; 1750 bool covers_screen; 1751 u32 scl0, scl1, pitch0; 1752 u32 tiling, src_x, src_y; 1753 u32 width, height; 1754 u32 hvs_format = format->hvs; 1755 u32 offsets[3] = { 0 }; 1756 unsigned int rotation; 1757 int ret, i; 1758 1759 if (vc4_state->dlist_initialized) 1760 return 0; 1761 1762 ret = vc4_plane_setup_clipping_and_scaling(state); 1763 if (ret) 1764 return ret; 1765 1766 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 1767 !vc4_state->crtc_w || !vc4_state->crtc_h) { 1768 /* 0 source size probably means the plane is offscreen. 1769 * 0 destination size is a redundant plane. 1770 */ 1771 vc4_state->dlist_initialized = 1; 1772 return 0; 1773 } 1774 1775 width = vc4_state->src_w[0] >> 16; 1776 height = vc4_state->src_h[0] >> 16; 1777 1778 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 1779 * and 4:4:4, scl1 should be set to scl0 so both channels of 1780 * the scaler do the same thing. For YUV, the Y plane needs 1781 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 1782 * the scl fields here. 1783 */ 1784 if (num_planes == 1) { 1785 scl0 = vc4_get_scl_field(state, 0); 1786 scl1 = scl0; 1787 } else { 1788 scl0 = vc4_get_scl_field(state, 1); 1789 scl1 = vc4_get_scl_field(state, 0); 1790 } 1791 1792 rotation = drm_rotation_simplify(state->rotation, 1793 DRM_MODE_ROTATE_0 | 1794 DRM_MODE_REFLECT_X | 1795 DRM_MODE_REFLECT_Y); 1796 1797 /* We must point to the last line when Y reflection is enabled. */ 1798 src_y = vc4_state->src_y >> 16; 1799 if (rotation & DRM_MODE_REFLECT_Y) 1800 src_y += height - 1; 1801 1802 src_x = vc4_state->src_x >> 16; 1803 1804 switch (base_format_mod) { 1805 case DRM_FORMAT_MOD_LINEAR: 1806 tiling = SCALER6_CTL0_ADDR_MODE_LINEAR; 1807 1808 /* Adjust the base pointer to the first pixel to be scanned 1809 * out. 1810 */ 1811 for (i = 0; i < num_planes; i++) { 1812 offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; 1813 offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; 1814 } 1815 1816 break; 1817 1818 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1819 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 1820 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 1821 u32 components_per_word; 1822 u32 starting_offset; 1823 u32 fetch_count; 1824 1825 if (param > SCALER_TILE_HEIGHT_MASK) { 1826 DRM_DEBUG_KMS("SAND height too large (%d)\n", 1827 param); 1828 return -EINVAL; 1829 } 1830 1831 if (fb->format->format == DRM_FORMAT_P030) { 1832 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 1833 tiling = SCALER6_CTL0_ADDR_MODE_128B; 1834 } else { 1835 hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE; 1836 1837 switch (base_format_mod) { 1838 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1839 tiling = SCALER6_CTL0_ADDR_MODE_128B; 1840 break; 1841 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1842 tiling = SCALER6_CTL0_ADDR_MODE_256B; 1843 break; 1844 default: 1845 return -EINVAL; 1846 } 1847 } 1848 1849 /* Adjust the base pointer to the first pixel to be scanned 1850 * out. 1851 * 1852 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 1853 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 1854 * word that should be taken as the first pixel. 1855 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 1856 * element within the 128bit word, eg for pixel 3 the value 1857 * should be 6. 1858 */ 1859 for (i = 0; i < num_planes; i++) { 1860 u32 tile_w, tile, x_off, pix_per_tile; 1861 1862 if (fb->format->format == DRM_FORMAT_P030) { 1863 /* 1864 * Spec says: bits [31:4] of the given address 1865 * should point to the 128-bit word containing 1866 * the desired starting pixel, and bits[3:0] 1867 * should be between 0 and 11, indicating which 1868 * of the 12-pixels in that 128-bit word is the 1869 * first pixel to be used 1870 */ 1871 u32 remaining_pixels = src_x % 96; 1872 u32 aligned = remaining_pixels / 12; 1873 u32 last_bits = remaining_pixels % 12; 1874 1875 x_off = aligned * 16 + last_bits; 1876 tile_w = 128; 1877 pix_per_tile = 96; 1878 } else { 1879 switch (base_format_mod) { 1880 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1881 tile_w = 128; 1882 break; 1883 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1884 tile_w = 256; 1885 break; 1886 default: 1887 return -EINVAL; 1888 } 1889 pix_per_tile = tile_w / fb->format->cpp[0]; 1890 x_off = (src_x % pix_per_tile) / 1891 (i ? h_subsample : 1) * 1892 fb->format->cpp[i]; 1893 } 1894 1895 tile = src_x / pix_per_tile; 1896 1897 offsets[i] += param * tile_w * tile; 1898 offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; 1899 offsets[i] += x_off & ~(i ? 1 : 0); 1900 } 1901 1902 components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32; 1903 starting_offset = src_x % components_per_word; 1904 fetch_count = (width + starting_offset + components_per_word - 1) / 1905 components_per_word; 1906 1907 pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) | 1908 VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT); 1909 break; 1910 } 1911 1912 default: 1913 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 1914 (long long)fb->modifier); 1915 return -EINVAL; 1916 } 1917 1918 /* fetch an extra pixel if we don't actually line up with the left edge. */ 1919 if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) 1920 width++; 1921 1922 /* same for the right side */ 1923 if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && 1924 vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) 1925 width++; 1926 1927 /* now for the top */ 1928 if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) 1929 height++; 1930 1931 /* and the bottom */ 1932 if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && 1933 vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) 1934 height++; 1935 1936 /* for YUV444 hardware wants double the width, otherwise it doesn't 1937 * fetch full width of chroma 1938 */ 1939 if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) 1940 width <<= 1; 1941 1942 /* Don't waste cycles mixing with plane alpha if the set alpha 1943 * is opaque or there is no per-pixel alpha information. 1944 * In any case we use the alpha property value as the fixed alpha. 1945 */ 1946 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 1947 fb->format->has_alpha; 1948 1949 /* Control Word 0: Scaling Configuration & Element Validity*/ 1950 vc4_dlist_write(vc4_state, 1951 SCALER6_CTL0_VALID | 1952 VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) | 1953 vc4_hvs6_get_alpha_mask_mode(state) | 1954 (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) | 1955 VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) | 1956 VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) | 1957 VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) | 1958 VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT)); 1959 1960 /* Position Word 0: Image Position */ 1961 vc4_state->pos0_offset = vc4_state->dlist_count; 1962 vc4_dlist_write(vc4_state, 1963 VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) | 1964 (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) | 1965 VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X)); 1966 1967 /* Control Word 2: Alpha Value & CSC */ 1968 vc4_dlist_write(vc4_state, 1969 vc6_plane_get_csc_mode(vc4_state) | 1970 vc4_hvs5_get_alpha_blend_mode(state) | 1971 (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) | 1972 VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA)); 1973 1974 /* Position Word 1: Scaled Image Dimensions */ 1975 if (!vc4_state->is_unity) 1976 vc4_dlist_write(vc4_state, 1977 VC4_SET_FIELD(vc4_state->crtc_h - 1, 1978 SCALER6_POS1_SCL_LINES) | 1979 VC4_SET_FIELD(vc4_state->crtc_w - 1, 1980 SCALER6_POS1_SCL_WIDTH)); 1981 1982 /* Position Word 2: Source Image Size */ 1983 vc4_state->pos2_offset = vc4_state->dlist_count; 1984 vc4_dlist_write(vc4_state, 1985 VC4_SET_FIELD(height - 1, 1986 SCALER6_POS2_SRC_LINES) | 1987 VC4_SET_FIELD(width - 1, 1988 SCALER6_POS2_SRC_WIDTH)); 1989 1990 /* Position Word 3: Context */ 1991 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1992 1993 /* 1994 * TODO: This only covers Raster Scan Order planes 1995 */ 1996 for (i = 0; i < num_planes; i++) { 1997 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); 1998 dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i]; 1999 2000 /* Pointer Word 0 */ 2001 vc4_state->ptr0_offset[i] = vc4_state->dlist_count; 2002 vc4_dlist_write(vc4_state, 2003 (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) | 2004 /* 2005 * The UPM buffer will be allocated in 2006 * vc6_plane_allocate_upm(). 2007 */ 2008 VC4_SET_FIELD(upper_32_bits(paddr) & 0xff, 2009 SCALER6_PTR0_UPPER_ADDR)); 2010 2011 /* Pointer Word 1 */ 2012 vc4_dlist_write(vc4_state, lower_32_bits(paddr)); 2013 2014 /* Pointer Word 2 */ 2015 if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 && 2016 base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) { 2017 vc4_dlist_write(vc4_state, 2018 VC4_SET_FIELD(fb->pitches[i], 2019 SCALER6_PTR2_PITCH)); 2020 } else { 2021 vc4_dlist_write(vc4_state, pitch0); 2022 } 2023 } 2024 2025 /* 2026 * Palette Word 0 2027 * TODO: We're not using the palette mode 2028 */ 2029 2030 /* 2031 * Trans Word 0 2032 * TODO: It's only relevant if we set the trans_rgb bit in the 2033 * control word 0, and we don't at the moment. 2034 */ 2035 2036 vc4_state->lbm_offset = 0; 2037 2038 if (!vc4_state->is_unity || fb->format->is_yuv) { 2039 /* 2040 * Reserve a slot for the LBM Base Address. The real value will 2041 * be set when calling vc4_plane_allocate_lbm(). 2042 */ 2043 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 2044 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 2045 vc4_state->lbm_offset = vc4_state->dlist_count; 2046 vc4_dlist_counter_increment(vc4_state); 2047 } 2048 2049 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 2050 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 2051 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 2052 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 2053 if (num_planes > 1) 2054 /* 2055 * Emit Cb/Cr as channel 0 and Y as channel 2056 * 1. This matches how we set up scl0/scl1 2057 * above. 2058 */ 2059 vc4_write_scaling_parameters(state, 1); 2060 2061 vc4_write_scaling_parameters(state, 0); 2062 } 2063 2064 /* 2065 * If any PPF setup was done, then all the kernel 2066 * pointers get uploaded. 2067 */ 2068 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 2069 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 2070 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 2071 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 2072 u32 kernel = 2073 VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 2074 SCALER_PPF_KERNEL_OFFSET); 2075 2076 /* HPPF plane 0 */ 2077 vc4_dlist_write(vc4_state, kernel); 2078 /* VPPF plane 0 */ 2079 vc4_dlist_write(vc4_state, kernel); 2080 /* HPPF plane 1 */ 2081 vc4_dlist_write(vc4_state, kernel); 2082 /* VPPF plane 1 */ 2083 vc4_dlist_write(vc4_state, kernel); 2084 } 2085 } 2086 2087 vc4_dlist_write(vc4_state, SCALER6_CTL0_END); 2088 2089 vc4_state->dlist[0] |= 2090 VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT); 2091 2092 /* crtc_* are already clipped coordinates. */ 2093 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 2094 vc4_state->crtc_w == state->crtc->mode.hdisplay && 2095 vc4_state->crtc_h == state->crtc->mode.vdisplay; 2096 2097 /* 2098 * Background fill might be necessary when the plane has per-pixel 2099 * alpha content or a non-opaque plane alpha and could blend from the 2100 * background or does not cover the entire screen. 2101 */ 2102 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 2103 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 2104 2105 /* 2106 * Flag the dlist as initialized to avoid checking it twice in case 2107 * the async update check already called vc4_plane_mode_set() and 2108 * decided to fallback to sync update because async update was not 2109 * possible. 2110 */ 2111 vc4_state->dlist_initialized = 1; 2112 2113 vc4_plane_calc_load(state); 2114 2115 drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n", 2116 plane->base.id, plane->name, vc4_state->dlist_count); 2117 2118 return 0; 2119 } 2120 2121 /* If a modeset involves changing the setup of a plane, the atomic 2122 * infrastructure will call this to validate a proposed plane setup. 2123 * However, if a plane isn't getting updated, this (and the 2124 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 2125 * compute the dlist here and have all active plane dlists get updated 2126 * in the CRTC's flush. 2127 */ 2128 static int vc4_plane_atomic_check(struct drm_plane *plane, 2129 struct drm_atomic_state *state) 2130 { 2131 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2132 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2133 plane); 2134 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); 2135 int ret; 2136 2137 vc4_state->dlist_count = 0; 2138 2139 if (!plane_enabled(new_plane_state)) { 2140 struct drm_plane_state *old_plane_state = 2141 drm_atomic_get_old_plane_state(state, plane); 2142 2143 if (vc4->gen >= VC4_GEN_6_C && old_plane_state && 2144 plane_enabled(old_plane_state)) { 2145 vc6_plane_free_upm(new_plane_state); 2146 } 2147 return 0; 2148 } 2149 2150 if (vc4->gen >= VC4_GEN_6_C) 2151 ret = vc6_plane_mode_set(plane, new_plane_state); 2152 else 2153 ret = vc4_plane_mode_set(plane, new_plane_state); 2154 if (ret) 2155 return ret; 2156 2157 if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || 2158 !vc4_state->crtc_w || !vc4_state->crtc_h) 2159 return 0; 2160 2161 ret = vc4_plane_allocate_lbm(new_plane_state); 2162 if (ret) 2163 return ret; 2164 2165 if (vc4->gen >= VC4_GEN_6_C) { 2166 ret = vc6_plane_allocate_upm(new_plane_state); 2167 if (ret) 2168 return ret; 2169 } 2170 2171 return 0; 2172 } 2173 2174 static void vc4_plane_atomic_update(struct drm_plane *plane, 2175 struct drm_atomic_state *state) 2176 { 2177 /* No contents here. Since we don't know where in the CRTC's 2178 * dlist we should be stored, our dlist is uploaded to the 2179 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 2180 * time. 2181 */ 2182 } 2183 2184 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 2185 { 2186 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 2187 int i; 2188 int idx; 2189 2190 if (!drm_dev_enter(plane->dev, &idx)) 2191 goto out; 2192 2193 vc4_state->hw_dlist = dlist; 2194 2195 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 2196 for (i = 0; i < vc4_state->dlist_count; i++) 2197 writel(vc4_state->dlist[i], &dlist[i]); 2198 2199 drm_dev_exit(idx); 2200 2201 out: 2202 return vc4_state->dlist_count; 2203 } 2204 2205 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 2206 { 2207 const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 2208 2209 return vc4_state->dlist_count; 2210 } 2211 2212 /* Updates the plane to immediately (well, once the FIFO needs 2213 * refilling) scan out from at a new framebuffer. 2214 */ 2215 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 2216 { 2217 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 2218 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0); 2219 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2220 dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0]; 2221 int idx; 2222 2223 if (!drm_dev_enter(plane->dev, &idx)) 2224 return; 2225 2226 /* We're skipping the address adjustment for negative origin, 2227 * because this is only called on the primary plane. 2228 */ 2229 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 2230 2231 if (vc4->gen == VC4_GEN_6_C) { 2232 u32 value; 2233 2234 value = vc4_state->dlist[vc4_state->ptr0_offset[0]] & 2235 ~SCALER6_PTR0_UPPER_ADDR_MASK; 2236 value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff, 2237 SCALER6_PTR0_UPPER_ADDR); 2238 2239 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2240 vc4_state->dlist[vc4_state->ptr0_offset[0]] = value; 2241 2242 value = lower_32_bits(dma_addr); 2243 writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]); 2244 vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value; 2245 } else { 2246 u32 addr; 2247 2248 addr = (u32)dma_addr; 2249 2250 /* Write the new address into the hardware immediately. The 2251 * scanout will start from this address as soon as the FIFO 2252 * needs to refill with pixels. 2253 */ 2254 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2255 2256 /* Also update the CPU-side dlist copy, so that any later 2257 * atomic updates that don't do a new modeset on our plane 2258 * also use our updated address. 2259 */ 2260 vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr; 2261 } 2262 2263 drm_dev_exit(idx); 2264 } 2265 2266 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 2267 struct drm_atomic_state *state) 2268 { 2269 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2270 plane); 2271 struct vc4_plane_state *vc4_state, *new_vc4_state; 2272 int idx; 2273 2274 if (!drm_dev_enter(plane->dev, &idx)) 2275 return; 2276 2277 swap(plane->state->fb, new_plane_state->fb); 2278 plane->state->crtc_x = new_plane_state->crtc_x; 2279 plane->state->crtc_y = new_plane_state->crtc_y; 2280 plane->state->crtc_w = new_plane_state->crtc_w; 2281 plane->state->crtc_h = new_plane_state->crtc_h; 2282 plane->state->src_x = new_plane_state->src_x; 2283 plane->state->src_y = new_plane_state->src_y; 2284 plane->state->src_w = new_plane_state->src_w; 2285 plane->state->src_h = new_plane_state->src_h; 2286 plane->state->alpha = new_plane_state->alpha; 2287 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode; 2288 plane->state->rotation = new_plane_state->rotation; 2289 plane->state->zpos = new_plane_state->zpos; 2290 plane->state->normalized_zpos = new_plane_state->normalized_zpos; 2291 plane->state->color_encoding = new_plane_state->color_encoding; 2292 plane->state->color_range = new_plane_state->color_range; 2293 plane->state->src = new_plane_state->src; 2294 plane->state->dst = new_plane_state->dst; 2295 plane->state->visible = new_plane_state->visible; 2296 2297 new_vc4_state = to_vc4_plane_state(new_plane_state); 2298 vc4_state = to_vc4_plane_state(plane->state); 2299 2300 vc4_state->crtc_x = new_vc4_state->crtc_x; 2301 vc4_state->crtc_y = new_vc4_state->crtc_y; 2302 vc4_state->crtc_h = new_vc4_state->crtc_h; 2303 vc4_state->crtc_w = new_vc4_state->crtc_w; 2304 vc4_state->src_x = new_vc4_state->src_x; 2305 vc4_state->src_y = new_vc4_state->src_y; 2306 memcpy(vc4_state->src_w, new_vc4_state->src_w, 2307 sizeof(vc4_state->src_w)); 2308 memcpy(vc4_state->src_h, new_vc4_state->src_h, 2309 sizeof(vc4_state->src_h)); 2310 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 2311 sizeof(vc4_state->x_scaling)); 2312 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 2313 sizeof(vc4_state->y_scaling)); 2314 vc4_state->is_unity = new_vc4_state->is_unity; 2315 vc4_state->is_yuv = new_vc4_state->is_yuv; 2316 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 2317 2318 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 2319 vc4_state->dlist[vc4_state->pos0_offset] = 2320 new_vc4_state->dlist[vc4_state->pos0_offset]; 2321 vc4_state->dlist[vc4_state->pos2_offset] = 2322 new_vc4_state->dlist[vc4_state->pos2_offset]; 2323 vc4_state->dlist[vc4_state->ptr0_offset[0]] = 2324 new_vc4_state->dlist[vc4_state->ptr0_offset[0]]; 2325 2326 /* Note that we can't just call vc4_plane_write_dlist() 2327 * because that would smash the context data that the HVS is 2328 * currently using. 2329 */ 2330 writel(vc4_state->dlist[vc4_state->pos0_offset], 2331 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 2332 writel(vc4_state->dlist[vc4_state->pos2_offset], 2333 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 2334 writel(vc4_state->dlist[vc4_state->ptr0_offset[0]], 2335 &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); 2336 2337 drm_dev_exit(idx); 2338 } 2339 2340 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 2341 struct drm_atomic_state *state, bool flip) 2342 { 2343 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 2344 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 2345 plane); 2346 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 2347 int ret; 2348 u32 i; 2349 2350 if (vc4->gen <= VC4_GEN_5) 2351 ret = vc4_plane_mode_set(plane, new_plane_state); 2352 else 2353 ret = vc6_plane_mode_set(plane, new_plane_state); 2354 if (ret) 2355 return ret; 2356 2357 old_vc4_state = to_vc4_plane_state(plane->state); 2358 new_vc4_state = to_vc4_plane_state(new_plane_state); 2359 2360 if (!new_vc4_state->hw_dlist) 2361 return -EINVAL; 2362 2363 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 2364 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 2365 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 2366 old_vc4_state->ptr0_offset[0] != new_vc4_state->ptr0_offset[0] || 2367 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state)) 2368 return -EINVAL; 2369 2370 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 2371 * if anything else has changed, fallback to a sync update. 2372 */ 2373 for (i = 0; i < new_vc4_state->dlist_count; i++) { 2374 if (i == new_vc4_state->pos0_offset || 2375 i == new_vc4_state->pos2_offset || 2376 i == new_vc4_state->ptr0_offset[0] || 2377 (new_vc4_state->lbm_offset && 2378 i == new_vc4_state->lbm_offset)) 2379 continue; 2380 2381 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 2382 return -EINVAL; 2383 } 2384 2385 return 0; 2386 } 2387 2388 static int vc4_prepare_fb(struct drm_plane *plane, 2389 struct drm_plane_state *state) 2390 { 2391 struct vc4_bo *bo; 2392 int ret; 2393 2394 if (!state->fb) 2395 return 0; 2396 2397 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 2398 2399 ret = drm_gem_plane_helper_prepare_fb(plane, state); 2400 if (ret) 2401 return ret; 2402 2403 return vc4_bo_inc_usecnt(bo); 2404 } 2405 2406 static void vc4_cleanup_fb(struct drm_plane *plane, 2407 struct drm_plane_state *state) 2408 { 2409 struct vc4_bo *bo; 2410 2411 if (!state->fb) 2412 return; 2413 2414 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 2415 vc4_bo_dec_usecnt(bo); 2416 } 2417 2418 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 2419 .atomic_check = vc4_plane_atomic_check, 2420 .atomic_update = vc4_plane_atomic_update, 2421 .prepare_fb = vc4_prepare_fb, 2422 .cleanup_fb = vc4_cleanup_fb, 2423 .atomic_async_check = vc4_plane_atomic_async_check, 2424 .atomic_async_update = vc4_plane_atomic_async_update, 2425 }; 2426 2427 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = { 2428 .atomic_check = vc4_plane_atomic_check, 2429 .atomic_update = vc4_plane_atomic_update, 2430 .atomic_async_check = vc4_plane_atomic_async_check, 2431 .atomic_async_update = vc4_plane_atomic_async_update, 2432 }; 2433 2434 static bool vc4_format_mod_supported(struct drm_plane *plane, 2435 uint32_t format, 2436 uint64_t modifier) 2437 { 2438 /* Support T_TILING for RGB formats only. */ 2439 switch (format) { 2440 case DRM_FORMAT_XRGB8888: 2441 case DRM_FORMAT_ARGB8888: 2442 case DRM_FORMAT_ABGR8888: 2443 case DRM_FORMAT_XBGR8888: 2444 case DRM_FORMAT_RGB565: 2445 case DRM_FORMAT_BGR565: 2446 case DRM_FORMAT_ARGB1555: 2447 case DRM_FORMAT_XRGB1555: 2448 switch (fourcc_mod_broadcom_mod(modifier)) { 2449 case DRM_FORMAT_MOD_LINEAR: 2450 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 2451 return true; 2452 default: 2453 return false; 2454 } 2455 case DRM_FORMAT_NV12: 2456 case DRM_FORMAT_NV21: 2457 switch (fourcc_mod_broadcom_mod(modifier)) { 2458 case DRM_FORMAT_MOD_LINEAR: 2459 case DRM_FORMAT_MOD_BROADCOM_SAND64: 2460 case DRM_FORMAT_MOD_BROADCOM_SAND128: 2461 case DRM_FORMAT_MOD_BROADCOM_SAND256: 2462 return true; 2463 default: 2464 return false; 2465 } 2466 case DRM_FORMAT_P030: 2467 switch (fourcc_mod_broadcom_mod(modifier)) { 2468 case DRM_FORMAT_MOD_BROADCOM_SAND128: 2469 return true; 2470 default: 2471 return false; 2472 } 2473 case DRM_FORMAT_RGBX1010102: 2474 case DRM_FORMAT_BGRX1010102: 2475 case DRM_FORMAT_RGBA1010102: 2476 case DRM_FORMAT_BGRA1010102: 2477 case DRM_FORMAT_XRGB4444: 2478 case DRM_FORMAT_ARGB4444: 2479 case DRM_FORMAT_XBGR4444: 2480 case DRM_FORMAT_ABGR4444: 2481 case DRM_FORMAT_RGBX4444: 2482 case DRM_FORMAT_RGBA4444: 2483 case DRM_FORMAT_BGRX4444: 2484 case DRM_FORMAT_BGRA4444: 2485 case DRM_FORMAT_RGB332: 2486 case DRM_FORMAT_BGR233: 2487 case DRM_FORMAT_YUV422: 2488 case DRM_FORMAT_YVU422: 2489 case DRM_FORMAT_YUV420: 2490 case DRM_FORMAT_YVU420: 2491 case DRM_FORMAT_NV16: 2492 case DRM_FORMAT_NV61: 2493 default: 2494 return (modifier == DRM_FORMAT_MOD_LINEAR); 2495 } 2496 } 2497 2498 static const struct drm_plane_funcs vc4_plane_funcs = { 2499 .update_plane = drm_atomic_helper_update_plane, 2500 .disable_plane = drm_atomic_helper_disable_plane, 2501 .reset = vc4_plane_reset, 2502 .atomic_duplicate_state = vc4_plane_duplicate_state, 2503 .atomic_destroy_state = vc4_plane_destroy_state, 2504 .format_mod_supported = vc4_format_mod_supported, 2505 }; 2506 2507 struct drm_plane *vc4_plane_init(struct drm_device *dev, 2508 enum drm_plane_type type, 2509 uint32_t possible_crtcs) 2510 { 2511 struct vc4_dev *vc4 = to_vc4_dev(dev); 2512 struct drm_plane *plane; 2513 struct vc4_plane *vc4_plane; 2514 u32 formats[ARRAY_SIZE(hvs_formats)]; 2515 int num_formats = 0; 2516 unsigned i; 2517 static const uint64_t modifiers[] = { 2518 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 2519 DRM_FORMAT_MOD_BROADCOM_SAND128, 2520 DRM_FORMAT_MOD_BROADCOM_SAND64, 2521 DRM_FORMAT_MOD_BROADCOM_SAND256, 2522 DRM_FORMAT_MOD_LINEAR, 2523 DRM_FORMAT_MOD_INVALID 2524 }; 2525 2526 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 2527 if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) { 2528 formats[num_formats] = hvs_formats[i].drm; 2529 num_formats++; 2530 } 2531 } 2532 2533 vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base, 2534 possible_crtcs, 2535 &vc4_plane_funcs, 2536 formats, num_formats, 2537 modifiers, type, NULL); 2538 if (IS_ERR(vc4_plane)) 2539 return ERR_CAST(vc4_plane); 2540 plane = &vc4_plane->base; 2541 2542 if (vc4->gen >= VC4_GEN_5) 2543 drm_plane_helper_add(plane, &vc5_plane_helper_funcs); 2544 else 2545 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 2546 2547 drm_plane_create_alpha_property(plane); 2548 drm_plane_create_blend_mode_property(plane, 2549 BIT(DRM_MODE_BLEND_PIXEL_NONE) | 2550 BIT(DRM_MODE_BLEND_PREMULTI) | 2551 BIT(DRM_MODE_BLEND_COVERAGE)); 2552 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 2553 DRM_MODE_ROTATE_0 | 2554 DRM_MODE_ROTATE_180 | 2555 DRM_MODE_REFLECT_X | 2556 DRM_MODE_REFLECT_Y); 2557 2558 drm_plane_create_color_properties(plane, 2559 BIT(DRM_COLOR_YCBCR_BT601) | 2560 BIT(DRM_COLOR_YCBCR_BT709) | 2561 BIT(DRM_COLOR_YCBCR_BT2020), 2562 BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | 2563 BIT(DRM_COLOR_YCBCR_FULL_RANGE), 2564 DRM_COLOR_YCBCR_BT709, 2565 DRM_COLOR_YCBCR_LIMITED_RANGE); 2566 2567 if (type == DRM_PLANE_TYPE_PRIMARY) 2568 drm_plane_create_zpos_immutable_property(plane, 0); 2569 2570 return plane; 2571 } 2572 2573 #define VC4_NUM_OVERLAY_PLANES 16 2574 2575 int vc4_plane_create_additional_planes(struct drm_device *drm) 2576 { 2577 struct drm_plane *cursor_plane; 2578 struct drm_crtc *crtc; 2579 unsigned int i; 2580 2581 /* Set up some arbitrary number of planes. We're not limited 2582 * by a set number of physical registers, just the space in 2583 * the HVS (16k) and how small an plane can be (28 bytes). 2584 * However, each plane we set up takes up some memory, and 2585 * increases the cost of looping over planes, which atomic 2586 * modesetting does quite a bit. As a result, we pick a 2587 * modest number of planes to expose, that should hopefully 2588 * still cover any sane usecase. 2589 */ 2590 for (i = 0; i < VC4_NUM_OVERLAY_PLANES; i++) { 2591 struct drm_plane *plane = 2592 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY, 2593 GENMASK(drm->mode_config.num_crtc - 1, 0)); 2594 2595 if (IS_ERR(plane)) 2596 continue; 2597 2598 /* Create zpos property. Max of all the overlays + 1 primary + 2599 * 1 cursor plane on a crtc. 2600 */ 2601 drm_plane_create_zpos_property(plane, i + 1, 1, 2602 VC4_NUM_OVERLAY_PLANES + 1); 2603 } 2604 2605 drm_for_each_crtc(crtc, drm) { 2606 /* Set up the legacy cursor after overlay initialization, 2607 * since the zpos fallback is that planes are rendered by plane 2608 * ID order, and that then puts the cursor on top. 2609 */ 2610 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR, 2611 drm_crtc_mask(crtc)); 2612 if (!IS_ERR(cursor_plane)) { 2613 crtc->cursor = cursor_plane; 2614 2615 drm_plane_create_zpos_property(cursor_plane, 2616 VC4_NUM_OVERLAY_PLANES + 1, 2617 1, 2618 VC4_NUM_OVERLAY_PLANES + 1); 2619 } 2620 } 2621 2622 return 0; 2623 } 2624