1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. 4 */ 5 6 #include "iris_instance.h" 7 #include "iris_vpu_buffer.h" 8 #include "iris_hfi_gen1_defines.h" 9 #include "iris_hfi_gen2_defines.h" 10 11 #define HFI_MAX_COL_FRAME 6 12 #define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_HEIGHT (8) 13 #define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH (32) 14 #define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_HEIGHT (8) 15 #define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_WIDTH (16) 16 #define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_HEIGHT (4) 17 #define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH (48) 18 #define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_HEIGHT (4) 19 #define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_WIDTH (24) 20 #define AV1D_SIZE_BSE_COL_MV_64x64 512 21 #define AV1D_SIZE_BSE_COL_MV_128x128 2816 22 #define UBWC_TILE_SIZE 256 23 24 #ifndef SYSTEM_LAL_TILE10 25 #define SYSTEM_LAL_TILE10 192 26 #endif 27 28 static u32 size_h264d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 29 { 30 u32 size_yuv, size_bin_hdr, size_bin_res; 31 32 size_yuv = ((frame_width * frame_height) <= BIN_BUFFER_THRESHOLD) ? 33 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : 34 ((frame_width * frame_height * 3) >> 1); 35 size_bin_hdr = size_yuv * H264_CABAC_HDR_RATIO_HD_TOT; 36 size_bin_res = size_yuv * H264_CABAC_RES_RATIO_HD_TOT; 37 size_bin_hdr = ALIGN(size_bin_hdr / num_vpp_pipes, 38 DMA_ALIGNMENT) * num_vpp_pipes; 39 size_bin_res = ALIGN(size_bin_res / num_vpp_pipes, 40 DMA_ALIGNMENT) * num_vpp_pipes; 41 42 return size_bin_hdr + size_bin_res; 43 } 44 45 static u32 hfi_buffer_bin_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 46 { 47 u32 n_aligned_h = ALIGN(frame_height, 16); 48 u32 n_aligned_w = ALIGN(frame_width, 16); 49 50 return size_h264d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes); 51 } 52 53 static u32 size_av1d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 54 { 55 u32 size_yuv, size_bin_hdr, size_bin_res; 56 57 size_yuv = ((frame_width * frame_height) <= BIN_BUFFER_THRESHOLD) ? 58 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : 59 ((frame_width * frame_height * 3) >> 1); 60 size_bin_hdr = size_yuv * AV1_CABAC_HDR_RATIO_HD_TOT; 61 size_bin_res = size_yuv * AV1_CABAC_RES_RATIO_HD_TOT; 62 size_bin_hdr = ALIGN(size_bin_hdr / num_vpp_pipes, 63 DMA_ALIGNMENT) * num_vpp_pipes; 64 size_bin_res = ALIGN(size_bin_res / num_vpp_pipes, 65 DMA_ALIGNMENT) * num_vpp_pipes; 66 67 return size_bin_hdr + size_bin_res; 68 } 69 70 static u32 hfi_buffer_bin_av1d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 71 { 72 u32 n_aligned_h = ALIGN(frame_height, 16); 73 u32 n_aligned_w = ALIGN(frame_width, 16); 74 75 return size_av1d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes); 76 } 77 78 static u32 size_h265d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 79 { 80 u32 product = frame_width * frame_height; 81 u32 size_yuv, size_bin_hdr, size_bin_res; 82 83 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ? 84 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1); 85 size_bin_hdr = size_yuv * H265_CABAC_HDR_RATIO_HD_TOT; 86 size_bin_res = size_yuv * H265_CABAC_RES_RATIO_HD_TOT; 87 size_bin_hdr = ALIGN(size_bin_hdr / num_vpp_pipes, DMA_ALIGNMENT) * num_vpp_pipes; 88 size_bin_res = ALIGN(size_bin_res / num_vpp_pipes, DMA_ALIGNMENT) * num_vpp_pipes; 89 90 return size_bin_hdr + size_bin_res; 91 } 92 93 static u32 hfi_buffer_bin_vp9d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 94 { 95 u32 _size_yuv = ALIGN(frame_width, 16) * ALIGN(frame_height, 16) * 3 / 2; 96 u32 _size = ALIGN(((max_t(u32, _size_yuv, ((BIN_BUFFER_THRESHOLD * 3) >> 1)) * 97 VPX_DECODER_FRAME_BIN_HDR_BUDGET / VPX_DECODER_FRAME_BIN_DENOMINATOR * 98 VPX_DECODER_FRAME_CONCURENCY_LVL) / num_vpp_pipes), DMA_ALIGNMENT) + 99 ALIGN(((max_t(u32, _size_yuv, ((BIN_BUFFER_THRESHOLD * 3) >> 1)) * 100 VPX_DECODER_FRAME_BIN_RES_BUDGET / VPX_DECODER_FRAME_BIN_DENOMINATOR * 101 VPX_DECODER_FRAME_CONCURENCY_LVL) / num_vpp_pipes), DMA_ALIGNMENT); 102 103 return _size * num_vpp_pipes; 104 } 105 106 static u32 hfi_buffer_bin_h265d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 107 { 108 u32 n_aligned_w = ALIGN(frame_width, 16); 109 u32 n_aligned_h = ALIGN(frame_height, 16); 110 111 return size_h265d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes); 112 } 113 114 static u32 hfi_buffer_comv_h264d(u32 frame_width, u32 frame_height, u32 _comv_bufcount) 115 { 116 u32 frame_height_in_mbs = DIV_ROUND_UP(frame_height, 16); 117 u32 frame_width_in_mbs = DIV_ROUND_UP(frame_width, 16); 118 u32 col_zero_aligned_width = (frame_width_in_mbs << 2); 119 u32 col_mv_aligned_width = (frame_width_in_mbs << 7); 120 u32 col_zero_size, size_colloc; 121 122 col_mv_aligned_width = ALIGN(col_mv_aligned_width, 16); 123 col_zero_aligned_width = ALIGN(col_zero_aligned_width, 16); 124 col_zero_size = col_zero_aligned_width * 125 ((frame_height_in_mbs + 1) >> 1); 126 col_zero_size = ALIGN(col_zero_size, 64); 127 col_zero_size <<= 1; 128 col_zero_size = ALIGN(col_zero_size, 512); 129 size_colloc = col_mv_aligned_width * ((frame_height_in_mbs + 1) >> 1); 130 size_colloc = ALIGN(size_colloc, 64); 131 size_colloc <<= 1; 132 size_colloc = ALIGN(size_colloc, 512); 133 size_colloc += (col_zero_size + SIZE_H264D_BUFTAB_T * 2); 134 135 return (size_colloc * (_comv_bufcount)) + 512; 136 } 137 138 static u32 hfi_buffer_comv_h265d(u32 frame_width, u32 frame_height, u32 _comv_bufcount) 139 { 140 u32 frame_height_in_mbs = (frame_height + 15) >> 4; 141 u32 frame_width_in_mbs = (frame_width + 15) >> 4; 142 u32 _size; 143 144 _size = ALIGN(((frame_width_in_mbs * frame_height_in_mbs) << 8), 512); 145 146 return (_size * (_comv_bufcount)) + 512; 147 } 148 149 static u32 num_lcu(u32 frame_width, u32 frame_height, u32 lcu_size) 150 { 151 return ((frame_width + lcu_size - 1) / lcu_size) * 152 ((frame_height + lcu_size - 1) / lcu_size); 153 } 154 155 static u32 hfi_buffer_comv_av1d(u32 frame_width, u32 frame_height, u32 comv_bufcount) 156 { 157 u32 size; 158 159 size = 2 * ALIGN(max(num_lcu(frame_width, frame_height, 64) * 160 AV1D_SIZE_BSE_COL_MV_64x64, 161 num_lcu(frame_width, frame_height, 128) * 162 AV1D_SIZE_BSE_COL_MV_128x128), 163 DMA_ALIGNMENT); 164 size *= comv_bufcount; 165 166 return size; 167 } 168 169 static u32 size_h264d_bse_cmd_buf(u32 frame_height) 170 { 171 u32 height = ALIGN(frame_height, 32); 172 173 return min_t(u32, (DIV_ROUND_UP(height, 16) * 48), H264D_MAX_SLICE) * 174 SIZE_H264D_BSE_CMD_PER_BUF; 175 } 176 177 static u32 size_h265d_bse_cmd_buf(u32 frame_width, u32 frame_height) 178 { 179 u32 _size = ALIGN(((ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 180 (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) * 181 NUM_HW_PIC_BUF, DMA_ALIGNMENT); 182 _size = min_t(u32, _size, H265D_MAX_SLICE + 1); 183 _size = 2 * _size * SIZE_H265D_BSE_CMD_PER_BUF; 184 185 return _size; 186 } 187 188 static u32 hfi_buffer_persist_h265d(u32 rpu_enabled) 189 { 190 return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + 191 H265_NUM_FRM_INFO * H265_DISPLAY_BUF_SIZE + 192 H265_NUM_TILE * sizeof(u32) + 193 NUM_HW_PIC_BUF * SIZE_SEI_USERDATA + 194 rpu_enabled * NUM_HW_PIC_BUF * SIZE_DOLBY_RPU_METADATA), 195 DMA_ALIGNMENT); 196 } 197 198 static inline 199 u32 hfi_iris3_vp9d_comv_size(void) 200 { 201 return (((8192 + 63) >> 6) * ((4320 + 63) >> 6) * 8 * 8 * 2 * 8); 202 } 203 204 static u32 hfi_buffer_persist_vp9d(void) 205 { 206 return ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE, DMA_ALIGNMENT) + 207 ALIGN(hfi_iris3_vp9d_comv_size(), DMA_ALIGNMENT) + 208 ALIGN(MAX_SUPERFRAME_HEADER_LEN, DMA_ALIGNMENT) + 209 ALIGN(VP9_UDC_HEADER_BUF_SIZE, DMA_ALIGNMENT) + 210 ALIGN(VP9_NUM_FRAME_INFO_BUF * CCE_TILE_OFFSET_SIZE, DMA_ALIGNMENT) + 211 ALIGN(VP9_NUM_FRAME_INFO_BUF * VP9_FRAME_INFO_BUF_SIZE, DMA_ALIGNMENT) + 212 HDR10_HIST_EXTRADATA_SIZE; 213 } 214 215 static u32 size_h264d_vpp_cmd_buf(u32 frame_height) 216 { 217 u32 size, height = ALIGN(frame_height, 32); 218 219 size = min_t(u32, (DIV_ROUND_UP(height, 16) * 48), H264D_MAX_SLICE) * 220 SIZE_H264D_VPP_CMD_PER_BUF; 221 222 return size > VPP_CMD_MAX_SIZE ? VPP_CMD_MAX_SIZE : size; 223 } 224 225 static u32 hfi_buffer_persist_h264d(void) 226 { 227 return ALIGN(SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264 + 228 H264_DISPLAY_BUF_SIZE * H264_NUM_FRM_INFO + 229 NUM_HW_PIC_BUF * SIZE_SEI_USERDATA, 230 DMA_ALIGNMENT); 231 } 232 233 static u32 hfi_buffer_persist_av1d(u32 max_width, u32 max_height, u32 total_ref_count) 234 { 235 u32 comv_size, size; 236 237 comv_size = hfi_buffer_comv_av1d(max_width, max_height, total_ref_count); 238 size = ALIGN((SIZE_AV1D_SEQUENCE_HEADER * 2 + SIZE_AV1D_METADATA + 239 AV1D_NUM_HW_PIC_BUF * (SIZE_AV1D_TILE_OFFSET + SIZE_AV1D_QM) + 240 AV1D_NUM_FRAME_HEADERS * (SIZE_AV1D_FRAME_HEADER + 241 2 * SIZE_AV1D_PROB_TABLE) + comv_size + HDR10_HIST_EXTRADATA_SIZE + 242 SIZE_AV1D_METADATA * AV1D_NUM_HW_PIC_BUF), DMA_ALIGNMENT); 243 244 return ALIGN(size, DMA_ALIGNMENT); 245 } 246 247 static u32 hfi_buffer_non_comv_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 248 { 249 u32 size_bse = size_h264d_bse_cmd_buf(frame_height); 250 u32 size_vpp = size_h264d_vpp_cmd_buf(frame_height); 251 u32 size = ALIGN(size_bse, DMA_ALIGNMENT) + 252 ALIGN(size_vpp, DMA_ALIGNMENT) + 253 ALIGN(SIZE_HW_PIC(SIZE_H264D_HW_PIC_T), DMA_ALIGNMENT); 254 255 return ALIGN(size, DMA_ALIGNMENT); 256 } 257 258 static u32 size_h265d_vpp_cmd_buf(u32 frame_width, u32 frame_height) 259 { 260 u32 _size = ALIGN(((ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 261 (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) * 262 NUM_HW_PIC_BUF, DMA_ALIGNMENT); 263 _size = min_t(u32, _size, H265D_MAX_SLICE + 1); 264 _size = ALIGN(_size, 4); 265 _size = 2 * _size * SIZE_H265D_VPP_CMD_PER_BUF; 266 if (_size > VPP_CMD_MAX_SIZE) 267 _size = VPP_CMD_MAX_SIZE; 268 269 return _size; 270 } 271 272 static u32 hfi_buffer_non_comv_h265d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 273 { 274 u32 _size_bse = size_h265d_bse_cmd_buf(frame_width, frame_height); 275 u32 _size_vpp = size_h265d_vpp_cmd_buf(frame_width, frame_height); 276 u32 _size = ALIGN(_size_bse, DMA_ALIGNMENT) + 277 ALIGN(_size_vpp, DMA_ALIGNMENT) + 278 ALIGN(NUM_HW_PIC_BUF * 20 * 22 * 4, DMA_ALIGNMENT) + 279 ALIGN(2 * sizeof(u16) * 280 (ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 281 (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS), DMA_ALIGNMENT) + 282 ALIGN(SIZE_HW_PIC(SIZE_H265D_HW_PIC_T), DMA_ALIGNMENT) + 283 HDR10_HIST_EXTRADATA_SIZE; 284 285 return ALIGN(_size, DMA_ALIGNMENT); 286 } 287 288 static u32 size_vpss_lb(u32 frame_width, u32 frame_height) 289 { 290 u32 opb_lb_wr_llb_y_buffer_size, opb_lb_wr_llb_uv_buffer_size; 291 u32 opb_wr_top_line_chroma_buffer_size; 292 u32 opb_wr_top_line_luma_buffer_size; 293 u32 macrotiling_size = 32; 294 295 opb_wr_top_line_luma_buffer_size = 296 ALIGN(frame_width, macrotiling_size) / macrotiling_size * 256; 297 opb_wr_top_line_luma_buffer_size = 298 ALIGN(opb_wr_top_line_luma_buffer_size, DMA_ALIGNMENT) + 299 (MAX_TILE_COLUMNS - 1) * 256; 300 opb_wr_top_line_luma_buffer_size = 301 max_t(u32, opb_wr_top_line_luma_buffer_size, (32 * ALIGN(frame_height, 8))); 302 opb_wr_top_line_chroma_buffer_size = opb_wr_top_line_luma_buffer_size; 303 opb_lb_wr_llb_uv_buffer_size = 304 ALIGN((ALIGN(frame_height, 8) / (4 / 2)) * 64, 32); 305 opb_lb_wr_llb_y_buffer_size = 306 ALIGN((ALIGN(frame_height, 8) / (4 / 2)) * 64, 32); 307 return opb_wr_top_line_luma_buffer_size + 308 opb_wr_top_line_chroma_buffer_size + 309 opb_lb_wr_llb_uv_buffer_size + 310 opb_lb_wr_llb_y_buffer_size; 311 } 312 313 static inline 314 u32 size_h265d_lb_fe_top_data(u32 frame_width, u32 frame_height) 315 { 316 return MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * 317 (ALIGN(frame_width, 64) + 8) * 2; 318 } 319 320 static inline 321 u32 size_h265d_lb_fe_top_ctrl(u32 frame_width, u32 frame_height) 322 { 323 return MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * 324 (ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS); 325 } 326 327 static inline 328 u32 size_h265d_lb_fe_left_ctrl(u32 frame_width, u32 frame_height) 329 { 330 return MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * 331 (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS); 332 } 333 334 static inline 335 u32 size_h265d_lb_se_top_ctrl(u32 frame_width, u32 frame_height) 336 { 337 return (LCU_MAX_SIZE_PELS / 8 * (128 / 8)) * ((frame_width + 15) >> 4); 338 } 339 340 static inline 341 u32 size_h265d_lb_se_left_ctrl(u32 frame_width, u32 frame_height) 342 { 343 return max_t(u32, ((frame_height + 16 - 1) / 8) * 344 MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE, 345 max_t(u32, ((frame_height + 32 - 1) / 8) * 346 MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE, 347 ((frame_height + 64 - 1) / 8) * 348 MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE)); 349 } 350 351 static inline 352 u32 size_h265d_lb_pe_top_data(u32 frame_width, u32 frame_height) 353 { 354 return MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * 355 (ALIGN(frame_width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS); 356 } 357 358 static inline 359 u32 size_h265d_lb_vsp_top(u32 frame_width, u32 frame_height) 360 { 361 return ((frame_width + 63) >> 6) * 128; 362 } 363 364 static inline 365 u32 size_h265d_lb_vsp_left(u32 frame_width, u32 frame_height) 366 { 367 return ((frame_height + 63) >> 6) * 128; 368 } 369 370 static inline 371 u32 size_h265d_lb_recon_dma_metadata_wr(u32 frame_width, u32 frame_height) 372 { 373 return size_h264d_lb_recon_dma_metadata_wr(frame_height); 374 } 375 376 static inline 377 u32 size_h265d_qp(u32 frame_width, u32 frame_height) 378 { 379 return size_h264d_qp(frame_width, frame_height); 380 } 381 382 static inline 383 u32 hfi_buffer_line_h265d(u32 frame_width, u32 frame_height, bool is_opb, u32 num_vpp_pipes) 384 { 385 u32 vpss_lb_size = 0, _size; 386 387 _size = ALIGN(size_h265d_lb_fe_top_data(frame_width, frame_height), DMA_ALIGNMENT) + 388 ALIGN(size_h265d_lb_fe_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) + 389 ALIGN(size_h265d_lb_fe_left_ctrl(frame_width, frame_height), 390 DMA_ALIGNMENT) * num_vpp_pipes + 391 ALIGN(size_h265d_lb_se_left_ctrl(frame_width, frame_height), 392 DMA_ALIGNMENT) * num_vpp_pipes + 393 ALIGN(size_h265d_lb_se_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) + 394 ALIGN(size_h265d_lb_pe_top_data(frame_width, frame_height), DMA_ALIGNMENT) + 395 ALIGN(size_h265d_lb_vsp_top(frame_width, frame_height), DMA_ALIGNMENT) + 396 ALIGN(size_h265d_lb_vsp_left(frame_width, frame_height), 397 DMA_ALIGNMENT) * num_vpp_pipes + 398 ALIGN(size_h265d_lb_recon_dma_metadata_wr(frame_width, frame_height), 399 DMA_ALIGNMENT) * 4 + 400 ALIGN(size_h265d_qp(frame_width, frame_height), DMA_ALIGNMENT); 401 if (is_opb) 402 vpss_lb_size = size_vpss_lb(frame_width, frame_height); 403 404 return ALIGN((_size + vpss_lb_size), DMA_ALIGNMENT); 405 } 406 407 static inline 408 u32 size_vpxd_lb_fe_left_ctrl(u32 frame_width, u32 frame_height) 409 { 410 return max_t(u32, ((frame_height + 15) >> 4) * 411 MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE, 412 max_t(u32, ((frame_height + 31) >> 5) * 413 MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE, 414 ((frame_height + 63) >> 6) * 415 MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE)); 416 } 417 418 static inline 419 u32 size_vpxd_lb_fe_top_ctrl(u32 frame_width, u32 frame_height) 420 { 421 return ((ALIGN(frame_width, 64) + 8) * 10 * 2); 422 } 423 424 static inline 425 u32 size_vpxd_lb_se_top_ctrl(u32 frame_width, u32 frame_height) 426 { 427 return ((frame_width + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 428 } 429 430 static inline 431 u32 size_vpxd_lb_se_left_ctrl(u32 frame_width, u32 frame_height) 432 { 433 return max_t(u32, ((frame_height + 15) >> 4) * 434 MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE, 435 max_t(u32, ((frame_height + 31) >> 5) * 436 MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE, 437 ((frame_height + 63) >> 6) * 438 MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE)); 439 } 440 441 static inline 442 u32 size_vpxd_lb_recon_dma_metadata_wr(u32 frame_width, u32 frame_height) 443 { 444 return ALIGN((ALIGN(frame_height, 8) / (4 / 2)) * 64, 445 BUFFER_ALIGNMENT_32_BYTES); 446 } 447 448 static inline __maybe_unused 449 u32 size_mp2d_lb_fe_top_data(u32 frame_width, u32 frame_height) 450 { 451 return ((ALIGN(frame_width, 16) + 8) * 10 * 2); 452 } 453 454 static inline 455 u32 size_vp9d_lb_fe_top_data(u32 frame_width, u32 frame_height) 456 { 457 return (ALIGN(ALIGN(frame_width, 8), 64) + 8) * 10 * 2; 458 } 459 460 static inline 461 u32 size_vp9d_lb_pe_top_data(u32 frame_width, u32 frame_height) 462 { 463 return ((ALIGN(ALIGN(frame_width, 8), 64) >> 6) * 176); 464 } 465 466 static inline 467 u32 size_vp9d_lb_vsp_top(u32 frame_width, u32 frame_height) 468 { 469 return (((ALIGN(ALIGN(frame_width, 8), 64) >> 6) * 64 * 8) + 256); 470 } 471 472 static inline 473 u32 size_vp9d_qp(u32 frame_width, u32 frame_height) 474 { 475 return size_h264d_qp(frame_width, frame_height); 476 } 477 478 static inline 479 u32 hfi_iris3_vp9d_lb_size(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 480 { 481 return ALIGN(size_vpxd_lb_fe_left_ctrl(frame_width, frame_height), DMA_ALIGNMENT) * 482 num_vpp_pipes + 483 ALIGN(size_vpxd_lb_se_left_ctrl(frame_width, frame_height), DMA_ALIGNMENT) * 484 num_vpp_pipes + 485 ALIGN(size_vp9d_lb_vsp_top(frame_width, frame_height), DMA_ALIGNMENT) + 486 ALIGN(size_vpxd_lb_fe_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) + 487 2 * ALIGN(size_vpxd_lb_recon_dma_metadata_wr(frame_width, frame_height), 488 DMA_ALIGNMENT) + 489 ALIGN(size_vpxd_lb_se_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) + 490 ALIGN(size_vp9d_lb_pe_top_data(frame_width, frame_height), DMA_ALIGNMENT) + 491 ALIGN(size_vp9d_lb_fe_top_data(frame_width, frame_height), DMA_ALIGNMENT) + 492 ALIGN(size_vp9d_qp(frame_width, frame_height), DMA_ALIGNMENT); 493 } 494 495 static inline 496 u32 hfi_buffer_line_vp9d(u32 frame_width, u32 frame_height, u32 _yuv_bufcount_min, bool is_opb, 497 u32 num_vpp_pipes) 498 { 499 u32 vpss_lb_size = 0; 500 u32 _lb_size; 501 502 _lb_size = hfi_iris3_vp9d_lb_size(frame_width, frame_height, num_vpp_pipes); 503 504 if (is_opb) 505 vpss_lb_size = size_vpss_lb(frame_width, frame_height); 506 507 return _lb_size + vpss_lb_size + 4096; 508 } 509 510 static u32 hfi_buffer_line_h264d(u32 frame_width, u32 frame_height, 511 bool is_opb, u32 num_vpp_pipes) 512 { 513 u32 vpss_lb_size = 0; 514 u32 size; 515 516 size = ALIGN(size_h264d_lb_fe_top_data(frame_width), DMA_ALIGNMENT) + 517 ALIGN(size_h264d_lb_fe_top_ctrl(frame_width), DMA_ALIGNMENT) + 518 ALIGN(size_h264d_lb_fe_left_ctrl(frame_height), DMA_ALIGNMENT) * num_vpp_pipes + 519 ALIGN(size_h264d_lb_se_top_ctrl(frame_width), DMA_ALIGNMENT) + 520 ALIGN(size_h264d_lb_se_left_ctrl(frame_height), DMA_ALIGNMENT) * num_vpp_pipes + 521 ALIGN(size_h264d_lb_pe_top_data(frame_width), DMA_ALIGNMENT) + 522 ALIGN(size_h264d_lb_vsp_top(frame_width), DMA_ALIGNMENT) + 523 ALIGN(size_h264d_lb_recon_dma_metadata_wr(frame_height), DMA_ALIGNMENT) * 2 + 524 ALIGN(size_h264d_qp(frame_width, frame_height), DMA_ALIGNMENT); 525 size = ALIGN(size, DMA_ALIGNMENT); 526 if (is_opb) 527 vpss_lb_size = size_vpss_lb(frame_width, frame_height); 528 529 return ALIGN((size + vpss_lb_size), DMA_ALIGNMENT); 530 } 531 532 static u32 size_av1d_lb_opb_wr1_nv12_ubwc(u32 frame_width, u32 frame_height) 533 { 534 u32 size, y_width, y_width_a = 128; 535 536 y_width = ALIGN(frame_width, y_width_a); 537 538 size = ((y_width + HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH - 1) / 539 HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH + 540 (AV1D_MAX_TILE_COLS - 1)); 541 return size * UBWC_TILE_SIZE; 542 } 543 544 static u32 size_av1d_lb_opb_wr1_tp10_ubwc(u32 frame_width, u32 frame_height) 545 { 546 u32 size, y_width, y_width_a = 256; 547 548 y_width = ALIGN(frame_width, y_width_a); 549 550 size = ((y_width + HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH - 1) / 551 HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH + 552 (AV1D_MAX_TILE_COLS - 1)); 553 554 return size * UBWC_TILE_SIZE; 555 } 556 557 static u32 hfi_buffer_line_av1d(u32 frame_width, u32 frame_height, 558 bool is_opb, u32 num_vpp_pipes) 559 { 560 u32 size, vpss_lb_size, opbwrbufsize, opbwr8, opbwr10; 561 562 size = ALIGN(size_av1d_lb_fe_top_data(frame_width, frame_height), 563 DMA_ALIGNMENT) + 564 ALIGN(size_av1d_lb_fe_top_ctrl(frame_width, frame_height), 565 DMA_ALIGNMENT) + 566 ALIGN(size_av1d_lb_fe_left_data(frame_width, frame_height), 567 DMA_ALIGNMENT) * num_vpp_pipes + 568 ALIGN(size_av1d_lb_fe_left_ctrl(frame_width, frame_height), 569 DMA_ALIGNMENT) * num_vpp_pipes + 570 ALIGN(size_av1d_lb_se_left_ctrl(frame_width, frame_height), 571 DMA_ALIGNMENT) * num_vpp_pipes + 572 ALIGN(size_av1d_lb_se_top_ctrl(frame_width, frame_height), 573 DMA_ALIGNMENT) + 574 ALIGN(size_av1d_lb_pe_top_data(frame_width, frame_height), 575 DMA_ALIGNMENT) + 576 ALIGN(size_av1d_lb_vsp_top(frame_width, frame_height), 577 DMA_ALIGNMENT) + 578 ALIGN(size_av1d_lb_recon_dma_metadata_wr 579 (frame_width, frame_height), DMA_ALIGNMENT) * 2 + 580 ALIGN(size_av1d_qp(frame_width, frame_height), DMA_ALIGNMENT); 581 opbwr8 = size_av1d_lb_opb_wr1_nv12_ubwc(frame_width, frame_height); 582 opbwr10 = size_av1d_lb_opb_wr1_tp10_ubwc(frame_width, frame_height); 583 opbwrbufsize = opbwr8 >= opbwr10 ? opbwr8 : opbwr10; 584 size = ALIGN((size + opbwrbufsize), DMA_ALIGNMENT); 585 if (is_opb) { 586 vpss_lb_size = size_vpss_lb(frame_width, frame_height); 587 size = ALIGN((size + vpss_lb_size) * 2, DMA_ALIGNMENT); 588 } 589 590 return size; 591 } 592 593 static u32 size_av1d_ibc_nv12_ubwc(u32 frame_width, u32 frame_height) 594 { 595 u32 size; 596 u32 y_width_a = 128, y_height_a = 32; 597 u32 uv_width_a = 128, uv_height_a = 32; 598 u32 ybufsize, uvbufsize, y_width, y_height, uv_width, uv_height; 599 u32 y_meta_width_a = 64, y_meta_height_a = 16; 600 u32 uv_meta_width_a = 64, uv_meta_height_a = 16; 601 u32 meta_height, meta_stride, meta_size; 602 u32 tile_width_y = HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH; 603 u32 tile_height_y = HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_HEIGHT; 604 u32 tile_width_uv = HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_WIDTH; 605 u32 tile_height_uv = HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_HEIGHT; 606 607 y_width = ALIGN(frame_width, y_width_a); 608 y_height = ALIGN(frame_height, y_height_a); 609 uv_width = ALIGN(frame_width, uv_width_a); 610 uv_height = ALIGN(((frame_height + 1) >> 1), uv_height_a); 611 ybufsize = ALIGN((y_width * y_height), HFI_ALIGNMENT_4096); 612 uvbufsize = ALIGN(uv_width * uv_height, HFI_ALIGNMENT_4096); 613 size = ybufsize + uvbufsize; 614 meta_stride = ALIGN(((frame_width + (tile_width_y - 1)) / tile_width_y), 615 y_meta_width_a); 616 meta_height = ALIGN(((frame_height + (tile_height_y - 1)) / tile_height_y), 617 y_meta_height_a); 618 meta_size = ALIGN(meta_stride * meta_height, HFI_ALIGNMENT_4096); 619 size += meta_size; 620 meta_stride = ALIGN(((((frame_width + 1) >> 1) + (tile_width_uv - 1)) / 621 tile_width_uv), uv_meta_width_a); 622 meta_height = ALIGN(((((frame_height + 1) >> 1) + (tile_height_uv - 1)) / 623 tile_height_uv), uv_meta_height_a); 624 meta_size = ALIGN(meta_stride * meta_height, HFI_ALIGNMENT_4096); 625 size += meta_size; 626 627 return size; 628 } 629 630 static u32 hfi_yuv420_tp10_calc_y_stride(u32 frame_width, u32 stride_multiple) 631 { 632 u32 stride; 633 634 stride = ALIGN(frame_width, 192); 635 stride = ALIGN(stride * 4 / 3, stride_multiple); 636 637 return stride; 638 } 639 640 static u32 hfi_yuv420_tp10_calc_y_bufheight(u32 frame_height, u32 min_buf_height_multiple) 641 { 642 return ALIGN(frame_height, min_buf_height_multiple); 643 } 644 645 static u32 hfi_yuv420_tp10_calc_uv_stride(u32 frame_width, u32 stride_multiple) 646 { 647 u32 stride; 648 649 stride = ALIGN(frame_width, 192); 650 stride = ALIGN(stride * 4 / 3, stride_multiple); 651 652 return stride; 653 } 654 655 static u32 hfi_yuv420_tp10_calc_uv_bufheight(u32 frame_height, u32 min_buf_height_multiple) 656 { 657 return ALIGN(((frame_height + 1) >> 1), min_buf_height_multiple); 658 } 659 660 static u32 size_av1d_ibc_tp10_ubwc(u32 frame_width, u32 frame_height) 661 { 662 u32 size; 663 u32 y_width_a = 256, y_height_a = 16, 664 uv_width_a = 256, uv_height_a = 16; 665 u32 ybufsize, uvbufsize, y_width, y_height, uv_width, uv_height; 666 u32 y_meta_width_a = 64, y_meta_height_a = 16, 667 uv_meta_width_a = 64, uv_meta_height_a = 16; 668 u32 meta_height, meta_stride, meta_size; 669 u32 tile_width_y = HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH; 670 u32 tile_height_y = HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_HEIGHT; 671 u32 tile_width_uv = HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_WIDTH; 672 u32 tile_height_uv = HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_HEIGHT; 673 674 y_width = hfi_yuv420_tp10_calc_y_stride(frame_width, y_width_a); 675 y_height = hfi_yuv420_tp10_calc_y_bufheight(frame_height, y_height_a); 676 uv_width = hfi_yuv420_tp10_calc_uv_stride(frame_width, uv_width_a); 677 uv_height = hfi_yuv420_tp10_calc_uv_bufheight(frame_height, uv_height_a); 678 ybufsize = ALIGN(y_width * y_height, HFI_ALIGNMENT_4096); 679 uvbufsize = ALIGN(uv_width * uv_height, HFI_ALIGNMENT_4096); 680 size = ybufsize + uvbufsize; 681 meta_stride = ALIGN(((frame_width + (tile_width_y - 1)) / tile_width_y), 682 y_meta_width_a); 683 meta_height = ALIGN(((frame_height + (tile_height_y - 1)) / tile_height_y), 684 y_meta_height_a); 685 meta_size = ALIGN(meta_stride * meta_height, HFI_ALIGNMENT_4096); 686 size += meta_size; 687 meta_stride = ALIGN(((((frame_width + 1) >> 1) + (tile_width_uv - 1)) / 688 tile_width_uv), uv_meta_width_a); 689 meta_height = ALIGN(((((frame_height + 1) >> 1) + (tile_height_uv - 1)) / 690 tile_height_uv), uv_meta_height_a); 691 meta_size = ALIGN(meta_stride * meta_height, HFI_ALIGNMENT_4096); 692 size += meta_size; 693 694 return size; 695 } 696 697 static u32 hfi_buffer_ibc_av1d(u32 frame_width, u32 frame_height) 698 { 699 u32 size, ibc8, ibc10; 700 701 ibc8 = size_av1d_ibc_nv12_ubwc(frame_width, frame_height); 702 ibc10 = size_av1d_ibc_tp10_ubwc(frame_width, frame_height); 703 size = ibc8 >= ibc10 ? ibc8 : ibc10; 704 705 return ALIGN(size, DMA_ALIGNMENT); 706 } 707 708 static u32 iris_vpu_dec_bin_size(struct iris_inst *inst) 709 { 710 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 711 struct v4l2_format *f = inst->fmt_src; 712 u32 height = f->fmt.pix_mp.height; 713 u32 width = f->fmt.pix_mp.width; 714 715 if (inst->codec == V4L2_PIX_FMT_H264) 716 return hfi_buffer_bin_h264d(width, height, num_vpp_pipes); 717 else if (inst->codec == V4L2_PIX_FMT_HEVC) 718 return hfi_buffer_bin_h265d(width, height, num_vpp_pipes); 719 else if (inst->codec == V4L2_PIX_FMT_VP9) 720 return hfi_buffer_bin_vp9d(width, height, num_vpp_pipes); 721 else if (inst->codec == V4L2_PIX_FMT_AV1) 722 return hfi_buffer_bin_av1d(width, height, num_vpp_pipes); 723 724 return 0; 725 } 726 727 static u32 iris_vpu_dec_comv_size(struct iris_inst *inst) 728 { 729 u32 num_comv = VIDEO_MAX_FRAME; 730 struct v4l2_format *f = inst->fmt_src; 731 u32 height = f->fmt.pix_mp.height; 732 u32 width = f->fmt.pix_mp.width; 733 734 if (inst->codec == V4L2_PIX_FMT_H264) 735 return hfi_buffer_comv_h264d(width, height, num_comv); 736 else if (inst->codec == V4L2_PIX_FMT_HEVC) 737 return hfi_buffer_comv_h265d(width, height, num_comv); 738 else if (inst->codec == V4L2_PIX_FMT_AV1) { 739 if (inst->fw_caps[DRAP].value) 740 return 0; 741 else 742 return hfi_buffer_comv_av1d(width, height, num_comv); 743 } 744 745 return 0; 746 } 747 748 static u32 iris_vpu_dec_persist_size(struct iris_inst *inst) 749 { 750 struct platform_inst_caps *caps; 751 752 if (inst->codec == V4L2_PIX_FMT_H264) 753 return hfi_buffer_persist_h264d(); 754 else if (inst->codec == V4L2_PIX_FMT_HEVC) 755 return hfi_buffer_persist_h265d(0); 756 else if (inst->codec == V4L2_PIX_FMT_VP9) 757 return hfi_buffer_persist_vp9d(); 758 else if (inst->codec == V4L2_PIX_FMT_AV1) { 759 caps = inst->core->iris_platform_data->inst_caps; 760 if (inst->fw_caps[DRAP].value) 761 return hfi_buffer_persist_av1d(caps->max_frame_width, 762 caps->max_frame_height, 16); 763 else 764 return hfi_buffer_persist_av1d(0, 0, 0); 765 } 766 767 return 0; 768 } 769 770 static u32 iris_vpu_dec_dpb_size(struct iris_inst *inst) 771 { 772 if (iris_split_mode_enabled(inst)) 773 return iris_get_buffer_size(inst, BUF_DPB); 774 else 775 return 0; 776 } 777 778 static u32 iris_vpu_dec_non_comv_size(struct iris_inst *inst) 779 { 780 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 781 struct v4l2_format *f = inst->fmt_src; 782 u32 height = f->fmt.pix_mp.height; 783 u32 width = f->fmt.pix_mp.width; 784 785 if (inst->codec == V4L2_PIX_FMT_H264) 786 return hfi_buffer_non_comv_h264d(width, height, num_vpp_pipes); 787 else if (inst->codec == V4L2_PIX_FMT_HEVC) 788 return hfi_buffer_non_comv_h265d(width, height, num_vpp_pipes); 789 790 return 0; 791 } 792 793 static u32 iris_vpu_dec_line_size(struct iris_inst *inst) 794 { 795 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 796 struct v4l2_format *f = inst->fmt_src; 797 u32 height = f->fmt.pix_mp.height; 798 u32 width = f->fmt.pix_mp.width; 799 bool is_opb = false; 800 u32 out_min_count = inst->buffers[BUF_OUTPUT].min_count; 801 802 if (iris_split_mode_enabled(inst)) 803 is_opb = true; 804 805 if (inst->codec == V4L2_PIX_FMT_H264) 806 return hfi_buffer_line_h264d(width, height, is_opb, num_vpp_pipes); 807 else if (inst->codec == V4L2_PIX_FMT_HEVC) 808 return hfi_buffer_line_h265d(width, height, is_opb, num_vpp_pipes); 809 else if (inst->codec == V4L2_PIX_FMT_VP9) 810 return hfi_buffer_line_vp9d(width, height, out_min_count, is_opb, 811 num_vpp_pipes); 812 else if (inst->codec == V4L2_PIX_FMT_AV1) 813 return hfi_buffer_line_av1d(width, height, is_opb, num_vpp_pipes); 814 815 return 0; 816 } 817 818 static u32 iris_vpu_dec_scratch1_size(struct iris_inst *inst) 819 { 820 return iris_vpu_dec_comv_size(inst) + 821 iris_vpu_dec_non_comv_size(inst) + 822 iris_vpu_dec_line_size(inst); 823 } 824 825 static inline u32 iris_vpu_enc_get_bitstream_width(struct iris_inst *inst) 826 { 827 if (is_rotation_90_or_270(inst)) 828 return inst->fmt_dst->fmt.pix_mp.height; 829 else 830 return inst->fmt_dst->fmt.pix_mp.width; 831 } 832 833 static inline u32 iris_vpu_enc_get_bitstream_height(struct iris_inst *inst) 834 { 835 if (is_rotation_90_or_270(inst)) 836 return inst->fmt_dst->fmt.pix_mp.width; 837 else 838 return inst->fmt_dst->fmt.pix_mp.height; 839 } 840 841 static inline u32 size_bin_bitstream_enc(u32 width, u32 height, 842 u32 rc_type) 843 { 844 u32 aligned_height = ALIGN(height, 32); 845 u32 aligned_width = ALIGN(width, 32); 846 u32 frame_size = width * height * 3; 847 u32 mbs_per_frame; 848 849 /* 850 * Encoder output size calculation: 32 Align width/height 851 * For resolution < 720p : YUVsize * 4 852 * For resolution > 720p & <= 4K : YUVsize / 2 853 * For resolution > 4k : YUVsize / 4 854 * Initially frame_size = YUVsize * 2; 855 */ 856 857 mbs_per_frame = (ALIGN(aligned_height, 16) * ALIGN(aligned_width, 16)) / 256; 858 859 if (mbs_per_frame < NUM_MBS_720P) 860 frame_size = frame_size << 1; 861 else if (mbs_per_frame <= NUM_MBS_4K) 862 frame_size = frame_size >> 2; 863 else 864 frame_size = frame_size >> 3; 865 866 if (rc_type == HFI_RATE_CONTROL_OFF || rc_type == HFI_RATE_CONTROL_CQ || 867 rc_type == HFI_RC_OFF || rc_type == HFI_RC_CQ) 868 frame_size = frame_size << 1; 869 870 /* 871 * In case of opaque color format bitdepth will be known 872 * with first ETB, buffers allocated already with 8 bit 873 * won't be sufficient for 10 bit 874 * calculate size considering 10-bit by default 875 * For 10-bit cases size = size * 1.25 876 */ 877 frame_size *= 5; 878 frame_size /= 4; 879 880 return ALIGN(frame_size, SZ_4K); 881 } 882 883 static inline u32 hfi_buffer_bin_enc(u32 width, u32 height, 884 u32 work_mode, u32 lcu_size, 885 u32 num_vpp_pipes, u32 rc_type) 886 { 887 u32 sao_bin_buffer_size, padded_bin_size, bitstream_size; 888 u32 total_bitbin_buffers, size_single_pipe, bitbin_size; 889 u32 aligned_height = ALIGN(height, lcu_size); 890 u32 aligned_width = ALIGN(width, lcu_size); 891 892 bitstream_size = size_bin_bitstream_enc(width, height, rc_type); 893 bitstream_size = ALIGN(bitstream_size, 256); 894 895 if (work_mode == STAGE_2) { 896 total_bitbin_buffers = 3; 897 bitbin_size = bitstream_size * 17 / 10; 898 bitbin_size = ALIGN(bitbin_size, 256); 899 } else { 900 total_bitbin_buffers = 1; 901 bitstream_size = aligned_width * aligned_height * 3; 902 bitbin_size = ALIGN(bitstream_size, 256); 903 } 904 905 if (num_vpp_pipes > 2) 906 size_single_pipe = bitbin_size / 2; 907 else 908 size_single_pipe = bitbin_size; 909 910 size_single_pipe = ALIGN(size_single_pipe, 256); 911 sao_bin_buffer_size = (64 * (((width + 32) * (height + 32)) >> 10)) + 384; 912 padded_bin_size = ALIGN(size_single_pipe, 256); 913 size_single_pipe = sao_bin_buffer_size + padded_bin_size; 914 size_single_pipe = ALIGN(size_single_pipe, 256); 915 bitbin_size = size_single_pipe * num_vpp_pipes; 916 917 return ALIGN(bitbin_size, 256) * total_bitbin_buffers + 512; 918 } 919 920 static u32 iris_vpu_enc_bin_size(struct iris_inst *inst) 921 { 922 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 923 u32 height = iris_vpu_enc_get_bitstream_height(inst); 924 u32 width = iris_vpu_enc_get_bitstream_width(inst); 925 u32 stage = inst->fw_caps[STAGE].value; 926 u32 lcu_size; 927 928 if (inst->codec == V4L2_PIX_FMT_HEVC) 929 lcu_size = 32; 930 else 931 lcu_size = 16; 932 933 return hfi_buffer_bin_enc(width, height, stage, lcu_size, 934 num_vpp_pipes, inst->hfi_rc_type); 935 } 936 937 static u32 iris_vpu_dec_partial_size(struct iris_inst *inst) 938 { 939 struct v4l2_format *f = inst->fmt_src; 940 u32 height = f->fmt.pix_mp.height; 941 u32 width = f->fmt.pix_mp.width; 942 943 return hfi_buffer_ibc_av1d(width, height); 944 } 945 946 static inline 947 u32 hfi_buffer_comv_enc(u32 frame_width, u32 frame_height, u32 lcu_size, 948 u32 num_recon, u32 standard) 949 { 950 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 951 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 952 u32 num_lcu_in_frame = width_in_lcus * height_in_lcus; 953 u32 mb_height = ((frame_height) + 15) >> 4; 954 u32 mb_width = ((frame_width) + 15) >> 4; 955 u32 size_colloc_mv, size_colloc_rc; 956 957 size_colloc_mv = (standard == HFI_CODEC_ENCODE_HEVC) ? 958 (16 * ((num_lcu_in_frame << 2) + 32)) : 959 (3 * 16 * (width_in_lcus * height_in_lcus + 32)); 960 size_colloc_mv = ALIGN(size_colloc_mv, 256) * num_recon; 961 size_colloc_rc = (((mb_width + 7) >> 3) * 16 * 2 * mb_height); 962 size_colloc_rc = ALIGN(size_colloc_rc, 256) * HFI_MAX_COL_FRAME; 963 964 return size_colloc_mv + size_colloc_rc; 965 } 966 967 static u32 iris_vpu_enc_comv_size(struct iris_inst *inst) 968 { 969 u32 height = iris_vpu_enc_get_bitstream_height(inst); 970 u32 width = iris_vpu_enc_get_bitstream_width(inst); 971 u32 num_recon = 1; 972 u32 lcu_size = 16; 973 974 if (inst->codec == V4L2_PIX_FMT_HEVC) { 975 lcu_size = 32; 976 return hfi_buffer_comv_enc(width, height, lcu_size, 977 num_recon + 1, HFI_CODEC_ENCODE_HEVC); 978 } 979 980 return hfi_buffer_comv_enc(width, height, lcu_size, 981 num_recon + 1, HFI_CODEC_ENCODE_AVC); 982 } 983 984 static inline 985 u32 size_frame_rc_buf_size(u32 standard, u32 frame_height_coded, 986 u32 num_vpp_pipes_enc) 987 { 988 u32 size = 0; 989 990 size = (standard == HFI_CODEC_ENCODE_HEVC) ? 991 (256 + 16 * (14 + ((((frame_height_coded) >> 5) + 7) >> 3))) : 992 (256 + 16 * (14 + ((((frame_height_coded) >> 4) + 7) >> 3))); 993 size *= 11; 994 995 if (num_vpp_pipes_enc > 1) 996 size = ALIGN(size, 256) * num_vpp_pipes_enc; 997 998 return ALIGN(size, 512) * HFI_MAX_COL_FRAME; 999 } 1000 1001 static inline 1002 u32 size_enc_slice_info_buf(u32 num_lcu_in_frame) 1003 { 1004 return ALIGN((256 + (num_lcu_in_frame << 4)), 256); 1005 } 1006 1007 static inline u32 enc_bitcnt_buf_size(u32 num_lcu_in_frame) 1008 { 1009 return ALIGN((256 + (4 * (num_lcu_in_frame))), 256); 1010 } 1011 1012 static inline u32 enc_bitmap_buf_size(u32 num_lcu_in_frame) 1013 { 1014 return ALIGN((256 + ((num_lcu_in_frame) >> 3)), 256); 1015 } 1016 1017 static inline u32 size_override_buf(u32 num_lcumb) 1018 { 1019 return ALIGN(((16 * (((num_lcumb) + 7) >> 3))), 256) * 2; 1020 } 1021 1022 static inline u32 size_ir_buf(u32 num_lcu_in_frame) 1023 { 1024 return ALIGN((((((num_lcu_in_frame) << 1) + 7) & (~7)) * 3), 256); 1025 } 1026 1027 static inline 1028 u32 size_linebuff_data(bool is_ten_bit, u32 frame_width_coded) 1029 { 1030 return is_ten_bit ? 1031 (((((10 * (frame_width_coded) + 1024) + (256 - 1)) & 1032 (~(256 - 1))) * 1) + 1033 (((((10 * (frame_width_coded) + 1024) >> 1) + (256 - 1)) & 1034 (~(256 - 1))) * 2)) : 1035 (((((8 * (frame_width_coded) + 1024) + (256 - 1)) & 1036 (~(256 - 1))) * 1) + 1037 (((((8 * (frame_width_coded) + 1024) >> 1) + (256 - 1)) & 1038 (~(256 - 1))) * 2)); 1039 } 1040 1041 static inline 1042 u32 size_left_linebuff_ctrl(u32 standard, u32 frame_height_coded, 1043 u32 num_vpp_pipes_enc) 1044 { 1045 u32 size = 0; 1046 1047 size = standard == HFI_CODEC_ENCODE_HEVC ? 1048 (((frame_height_coded) + 1049 (32)) / 32 * 4 * 16) : 1050 (((frame_height_coded) + 15) / 16 * 5 * 16); 1051 1052 if ((num_vpp_pipes_enc) > 1) { 1053 size += 512; 1054 size = ALIGN(size, 512) * 1055 num_vpp_pipes_enc; 1056 } 1057 1058 return ALIGN(size, 256); 1059 } 1060 1061 static inline 1062 u32 size_left_linebuff_recon_pix(bool is_ten_bit, u32 frame_height_coded, 1063 u32 num_vpp_pipes_enc) 1064 { 1065 return (((is_ten_bit + 1) * 2 * (frame_height_coded) + 256) + 1066 (256 << (num_vpp_pipes_enc - 1)) - 1) & 1067 (~((256 << (num_vpp_pipes_enc - 1)) - 1)) * 1; 1068 } 1069 1070 static inline 1071 u32 size_top_linebuff_ctrl_fe(u32 frame_width_coded, u32 standard) 1072 { 1073 return standard == HFI_CODEC_ENCODE_HEVC ? 1074 ALIGN((64 * ((frame_width_coded) >> 5)), 256) : 1075 ALIGN((256 + 16 * ((frame_width_coded) >> 4)), 256); 1076 } 1077 1078 static inline 1079 u32 size_left_linebuff_ctrl_fe(u32 frame_height_coded, u32 num_vpp_pipes_enc) 1080 { 1081 return (((256 + 64 * ((frame_height_coded) >> 4)) + 1082 (256 << (num_vpp_pipes_enc - 1)) - 1) & 1083 (~((256 << (num_vpp_pipes_enc - 1)) - 1)) * 1) * 1084 num_vpp_pipes_enc; 1085 } 1086 1087 static inline 1088 u32 size_left_linebuff_metadata_recon_y(u32 frame_height_coded, 1089 bool is_ten_bit, 1090 u32 num_vpp_pipes_enc) 1091 { 1092 return ALIGN(((256 + 64 * ((frame_height_coded) / 1093 (8 * (is_ten_bit ? 4 : 8))))), 256) * num_vpp_pipes_enc; 1094 } 1095 1096 static inline 1097 u32 size_left_linebuff_metadata_recon_uv(u32 frame_height_coded, 1098 bool is_ten_bit, 1099 u32 num_vpp_pipes_enc) 1100 { 1101 return ALIGN(((256 + 64 * ((frame_height_coded) / 1102 (4 * (is_ten_bit ? 4 : 8))))), 256) * num_vpp_pipes_enc; 1103 } 1104 1105 static inline 1106 u32 size_linebuff_recon_pix(bool is_ten_bit, u32 frame_width_coded) 1107 { 1108 return ALIGN(((is_ten_bit ? 3 : 2) * (frame_width_coded)), 256); 1109 } 1110 1111 static inline 1112 u32 size_line_buf_ctrl(u32 frame_width_coded) 1113 { 1114 return ALIGN(frame_width_coded, 256); 1115 } 1116 1117 static inline 1118 u32 size_line_buf_ctrl_id2(u32 frame_width_coded) 1119 { 1120 return ALIGN(frame_width_coded, 256); 1121 } 1122 1123 static inline u32 size_line_buf_sde(u32 frame_width_coded) 1124 { 1125 return ALIGN((256 + (16 * ((frame_width_coded) >> 4))), 256); 1126 } 1127 1128 static inline 1129 u32 size_vpss_line_buf(u32 num_vpp_pipes_enc, u32 frame_height_coded, 1130 u32 frame_width_coded) 1131 { 1132 return ALIGN(((((((8192) >> 2) << 5) * (num_vpp_pipes_enc)) + 64) + 1133 (((((max_t(u32, (frame_width_coded), 1134 (frame_height_coded)) + 3) >> 2) << 5) + 256) * 16)), 256); 1135 } 1136 static inline 1137 u32 size_vpss_line_buf_vpu33(u32 num_vpp_pipes_enc, u32 frame_height_coded, 1138 u32 frame_width_coded) 1139 { 1140 u32 vpss_4tap_top, vpss_4tap_left, vpss_div2_top; 1141 u32 vpss_div2_left, vpss_top_lb, vpss_left_lb; 1142 u32 size_left, size_top; 1143 u32 max_width_height; 1144 1145 max_width_height = max_t(u32, frame_width_coded, frame_height_coded); 1146 vpss_4tap_top = ((((max_width_height * 2) + 3) >> 2) << 4) + 256; 1147 vpss_4tap_left = (((8192 + 3) >> 2) << 5) + 64; 1148 vpss_div2_top = (((max_width_height + 3) >> 2) << 4) + 256; 1149 vpss_div2_left = ((((max_width_height * 2) + 3) >> 2) << 5) + 64; 1150 vpss_top_lb = (frame_width_coded + 1) << 3; 1151 vpss_left_lb = (frame_height_coded << 3) * num_vpp_pipes_enc; 1152 size_left = (vpss_4tap_left + vpss_div2_left) * 2 * num_vpp_pipes_enc; 1153 size_top = (vpss_4tap_top + vpss_div2_top) * 2; 1154 1155 return ALIGN(size_left + size_top + vpss_top_lb + vpss_left_lb, DMA_ALIGNMENT); 1156 } 1157 1158 static inline 1159 u32 size_top_line_buf_first_stg_sao(u32 frame_width_coded) 1160 { 1161 return ALIGN((16 * ((frame_width_coded) >> 5)), 256); 1162 } 1163 1164 static inline 1165 u32 size_enc_ref_buffer(u32 frame_width, u32 frame_height) 1166 { 1167 u32 u_chroma_buffer_height = ALIGN(frame_height >> 1, 32); 1168 u32 u_buffer_height = ALIGN(frame_height, 32); 1169 u32 u_buffer_width = ALIGN(frame_width, 32); 1170 1171 return (u_buffer_height + u_chroma_buffer_height) * u_buffer_width; 1172 } 1173 1174 static inline 1175 u32 size_enc_ten_bit_ref_buffer(u32 frame_width, u32 frame_height) 1176 { 1177 u32 ref_luma_stride_in_bytes = ((frame_width + SYSTEM_LAL_TILE10 - 1) / SYSTEM_LAL_TILE10) * 1178 SYSTEM_LAL_TILE10; 1179 u32 ref_buf_height = (frame_height + (32 - 1)) & (~(32 - 1)); 1180 u32 u_ref_stride, luma_size; 1181 u32 ref_chrm_height_in_bytes; 1182 u32 chroma_size; 1183 1184 u_ref_stride = 4 * (ref_luma_stride_in_bytes / 3); 1185 u_ref_stride = (u_ref_stride + (128 - 1)) & (~(128 - 1)); 1186 luma_size = ref_buf_height * u_ref_stride; 1187 luma_size = (luma_size + (4096 - 1)) & (~(4096 - 1)); 1188 1189 ref_chrm_height_in_bytes = (((frame_height + 1) >> 1) + (32 - 1)) & (~(32 - 1)); 1190 chroma_size = u_ref_stride * ref_chrm_height_in_bytes; 1191 chroma_size = (chroma_size + (4096 - 1)) & (~(4096 - 1)); 1192 1193 return luma_size + chroma_size; 1194 } 1195 1196 static inline 1197 u32 hfi_ubwc_calc_metadata_plane_stride(u32 frame_width, 1198 u32 metadata_stride_multiple, 1199 u32 tile_width_in_pels) 1200 { 1201 return ALIGN(((frame_width + (tile_width_in_pels - 1)) / tile_width_in_pels), 1202 metadata_stride_multiple); 1203 } 1204 1205 static inline 1206 u32 hfi_ubwc_metadata_plane_bufheight(u32 frame_height, 1207 u32 metadata_height_multiple, 1208 u32 tile_height_in_pels) 1209 { 1210 return ALIGN(((frame_height + (tile_height_in_pels - 1)) / tile_height_in_pels), 1211 metadata_height_multiple); 1212 } 1213 1214 static inline 1215 u32 hfi_ubwc_metadata_plane_buffer_size(u32 _metadata_tride, u32 _metadata_buf_height) 1216 { 1217 return ALIGN(_metadata_tride * _metadata_buf_height, 4096); 1218 } 1219 1220 static inline 1221 u32 hfi_buffer_non_comv_enc(u32 frame_width, u32 frame_height, 1222 u32 num_vpp_pipes_enc, u32 lcu_size, u32 standard) 1223 { 1224 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 1225 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 1226 u32 num_lcu_in_frame = width_in_lcus * height_in_lcus; 1227 u32 frame_height_coded = height_in_lcus * (lcu_size); 1228 u32 frame_width_coded = width_in_lcus * (lcu_size); 1229 u32 num_lcumb, frame_rc_buf_size; 1230 1231 num_lcumb = (frame_height_coded / lcu_size) * 1232 ((frame_width_coded + lcu_size * 8) / lcu_size); 1233 frame_rc_buf_size = size_frame_rc_buf_size(standard, frame_height_coded, 1234 num_vpp_pipes_enc); 1235 return size_enc_slice_info_buf(num_lcu_in_frame) + 1236 SIZE_SLICE_CMD_BUFFER + 1237 SIZE_SPS_PPS_SLICE_HDR + 1238 frame_rc_buf_size + 1239 enc_bitcnt_buf_size(num_lcu_in_frame) + 1240 enc_bitmap_buf_size(num_lcu_in_frame) + 1241 SIZE_BSE_SLICE_CMD_BUF + 1242 SIZE_LAMBDA_LUT + 1243 size_override_buf(num_lcumb) + 1244 size_ir_buf(num_lcu_in_frame); 1245 } 1246 1247 static u32 iris_vpu_enc_non_comv_size(struct iris_inst *inst) 1248 { 1249 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1250 u32 height = iris_vpu_enc_get_bitstream_height(inst); 1251 u32 width = iris_vpu_enc_get_bitstream_width(inst); 1252 u32 lcu_size = 16; 1253 1254 if (inst->codec == V4L2_PIX_FMT_HEVC) { 1255 lcu_size = 32; 1256 return hfi_buffer_non_comv_enc(width, height, num_vpp_pipes, 1257 lcu_size, HFI_CODEC_ENCODE_HEVC) + 1258 SIZE_ONE_SLICE_BUF; 1259 } 1260 1261 return hfi_buffer_non_comv_enc(width, height, num_vpp_pipes, 1262 lcu_size, HFI_CODEC_ENCODE_AVC); 1263 } 1264 1265 static inline 1266 u32 hfi_buffer_line_enc_base(u32 frame_width, u32 frame_height, bool is_ten_bit, 1267 u32 num_vpp_pipes_enc, u32 lcu_size, u32 standard) 1268 { 1269 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 1270 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 1271 u32 frame_height_coded = height_in_lcus * (lcu_size); 1272 u32 frame_width_coded = width_in_lcus * (lcu_size); 1273 u32 line_buff_data_size, left_line_buff_ctrl_size; 1274 u32 left_line_buff_metadata_recon__uv__size; 1275 u32 left_line_buff_metadata_recon__y__size; 1276 u32 left_line_buff_recon_pix_size; 1277 u32 top_line_buff_ctrl_fe_size; 1278 u32 line_buff_recon_pix_size; 1279 1280 line_buff_data_size = size_linebuff_data(is_ten_bit, frame_width_coded); 1281 left_line_buff_ctrl_size = 1282 size_left_linebuff_ctrl(standard, frame_height_coded, num_vpp_pipes_enc); 1283 left_line_buff_recon_pix_size = 1284 size_left_linebuff_recon_pix(is_ten_bit, frame_height_coded, 1285 num_vpp_pipes_enc); 1286 top_line_buff_ctrl_fe_size = 1287 size_top_linebuff_ctrl_fe(frame_width_coded, standard); 1288 left_line_buff_metadata_recon__y__size = 1289 size_left_linebuff_metadata_recon_y(frame_height_coded, is_ten_bit, 1290 num_vpp_pipes_enc); 1291 left_line_buff_metadata_recon__uv__size = 1292 size_left_linebuff_metadata_recon_uv(frame_height_coded, is_ten_bit, 1293 num_vpp_pipes_enc); 1294 line_buff_recon_pix_size = size_linebuff_recon_pix(is_ten_bit, frame_width_coded); 1295 1296 return size_line_buf_ctrl(frame_width_coded) + 1297 size_line_buf_ctrl_id2(frame_width_coded) + 1298 line_buff_data_size + 1299 left_line_buff_ctrl_size + 1300 left_line_buff_recon_pix_size + 1301 top_line_buff_ctrl_fe_size + 1302 left_line_buff_metadata_recon__y__size + 1303 left_line_buff_metadata_recon__uv__size + 1304 line_buff_recon_pix_size + 1305 size_left_linebuff_ctrl_fe(frame_height_coded, num_vpp_pipes_enc) + 1306 size_line_buf_sde(frame_width_coded) + 1307 size_top_line_buf_first_stg_sao(frame_width_coded); 1308 } 1309 1310 static inline 1311 u32 hfi_buffer_line_enc(u32 frame_width, u32 frame_height, bool is_ten_bit, 1312 u32 num_vpp_pipes_enc, u32 lcu_size, u32 standard) 1313 { 1314 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 1315 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 1316 u32 frame_height_coded = height_in_lcus * (lcu_size); 1317 u32 frame_width_coded = width_in_lcus * (lcu_size); 1318 1319 return hfi_buffer_line_enc_base(frame_width, frame_height, is_ten_bit, 1320 num_vpp_pipes_enc, lcu_size, standard) + 1321 size_vpss_line_buf(num_vpp_pipes_enc, frame_height_coded, frame_width_coded); 1322 } 1323 1324 static inline 1325 u32 hfi_buffer_line_enc_vpu33(u32 frame_width, u32 frame_height, bool is_ten_bit, 1326 u32 num_vpp_pipes_enc, u32 lcu_size, u32 standard) 1327 { 1328 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 1329 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 1330 u32 frame_height_coded = height_in_lcus * (lcu_size); 1331 u32 frame_width_coded = width_in_lcus * (lcu_size); 1332 1333 return hfi_buffer_line_enc_base(frame_width, frame_height, is_ten_bit, 1334 num_vpp_pipes_enc, lcu_size, standard) + 1335 size_vpss_line_buf_vpu33(num_vpp_pipes_enc, frame_height_coded, 1336 frame_width_coded); 1337 } 1338 1339 static u32 iris_vpu_enc_line_size(struct iris_inst *inst) 1340 { 1341 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1342 u32 height = iris_vpu_enc_get_bitstream_height(inst); 1343 u32 width = iris_vpu_enc_get_bitstream_width(inst); 1344 u32 lcu_size = 16; 1345 1346 if (inst->codec == V4L2_PIX_FMT_HEVC) { 1347 lcu_size = 32; 1348 return hfi_buffer_line_enc(width, height, 0, num_vpp_pipes, 1349 lcu_size, HFI_CODEC_ENCODE_HEVC); 1350 } 1351 1352 return hfi_buffer_line_enc(width, height, 0, num_vpp_pipes, 1353 lcu_size, HFI_CODEC_ENCODE_AVC); 1354 } 1355 1356 static u32 iris_vpu33_enc_line_size(struct iris_inst *inst) 1357 { 1358 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1359 u32 height = iris_vpu_enc_get_bitstream_height(inst); 1360 u32 width = iris_vpu_enc_get_bitstream_width(inst); 1361 u32 lcu_size = 16; 1362 1363 if (inst->codec == V4L2_PIX_FMT_HEVC) { 1364 lcu_size = 32; 1365 return hfi_buffer_line_enc_vpu33(width, height, 0, num_vpp_pipes, 1366 lcu_size, HFI_CODEC_ENCODE_HEVC); 1367 } 1368 1369 return hfi_buffer_line_enc_vpu33(width, height, 0, num_vpp_pipes, 1370 lcu_size, HFI_CODEC_ENCODE_AVC); 1371 } 1372 1373 static inline 1374 u32 hfi_buffer_dpb_enc(u32 frame_width, u32 frame_height, bool is_ten_bit) 1375 { 1376 u32 metadata_stride, metadata_buf_height, meta_size_y, meta_size_c; 1377 u32 ten_bit_ref_buf_size = 0, ref_buf_size = 0; 1378 u32 size; 1379 1380 if (!is_ten_bit) { 1381 ref_buf_size = size_enc_ref_buffer(frame_width, frame_height); 1382 metadata_stride = 1383 hfi_ubwc_calc_metadata_plane_stride(frame_width, 64, 1384 HFI_COL_FMT_NV12C_Y_TILE_WIDTH); 1385 metadata_buf_height = 1386 hfi_ubwc_metadata_plane_bufheight(frame_height, 16, 1387 HFI_COL_FMT_NV12C_Y_TILE_HEIGHT); 1388 meta_size_y = 1389 hfi_ubwc_metadata_plane_buffer_size(metadata_stride, metadata_buf_height); 1390 meta_size_c = 1391 hfi_ubwc_metadata_plane_buffer_size(metadata_stride, metadata_buf_height); 1392 size = ref_buf_size + meta_size_y + meta_size_c; 1393 } else { 1394 ten_bit_ref_buf_size = size_enc_ten_bit_ref_buffer(frame_width, frame_height); 1395 metadata_stride = 1396 hfi_ubwc_calc_metadata_plane_stride(frame_width, 1397 IRIS_METADATA_STRIDE_MULTIPLE, 1398 HFI_COL_FMT_TP10C_Y_TILE_WIDTH); 1399 metadata_buf_height = 1400 hfi_ubwc_metadata_plane_bufheight(frame_height, 1401 IRIS_METADATA_HEIGHT_MULTIPLE, 1402 HFI_COL_FMT_TP10C_Y_TILE_HEIGHT); 1403 meta_size_y = 1404 hfi_ubwc_metadata_plane_buffer_size(metadata_stride, metadata_buf_height); 1405 meta_size_c = 1406 hfi_ubwc_metadata_plane_buffer_size(metadata_stride, metadata_buf_height); 1407 size = ten_bit_ref_buf_size + meta_size_y + meta_size_c; 1408 } 1409 1410 return size; 1411 } 1412 1413 static u32 iris_vpu_enc_arp_size(struct iris_inst *inst) 1414 { 1415 return HFI_BUFFER_ARP_ENC; 1416 } 1417 1418 inline bool is_scaling_enabled(struct iris_inst *inst) 1419 { 1420 struct v4l2_pix_format_mplane *dst_fmt = &inst->fmt_dst->fmt.pix_mp; 1421 struct v4l2_pix_format_mplane *src_fmt = &inst->fmt_src->fmt.pix_mp; 1422 1423 return dst_fmt->width != src_fmt->width || 1424 dst_fmt->height != src_fmt->height; 1425 } 1426 1427 static inline 1428 u32 hfi_buffer_vpss_enc(u32 dswidth, u32 dsheight, bool ds_enable, 1429 u32 blur, bool is_ten_bit) 1430 { 1431 if (ds_enable || blur) 1432 return hfi_buffer_dpb_enc(dswidth, dsheight, is_ten_bit); 1433 1434 return 0; 1435 } 1436 1437 static inline u32 hfi_buffer_scratch1_enc(u32 frame_width, u32 frame_height, 1438 u32 lcu_size, u32 num_ref, 1439 bool ten_bit, u32 num_vpp_pipes, 1440 bool is_h265) 1441 { 1442 u32 line_buf_ctrl_size, line_buf_data_size, leftline_buf_ctrl_size; 1443 u32 line_buf_sde_size, sps_pps_slice_hdr, topline_buf_ctrl_size_FE; 1444 u32 leftline_buf_ctrl_size_FE, line_buf_recon_pix_size; 1445 u32 leftline_buf_recon_pix_size, lambda_lut_size, override_buffer_size; 1446 u32 col_mv_buf_size, vpp_reg_buffer_size, ir_buffer_size; 1447 u32 vpss_line_buf, leftline_buf_meta_recony, h265e_colrcbuf_size; 1448 u32 h265e_framerc_bufsize, h265e_lcubitcnt_bufsize; 1449 u32 h265e_lcubitmap_bufsize, se_stats_bufsize; 1450 u32 bse_reg_buffer_size, bse_slice_cmd_buffer_size, slice_info_bufsize; 1451 u32 line_buf_ctrl_size_buffid2, slice_cmd_buffer_size; 1452 u32 width_lcu_num, height_lcu_num, width_coded, height_coded; 1453 u32 frame_num_lcu, linebuf_meta_recon_uv, topline_bufsize_fe_1stg_sao; 1454 u32 vpss_line_buffer_size_1; 1455 u32 bit_depth, num_lcu_mb; 1456 1457 width_lcu_num = (frame_width + lcu_size - 1) / lcu_size; 1458 height_lcu_num = (frame_height + lcu_size - 1) / lcu_size; 1459 frame_num_lcu = width_lcu_num * height_lcu_num; 1460 width_coded = width_lcu_num * lcu_size; 1461 height_coded = height_lcu_num * lcu_size; 1462 num_lcu_mb = (height_coded / lcu_size) * 1463 ((width_coded + lcu_size * 8) / lcu_size); 1464 slice_info_bufsize = 256 + (frame_num_lcu << 4); 1465 slice_info_bufsize = ALIGN(slice_info_bufsize, 256); 1466 line_buf_ctrl_size = ALIGN(width_coded, 256); 1467 line_buf_ctrl_size_buffid2 = ALIGN(width_coded, 256); 1468 1469 bit_depth = ten_bit ? 10 : 8; 1470 line_buf_data_size = 1471 (((((bit_depth * width_coded + 1024) + (256 - 1)) & 1472 (~(256 - 1))) * 1) + 1473 (((((bit_depth * width_coded + 1024) >> 1) + (256 - 1)) & 1474 (~(256 - 1))) * 2)); 1475 1476 leftline_buf_ctrl_size = is_h265 ? ((height_coded + 32) / 32 * 4 * 16) : 1477 ((height_coded + 15) / 16 * 5 * 16); 1478 1479 if (num_vpp_pipes > 1) { 1480 leftline_buf_ctrl_size += 512; 1481 leftline_buf_ctrl_size = 1482 ALIGN(leftline_buf_ctrl_size, 512) * num_vpp_pipes; 1483 } 1484 1485 leftline_buf_ctrl_size = ALIGN(leftline_buf_ctrl_size, 256); 1486 leftline_buf_recon_pix_size = 1487 (((ten_bit + 1) * 2 * (height_coded) + 256) + 1488 (256 << (num_vpp_pipes - 1)) - 1) & 1489 (~((256 << (num_vpp_pipes - 1)) - 1)) * 1; 1490 1491 topline_buf_ctrl_size_FE = is_h265 ? (64 * (width_coded >> 5)) : 1492 (256 + 16 * (width_coded >> 4)); 1493 topline_buf_ctrl_size_FE = ALIGN(topline_buf_ctrl_size_FE, 256); 1494 leftline_buf_ctrl_size_FE = 1495 (((256 + 64 * (height_coded >> 4)) + 1496 (256 << (num_vpp_pipes - 1)) - 1) & 1497 (~((256 << (num_vpp_pipes - 1)) - 1)) * 1) * 1498 num_vpp_pipes; 1499 leftline_buf_meta_recony = 1500 (256 + 64 * ((height_coded) / (8 * (ten_bit ? 4 : 8)))); 1501 leftline_buf_meta_recony = ALIGN(leftline_buf_meta_recony, 256); 1502 leftline_buf_meta_recony = leftline_buf_meta_recony * num_vpp_pipes; 1503 linebuf_meta_recon_uv = 1504 (256 + 64 * ((height_coded) / (4 * (ten_bit ? 4 : 8)))); 1505 linebuf_meta_recon_uv = ALIGN(linebuf_meta_recon_uv, 256); 1506 linebuf_meta_recon_uv = linebuf_meta_recon_uv * num_vpp_pipes; 1507 line_buf_recon_pix_size = ((ten_bit ? 3 : 2) * width_coded); 1508 line_buf_recon_pix_size = ALIGN(line_buf_recon_pix_size, 256); 1509 slice_cmd_buffer_size = ALIGN(20480, 256); 1510 sps_pps_slice_hdr = 2048 + 4096; 1511 col_mv_buf_size = 1512 is_h265 ? (16 * ((frame_num_lcu << 2) + 32)) : 1513 (3 * 16 * (width_lcu_num * height_lcu_num + 32)); 1514 col_mv_buf_size = ALIGN(col_mv_buf_size, 256) * (num_ref + 1); 1515 h265e_colrcbuf_size = 1516 (((width_lcu_num + 7) >> 3) * 16 * 2 * height_lcu_num); 1517 if (num_vpp_pipes > 1) 1518 h265e_colrcbuf_size = 1519 ALIGN(h265e_colrcbuf_size, 256) * num_vpp_pipes; 1520 1521 h265e_colrcbuf_size = 1522 ALIGN(h265e_colrcbuf_size, 256) * HFI_MAX_COL_FRAME; 1523 h265e_framerc_bufsize = 1524 (is_h265) ? 1525 (256 + 16 * (14 + (((height_coded >> 5) + 7) >> 3))) : 1526 (256 + 16 * (14 + (((height_coded >> 4) + 7) >> 3))); 1527 h265e_framerc_bufsize *= 6; 1528 if (num_vpp_pipes > 1) 1529 h265e_framerc_bufsize = 1530 ALIGN(h265e_framerc_bufsize, 256) * num_vpp_pipes; 1531 1532 h265e_framerc_bufsize = 1533 ALIGN(h265e_framerc_bufsize, 512) * HFI_MAX_COL_FRAME; 1534 h265e_lcubitcnt_bufsize = 256 + 4 * frame_num_lcu; 1535 h265e_lcubitcnt_bufsize = ALIGN(h265e_lcubitcnt_bufsize, 256); 1536 h265e_lcubitmap_bufsize = 256 + (frame_num_lcu >> 3); 1537 h265e_lcubitmap_bufsize = ALIGN(h265e_lcubitmap_bufsize, 256); 1538 line_buf_sde_size = 256 + 16 * (width_coded >> 4); 1539 line_buf_sde_size = ALIGN(line_buf_sde_size, 256); 1540 if ((width_coded * height_coded) > (4096 * 2160)) 1541 se_stats_bufsize = 0; 1542 else if ((width_coded * height_coded) > (1920 * 1088)) 1543 se_stats_bufsize = (40 * 4 * frame_num_lcu + 256 + 256); 1544 else 1545 se_stats_bufsize = (1024 * frame_num_lcu + 256 + 256); 1546 1547 se_stats_bufsize = ALIGN(se_stats_bufsize, 256) * 2; 1548 bse_slice_cmd_buffer_size = (((8192 << 2) + 7) & (~7)) * 6; 1549 bse_reg_buffer_size = (((512 << 3) + 7) & (~7)) * 4; 1550 vpp_reg_buffer_size = (((2048 << 3) + 31) & (~31)) * 10; 1551 lambda_lut_size = 256 * 11; 1552 override_buffer_size = 16 * ((num_lcu_mb + 7) >> 3); 1553 override_buffer_size = ALIGN(override_buffer_size, 256) * 2; 1554 ir_buffer_size = (((frame_num_lcu << 1) + 7) & (~7)) * 3; 1555 vpss_line_buffer_size_1 = (((8192 >> 2) << 5) * num_vpp_pipes) + 64; 1556 vpss_line_buf = 1557 (((((max(width_coded, height_coded) + 3) >> 2) << 5) + 256) * 1558 16) + 1559 vpss_line_buffer_size_1; 1560 topline_bufsize_fe_1stg_sao = 16 * (width_coded >> 5); 1561 topline_bufsize_fe_1stg_sao = ALIGN(topline_bufsize_fe_1stg_sao, 256); 1562 1563 return line_buf_ctrl_size + line_buf_data_size + 1564 line_buf_ctrl_size_buffid2 + leftline_buf_ctrl_size + 1565 vpss_line_buf + col_mv_buf_size + topline_buf_ctrl_size_FE + 1566 leftline_buf_ctrl_size_FE + line_buf_recon_pix_size + 1567 leftline_buf_recon_pix_size + leftline_buf_meta_recony + 1568 linebuf_meta_recon_uv + h265e_colrcbuf_size + 1569 h265e_framerc_bufsize + h265e_lcubitcnt_bufsize + 1570 h265e_lcubitmap_bufsize + line_buf_sde_size + 1571 topline_bufsize_fe_1stg_sao + override_buffer_size + 1572 bse_reg_buffer_size + vpp_reg_buffer_size + sps_pps_slice_hdr + 1573 slice_cmd_buffer_size + bse_slice_cmd_buffer_size + 1574 ir_buffer_size + slice_info_bufsize + lambda_lut_size + 1575 se_stats_bufsize + 1024; 1576 } 1577 1578 static u32 iris_vpu_enc_scratch1_size(struct iris_inst *inst) 1579 { 1580 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1581 u32 frame_height = iris_vpu_enc_get_bitstream_height(inst); 1582 u32 frame_width = iris_vpu_enc_get_bitstream_width(inst); 1583 u32 num_ref = 1; 1584 u32 lcu_size; 1585 bool is_h265; 1586 1587 if (inst->codec == V4L2_PIX_FMT_H264) { 1588 lcu_size = 16; 1589 is_h265 = false; 1590 } else if (inst->codec == V4L2_PIX_FMT_HEVC) { 1591 lcu_size = 32; 1592 is_h265 = true; 1593 } else { 1594 return 0; 1595 } 1596 1597 return hfi_buffer_scratch1_enc(frame_width, frame_height, lcu_size, 1598 num_ref, false, num_vpp_pipes, is_h265); 1599 } 1600 1601 static inline u32 ubwc_metadata_plane_stride(u32 width, 1602 u32 metadata_stride_multi, 1603 u32 tile_width_pels) 1604 { 1605 return ALIGN(((width + (tile_width_pels - 1)) / tile_width_pels), 1606 metadata_stride_multi); 1607 } 1608 1609 static inline u32 ubwc_metadata_plane_bufheight(u32 height, 1610 u32 metadata_height_multi, 1611 u32 tile_height_pels) 1612 { 1613 return ALIGN(((height + (tile_height_pels - 1)) / tile_height_pels), 1614 metadata_height_multi); 1615 } 1616 1617 static inline u32 ubwc_metadata_plane_buffer_size(u32 metadata_stride, 1618 u32 metadata_buf_height) 1619 { 1620 return ALIGN(metadata_stride * metadata_buf_height, SZ_4K); 1621 } 1622 1623 static inline u32 hfi_buffer_scratch2_enc(u32 frame_width, u32 frame_height, 1624 u32 num_ref, bool ten_bit) 1625 { 1626 u32 aligned_width, aligned_height, chroma_height, ref_buf_height; 1627 u32 metadata_stride, meta_buf_height, meta_size_y, meta_size_c; 1628 u32 ref_luma_stride_bytes, ref_chroma_height_bytes; 1629 u32 ref_buf_size, ref_stride; 1630 u32 luma_size, chroma_size; 1631 u32 size; 1632 1633 if (!ten_bit) { 1634 aligned_height = ALIGN(frame_height, 32); 1635 chroma_height = frame_height >> 1; 1636 chroma_height = ALIGN(chroma_height, 32); 1637 aligned_width = ALIGN(frame_width, 128); 1638 metadata_stride = 1639 ubwc_metadata_plane_stride(frame_width, 64, 32); 1640 meta_buf_height = 1641 ubwc_metadata_plane_bufheight(frame_height, 16, 8); 1642 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 1643 meta_buf_height); 1644 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 1645 meta_buf_height); 1646 size = (aligned_height + chroma_height) * aligned_width + 1647 meta_size_y + meta_size_c; 1648 size = (size * (num_ref + 3)) + 4096; 1649 } else { 1650 ref_buf_height = (frame_height + (32 - 1)) & (~(32 - 1)); 1651 ref_luma_stride_bytes = ((frame_width + 192 - 1) / 192) * 192; 1652 ref_stride = 4 * (ref_luma_stride_bytes / 3); 1653 ref_stride = (ref_stride + (128 - 1)) & (~(128 - 1)); 1654 luma_size = ref_buf_height * ref_stride; 1655 ref_chroma_height_bytes = 1656 (((frame_height + 1) >> 1) + (32 - 1)) & (~(32 - 1)); 1657 chroma_size = ref_stride * ref_chroma_height_bytes; 1658 luma_size = (luma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 1659 chroma_size = (chroma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 1660 ref_buf_size = luma_size + chroma_size; 1661 metadata_stride = 1662 ubwc_metadata_plane_stride(frame_width, 64, 48); 1663 meta_buf_height = 1664 ubwc_metadata_plane_bufheight(frame_height, 16, 4); 1665 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 1666 meta_buf_height); 1667 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 1668 meta_buf_height); 1669 size = ref_buf_size + meta_size_y + meta_size_c; 1670 size = (size * (num_ref + 3)) + 4096; 1671 } 1672 1673 return size; 1674 } 1675 1676 static u32 iris_vpu_enc_scratch2_size(struct iris_inst *inst) 1677 { 1678 u32 frame_height = iris_vpu_enc_get_bitstream_height(inst); 1679 u32 frame_width = iris_vpu_enc_get_bitstream_width(inst); 1680 u32 num_ref = 1; 1681 1682 return hfi_buffer_scratch2_enc(frame_width, frame_height, num_ref, 1683 false); 1684 } 1685 1686 static u32 iris_vpu_enc_vpss_size(struct iris_inst *inst) 1687 { 1688 u32 ds_enable = is_scaling_enabled(inst); 1689 struct v4l2_format *f = inst->fmt_dst; 1690 u32 height = f->fmt.pix_mp.height; 1691 u32 width = f->fmt.pix_mp.width; 1692 1693 return hfi_buffer_vpss_enc(width, height, ds_enable, 0, 0); 1694 } 1695 1696 static inline u32 size_dpb_opb(u32 height, u32 lcu_size) 1697 { 1698 u32 max_tile_height = ((height + lcu_size - 1) / lcu_size) * lcu_size + 8; 1699 u32 dpb_opb = 3 * ((max_tile_height >> 3) * DMA_ALIGNMENT); 1700 u32 num_luma_chrome_plane = 2; 1701 1702 return ALIGN(dpb_opb, DMA_ALIGNMENT) * num_luma_chrome_plane; 1703 } 1704 1705 static u32 hfi_vpu4x_vp9d_lb_size(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 1706 { 1707 u32 vp9_top_lb, vp9_fe_left_lb, vp9_se_left_lb, dpb_opb, vp9d_qp, num_lcu_per_pipe; 1708 u32 lcu_size = 64; 1709 1710 vp9_top_lb = ALIGN(size_vp9d_lb_vsp_top(frame_width, frame_height), DMA_ALIGNMENT); 1711 vp9_top_lb += ALIGN(size_vpxd_lb_se_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT); 1712 vp9_top_lb += max3(DIV_ROUND_UP(frame_width, BUFFER_ALIGNMENT_16_BYTES) * 1713 MAX_PE_NBR_DATA_LCU16_LINE_BUFFER_SIZE, 1714 DIV_ROUND_UP(frame_width, BUFFER_ALIGNMENT_32_BYTES) * 1715 MAX_PE_NBR_DATA_LCU32_LINE_BUFFER_SIZE, 1716 DIV_ROUND_UP(frame_width, BUFFER_ALIGNMENT_64_BYTES) * 1717 MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE); 1718 vp9_top_lb = ALIGN(vp9_top_lb, DMA_ALIGNMENT); 1719 vp9_top_lb += ALIGN((DMA_ALIGNMENT * DIV_ROUND_UP(frame_width, lcu_size)), 1720 DMA_ALIGNMENT) * FE_TOP_CTRL_LINE_NUMBERS; 1721 vp9_top_lb += ALIGN(DMA_ALIGNMENT * 8 * DIV_ROUND_UP(frame_width, lcu_size), 1722 DMA_ALIGNMENT) * (FE_TOP_DATA_LUMA_LINE_NUMBERS + 1723 FE_TOP_DATA_CHROMA_LINE_NUMBERS); 1724 1725 num_lcu_per_pipe = (DIV_ROUND_UP(frame_height, lcu_size) / num_vpp_pipes) + 1726 (DIV_ROUND_UP(frame_height, lcu_size) % num_vpp_pipes); 1727 vp9_fe_left_lb = ALIGN((DMA_ALIGNMENT * num_lcu_per_pipe), DMA_ALIGNMENT) * 1728 FE_LFT_CTRL_LINE_NUMBERS; 1729 vp9_fe_left_lb += ((ALIGN((DMA_ALIGNMENT * 8 * num_lcu_per_pipe), DMA_ALIGNMENT) * 1730 FE_LFT_DB_DATA_LINE_NUMBERS) + 1731 ALIGN((DMA_ALIGNMENT * 3 * num_lcu_per_pipe), DMA_ALIGNMENT) + 1732 ALIGN((DMA_ALIGNMENT * 4 * num_lcu_per_pipe), DMA_ALIGNMENT) + 1733 (ALIGN((DMA_ALIGNMENT * 24 * num_lcu_per_pipe), DMA_ALIGNMENT) * 1734 FE_LFT_LR_DATA_LINE_NUMBERS)); 1735 vp9_fe_left_lb = vp9_fe_left_lb * num_vpp_pipes; 1736 1737 vp9_se_left_lb = ALIGN(size_vpxd_lb_se_left_ctrl(frame_width, frame_height), 1738 DMA_ALIGNMENT); 1739 dpb_opb = size_dpb_opb(frame_height, lcu_size); 1740 vp9d_qp = ALIGN(size_vp9d_qp(frame_width, frame_height), DMA_ALIGNMENT); 1741 1742 return vp9_top_lb + vp9_fe_left_lb + (vp9_se_left_lb * num_vpp_pipes) + 1743 (dpb_opb * num_vpp_pipes) + vp9d_qp; 1744 } 1745 1746 static u32 hfi_vpu4x_buffer_line_vp9d(u32 frame_width, u32 frame_height, u32 _yuv_bufcount_min, 1747 bool is_opb, u32 num_vpp_pipes) 1748 { 1749 u32 lb_size = hfi_vpu4x_vp9d_lb_size(frame_width, frame_height, num_vpp_pipes); 1750 u32 dpb_obp_size = 0, lcu_size = 64; 1751 1752 if (is_opb) 1753 dpb_obp_size = size_dpb_opb(frame_height, lcu_size) * num_vpp_pipes; 1754 1755 return lb_size + dpb_obp_size; 1756 } 1757 1758 static u32 iris_vpu4x_dec_line_size(struct iris_inst *inst) 1759 { 1760 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1761 u32 out_min_count = inst->buffers[BUF_OUTPUT].min_count; 1762 struct v4l2_format *f = inst->fmt_src; 1763 u32 height = f->fmt.pix_mp.height; 1764 u32 width = f->fmt.pix_mp.width; 1765 bool is_opb = false; 1766 1767 if (iris_split_mode_enabled(inst)) 1768 is_opb = true; 1769 1770 if (inst->codec == V4L2_PIX_FMT_H264) 1771 return hfi_buffer_line_h264d(width, height, is_opb, num_vpp_pipes); 1772 else if (inst->codec == V4L2_PIX_FMT_HEVC) 1773 return hfi_buffer_line_h265d(width, height, is_opb, num_vpp_pipes); 1774 else if (inst->codec == V4L2_PIX_FMT_VP9) 1775 return hfi_vpu4x_buffer_line_vp9d(width, height, out_min_count, is_opb, 1776 num_vpp_pipes); 1777 1778 return 0; 1779 } 1780 1781 static u32 hfi_vpu4x_buffer_persist_h265d(u32 rpu_enabled) 1782 { 1783 return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + H265_NUM_FRM_INFO * 1784 H265_DISPLAY_BUF_SIZE + (H265_NUM_TILE * sizeof(u32)) + (NUM_HW_PIC_BUF * 1785 (SIZE_SEI_USERDATA + SIZE_H265D_ARP + SIZE_THREE_DIMENSION_USERDATA)) + 1786 rpu_enabled * NUM_HW_PIC_BUF * SIZE_DOLBY_RPU_METADATA), DMA_ALIGNMENT); 1787 } 1788 1789 static u32 hfi_vpu4x_buffer_persist_vp9d(void) 1790 { 1791 return ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE, DMA_ALIGNMENT) + 1792 (ALIGN(hfi_iris3_vp9d_comv_size(), DMA_ALIGNMENT) * 2) + 1793 ALIGN(MAX_SUPERFRAME_HEADER_LEN, DMA_ALIGNMENT) + 1794 ALIGN(VP9_UDC_HEADER_BUF_SIZE, DMA_ALIGNMENT) + 1795 ALIGN(VP9_NUM_FRAME_INFO_BUF * CCE_TILE_OFFSET_SIZE, DMA_ALIGNMENT) + 1796 ALIGN(VP9_NUM_FRAME_INFO_BUF * VP9_FRAME_INFO_BUF_SIZE_VPU4X, DMA_ALIGNMENT) + 1797 HDR10_HIST_EXTRADATA_SIZE; 1798 } 1799 1800 static u32 iris_vpu4x_dec_persist_size(struct iris_inst *inst) 1801 { 1802 if (inst->codec == V4L2_PIX_FMT_H264) 1803 return hfi_buffer_persist_h264d(); 1804 else if (inst->codec == V4L2_PIX_FMT_HEVC) 1805 return hfi_vpu4x_buffer_persist_h265d(0); 1806 else if (inst->codec == V4L2_PIX_FMT_VP9) 1807 return hfi_vpu4x_buffer_persist_vp9d(); 1808 1809 return 0; 1810 } 1811 1812 static u32 size_se_lb(u32 standard, u32 num_vpp_pipes_enc, 1813 u32 frame_width_coded, u32 frame_height_coded) 1814 { 1815 u32 se_tlb_size = ALIGN(frame_width_coded, DMA_ALIGNMENT); 1816 u32 se_llb_size = (standard == HFI_CODEC_ENCODE_HEVC) ? 1817 ((frame_height_coded + BUFFER_ALIGNMENT_32_BYTES - 1) / 1818 BUFFER_ALIGNMENT_32_BYTES) * LOG2_16 * LLB_UNIT_SIZE : 1819 ((frame_height_coded + BUFFER_ALIGNMENT_16_BYTES - 1) / 1820 BUFFER_ALIGNMENT_16_BYTES) * LOG2_32 * LLB_UNIT_SIZE; 1821 1822 se_llb_size = ALIGN(se_llb_size, BUFFER_ALIGNMENT_32_BYTES); 1823 1824 if (num_vpp_pipes_enc > 1) 1825 se_llb_size = ALIGN(se_llb_size + BUFFER_ALIGNMENT_512_BYTES, 1826 DMA_ALIGNMENT) * num_vpp_pipes_enc; 1827 1828 return ALIGN(se_tlb_size + se_llb_size, DMA_ALIGNMENT); 1829 } 1830 1831 static u32 size_te_lb(bool is_ten_bit, u32 num_vpp_pipes_enc, u32 width_in_lcus, 1832 u32 frame_height_coded, u32 frame_width_coded) 1833 { 1834 u32 num_pixel_10_bit = 3, num_pixel_8_bit = 2, num_pixel_te_llb = 3; 1835 u32 te_llb_col_rc_size = ALIGN(32 * width_in_lcus / num_vpp_pipes_enc, 1836 DMA_ALIGNMENT) * num_vpp_pipes_enc; 1837 u32 te_tlb_recon_data_size = ALIGN((is_ten_bit ? num_pixel_10_bit : num_pixel_8_bit) * 1838 frame_width_coded, DMA_ALIGNMENT); 1839 u32 te_llb_recon_data_size = ((1 + is_ten_bit) * num_pixel_te_llb * frame_height_coded + 1840 num_vpp_pipes_enc - 1) / num_vpp_pipes_enc; 1841 te_llb_recon_data_size = ALIGN(te_llb_recon_data_size, DMA_ALIGNMENT) * num_vpp_pipes_enc; 1842 1843 return ALIGN(te_llb_recon_data_size + te_llb_col_rc_size + te_tlb_recon_data_size, 1844 DMA_ALIGNMENT); 1845 } 1846 1847 static inline u32 calc_fe_tlb_size(u32 size_per_lcu, bool is_ten_bit) 1848 { 1849 u32 num_pixels_fe_tlb_10_bit = 128, num_pixels_fe_tlb_8_bit = 64; 1850 1851 return is_ten_bit ? (num_pixels_fe_tlb_10_bit * (size_per_lcu + 1)) : 1852 (size_per_lcu * num_pixels_fe_tlb_8_bit); 1853 } 1854 1855 static u32 size_fe_lb(bool is_ten_bit, u32 standard, u32 num_vpp_pipes_enc, 1856 u32 frame_height_coded, u32 frame_width_coded) 1857 { 1858 u32 log2_lcu_size, num_cu_in_height_pipe, num_cu_in_width, 1859 fb_llb_db_ctrl_size, fb_llb_db_luma_size, fb_llb_db_chroma_size, 1860 fb_tlb_db_ctrl_size, fb_tlb_db_luma_size, fb_tlb_db_chroma_size, 1861 fb_llb_sao_ctrl_size, fb_llb_sao_luma_size, fb_llb_sao_chroma_size, 1862 fb_tlb_sao_ctrl_size, fb_tlb_sao_luma_size, fb_tlb_sao_chroma_size, 1863 fb_lb_top_sdc_size, fb_lb_se_ctrl_size, fe_tlb_size, size_per_lcu; 1864 1865 log2_lcu_size = (standard == HFI_CODEC_ENCODE_HEVC) ? 5 : 4; 1866 num_cu_in_height_pipe = ((frame_height_coded >> log2_lcu_size) + num_vpp_pipes_enc - 1) / 1867 num_vpp_pipes_enc; 1868 num_cu_in_width = frame_width_coded >> log2_lcu_size; 1869 1870 size_per_lcu = 2; 1871 fe_tlb_size = calc_fe_tlb_size(size_per_lcu, 1); 1872 fb_llb_db_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe; 1873 fb_llb_db_ctrl_size = ALIGN(fb_llb_db_ctrl_size, DMA_ALIGNMENT) * num_vpp_pipes_enc; 1874 1875 size_per_lcu = (1 << (log2_lcu_size - 3)); 1876 fe_tlb_size = calc_fe_tlb_size(size_per_lcu, is_ten_bit); 1877 fb_llb_db_luma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe; 1878 fb_llb_db_luma_size = ALIGN(fb_llb_db_luma_size, DMA_ALIGNMENT) * num_vpp_pipes_enc; 1879 1880 size_per_lcu = ((1 << (log2_lcu_size - 4)) * 2); 1881 fe_tlb_size = calc_fe_tlb_size(size_per_lcu, is_ten_bit); 1882 fb_llb_db_chroma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe; 1883 fb_llb_db_chroma_size = ALIGN(fb_llb_db_chroma_size, DMA_ALIGNMENT) * num_vpp_pipes_enc; 1884 1885 size_per_lcu = 1; 1886 fe_tlb_size = calc_fe_tlb_size(size_per_lcu, 1); 1887 fb_tlb_db_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width; 1888 fb_llb_sao_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe; 1889 fb_llb_sao_ctrl_size = fb_llb_sao_ctrl_size * num_vpp_pipes_enc; 1890 fb_tlb_sao_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width; 1891 1892 size_per_lcu = ((1 << (log2_lcu_size - 3)) + 1); 1893 fe_tlb_size = calc_fe_tlb_size(size_per_lcu, is_ten_bit); 1894 fb_tlb_db_luma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width; 1895 1896 size_per_lcu = (2 * ((1 << (log2_lcu_size - 4)) + 1)); 1897 fe_tlb_size = calc_fe_tlb_size(size_per_lcu, is_ten_bit); 1898 fb_tlb_db_chroma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width; 1899 1900 fb_llb_sao_luma_size = BUFFER_ALIGNMENT_256_BYTES * num_vpp_pipes_enc; 1901 fb_llb_sao_chroma_size = BUFFER_ALIGNMENT_256_BYTES * num_vpp_pipes_enc; 1902 fb_tlb_sao_luma_size = BUFFER_ALIGNMENT_256_BYTES; 1903 fb_tlb_sao_chroma_size = BUFFER_ALIGNMENT_256_BYTES; 1904 fb_lb_top_sdc_size = ALIGN((FE_SDC_DATA_PER_BLOCK * (frame_width_coded >> 5)), 1905 DMA_ALIGNMENT); 1906 fb_lb_se_ctrl_size = ALIGN((SE_CTRL_DATA_PER_BLOCK * (frame_width_coded >> 5)), 1907 DMA_ALIGNMENT); 1908 1909 return fb_llb_db_ctrl_size + fb_llb_db_luma_size + fb_llb_db_chroma_size + 1910 fb_tlb_db_ctrl_size + fb_tlb_db_luma_size + fb_tlb_db_chroma_size + 1911 fb_llb_sao_ctrl_size + fb_llb_sao_luma_size + fb_llb_sao_chroma_size + 1912 fb_tlb_sao_ctrl_size + fb_tlb_sao_luma_size + fb_tlb_sao_chroma_size + 1913 fb_lb_top_sdc_size + fb_lb_se_ctrl_size; 1914 } 1915 1916 static u32 size_md_lb(u32 standard, u32 frame_width_coded, 1917 u32 frame_height_coded, u32 num_vpp_pipes_enc) 1918 { 1919 u32 md_tlb_size = ALIGN(frame_width_coded, DMA_ALIGNMENT); 1920 u32 md_llb_size = (standard == HFI_CODEC_ENCODE_HEVC) ? 1921 ((frame_height_coded + BUFFER_ALIGNMENT_32_BYTES - 1) / 1922 BUFFER_ALIGNMENT_32_BYTES) * LOG2_16 * LLB_UNIT_SIZE : 1923 ((frame_height_coded + BUFFER_ALIGNMENT_16_BYTES - 1) / 1924 BUFFER_ALIGNMENT_16_BYTES) * LOG2_32 * LLB_UNIT_SIZE; 1925 1926 md_llb_size = ALIGN(md_llb_size, BUFFER_ALIGNMENT_32_BYTES); 1927 1928 if (num_vpp_pipes_enc > 1) 1929 md_llb_size = ALIGN(md_llb_size + BUFFER_ALIGNMENT_512_BYTES, 1930 DMA_ALIGNMENT) * num_vpp_pipes_enc; 1931 1932 md_llb_size = ALIGN(md_llb_size, DMA_ALIGNMENT); 1933 1934 return ALIGN(md_tlb_size + md_llb_size, DMA_ALIGNMENT); 1935 } 1936 1937 static u32 size_dma_opb_lb(u32 num_vpp_pipes_enc, u32 frame_width_coded, 1938 u32 frame_height_coded) 1939 { 1940 u32 opb_packet_bytes = 128, opb_bpp = 128, opb_size_per_row = 6; 1941 u32 dma_opb_wr_tlb_y_size = DIV_ROUND_UP(frame_width_coded, 16) * opb_packet_bytes; 1942 u32 dma_opb_wr_tlb_uv_size = DIV_ROUND_UP(frame_width_coded, 16) * opb_packet_bytes; 1943 u32 dma_opb_wr2_tlb_y_size = ALIGN((opb_bpp * opb_size_per_row * frame_height_coded / 8), 1944 DMA_ALIGNMENT) * num_vpp_pipes_enc; 1945 u32 dma_opb_wr2_tlb_uv_size = ALIGN((opb_bpp * opb_size_per_row * frame_height_coded / 8), 1946 DMA_ALIGNMENT) * num_vpp_pipes_enc; 1947 1948 dma_opb_wr2_tlb_y_size = max(dma_opb_wr2_tlb_y_size, dma_opb_wr_tlb_y_size << 1); 1949 dma_opb_wr2_tlb_uv_size = max(dma_opb_wr2_tlb_uv_size, dma_opb_wr_tlb_uv_size << 1); 1950 1951 return ALIGN(dma_opb_wr_tlb_y_size + dma_opb_wr_tlb_uv_size + dma_opb_wr2_tlb_y_size + 1952 dma_opb_wr2_tlb_uv_size, DMA_ALIGNMENT); 1953 } 1954 1955 static u32 hfi_vpu4x_buffer_line_enc(u32 frame_width, u32 frame_height, 1956 bool is_ten_bit, u32 num_vpp_pipes_enc, 1957 u32 lcu_size, u32 standard) 1958 { 1959 u32 width_in_lcus = (frame_width + lcu_size - 1) / lcu_size; 1960 u32 height_in_lcus = (frame_height + lcu_size - 1) / lcu_size; 1961 u32 frame_width_coded = width_in_lcus * lcu_size; 1962 u32 frame_height_coded = height_in_lcus * lcu_size; 1963 1964 u32 se_lb_size = size_se_lb(standard, num_vpp_pipes_enc, frame_width_coded, 1965 frame_height_coded); 1966 u32 te_lb_size = size_te_lb(is_ten_bit, num_vpp_pipes_enc, width_in_lcus, 1967 frame_height_coded, frame_width_coded); 1968 u32 fe_lb_size = size_fe_lb(is_ten_bit, standard, num_vpp_pipes_enc, frame_height_coded, 1969 frame_width_coded); 1970 u32 md_lb_size = size_md_lb(standard, frame_width_coded, frame_height_coded, 1971 num_vpp_pipes_enc); 1972 u32 dma_opb_lb_size = size_dma_opb_lb(num_vpp_pipes_enc, frame_width_coded, 1973 frame_height_coded); 1974 u32 dse_lb_size = ALIGN((256 + (16 * (frame_width_coded >> 4))), DMA_ALIGNMENT); 1975 u32 size_vpss_lb_enc = size_vpss_line_buf_vpu33(num_vpp_pipes_enc, frame_width_coded, 1976 frame_height_coded); 1977 1978 return se_lb_size + te_lb_size + fe_lb_size + md_lb_size + dma_opb_lb_size + 1979 dse_lb_size + size_vpss_lb_enc; 1980 } 1981 1982 static u32 iris_vpu4x_enc_line_size(struct iris_inst *inst) 1983 { 1984 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1985 u32 lcu_size = inst->codec == V4L2_PIX_FMT_HEVC ? 32 : 16; 1986 struct v4l2_format *f = inst->fmt_dst; 1987 u32 height = f->fmt.pix_mp.height; 1988 u32 width = f->fmt.pix_mp.width; 1989 1990 return hfi_vpu4x_buffer_line_enc(width, height, 0, num_vpp_pipes, 1991 lcu_size, inst->codec); 1992 } 1993 1994 static int output_min_count(struct iris_inst *inst) 1995 { 1996 int output_min_count = 4; 1997 1998 /* fw_min_count > 0 indicates reconfig event has already arrived */ 1999 if (inst->fw_min_count) { 2000 if (iris_split_mode_enabled(inst) && 2001 (inst->codec == V4L2_PIX_FMT_VP9 || 2002 inst->codec == V4L2_PIX_FMT_AV1)) 2003 return min_t(u32, 4, inst->fw_min_count); 2004 else 2005 return inst->fw_min_count; 2006 } 2007 2008 if (inst->codec == V4L2_PIX_FMT_VP9) 2009 output_min_count = 9; 2010 else if (inst->codec == V4L2_PIX_FMT_AV1) 2011 output_min_count = 11; 2012 2013 return output_min_count; 2014 } 2015 2016 struct iris_vpu_buf_type_handle { 2017 enum iris_buffer_type type; 2018 u32 (*handle)(struct iris_inst *inst); 2019 }; 2020 2021 u32 iris_vpu_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type) 2022 { 2023 const struct iris_vpu_buf_type_handle *buf_type_handle_arr = NULL; 2024 u32 size = 0, buf_type_handle_size = 0, i; 2025 2026 static const struct iris_vpu_buf_type_handle dec_internal_buf_type_handle[] = { 2027 {BUF_BIN, iris_vpu_dec_bin_size }, 2028 {BUF_COMV, iris_vpu_dec_comv_size }, 2029 {BUF_NON_COMV, iris_vpu_dec_non_comv_size }, 2030 {BUF_LINE, iris_vpu_dec_line_size }, 2031 {BUF_PERSIST, iris_vpu_dec_persist_size }, 2032 {BUF_DPB, iris_vpu_dec_dpb_size }, 2033 {BUF_SCRATCH_1, iris_vpu_dec_scratch1_size }, 2034 {BUF_PARTIAL, iris_vpu_dec_partial_size }, 2035 }; 2036 2037 static const struct iris_vpu_buf_type_handle enc_internal_buf_type_handle[] = { 2038 {BUF_BIN, iris_vpu_enc_bin_size }, 2039 {BUF_COMV, iris_vpu_enc_comv_size }, 2040 {BUF_NON_COMV, iris_vpu_enc_non_comv_size }, 2041 {BUF_LINE, iris_vpu_enc_line_size }, 2042 {BUF_ARP, iris_vpu_enc_arp_size }, 2043 {BUF_VPSS, iris_vpu_enc_vpss_size }, 2044 {BUF_SCRATCH_1, iris_vpu_enc_scratch1_size }, 2045 {BUF_SCRATCH_2, iris_vpu_enc_scratch2_size }, 2046 }; 2047 2048 if (inst->domain == DECODER) { 2049 buf_type_handle_size = ARRAY_SIZE(dec_internal_buf_type_handle); 2050 buf_type_handle_arr = dec_internal_buf_type_handle; 2051 } else if (inst->domain == ENCODER) { 2052 buf_type_handle_size = ARRAY_SIZE(enc_internal_buf_type_handle); 2053 buf_type_handle_arr = enc_internal_buf_type_handle; 2054 } 2055 2056 for (i = 0; i < buf_type_handle_size; i++) { 2057 if (buf_type_handle_arr[i].type == buffer_type) { 2058 size = buf_type_handle_arr[i].handle(inst); 2059 break; 2060 } 2061 } 2062 2063 return size; 2064 } 2065 2066 u32 iris_vpu33_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type) 2067 { 2068 u32 size = 0, i; 2069 2070 static const struct iris_vpu_buf_type_handle enc_internal_buf_type_handle[] = { 2071 {BUF_BIN, iris_vpu_enc_bin_size }, 2072 {BUF_COMV, iris_vpu_enc_comv_size }, 2073 {BUF_NON_COMV, iris_vpu_enc_non_comv_size }, 2074 {BUF_LINE, iris_vpu33_enc_line_size }, 2075 {BUF_ARP, iris_vpu_enc_arp_size }, 2076 {BUF_VPSS, iris_vpu_enc_vpss_size }, 2077 {BUF_SCRATCH_1, iris_vpu_enc_scratch1_size }, 2078 {BUF_SCRATCH_2, iris_vpu_enc_scratch2_size }, 2079 }; 2080 2081 if (inst->domain == DECODER) 2082 return iris_vpu_buf_size(inst, buffer_type); 2083 2084 for (i = 0; i < ARRAY_SIZE(enc_internal_buf_type_handle); i++) { 2085 if (enc_internal_buf_type_handle[i].type == buffer_type) { 2086 size = enc_internal_buf_type_handle[i].handle(inst); 2087 break; 2088 } 2089 } 2090 2091 return size; 2092 } 2093 2094 u32 iris_vpu4x_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type) 2095 { 2096 const struct iris_vpu_buf_type_handle *buf_type_handle_arr = NULL; 2097 u32 size = 0, buf_type_handle_size = 0, i; 2098 2099 static const struct iris_vpu_buf_type_handle dec_internal_buf_type_handle[] = { 2100 {BUF_BIN, iris_vpu_dec_bin_size }, 2101 {BUF_COMV, iris_vpu_dec_comv_size }, 2102 {BUF_NON_COMV, iris_vpu_dec_non_comv_size }, 2103 {BUF_LINE, iris_vpu4x_dec_line_size }, 2104 {BUF_PERSIST, iris_vpu4x_dec_persist_size }, 2105 {BUF_DPB, iris_vpu_dec_dpb_size }, 2106 {BUF_SCRATCH_1, iris_vpu_dec_scratch1_size }, 2107 }; 2108 2109 static const struct iris_vpu_buf_type_handle enc_internal_buf_type_handle[] = { 2110 {BUF_BIN, iris_vpu_enc_bin_size }, 2111 {BUF_COMV, iris_vpu_enc_comv_size }, 2112 {BUF_NON_COMV, iris_vpu_enc_non_comv_size }, 2113 {BUF_LINE, iris_vpu4x_enc_line_size }, 2114 {BUF_ARP, iris_vpu_enc_arp_size }, 2115 {BUF_VPSS, iris_vpu_enc_vpss_size }, 2116 {BUF_SCRATCH_1, iris_vpu_enc_scratch1_size }, 2117 {BUF_SCRATCH_2, iris_vpu_enc_scratch2_size }, 2118 }; 2119 2120 if (inst->domain == DECODER) { 2121 buf_type_handle_size = ARRAY_SIZE(dec_internal_buf_type_handle); 2122 buf_type_handle_arr = dec_internal_buf_type_handle; 2123 } else if (inst->domain == ENCODER) { 2124 buf_type_handle_size = ARRAY_SIZE(enc_internal_buf_type_handle); 2125 buf_type_handle_arr = enc_internal_buf_type_handle; 2126 } 2127 2128 for (i = 0; i < buf_type_handle_size; i++) { 2129 if (buf_type_handle_arr[i].type == buffer_type) { 2130 size = buf_type_handle_arr[i].handle(inst); 2131 break; 2132 } 2133 } 2134 2135 return size; 2136 } 2137 2138 static u32 internal_buffer_count(struct iris_inst *inst, 2139 enum iris_buffer_type buffer_type) 2140 { 2141 if (buffer_type == BUF_BIN || buffer_type == BUF_LINE || 2142 buffer_type == BUF_PERSIST) { 2143 return 1; 2144 } else if (buffer_type == BUF_COMV || buffer_type == BUF_NON_COMV) { 2145 if (inst->codec == V4L2_PIX_FMT_H264 || 2146 inst->codec == V4L2_PIX_FMT_HEVC || 2147 inst->codec == V4L2_PIX_FMT_AV1) 2148 return 1; 2149 } 2150 2151 return 0; 2152 } 2153 2154 static inline int iris_vpu_dpb_count(struct iris_inst *inst) 2155 { 2156 if (inst->codec == V4L2_PIX_FMT_AV1) 2157 return 11; 2158 2159 if (iris_split_mode_enabled(inst)) { 2160 return inst->fw_min_count ? 2161 inst->fw_min_count : inst->buffers[BUF_OUTPUT].min_count; 2162 } 2163 2164 return 0; 2165 } 2166 2167 int iris_vpu_buf_count(struct iris_inst *inst, enum iris_buffer_type buffer_type) 2168 { 2169 switch (buffer_type) { 2170 case BUF_INPUT: 2171 return MIN_BUFFERS; 2172 case BUF_OUTPUT: 2173 if (inst->domain == ENCODER) 2174 return MIN_BUFFERS; 2175 else 2176 return output_min_count(inst); 2177 case BUF_NON_COMV: 2178 if (inst->codec == V4L2_PIX_FMT_AV1) 2179 return 0; 2180 else 2181 return 1; 2182 case BUF_BIN: 2183 case BUF_COMV: 2184 case BUF_LINE: 2185 case BUF_PERSIST: 2186 return internal_buffer_count(inst, buffer_type); 2187 case BUF_SCRATCH_1: 2188 case BUF_SCRATCH_2: 2189 case BUF_VPSS: 2190 case BUF_ARP: 2191 case BUF_PARTIAL: 2192 return 1; /* internal buffer count needed by firmware is 1 */ 2193 case BUF_DPB: 2194 return iris_vpu_dpb_count(inst); 2195 default: 2196 return 0; 2197 } 2198 } 2199