1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. 4 */ 5 6 #include "iris_instance.h" 7 #include "iris_vpu_buffer.h" 8 #include "iris_hfi_gen1_defines.h" 9 #include "iris_hfi_gen2_defines.h" 10 11 #define HFI_MAX_COL_FRAME 6 12 #define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_HEIGHT (8) 13 #define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH (32) 14 #define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_HEIGHT (8) 15 #define HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_WIDTH (16) 16 #define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_HEIGHT (4) 17 #define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH (48) 18 #define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_HEIGHT (4) 19 #define HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_WIDTH (24) 20 #define AV1D_SIZE_BSE_COL_MV_64x64 512 21 #define AV1D_SIZE_BSE_COL_MV_128x128 2816 22 #define UBWC_TILE_SIZE 256 23 24 #ifndef SYSTEM_LAL_TILE10 25 #define SYSTEM_LAL_TILE10 192 26 #endif 27 28 static u32 size_h264d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 29 { 30 u32 size_yuv, size_bin_hdr, size_bin_res; 31 32 size_yuv = ((frame_width * frame_height) <= BIN_BUFFER_THRESHOLD) ? 33 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : 34 ((frame_width * frame_height * 3) >> 1); 35 size_bin_hdr = size_yuv * H264_CABAC_HDR_RATIO_HD_TOT; 36 size_bin_res = size_yuv * H264_CABAC_RES_RATIO_HD_TOT; 37 size_bin_hdr = ALIGN(size_bin_hdr / num_vpp_pipes, 38 DMA_ALIGNMENT) * num_vpp_pipes; 39 size_bin_res = ALIGN(size_bin_res / num_vpp_pipes, 40 DMA_ALIGNMENT) * num_vpp_pipes; 41 42 return size_bin_hdr + size_bin_res; 43 } 44 45 static u32 hfi_buffer_bin_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 46 { 47 u32 n_aligned_h = ALIGN(frame_height, 16); 48 u32 n_aligned_w = ALIGN(frame_width, 16); 49 50 return size_h264d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes); 51 } 52 53 static u32 size_av1d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 54 { 55 u32 size_yuv, size_bin_hdr, size_bin_res; 56 57 size_yuv = ((frame_width * frame_height) <= BIN_BUFFER_THRESHOLD) ? 58 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : 59 ((frame_width * frame_height * 3) >> 1); 60 size_bin_hdr = size_yuv * AV1_CABAC_HDR_RATIO_HD_TOT; 61 size_bin_res = size_yuv * AV1_CABAC_RES_RATIO_HD_TOT; 62 size_bin_hdr = ALIGN(size_bin_hdr / num_vpp_pipes, 63 DMA_ALIGNMENT) * num_vpp_pipes; 64 size_bin_res = ALIGN(size_bin_res / num_vpp_pipes, 65 DMA_ALIGNMENT) * num_vpp_pipes; 66 67 return size_bin_hdr + size_bin_res; 68 } 69 70 static u32 hfi_buffer_bin_av1d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 71 { 72 u32 n_aligned_h = ALIGN(frame_height, 16); 73 u32 n_aligned_w = ALIGN(frame_width, 16); 74 75 return size_av1d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes); 76 } 77 78 static u32 size_h265d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 79 { 80 u32 product = frame_width * frame_height; 81 u32 size_yuv, size_bin_hdr, size_bin_res; 82 83 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ? 84 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1); 85 size_bin_hdr = size_yuv * H265_CABAC_HDR_RATIO_HD_TOT; 86 size_bin_res = size_yuv * H265_CABAC_RES_RATIO_HD_TOT; 87 size_bin_hdr = ALIGN(size_bin_hdr / num_vpp_pipes, DMA_ALIGNMENT) * num_vpp_pipes; 88 size_bin_res = ALIGN(size_bin_res / num_vpp_pipes, DMA_ALIGNMENT) * num_vpp_pipes; 89 90 return size_bin_hdr + size_bin_res; 91 } 92 93 static u32 hfi_buffer_bin_vp9d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 94 { 95 u32 _size_yuv = ALIGN(frame_width, 16) * ALIGN(frame_height, 16) * 3 / 2; 96 u32 _size = ALIGN(((max_t(u32, _size_yuv, ((BIN_BUFFER_THRESHOLD * 3) >> 1)) * 97 VPX_DECODER_FRAME_BIN_HDR_BUDGET / VPX_DECODER_FRAME_BIN_DENOMINATOR * 98 VPX_DECODER_FRAME_CONCURENCY_LVL) / num_vpp_pipes), DMA_ALIGNMENT) + 99 ALIGN(((max_t(u32, _size_yuv, ((BIN_BUFFER_THRESHOLD * 3) >> 1)) * 100 VPX_DECODER_FRAME_BIN_RES_BUDGET / VPX_DECODER_FRAME_BIN_DENOMINATOR * 101 VPX_DECODER_FRAME_CONCURENCY_LVL) / num_vpp_pipes), DMA_ALIGNMENT); 102 103 return _size * num_vpp_pipes; 104 } 105 106 static u32 hfi_buffer_bin_h265d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 107 { 108 u32 n_aligned_w = ALIGN(frame_width, 16); 109 u32 n_aligned_h = ALIGN(frame_height, 16); 110 111 return size_h265d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes); 112 } 113 114 static u32 hfi_buffer_comv_h264d(u32 frame_width, u32 frame_height, u32 _comv_bufcount) 115 { 116 u32 frame_height_in_mbs = DIV_ROUND_UP(frame_height, 16); 117 u32 frame_width_in_mbs = DIV_ROUND_UP(frame_width, 16); 118 u32 col_zero_aligned_width = (frame_width_in_mbs << 2); 119 u32 col_mv_aligned_width = (frame_width_in_mbs << 7); 120 u32 col_zero_size, size_colloc; 121 122 col_mv_aligned_width = ALIGN(col_mv_aligned_width, 16); 123 col_zero_aligned_width = ALIGN(col_zero_aligned_width, 16); 124 col_zero_size = col_zero_aligned_width * 125 ((frame_height_in_mbs + 1) >> 1); 126 col_zero_size = ALIGN(col_zero_size, 64); 127 col_zero_size <<= 1; 128 col_zero_size = ALIGN(col_zero_size, 512); 129 size_colloc = col_mv_aligned_width * ((frame_height_in_mbs + 1) >> 1); 130 size_colloc = ALIGN(size_colloc, 64); 131 size_colloc <<= 1; 132 size_colloc = ALIGN(size_colloc, 512); 133 size_colloc += (col_zero_size + SIZE_H264D_BUFTAB_T * 2); 134 135 return (size_colloc * (_comv_bufcount)) + 512; 136 } 137 138 static u32 hfi_buffer_comv_h265d(u32 frame_width, u32 frame_height, u32 _comv_bufcount) 139 { 140 u32 frame_height_in_mbs = (frame_height + 15) >> 4; 141 u32 frame_width_in_mbs = (frame_width + 15) >> 4; 142 u32 _size; 143 144 _size = ALIGN(((frame_width_in_mbs * frame_height_in_mbs) << 8), 512); 145 146 return (_size * (_comv_bufcount)) + 512; 147 } 148 149 static u32 num_lcu(u32 frame_width, u32 frame_height, u32 lcu_size) 150 { 151 return ((frame_width + lcu_size - 1) / lcu_size) * 152 ((frame_height + lcu_size - 1) / lcu_size); 153 } 154 155 static u32 hfi_buffer_comv_av1d(u32 frame_width, u32 frame_height, u32 comv_bufcount) 156 { 157 u32 size; 158 159 size = 2 * ALIGN(max(num_lcu(frame_width, frame_height, 64) * 160 AV1D_SIZE_BSE_COL_MV_64x64, 161 num_lcu(frame_width, frame_height, 128) * 162 AV1D_SIZE_BSE_COL_MV_128x128), 163 DMA_ALIGNMENT); 164 size *= comv_bufcount; 165 166 return size; 167 } 168 169 static u32 size_h264d_bse_cmd_buf(u32 frame_height) 170 { 171 u32 height = ALIGN(frame_height, 32); 172 173 return min_t(u32, (DIV_ROUND_UP(height, 16) * 48), H264D_MAX_SLICE) * 174 SIZE_H264D_BSE_CMD_PER_BUF; 175 } 176 177 static u32 size_h265d_bse_cmd_buf(u32 frame_width, u32 frame_height) 178 { 179 u32 _size = ALIGN(((ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 180 (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) * 181 NUM_HW_PIC_BUF, DMA_ALIGNMENT); 182 _size = min_t(u32, _size, H265D_MAX_SLICE + 1); 183 _size = 2 * _size * SIZE_H265D_BSE_CMD_PER_BUF; 184 185 return _size; 186 } 187 188 static u32 hfi_buffer_persist_h265d(u32 rpu_enabled) 189 { 190 return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + 191 H265_NUM_FRM_INFO * H265_DISPLAY_BUF_SIZE + 192 H265_NUM_TILE * sizeof(u32) + 193 NUM_HW_PIC_BUF * SIZE_SEI_USERDATA + 194 rpu_enabled * NUM_HW_PIC_BUF * SIZE_DOLBY_RPU_METADATA), 195 DMA_ALIGNMENT); 196 } 197 198 static inline 199 u32 hfi_iris3_vp9d_comv_size(void) 200 { 201 return (((8192 + 63) >> 6) * ((4320 + 63) >> 6) * 8 * 8 * 2 * 8); 202 } 203 204 static u32 hfi_buffer_persist_vp9d(void) 205 { 206 return ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE, DMA_ALIGNMENT) + 207 ALIGN(hfi_iris3_vp9d_comv_size(), DMA_ALIGNMENT) + 208 ALIGN(MAX_SUPERFRAME_HEADER_LEN, DMA_ALIGNMENT) + 209 ALIGN(VP9_UDC_HEADER_BUF_SIZE, DMA_ALIGNMENT) + 210 ALIGN(VP9_NUM_FRAME_INFO_BUF * CCE_TILE_OFFSET_SIZE, DMA_ALIGNMENT) + 211 ALIGN(VP9_NUM_FRAME_INFO_BUF * VP9_FRAME_INFO_BUF_SIZE, DMA_ALIGNMENT) + 212 HDR10_HIST_EXTRADATA_SIZE; 213 } 214 215 static u32 size_h264d_vpp_cmd_buf(u32 frame_height) 216 { 217 u32 size, height = ALIGN(frame_height, 32); 218 219 size = min_t(u32, (DIV_ROUND_UP(height, 16) * 48), H264D_MAX_SLICE) * 220 SIZE_H264D_VPP_CMD_PER_BUF; 221 222 return size > VPP_CMD_MAX_SIZE ? VPP_CMD_MAX_SIZE : size; 223 } 224 225 static u32 hfi_buffer_persist_h264d(void) 226 { 227 return ALIGN(SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264 + 228 H264_DISPLAY_BUF_SIZE * H264_NUM_FRM_INFO + 229 NUM_HW_PIC_BUF * SIZE_SEI_USERDATA, 230 DMA_ALIGNMENT); 231 } 232 233 static u32 hfi_buffer_persist_av1d(u32 max_width, u32 max_height, u32 total_ref_count) 234 { 235 u32 comv_size, size; 236 237 comv_size = hfi_buffer_comv_av1d(max_width, max_height, total_ref_count); 238 size = ALIGN((SIZE_AV1D_SEQUENCE_HEADER * 2 + SIZE_AV1D_METADATA + 239 AV1D_NUM_HW_PIC_BUF * (SIZE_AV1D_TILE_OFFSET + SIZE_AV1D_QM) + 240 AV1D_NUM_FRAME_HEADERS * (SIZE_AV1D_FRAME_HEADER + 241 2 * SIZE_AV1D_PROB_TABLE) + comv_size + HDR10_HIST_EXTRADATA_SIZE + 242 SIZE_AV1D_METADATA * AV1D_NUM_HW_PIC_BUF), DMA_ALIGNMENT); 243 244 return ALIGN(size, DMA_ALIGNMENT); 245 } 246 247 static u32 hfi_buffer_non_comv_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 248 { 249 u32 size_bse = size_h264d_bse_cmd_buf(frame_height); 250 u32 size_vpp = size_h264d_vpp_cmd_buf(frame_height); 251 u32 size = ALIGN(size_bse, DMA_ALIGNMENT) + 252 ALIGN(size_vpp, DMA_ALIGNMENT) + 253 ALIGN(SIZE_HW_PIC(SIZE_H264D_HW_PIC_T), DMA_ALIGNMENT); 254 255 return ALIGN(size, DMA_ALIGNMENT); 256 } 257 258 static u32 size_h265d_vpp_cmd_buf(u32 frame_width, u32 frame_height) 259 { 260 u32 _size = ALIGN(((ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 261 (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) * 262 NUM_HW_PIC_BUF, DMA_ALIGNMENT); 263 _size = min_t(u32, _size, H265D_MAX_SLICE + 1); 264 _size = ALIGN(_size, 4); 265 _size = 2 * _size * SIZE_H265D_VPP_CMD_PER_BUF; 266 if (_size > VPP_CMD_MAX_SIZE) 267 _size = VPP_CMD_MAX_SIZE; 268 269 return _size; 270 } 271 272 static u32 hfi_buffer_non_comv_h265d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 273 { 274 u32 _size_bse = size_h265d_bse_cmd_buf(frame_width, frame_height); 275 u32 _size_vpp = size_h265d_vpp_cmd_buf(frame_width, frame_height); 276 u32 _size = ALIGN(_size_bse, DMA_ALIGNMENT) + 277 ALIGN(_size_vpp, DMA_ALIGNMENT) + 278 ALIGN(NUM_HW_PIC_BUF * 20 * 22 * 4, DMA_ALIGNMENT) + 279 ALIGN(2 * sizeof(u16) * 280 (ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 281 (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS), DMA_ALIGNMENT) + 282 ALIGN(SIZE_HW_PIC(SIZE_H265D_HW_PIC_T), DMA_ALIGNMENT) + 283 HDR10_HIST_EXTRADATA_SIZE; 284 285 return ALIGN(_size, DMA_ALIGNMENT); 286 } 287 288 static u32 size_vpss_lb(u32 frame_width, u32 frame_height) 289 { 290 u32 opb_lb_wr_llb_y_buffer_size, opb_lb_wr_llb_uv_buffer_size; 291 u32 opb_wr_top_line_chroma_buffer_size; 292 u32 opb_wr_top_line_luma_buffer_size; 293 u32 macrotiling_size = 32; 294 295 opb_wr_top_line_luma_buffer_size = 296 ALIGN(frame_width, macrotiling_size) / macrotiling_size * 256; 297 opb_wr_top_line_luma_buffer_size = 298 ALIGN(opb_wr_top_line_luma_buffer_size, DMA_ALIGNMENT) + 299 (MAX_TILE_COLUMNS - 1) * 256; 300 opb_wr_top_line_luma_buffer_size = 301 max_t(u32, opb_wr_top_line_luma_buffer_size, (32 * ALIGN(frame_height, 8))); 302 opb_wr_top_line_chroma_buffer_size = opb_wr_top_line_luma_buffer_size; 303 opb_lb_wr_llb_uv_buffer_size = 304 ALIGN((ALIGN(frame_height, 8) / (4 / 2)) * 64, 32); 305 opb_lb_wr_llb_y_buffer_size = 306 ALIGN((ALIGN(frame_height, 8) / (4 / 2)) * 64, 32); 307 return opb_wr_top_line_luma_buffer_size + 308 opb_wr_top_line_chroma_buffer_size + 309 opb_lb_wr_llb_uv_buffer_size + 310 opb_lb_wr_llb_y_buffer_size; 311 } 312 313 static inline 314 u32 size_h265d_lb_fe_top_data(u32 frame_width, u32 frame_height) 315 { 316 return MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * 317 (ALIGN(frame_width, 64) + 8) * 2; 318 } 319 320 static inline 321 u32 size_h265d_lb_fe_top_ctrl(u32 frame_width, u32 frame_height) 322 { 323 return MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * 324 (ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS); 325 } 326 327 static inline 328 u32 size_h265d_lb_fe_left_ctrl(u32 frame_width, u32 frame_height) 329 { 330 return MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * 331 (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS); 332 } 333 334 static inline 335 u32 size_h265d_lb_se_top_ctrl(u32 frame_width, u32 frame_height) 336 { 337 return (LCU_MAX_SIZE_PELS / 8 * (128 / 8)) * ((frame_width + 15) >> 4); 338 } 339 340 static inline 341 u32 size_h265d_lb_se_left_ctrl(u32 frame_width, u32 frame_height) 342 { 343 return max_t(u32, ((frame_height + 16 - 1) / 8) * 344 MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE, 345 max_t(u32, ((frame_height + 32 - 1) / 8) * 346 MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE, 347 ((frame_height + 64 - 1) / 8) * 348 MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE)); 349 } 350 351 static inline 352 u32 size_h265d_lb_pe_top_data(u32 frame_width, u32 frame_height) 353 { 354 return MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * 355 (ALIGN(frame_width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS); 356 } 357 358 static inline 359 u32 size_h265d_lb_vsp_top(u32 frame_width, u32 frame_height) 360 { 361 return ((frame_width + 63) >> 6) * 128; 362 } 363 364 static inline 365 u32 size_h265d_lb_vsp_left(u32 frame_width, u32 frame_height) 366 { 367 return ((frame_height + 63) >> 6) * 128; 368 } 369 370 static inline 371 u32 size_h265d_lb_recon_dma_metadata_wr(u32 frame_width, u32 frame_height) 372 { 373 return size_h264d_lb_recon_dma_metadata_wr(frame_height); 374 } 375 376 static inline 377 u32 size_h265d_qp(u32 frame_width, u32 frame_height) 378 { 379 return size_h264d_qp(frame_width, frame_height); 380 } 381 382 static inline 383 u32 hfi_buffer_line_h265d(u32 frame_width, u32 frame_height, bool is_opb, u32 num_vpp_pipes) 384 { 385 u32 vpss_lb_size = 0, _size; 386 387 _size = ALIGN(size_h265d_lb_fe_top_data(frame_width, frame_height), DMA_ALIGNMENT) + 388 ALIGN(size_h265d_lb_fe_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) + 389 ALIGN(size_h265d_lb_fe_left_ctrl(frame_width, frame_height), 390 DMA_ALIGNMENT) * num_vpp_pipes + 391 ALIGN(size_h265d_lb_se_left_ctrl(frame_width, frame_height), 392 DMA_ALIGNMENT) * num_vpp_pipes + 393 ALIGN(size_h265d_lb_se_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) + 394 ALIGN(size_h265d_lb_pe_top_data(frame_width, frame_height), DMA_ALIGNMENT) + 395 ALIGN(size_h265d_lb_vsp_top(frame_width, frame_height), DMA_ALIGNMENT) + 396 ALIGN(size_h265d_lb_vsp_left(frame_width, frame_height), 397 DMA_ALIGNMENT) * num_vpp_pipes + 398 ALIGN(size_h265d_lb_recon_dma_metadata_wr(frame_width, frame_height), 399 DMA_ALIGNMENT) * 4 + 400 ALIGN(size_h265d_qp(frame_width, frame_height), DMA_ALIGNMENT); 401 if (is_opb) 402 vpss_lb_size = size_vpss_lb(frame_width, frame_height); 403 404 return ALIGN((_size + vpss_lb_size), DMA_ALIGNMENT); 405 } 406 407 static inline 408 u32 size_vpxd_lb_fe_left_ctrl(u32 frame_width, u32 frame_height) 409 { 410 return max_t(u32, ((frame_height + 15) >> 4) * 411 MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE, 412 max_t(u32, ((frame_height + 31) >> 5) * 413 MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE, 414 ((frame_height + 63) >> 6) * 415 MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE)); 416 } 417 418 static inline 419 u32 size_vpxd_lb_fe_top_ctrl(u32 frame_width, u32 frame_height) 420 { 421 return ((ALIGN(frame_width, 64) + 8) * 10 * 2); 422 } 423 424 static inline 425 u32 size_vpxd_lb_se_top_ctrl(u32 frame_width, u32 frame_height) 426 { 427 return ((frame_width + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 428 } 429 430 static inline 431 u32 size_vpxd_lb_se_left_ctrl(u32 frame_width, u32 frame_height) 432 { 433 return max_t(u32, ((frame_height + 15) >> 4) * 434 MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE, 435 max_t(u32, ((frame_height + 31) >> 5) * 436 MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE, 437 ((frame_height + 63) >> 6) * 438 MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE)); 439 } 440 441 static inline 442 u32 size_vpxd_lb_recon_dma_metadata_wr(u32 frame_width, u32 frame_height) 443 { 444 return ALIGN((ALIGN(frame_height, 8) / (4 / 2)) * 64, 445 BUFFER_ALIGNMENT_32_BYTES); 446 } 447 448 static inline __maybe_unused 449 u32 size_mp2d_lb_fe_top_data(u32 frame_width, u32 frame_height) 450 { 451 return ((ALIGN(frame_width, 16) + 8) * 10 * 2); 452 } 453 454 static inline 455 u32 size_vp9d_lb_fe_top_data(u32 frame_width, u32 frame_height) 456 { 457 return (ALIGN(ALIGN(frame_width, 8), 64) + 8) * 10 * 2; 458 } 459 460 static inline 461 u32 size_vp9d_lb_pe_top_data(u32 frame_width, u32 frame_height) 462 { 463 return ((ALIGN(ALIGN(frame_width, 8), 64) >> 6) * 176); 464 } 465 466 static inline 467 u32 size_vp9d_lb_vsp_top(u32 frame_width, u32 frame_height) 468 { 469 return (((ALIGN(ALIGN(frame_width, 8), 64) >> 6) * 64 * 8) + 256); 470 } 471 472 static inline 473 u32 size_vp9d_qp(u32 frame_width, u32 frame_height) 474 { 475 return size_h264d_qp(frame_width, frame_height); 476 } 477 478 static inline 479 u32 hfi_iris3_vp9d_lb_size(u32 frame_width, u32 frame_height, u32 num_vpp_pipes) 480 { 481 return ALIGN(size_vpxd_lb_fe_left_ctrl(frame_width, frame_height), DMA_ALIGNMENT) * 482 num_vpp_pipes + 483 ALIGN(size_vpxd_lb_se_left_ctrl(frame_width, frame_height), DMA_ALIGNMENT) * 484 num_vpp_pipes + 485 ALIGN(size_vp9d_lb_vsp_top(frame_width, frame_height), DMA_ALIGNMENT) + 486 ALIGN(size_vpxd_lb_fe_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) + 487 2 * ALIGN(size_vpxd_lb_recon_dma_metadata_wr(frame_width, frame_height), 488 DMA_ALIGNMENT) + 489 ALIGN(size_vpxd_lb_se_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) + 490 ALIGN(size_vp9d_lb_pe_top_data(frame_width, frame_height), DMA_ALIGNMENT) + 491 ALIGN(size_vp9d_lb_fe_top_data(frame_width, frame_height), DMA_ALIGNMENT) + 492 ALIGN(size_vp9d_qp(frame_width, frame_height), DMA_ALIGNMENT); 493 } 494 495 static inline 496 u32 hfi_buffer_line_vp9d(u32 frame_width, u32 frame_height, u32 _yuv_bufcount_min, bool is_opb, 497 u32 num_vpp_pipes) 498 { 499 u32 vpss_lb_size = 0; 500 u32 _lb_size; 501 502 _lb_size = hfi_iris3_vp9d_lb_size(frame_width, frame_height, num_vpp_pipes); 503 504 if (is_opb) 505 vpss_lb_size = size_vpss_lb(frame_width, frame_height); 506 507 return _lb_size + vpss_lb_size + 4096; 508 } 509 510 static u32 hfi_buffer_line_h264d(u32 frame_width, u32 frame_height, 511 bool is_opb, u32 num_vpp_pipes) 512 { 513 u32 vpss_lb_size = 0; 514 u32 size; 515 516 size = ALIGN(size_h264d_lb_fe_top_data(frame_width), DMA_ALIGNMENT) + 517 ALIGN(size_h264d_lb_fe_top_ctrl(frame_width), DMA_ALIGNMENT) + 518 ALIGN(size_h264d_lb_fe_left_ctrl(frame_height), DMA_ALIGNMENT) * num_vpp_pipes + 519 ALIGN(size_h264d_lb_se_top_ctrl(frame_width), DMA_ALIGNMENT) + 520 ALIGN(size_h264d_lb_se_left_ctrl(frame_height), DMA_ALIGNMENT) * num_vpp_pipes + 521 ALIGN(size_h264d_lb_pe_top_data(frame_width), DMA_ALIGNMENT) + 522 ALIGN(size_h264d_lb_vsp_top(frame_width), DMA_ALIGNMENT) + 523 ALIGN(size_h264d_lb_recon_dma_metadata_wr(frame_height), DMA_ALIGNMENT) * 2 + 524 ALIGN(size_h264d_qp(frame_width, frame_height), DMA_ALIGNMENT); 525 size = ALIGN(size, DMA_ALIGNMENT); 526 if (is_opb) 527 vpss_lb_size = size_vpss_lb(frame_width, frame_height); 528 529 return ALIGN((size + vpss_lb_size), DMA_ALIGNMENT); 530 } 531 532 static u32 size_av1d_lb_opb_wr1_nv12_ubwc(u32 frame_width, u32 frame_height) 533 { 534 u32 size, y_width, y_width_a = 128; 535 536 y_width = ALIGN(frame_width, y_width_a); 537 538 size = ((y_width + HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH - 1) / 539 HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH + 540 (AV1D_MAX_TILE_COLS - 1)); 541 return size * UBWC_TILE_SIZE; 542 } 543 544 static u32 size_av1d_lb_opb_wr1_tp10_ubwc(u32 frame_width, u32 frame_height) 545 { 546 u32 size, y_width, y_width_a = 256; 547 548 y_width = ALIGN(frame_width, y_width_a); 549 550 size = ((y_width + HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH - 1) / 551 HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH + 552 (AV1D_MAX_TILE_COLS - 1)); 553 554 return size * UBWC_TILE_SIZE; 555 } 556 557 static u32 hfi_buffer_line_av1d(u32 frame_width, u32 frame_height, 558 bool is_opb, u32 num_vpp_pipes) 559 { 560 u32 size, vpss_lb_size, opbwrbufsize, opbwr8, opbwr10; 561 562 size = ALIGN(size_av1d_lb_fe_top_data(frame_width, frame_height), 563 DMA_ALIGNMENT) + 564 ALIGN(size_av1d_lb_fe_top_ctrl(frame_width, frame_height), 565 DMA_ALIGNMENT) + 566 ALIGN(size_av1d_lb_fe_left_data(frame_width, frame_height), 567 DMA_ALIGNMENT) * num_vpp_pipes + 568 ALIGN(size_av1d_lb_fe_left_ctrl(frame_width, frame_height), 569 DMA_ALIGNMENT) * num_vpp_pipes + 570 ALIGN(size_av1d_lb_se_left_ctrl(frame_width, frame_height), 571 DMA_ALIGNMENT) * num_vpp_pipes + 572 ALIGN(size_av1d_lb_se_top_ctrl(frame_width, frame_height), 573 DMA_ALIGNMENT) + 574 ALIGN(size_av1d_lb_pe_top_data(frame_width, frame_height), 575 DMA_ALIGNMENT) + 576 ALIGN(size_av1d_lb_vsp_top(frame_width, frame_height), 577 DMA_ALIGNMENT) + 578 ALIGN(size_av1d_lb_recon_dma_metadata_wr 579 (frame_width, frame_height), DMA_ALIGNMENT) * 2 + 580 ALIGN(size_av1d_qp(frame_width, frame_height), DMA_ALIGNMENT); 581 opbwr8 = size_av1d_lb_opb_wr1_nv12_ubwc(frame_width, frame_height); 582 opbwr10 = size_av1d_lb_opb_wr1_tp10_ubwc(frame_width, frame_height); 583 opbwrbufsize = opbwr8 >= opbwr10 ? opbwr8 : opbwr10; 584 size = ALIGN((size + opbwrbufsize), DMA_ALIGNMENT); 585 if (is_opb) { 586 vpss_lb_size = size_vpss_lb(frame_width, frame_height); 587 size = ALIGN((size + vpss_lb_size) * 2, DMA_ALIGNMENT); 588 } 589 590 return size; 591 } 592 593 static u32 size_av1d_ibc_nv12_ubwc(u32 frame_width, u32 frame_height) 594 { 595 u32 size; 596 u32 y_width_a = 128, y_height_a = 32; 597 u32 uv_width_a = 128, uv_height_a = 32; 598 u32 ybufsize, uvbufsize, y_width, y_height, uv_width, uv_height; 599 u32 y_meta_width_a = 64, y_meta_height_a = 16; 600 u32 uv_meta_width_a = 64, uv_meta_height_a = 16; 601 u32 meta_height, meta_stride, meta_size; 602 u32 tile_width_y = HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_WIDTH; 603 u32 tile_height_y = HFI_COLOR_FORMAT_YUV420_NV12_UBWC_Y_TILE_HEIGHT; 604 u32 tile_width_uv = HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_WIDTH; 605 u32 tile_height_uv = HFI_COLOR_FORMAT_YUV420_NV12_UBWC_UV_TILE_HEIGHT; 606 607 y_width = ALIGN(frame_width, y_width_a); 608 y_height = ALIGN(frame_height, y_height_a); 609 uv_width = ALIGN(frame_width, uv_width_a); 610 uv_height = ALIGN(((frame_height + 1) >> 1), uv_height_a); 611 ybufsize = ALIGN((y_width * y_height), HFI_ALIGNMENT_4096); 612 uvbufsize = ALIGN(uv_width * uv_height, HFI_ALIGNMENT_4096); 613 size = ybufsize + uvbufsize; 614 meta_stride = ALIGN(((frame_width + (tile_width_y - 1)) / tile_width_y), 615 y_meta_width_a); 616 meta_height = ALIGN(((frame_height + (tile_height_y - 1)) / tile_height_y), 617 y_meta_height_a); 618 meta_size = ALIGN(meta_stride * meta_height, HFI_ALIGNMENT_4096); 619 size += meta_size; 620 meta_stride = ALIGN(((((frame_width + 1) >> 1) + (tile_width_uv - 1)) / 621 tile_width_uv), uv_meta_width_a); 622 meta_height = ALIGN(((((frame_height + 1) >> 1) + (tile_height_uv - 1)) / 623 tile_height_uv), uv_meta_height_a); 624 meta_size = ALIGN(meta_stride * meta_height, HFI_ALIGNMENT_4096); 625 size += meta_size; 626 627 return size; 628 } 629 630 static u32 hfi_yuv420_tp10_calc_y_stride(u32 frame_width, u32 stride_multiple) 631 { 632 u32 stride; 633 634 stride = ALIGN(frame_width, 192); 635 stride = ALIGN(stride * 4 / 3, stride_multiple); 636 637 return stride; 638 } 639 640 static u32 hfi_yuv420_tp10_calc_y_bufheight(u32 frame_height, u32 min_buf_height_multiple) 641 { 642 return ALIGN(frame_height, min_buf_height_multiple); 643 } 644 645 static u32 hfi_yuv420_tp10_calc_uv_stride(u32 frame_width, u32 stride_multiple) 646 { 647 u32 stride; 648 649 stride = ALIGN(frame_width, 192); 650 stride = ALIGN(stride * 4 / 3, stride_multiple); 651 652 return stride; 653 } 654 655 static u32 hfi_yuv420_tp10_calc_uv_bufheight(u32 frame_height, u32 min_buf_height_multiple) 656 { 657 return ALIGN(((frame_height + 1) >> 1), min_buf_height_multiple); 658 } 659 660 static u32 size_av1d_ibc_tp10_ubwc(u32 frame_width, u32 frame_height) 661 { 662 u32 size; 663 u32 y_width_a = 256, y_height_a = 16, 664 uv_width_a = 256, uv_height_a = 16; 665 u32 ybufsize, uvbufsize, y_width, y_height, uv_width, uv_height; 666 u32 y_meta_width_a = 64, y_meta_height_a = 16, 667 uv_meta_width_a = 64, uv_meta_height_a = 16; 668 u32 meta_height, meta_stride, meta_size; 669 u32 tile_width_y = HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_WIDTH; 670 u32 tile_height_y = HFI_COLOR_FORMAT_YUV420_TP10_UBWC_Y_TILE_HEIGHT; 671 u32 tile_width_uv = HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_WIDTH; 672 u32 tile_height_uv = HFI_COLOR_FORMAT_YUV420_TP10_UBWC_UV_TILE_HEIGHT; 673 674 y_width = hfi_yuv420_tp10_calc_y_stride(frame_width, y_width_a); 675 y_height = hfi_yuv420_tp10_calc_y_bufheight(frame_height, y_height_a); 676 uv_width = hfi_yuv420_tp10_calc_uv_stride(frame_width, uv_width_a); 677 uv_height = hfi_yuv420_tp10_calc_uv_bufheight(frame_height, uv_height_a); 678 ybufsize = ALIGN(y_width * y_height, HFI_ALIGNMENT_4096); 679 uvbufsize = ALIGN(uv_width * uv_height, HFI_ALIGNMENT_4096); 680 size = ybufsize + uvbufsize; 681 meta_stride = ALIGN(((frame_width + (tile_width_y - 1)) / tile_width_y), 682 y_meta_width_a); 683 meta_height = ALIGN(((frame_height + (tile_height_y - 1)) / tile_height_y), 684 y_meta_height_a); 685 meta_size = ALIGN(meta_stride * meta_height, HFI_ALIGNMENT_4096); 686 size += meta_size; 687 meta_stride = ALIGN(((((frame_width + 1) >> 1) + (tile_width_uv - 1)) / 688 tile_width_uv), uv_meta_width_a); 689 meta_height = ALIGN(((((frame_height + 1) >> 1) + (tile_height_uv - 1)) / 690 tile_height_uv), uv_meta_height_a); 691 meta_size = ALIGN(meta_stride * meta_height, HFI_ALIGNMENT_4096); 692 size += meta_size; 693 694 return size; 695 } 696 697 static u32 hfi_buffer_ibc_av1d(u32 frame_width, u32 frame_height) 698 { 699 u32 size, ibc8, ibc10; 700 701 ibc8 = size_av1d_ibc_nv12_ubwc(frame_width, frame_height); 702 ibc10 = size_av1d_ibc_tp10_ubwc(frame_width, frame_height); 703 size = ibc8 >= ibc10 ? ibc8 : ibc10; 704 705 return ALIGN(size, DMA_ALIGNMENT); 706 } 707 708 static u32 iris_vpu_dec_bin_size(struct iris_inst *inst) 709 { 710 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 711 struct v4l2_format *f = inst->fmt_src; 712 u32 height = f->fmt.pix_mp.height; 713 u32 width = f->fmt.pix_mp.width; 714 715 if (inst->codec == V4L2_PIX_FMT_H264) 716 return hfi_buffer_bin_h264d(width, height, num_vpp_pipes); 717 else if (inst->codec == V4L2_PIX_FMT_HEVC) 718 return hfi_buffer_bin_h265d(width, height, num_vpp_pipes); 719 else if (inst->codec == V4L2_PIX_FMT_VP9) 720 return hfi_buffer_bin_vp9d(width, height, num_vpp_pipes); 721 else if (inst->codec == V4L2_PIX_FMT_AV1) 722 return hfi_buffer_bin_av1d(width, height, num_vpp_pipes); 723 724 return 0; 725 } 726 727 static u32 iris_vpu_dec_comv_size(struct iris_inst *inst) 728 { 729 u32 num_comv = VIDEO_MAX_FRAME; 730 struct v4l2_format *f = inst->fmt_src; 731 u32 height = f->fmt.pix_mp.height; 732 u32 width = f->fmt.pix_mp.width; 733 734 if (inst->codec == V4L2_PIX_FMT_H264) 735 return hfi_buffer_comv_h264d(width, height, num_comv); 736 else if (inst->codec == V4L2_PIX_FMT_HEVC) 737 return hfi_buffer_comv_h265d(width, height, num_comv); 738 else if (inst->codec == V4L2_PIX_FMT_AV1) { 739 if (inst->fw_caps[DRAP].value) 740 return 0; 741 else 742 return hfi_buffer_comv_av1d(width, height, num_comv); 743 } 744 745 return 0; 746 } 747 748 static u32 iris_vpu_dec_persist_size(struct iris_inst *inst) 749 { 750 struct platform_inst_caps *caps; 751 752 if (inst->codec == V4L2_PIX_FMT_H264) 753 return hfi_buffer_persist_h264d(); 754 else if (inst->codec == V4L2_PIX_FMT_HEVC) 755 return hfi_buffer_persist_h265d(0); 756 else if (inst->codec == V4L2_PIX_FMT_VP9) 757 return hfi_buffer_persist_vp9d(); 758 else if (inst->codec == V4L2_PIX_FMT_AV1) { 759 caps = inst->core->iris_platform_data->inst_caps; 760 if (inst->fw_caps[DRAP].value) 761 return hfi_buffer_persist_av1d(caps->max_frame_width, 762 caps->max_frame_height, 16); 763 else 764 return hfi_buffer_persist_av1d(0, 0, 0); 765 } 766 767 return 0; 768 } 769 770 static u32 iris_vpu_dec_dpb_size(struct iris_inst *inst) 771 { 772 if (iris_split_mode_enabled(inst)) 773 return iris_get_buffer_size(inst, BUF_DPB); 774 else 775 return 0; 776 } 777 778 static u32 iris_vpu_dec_non_comv_size(struct iris_inst *inst) 779 { 780 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 781 struct v4l2_format *f = inst->fmt_src; 782 u32 height = f->fmt.pix_mp.height; 783 u32 width = f->fmt.pix_mp.width; 784 785 if (inst->codec == V4L2_PIX_FMT_H264) 786 return hfi_buffer_non_comv_h264d(width, height, num_vpp_pipes); 787 else if (inst->codec == V4L2_PIX_FMT_HEVC) 788 return hfi_buffer_non_comv_h265d(width, height, num_vpp_pipes); 789 790 return 0; 791 } 792 793 static u32 iris_vpu_dec_line_size(struct iris_inst *inst) 794 { 795 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 796 struct v4l2_format *f = inst->fmt_src; 797 u32 height = f->fmt.pix_mp.height; 798 u32 width = f->fmt.pix_mp.width; 799 bool is_opb = false; 800 u32 out_min_count = inst->buffers[BUF_OUTPUT].min_count; 801 802 if (iris_split_mode_enabled(inst)) 803 is_opb = true; 804 805 if (inst->codec == V4L2_PIX_FMT_H264) 806 return hfi_buffer_line_h264d(width, height, is_opb, num_vpp_pipes); 807 else if (inst->codec == V4L2_PIX_FMT_HEVC) 808 return hfi_buffer_line_h265d(width, height, is_opb, num_vpp_pipes); 809 else if (inst->codec == V4L2_PIX_FMT_VP9) 810 return hfi_buffer_line_vp9d(width, height, out_min_count, is_opb, 811 num_vpp_pipes); 812 else if (inst->codec == V4L2_PIX_FMT_AV1) 813 return hfi_buffer_line_av1d(width, height, is_opb, num_vpp_pipes); 814 815 return 0; 816 } 817 818 static u32 iris_vpu_dec_scratch1_size(struct iris_inst *inst) 819 { 820 return iris_vpu_dec_comv_size(inst) + 821 iris_vpu_dec_non_comv_size(inst) + 822 iris_vpu_dec_line_size(inst); 823 } 824 825 static inline u32 size_bin_bitstream_enc(u32 width, u32 height, 826 u32 rc_type) 827 { 828 u32 aligned_height = ALIGN(height, 32); 829 u32 aligned_width = ALIGN(width, 32); 830 u32 frame_size = width * height * 3; 831 u32 mbs_per_frame; 832 833 /* 834 * Encoder output size calculation: 32 Align width/height 835 * For resolution < 720p : YUVsize * 4 836 * For resolution > 720p & <= 4K : YUVsize / 2 837 * For resolution > 4k : YUVsize / 4 838 * Initially frame_size = YUVsize * 2; 839 */ 840 841 mbs_per_frame = (ALIGN(aligned_height, 16) * ALIGN(aligned_width, 16)) / 256; 842 843 if (mbs_per_frame < NUM_MBS_720P) 844 frame_size = frame_size << 1; 845 else if (mbs_per_frame <= NUM_MBS_4K) 846 frame_size = frame_size >> 2; 847 else 848 frame_size = frame_size >> 3; 849 850 if (rc_type == HFI_RATE_CONTROL_OFF || rc_type == HFI_RATE_CONTROL_CQ || 851 rc_type == HFI_RC_OFF || rc_type == HFI_RC_CQ) 852 frame_size = frame_size << 1; 853 854 /* 855 * In case of opaque color format bitdepth will be known 856 * with first ETB, buffers allocated already with 8 bit 857 * won't be sufficient for 10 bit 858 * calculate size considering 10-bit by default 859 * For 10-bit cases size = size * 1.25 860 */ 861 frame_size *= 5; 862 frame_size /= 4; 863 864 return ALIGN(frame_size, SZ_4K); 865 } 866 867 static inline u32 hfi_buffer_bin_enc(u32 width, u32 height, 868 u32 work_mode, u32 lcu_size, 869 u32 num_vpp_pipes, u32 rc_type) 870 { 871 u32 sao_bin_buffer_size, padded_bin_size, bitstream_size; 872 u32 total_bitbin_buffers, size_single_pipe, bitbin_size; 873 u32 aligned_height = ALIGN(height, lcu_size); 874 u32 aligned_width = ALIGN(width, lcu_size); 875 876 bitstream_size = size_bin_bitstream_enc(width, height, rc_type); 877 bitstream_size = ALIGN(bitstream_size, 256); 878 879 if (work_mode == STAGE_2) { 880 total_bitbin_buffers = 3; 881 bitbin_size = bitstream_size * 17 / 10; 882 bitbin_size = ALIGN(bitbin_size, 256); 883 } else { 884 total_bitbin_buffers = 1; 885 bitstream_size = aligned_width * aligned_height * 3; 886 bitbin_size = ALIGN(bitstream_size, 256); 887 } 888 889 if (num_vpp_pipes > 2) 890 size_single_pipe = bitbin_size / 2; 891 else 892 size_single_pipe = bitbin_size; 893 894 size_single_pipe = ALIGN(size_single_pipe, 256); 895 sao_bin_buffer_size = (64 * (((width + 32) * (height + 32)) >> 10)) + 384; 896 padded_bin_size = ALIGN(size_single_pipe, 256); 897 size_single_pipe = sao_bin_buffer_size + padded_bin_size; 898 size_single_pipe = ALIGN(size_single_pipe, 256); 899 bitbin_size = size_single_pipe * num_vpp_pipes; 900 901 return ALIGN(bitbin_size, 256) * total_bitbin_buffers + 512; 902 } 903 904 static u32 iris_vpu_enc_bin_size(struct iris_inst *inst) 905 { 906 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 907 u32 stage = inst->fw_caps[STAGE].value; 908 struct v4l2_format *f = inst->fmt_dst; 909 u32 height = f->fmt.pix_mp.height; 910 u32 width = f->fmt.pix_mp.width; 911 u32 lcu_size; 912 913 if (inst->codec == V4L2_PIX_FMT_HEVC) 914 lcu_size = 32; 915 else 916 lcu_size = 16; 917 918 return hfi_buffer_bin_enc(width, height, stage, lcu_size, 919 num_vpp_pipes, inst->hfi_rc_type); 920 } 921 922 static u32 iris_vpu_dec_partial_size(struct iris_inst *inst) 923 { 924 struct v4l2_format *f = inst->fmt_src; 925 u32 height = f->fmt.pix_mp.height; 926 u32 width = f->fmt.pix_mp.width; 927 928 return hfi_buffer_ibc_av1d(width, height); 929 } 930 931 static inline 932 u32 hfi_buffer_comv_enc(u32 frame_width, u32 frame_height, u32 lcu_size, 933 u32 num_recon, u32 standard) 934 { 935 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 936 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 937 u32 num_lcu_in_frame = width_in_lcus * height_in_lcus; 938 u32 mb_height = ((frame_height) + 15) >> 4; 939 u32 mb_width = ((frame_width) + 15) >> 4; 940 u32 size_colloc_mv, size_colloc_rc; 941 942 size_colloc_mv = (standard == HFI_CODEC_ENCODE_HEVC) ? 943 (16 * ((num_lcu_in_frame << 2) + 32)) : 944 (3 * 16 * (width_in_lcus * height_in_lcus + 32)); 945 size_colloc_mv = ALIGN(size_colloc_mv, 256) * num_recon; 946 size_colloc_rc = (((mb_width + 7) >> 3) * 16 * 2 * mb_height); 947 size_colloc_rc = ALIGN(size_colloc_rc, 256) * HFI_MAX_COL_FRAME; 948 949 return size_colloc_mv + size_colloc_rc; 950 } 951 952 static u32 iris_vpu_enc_comv_size(struct iris_inst *inst) 953 { 954 struct v4l2_format *f = inst->fmt_dst; 955 u32 height = f->fmt.pix_mp.height; 956 u32 width = f->fmt.pix_mp.width; 957 u32 num_recon = 1; 958 u32 lcu_size = 16; 959 960 if (inst->codec == V4L2_PIX_FMT_HEVC) { 961 lcu_size = 32; 962 return hfi_buffer_comv_enc(width, height, lcu_size, 963 num_recon + 1, HFI_CODEC_ENCODE_HEVC); 964 } 965 966 return hfi_buffer_comv_enc(width, height, lcu_size, 967 num_recon + 1, HFI_CODEC_ENCODE_AVC); 968 } 969 970 static inline 971 u32 size_frame_rc_buf_size(u32 standard, u32 frame_height_coded, 972 u32 num_vpp_pipes_enc) 973 { 974 u32 size = 0; 975 976 size = (standard == HFI_CODEC_ENCODE_HEVC) ? 977 (256 + 16 * (14 + ((((frame_height_coded) >> 5) + 7) >> 3))) : 978 (256 + 16 * (14 + ((((frame_height_coded) >> 4) + 7) >> 3))); 979 size *= 11; 980 981 if (num_vpp_pipes_enc > 1) 982 size = ALIGN(size, 256) * num_vpp_pipes_enc; 983 984 return ALIGN(size, 512) * HFI_MAX_COL_FRAME; 985 } 986 987 static inline 988 u32 size_enc_slice_info_buf(u32 num_lcu_in_frame) 989 { 990 return ALIGN((256 + (num_lcu_in_frame << 4)), 256); 991 } 992 993 static inline u32 enc_bitcnt_buf_size(u32 num_lcu_in_frame) 994 { 995 return ALIGN((256 + (4 * (num_lcu_in_frame))), 256); 996 } 997 998 static inline u32 enc_bitmap_buf_size(u32 num_lcu_in_frame) 999 { 1000 return ALIGN((256 + ((num_lcu_in_frame) >> 3)), 256); 1001 } 1002 1003 static inline u32 size_override_buf(u32 num_lcumb) 1004 { 1005 return ALIGN(((16 * (((num_lcumb) + 7) >> 3))), 256) * 2; 1006 } 1007 1008 static inline u32 size_ir_buf(u32 num_lcu_in_frame) 1009 { 1010 return ALIGN((((((num_lcu_in_frame) << 1) + 7) & (~7)) * 3), 256); 1011 } 1012 1013 static inline 1014 u32 size_linebuff_data(bool is_ten_bit, u32 frame_width_coded) 1015 { 1016 return is_ten_bit ? 1017 (((((10 * (frame_width_coded) + 1024) + (256 - 1)) & 1018 (~(256 - 1))) * 1) + 1019 (((((10 * (frame_width_coded) + 1024) >> 1) + (256 - 1)) & 1020 (~(256 - 1))) * 2)) : 1021 (((((8 * (frame_width_coded) + 1024) + (256 - 1)) & 1022 (~(256 - 1))) * 1) + 1023 (((((8 * (frame_width_coded) + 1024) >> 1) + (256 - 1)) & 1024 (~(256 - 1))) * 2)); 1025 } 1026 1027 static inline 1028 u32 size_left_linebuff_ctrl(u32 standard, u32 frame_height_coded, 1029 u32 num_vpp_pipes_enc) 1030 { 1031 u32 size = 0; 1032 1033 size = standard == HFI_CODEC_ENCODE_HEVC ? 1034 (((frame_height_coded) + 1035 (32)) / 32 * 4 * 16) : 1036 (((frame_height_coded) + 15) / 16 * 5 * 16); 1037 1038 if ((num_vpp_pipes_enc) > 1) { 1039 size += 512; 1040 size = ALIGN(size, 512) * 1041 num_vpp_pipes_enc; 1042 } 1043 1044 return ALIGN(size, 256); 1045 } 1046 1047 static inline 1048 u32 size_left_linebuff_recon_pix(bool is_ten_bit, u32 frame_height_coded, 1049 u32 num_vpp_pipes_enc) 1050 { 1051 return (((is_ten_bit + 1) * 2 * (frame_height_coded) + 256) + 1052 (256 << (num_vpp_pipes_enc - 1)) - 1) & 1053 (~((256 << (num_vpp_pipes_enc - 1)) - 1)) * 1; 1054 } 1055 1056 static inline 1057 u32 size_top_linebuff_ctrl_fe(u32 frame_width_coded, u32 standard) 1058 { 1059 return standard == HFI_CODEC_ENCODE_HEVC ? 1060 ALIGN((64 * ((frame_width_coded) >> 5)), 256) : 1061 ALIGN((256 + 16 * ((frame_width_coded) >> 4)), 256); 1062 } 1063 1064 static inline 1065 u32 size_left_linebuff_ctrl_fe(u32 frame_height_coded, u32 num_vpp_pipes_enc) 1066 { 1067 return (((256 + 64 * ((frame_height_coded) >> 4)) + 1068 (256 << (num_vpp_pipes_enc - 1)) - 1) & 1069 (~((256 << (num_vpp_pipes_enc - 1)) - 1)) * 1) * 1070 num_vpp_pipes_enc; 1071 } 1072 1073 static inline 1074 u32 size_left_linebuff_metadata_recon_y(u32 frame_height_coded, 1075 bool is_ten_bit, 1076 u32 num_vpp_pipes_enc) 1077 { 1078 return ALIGN(((256 + 64 * ((frame_height_coded) / 1079 (8 * (is_ten_bit ? 4 : 8))))), 256) * num_vpp_pipes_enc; 1080 } 1081 1082 static inline 1083 u32 size_left_linebuff_metadata_recon_uv(u32 frame_height_coded, 1084 bool is_ten_bit, 1085 u32 num_vpp_pipes_enc) 1086 { 1087 return ALIGN(((256 + 64 * ((frame_height_coded) / 1088 (4 * (is_ten_bit ? 4 : 8))))), 256) * num_vpp_pipes_enc; 1089 } 1090 1091 static inline 1092 u32 size_linebuff_recon_pix(bool is_ten_bit, u32 frame_width_coded) 1093 { 1094 return ALIGN(((is_ten_bit ? 3 : 2) * (frame_width_coded)), 256); 1095 } 1096 1097 static inline 1098 u32 size_line_buf_ctrl(u32 frame_width_coded) 1099 { 1100 return ALIGN(frame_width_coded, 256); 1101 } 1102 1103 static inline 1104 u32 size_line_buf_ctrl_id2(u32 frame_width_coded) 1105 { 1106 return ALIGN(frame_width_coded, 256); 1107 } 1108 1109 static inline u32 size_line_buf_sde(u32 frame_width_coded) 1110 { 1111 return ALIGN((256 + (16 * ((frame_width_coded) >> 4))), 256); 1112 } 1113 1114 static inline 1115 u32 size_vpss_line_buf(u32 num_vpp_pipes_enc, u32 frame_height_coded, 1116 u32 frame_width_coded) 1117 { 1118 return ALIGN(((((((8192) >> 2) << 5) * (num_vpp_pipes_enc)) + 64) + 1119 (((((max_t(u32, (frame_width_coded), 1120 (frame_height_coded)) + 3) >> 2) << 5) + 256) * 16)), 256); 1121 } 1122 static inline 1123 u32 size_vpss_line_buf_vpu33(u32 num_vpp_pipes_enc, u32 frame_height_coded, 1124 u32 frame_width_coded) 1125 { 1126 u32 vpss_4tap_top, vpss_4tap_left, vpss_div2_top; 1127 u32 vpss_div2_left, vpss_top_lb, vpss_left_lb; 1128 u32 size_left, size_top; 1129 u32 max_width_height; 1130 1131 max_width_height = max_t(u32, frame_width_coded, frame_height_coded); 1132 vpss_4tap_top = ((((max_width_height * 2) + 3) >> 2) << 4) + 256; 1133 vpss_4tap_left = (((8192 + 3) >> 2) << 5) + 64; 1134 vpss_div2_top = (((max_width_height + 3) >> 2) << 4) + 256; 1135 vpss_div2_left = ((((max_width_height * 2) + 3) >> 2) << 5) + 64; 1136 vpss_top_lb = (frame_width_coded + 1) << 3; 1137 vpss_left_lb = (frame_height_coded << 3) * num_vpp_pipes_enc; 1138 size_left = (vpss_4tap_left + vpss_div2_left) * 2 * num_vpp_pipes_enc; 1139 size_top = (vpss_4tap_top + vpss_div2_top) * 2; 1140 1141 return ALIGN(size_left + size_top + vpss_top_lb + vpss_left_lb, DMA_ALIGNMENT); 1142 } 1143 1144 static inline 1145 u32 size_top_line_buf_first_stg_sao(u32 frame_width_coded) 1146 { 1147 return ALIGN((16 * ((frame_width_coded) >> 5)), 256); 1148 } 1149 1150 static inline 1151 u32 size_enc_ref_buffer(u32 frame_width, u32 frame_height) 1152 { 1153 u32 u_chroma_buffer_height = ALIGN(frame_height >> 1, 32); 1154 u32 u_buffer_height = ALIGN(frame_height, 32); 1155 u32 u_buffer_width = ALIGN(frame_width, 32); 1156 1157 return (u_buffer_height + u_chroma_buffer_height) * u_buffer_width; 1158 } 1159 1160 static inline 1161 u32 size_enc_ten_bit_ref_buffer(u32 frame_width, u32 frame_height) 1162 { 1163 u32 ref_luma_stride_in_bytes = ((frame_width + SYSTEM_LAL_TILE10 - 1) / SYSTEM_LAL_TILE10) * 1164 SYSTEM_LAL_TILE10; 1165 u32 ref_buf_height = (frame_height + (32 - 1)) & (~(32 - 1)); 1166 u32 u_ref_stride, luma_size; 1167 u32 ref_chrm_height_in_bytes; 1168 u32 chroma_size; 1169 1170 u_ref_stride = 4 * (ref_luma_stride_in_bytes / 3); 1171 u_ref_stride = (u_ref_stride + (128 - 1)) & (~(128 - 1)); 1172 luma_size = ref_buf_height * u_ref_stride; 1173 luma_size = (luma_size + (4096 - 1)) & (~(4096 - 1)); 1174 1175 ref_chrm_height_in_bytes = (((frame_height + 1) >> 1) + (32 - 1)) & (~(32 - 1)); 1176 chroma_size = u_ref_stride * ref_chrm_height_in_bytes; 1177 chroma_size = (chroma_size + (4096 - 1)) & (~(4096 - 1)); 1178 1179 return luma_size + chroma_size; 1180 } 1181 1182 static inline 1183 u32 hfi_ubwc_calc_metadata_plane_stride(u32 frame_width, 1184 u32 metadata_stride_multiple, 1185 u32 tile_width_in_pels) 1186 { 1187 return ALIGN(((frame_width + (tile_width_in_pels - 1)) / tile_width_in_pels), 1188 metadata_stride_multiple); 1189 } 1190 1191 static inline 1192 u32 hfi_ubwc_metadata_plane_bufheight(u32 frame_height, 1193 u32 metadata_height_multiple, 1194 u32 tile_height_in_pels) 1195 { 1196 return ALIGN(((frame_height + (tile_height_in_pels - 1)) / tile_height_in_pels), 1197 metadata_height_multiple); 1198 } 1199 1200 static inline 1201 u32 hfi_ubwc_metadata_plane_buffer_size(u32 _metadata_tride, u32 _metadata_buf_height) 1202 { 1203 return ALIGN(_metadata_tride * _metadata_buf_height, 4096); 1204 } 1205 1206 static inline 1207 u32 hfi_buffer_non_comv_enc(u32 frame_width, u32 frame_height, 1208 u32 num_vpp_pipes_enc, u32 lcu_size, u32 standard) 1209 { 1210 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 1211 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 1212 u32 num_lcu_in_frame = width_in_lcus * height_in_lcus; 1213 u32 frame_height_coded = height_in_lcus * (lcu_size); 1214 u32 frame_width_coded = width_in_lcus * (lcu_size); 1215 u32 num_lcumb, frame_rc_buf_size; 1216 1217 num_lcumb = (frame_height_coded / lcu_size) * 1218 ((frame_width_coded + lcu_size * 8) / lcu_size); 1219 frame_rc_buf_size = size_frame_rc_buf_size(standard, frame_height_coded, 1220 num_vpp_pipes_enc); 1221 return size_enc_slice_info_buf(num_lcu_in_frame) + 1222 SIZE_SLICE_CMD_BUFFER + 1223 SIZE_SPS_PPS_SLICE_HDR + 1224 frame_rc_buf_size + 1225 enc_bitcnt_buf_size(num_lcu_in_frame) + 1226 enc_bitmap_buf_size(num_lcu_in_frame) + 1227 SIZE_BSE_SLICE_CMD_BUF + 1228 SIZE_LAMBDA_LUT + 1229 size_override_buf(num_lcumb) + 1230 size_ir_buf(num_lcu_in_frame); 1231 } 1232 1233 static u32 iris_vpu_enc_non_comv_size(struct iris_inst *inst) 1234 { 1235 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1236 struct v4l2_format *f = inst->fmt_dst; 1237 u32 height = f->fmt.pix_mp.height; 1238 u32 width = f->fmt.pix_mp.width; 1239 u32 lcu_size = 16; 1240 1241 if (inst->codec == V4L2_PIX_FMT_HEVC) { 1242 lcu_size = 32; 1243 return hfi_buffer_non_comv_enc(width, height, num_vpp_pipes, 1244 lcu_size, HFI_CODEC_ENCODE_HEVC) + 1245 SIZE_ONE_SLICE_BUF; 1246 } 1247 1248 return hfi_buffer_non_comv_enc(width, height, num_vpp_pipes, 1249 lcu_size, HFI_CODEC_ENCODE_AVC); 1250 } 1251 1252 static inline 1253 u32 hfi_buffer_line_enc_base(u32 frame_width, u32 frame_height, bool is_ten_bit, 1254 u32 num_vpp_pipes_enc, u32 lcu_size, u32 standard) 1255 { 1256 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 1257 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 1258 u32 frame_height_coded = height_in_lcus * (lcu_size); 1259 u32 frame_width_coded = width_in_lcus * (lcu_size); 1260 u32 line_buff_data_size, left_line_buff_ctrl_size; 1261 u32 left_line_buff_metadata_recon__uv__size; 1262 u32 left_line_buff_metadata_recon__y__size; 1263 u32 left_line_buff_recon_pix_size; 1264 u32 top_line_buff_ctrl_fe_size; 1265 u32 line_buff_recon_pix_size; 1266 1267 line_buff_data_size = size_linebuff_data(is_ten_bit, frame_width_coded); 1268 left_line_buff_ctrl_size = 1269 size_left_linebuff_ctrl(standard, frame_height_coded, num_vpp_pipes_enc); 1270 left_line_buff_recon_pix_size = 1271 size_left_linebuff_recon_pix(is_ten_bit, frame_height_coded, 1272 num_vpp_pipes_enc); 1273 top_line_buff_ctrl_fe_size = 1274 size_top_linebuff_ctrl_fe(frame_width_coded, standard); 1275 left_line_buff_metadata_recon__y__size = 1276 size_left_linebuff_metadata_recon_y(frame_height_coded, is_ten_bit, 1277 num_vpp_pipes_enc); 1278 left_line_buff_metadata_recon__uv__size = 1279 size_left_linebuff_metadata_recon_uv(frame_height_coded, is_ten_bit, 1280 num_vpp_pipes_enc); 1281 line_buff_recon_pix_size = size_linebuff_recon_pix(is_ten_bit, frame_width_coded); 1282 1283 return size_line_buf_ctrl(frame_width_coded) + 1284 size_line_buf_ctrl_id2(frame_width_coded) + 1285 line_buff_data_size + 1286 left_line_buff_ctrl_size + 1287 left_line_buff_recon_pix_size + 1288 top_line_buff_ctrl_fe_size + 1289 left_line_buff_metadata_recon__y__size + 1290 left_line_buff_metadata_recon__uv__size + 1291 line_buff_recon_pix_size + 1292 size_left_linebuff_ctrl_fe(frame_height_coded, num_vpp_pipes_enc) + 1293 size_line_buf_sde(frame_width_coded) + 1294 size_top_line_buf_first_stg_sao(frame_width_coded); 1295 } 1296 1297 static inline 1298 u32 hfi_buffer_line_enc(u32 frame_width, u32 frame_height, bool is_ten_bit, 1299 u32 num_vpp_pipes_enc, u32 lcu_size, u32 standard) 1300 { 1301 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 1302 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 1303 u32 frame_height_coded = height_in_lcus * (lcu_size); 1304 u32 frame_width_coded = width_in_lcus * (lcu_size); 1305 1306 return hfi_buffer_line_enc_base(frame_width, frame_height, is_ten_bit, 1307 num_vpp_pipes_enc, lcu_size, standard) + 1308 size_vpss_line_buf(num_vpp_pipes_enc, frame_height_coded, frame_width_coded); 1309 } 1310 1311 static inline 1312 u32 hfi_buffer_line_enc_vpu33(u32 frame_width, u32 frame_height, bool is_ten_bit, 1313 u32 num_vpp_pipes_enc, u32 lcu_size, u32 standard) 1314 { 1315 u32 width_in_lcus = ((frame_width) + (lcu_size) - 1) / (lcu_size); 1316 u32 height_in_lcus = ((frame_height) + (lcu_size) - 1) / (lcu_size); 1317 u32 frame_height_coded = height_in_lcus * (lcu_size); 1318 u32 frame_width_coded = width_in_lcus * (lcu_size); 1319 1320 return hfi_buffer_line_enc_base(frame_width, frame_height, is_ten_bit, 1321 num_vpp_pipes_enc, lcu_size, standard) + 1322 size_vpss_line_buf_vpu33(num_vpp_pipes_enc, frame_height_coded, 1323 frame_width_coded); 1324 } 1325 1326 static u32 iris_vpu_enc_line_size(struct iris_inst *inst) 1327 { 1328 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1329 struct v4l2_format *f = inst->fmt_dst; 1330 u32 height = f->fmt.pix_mp.height; 1331 u32 width = f->fmt.pix_mp.width; 1332 u32 lcu_size = 16; 1333 1334 if (inst->codec == V4L2_PIX_FMT_HEVC) { 1335 lcu_size = 32; 1336 return hfi_buffer_line_enc(width, height, 0, num_vpp_pipes, 1337 lcu_size, HFI_CODEC_ENCODE_HEVC); 1338 } 1339 1340 return hfi_buffer_line_enc(width, height, 0, num_vpp_pipes, 1341 lcu_size, HFI_CODEC_ENCODE_AVC); 1342 } 1343 1344 static u32 iris_vpu33_enc_line_size(struct iris_inst *inst) 1345 { 1346 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1347 struct v4l2_format *f = inst->fmt_dst; 1348 u32 height = f->fmt.pix_mp.height; 1349 u32 width = f->fmt.pix_mp.width; 1350 u32 lcu_size = 16; 1351 1352 if (inst->codec == V4L2_PIX_FMT_HEVC) { 1353 lcu_size = 32; 1354 return hfi_buffer_line_enc_vpu33(width, height, 0, num_vpp_pipes, 1355 lcu_size, HFI_CODEC_ENCODE_HEVC); 1356 } 1357 1358 return hfi_buffer_line_enc_vpu33(width, height, 0, num_vpp_pipes, 1359 lcu_size, HFI_CODEC_ENCODE_AVC); 1360 } 1361 1362 static inline 1363 u32 hfi_buffer_dpb_enc(u32 frame_width, u32 frame_height, bool is_ten_bit) 1364 { 1365 u32 metadata_stride, metadata_buf_height, meta_size_y, meta_size_c; 1366 u32 ten_bit_ref_buf_size = 0, ref_buf_size = 0; 1367 u32 size; 1368 1369 if (!is_ten_bit) { 1370 ref_buf_size = size_enc_ref_buffer(frame_width, frame_height); 1371 metadata_stride = 1372 hfi_ubwc_calc_metadata_plane_stride(frame_width, 64, 1373 HFI_COL_FMT_NV12C_Y_TILE_WIDTH); 1374 metadata_buf_height = 1375 hfi_ubwc_metadata_plane_bufheight(frame_height, 16, 1376 HFI_COL_FMT_NV12C_Y_TILE_HEIGHT); 1377 meta_size_y = 1378 hfi_ubwc_metadata_plane_buffer_size(metadata_stride, metadata_buf_height); 1379 meta_size_c = 1380 hfi_ubwc_metadata_plane_buffer_size(metadata_stride, metadata_buf_height); 1381 size = ref_buf_size + meta_size_y + meta_size_c; 1382 } else { 1383 ten_bit_ref_buf_size = size_enc_ten_bit_ref_buffer(frame_width, frame_height); 1384 metadata_stride = 1385 hfi_ubwc_calc_metadata_plane_stride(frame_width, 1386 IRIS_METADATA_STRIDE_MULTIPLE, 1387 HFI_COL_FMT_TP10C_Y_TILE_WIDTH); 1388 metadata_buf_height = 1389 hfi_ubwc_metadata_plane_bufheight(frame_height, 1390 IRIS_METADATA_HEIGHT_MULTIPLE, 1391 HFI_COL_FMT_TP10C_Y_TILE_HEIGHT); 1392 meta_size_y = 1393 hfi_ubwc_metadata_plane_buffer_size(metadata_stride, metadata_buf_height); 1394 meta_size_c = 1395 hfi_ubwc_metadata_plane_buffer_size(metadata_stride, metadata_buf_height); 1396 size = ten_bit_ref_buf_size + meta_size_y + meta_size_c; 1397 } 1398 1399 return size; 1400 } 1401 1402 static u32 iris_vpu_enc_arp_size(struct iris_inst *inst) 1403 { 1404 return HFI_BUFFER_ARP_ENC; 1405 } 1406 1407 inline bool is_scaling_enabled(struct iris_inst *inst) 1408 { 1409 return inst->crop.left != inst->compose.left || 1410 inst->crop.top != inst->compose.top || 1411 inst->crop.width != inst->compose.width || 1412 inst->crop.height != inst->compose.height; 1413 } 1414 1415 static inline 1416 u32 hfi_buffer_vpss_enc(u32 dswidth, u32 dsheight, bool ds_enable, 1417 u32 blur, bool is_ten_bit) 1418 { 1419 if (ds_enable || blur) 1420 return hfi_buffer_dpb_enc(dswidth, dsheight, is_ten_bit); 1421 1422 return 0; 1423 } 1424 1425 static inline u32 hfi_buffer_scratch1_enc(u32 frame_width, u32 frame_height, 1426 u32 lcu_size, u32 num_ref, 1427 bool ten_bit, u32 num_vpp_pipes, 1428 bool is_h265) 1429 { 1430 u32 line_buf_ctrl_size, line_buf_data_size, leftline_buf_ctrl_size; 1431 u32 line_buf_sde_size, sps_pps_slice_hdr, topline_buf_ctrl_size_FE; 1432 u32 leftline_buf_ctrl_size_FE, line_buf_recon_pix_size; 1433 u32 leftline_buf_recon_pix_size, lambda_lut_size, override_buffer_size; 1434 u32 col_mv_buf_size, vpp_reg_buffer_size, ir_buffer_size; 1435 u32 vpss_line_buf, leftline_buf_meta_recony, h265e_colrcbuf_size; 1436 u32 h265e_framerc_bufsize, h265e_lcubitcnt_bufsize; 1437 u32 h265e_lcubitmap_bufsize, se_stats_bufsize; 1438 u32 bse_reg_buffer_size, bse_slice_cmd_buffer_size, slice_info_bufsize; 1439 u32 line_buf_ctrl_size_buffid2, slice_cmd_buffer_size; 1440 u32 width_lcu_num, height_lcu_num, width_coded, height_coded; 1441 u32 frame_num_lcu, linebuf_meta_recon_uv, topline_bufsize_fe_1stg_sao; 1442 u32 vpss_line_buffer_size_1; 1443 u32 bit_depth, num_lcu_mb; 1444 1445 width_lcu_num = (frame_width + lcu_size - 1) / lcu_size; 1446 height_lcu_num = (frame_height + lcu_size - 1) / lcu_size; 1447 frame_num_lcu = width_lcu_num * height_lcu_num; 1448 width_coded = width_lcu_num * lcu_size; 1449 height_coded = height_lcu_num * lcu_size; 1450 num_lcu_mb = (height_coded / lcu_size) * 1451 ((width_coded + lcu_size * 8) / lcu_size); 1452 slice_info_bufsize = 256 + (frame_num_lcu << 4); 1453 slice_info_bufsize = ALIGN(slice_info_bufsize, 256); 1454 line_buf_ctrl_size = ALIGN(width_coded, 256); 1455 line_buf_ctrl_size_buffid2 = ALIGN(width_coded, 256); 1456 1457 bit_depth = ten_bit ? 10 : 8; 1458 line_buf_data_size = 1459 (((((bit_depth * width_coded + 1024) + (256 - 1)) & 1460 (~(256 - 1))) * 1) + 1461 (((((bit_depth * width_coded + 1024) >> 1) + (256 - 1)) & 1462 (~(256 - 1))) * 2)); 1463 1464 leftline_buf_ctrl_size = is_h265 ? ((height_coded + 32) / 32 * 4 * 16) : 1465 ((height_coded + 15) / 16 * 5 * 16); 1466 1467 if (num_vpp_pipes > 1) { 1468 leftline_buf_ctrl_size += 512; 1469 leftline_buf_ctrl_size = 1470 ALIGN(leftline_buf_ctrl_size, 512) * num_vpp_pipes; 1471 } 1472 1473 leftline_buf_ctrl_size = ALIGN(leftline_buf_ctrl_size, 256); 1474 leftline_buf_recon_pix_size = 1475 (((ten_bit + 1) * 2 * (height_coded) + 256) + 1476 (256 << (num_vpp_pipes - 1)) - 1) & 1477 (~((256 << (num_vpp_pipes - 1)) - 1)) * 1; 1478 1479 topline_buf_ctrl_size_FE = is_h265 ? (64 * (width_coded >> 5)) : 1480 (256 + 16 * (width_coded >> 4)); 1481 topline_buf_ctrl_size_FE = ALIGN(topline_buf_ctrl_size_FE, 256); 1482 leftline_buf_ctrl_size_FE = 1483 (((256 + 64 * (height_coded >> 4)) + 1484 (256 << (num_vpp_pipes - 1)) - 1) & 1485 (~((256 << (num_vpp_pipes - 1)) - 1)) * 1) * 1486 num_vpp_pipes; 1487 leftline_buf_meta_recony = 1488 (256 + 64 * ((height_coded) / (8 * (ten_bit ? 4 : 8)))); 1489 leftline_buf_meta_recony = ALIGN(leftline_buf_meta_recony, 256); 1490 leftline_buf_meta_recony = leftline_buf_meta_recony * num_vpp_pipes; 1491 linebuf_meta_recon_uv = 1492 (256 + 64 * ((height_coded) / (4 * (ten_bit ? 4 : 8)))); 1493 linebuf_meta_recon_uv = ALIGN(linebuf_meta_recon_uv, 256); 1494 linebuf_meta_recon_uv = linebuf_meta_recon_uv * num_vpp_pipes; 1495 line_buf_recon_pix_size = ((ten_bit ? 3 : 2) * width_coded); 1496 line_buf_recon_pix_size = ALIGN(line_buf_recon_pix_size, 256); 1497 slice_cmd_buffer_size = ALIGN(20480, 256); 1498 sps_pps_slice_hdr = 2048 + 4096; 1499 col_mv_buf_size = 1500 is_h265 ? (16 * ((frame_num_lcu << 2) + 32)) : 1501 (3 * 16 * (width_lcu_num * height_lcu_num + 32)); 1502 col_mv_buf_size = ALIGN(col_mv_buf_size, 256) * (num_ref + 1); 1503 h265e_colrcbuf_size = 1504 (((width_lcu_num + 7) >> 3) * 16 * 2 * height_lcu_num); 1505 if (num_vpp_pipes > 1) 1506 h265e_colrcbuf_size = 1507 ALIGN(h265e_colrcbuf_size, 256) * num_vpp_pipes; 1508 1509 h265e_colrcbuf_size = 1510 ALIGN(h265e_colrcbuf_size, 256) * HFI_MAX_COL_FRAME; 1511 h265e_framerc_bufsize = 1512 (is_h265) ? 1513 (256 + 16 * (14 + (((height_coded >> 5) + 7) >> 3))) : 1514 (256 + 16 * (14 + (((height_coded >> 4) + 7) >> 3))); 1515 h265e_framerc_bufsize *= 6; 1516 if (num_vpp_pipes > 1) 1517 h265e_framerc_bufsize = 1518 ALIGN(h265e_framerc_bufsize, 256) * num_vpp_pipes; 1519 1520 h265e_framerc_bufsize = 1521 ALIGN(h265e_framerc_bufsize, 512) * HFI_MAX_COL_FRAME; 1522 h265e_lcubitcnt_bufsize = 256 + 4 * frame_num_lcu; 1523 h265e_lcubitcnt_bufsize = ALIGN(h265e_lcubitcnt_bufsize, 256); 1524 h265e_lcubitmap_bufsize = 256 + (frame_num_lcu >> 3); 1525 h265e_lcubitmap_bufsize = ALIGN(h265e_lcubitmap_bufsize, 256); 1526 line_buf_sde_size = 256 + 16 * (width_coded >> 4); 1527 line_buf_sde_size = ALIGN(line_buf_sde_size, 256); 1528 if ((width_coded * height_coded) > (4096 * 2160)) 1529 se_stats_bufsize = 0; 1530 else if ((width_coded * height_coded) > (1920 * 1088)) 1531 se_stats_bufsize = (40 * 4 * frame_num_lcu + 256 + 256); 1532 else 1533 se_stats_bufsize = (1024 * frame_num_lcu + 256 + 256); 1534 1535 se_stats_bufsize = ALIGN(se_stats_bufsize, 256) * 2; 1536 bse_slice_cmd_buffer_size = (((8192 << 2) + 7) & (~7)) * 6; 1537 bse_reg_buffer_size = (((512 << 3) + 7) & (~7)) * 4; 1538 vpp_reg_buffer_size = (((2048 << 3) + 31) & (~31)) * 10; 1539 lambda_lut_size = 256 * 11; 1540 override_buffer_size = 16 * ((num_lcu_mb + 7) >> 3); 1541 override_buffer_size = ALIGN(override_buffer_size, 256) * 2; 1542 ir_buffer_size = (((frame_num_lcu << 1) + 7) & (~7)) * 3; 1543 vpss_line_buffer_size_1 = (((8192 >> 2) << 5) * num_vpp_pipes) + 64; 1544 vpss_line_buf = 1545 (((((max(width_coded, height_coded) + 3) >> 2) << 5) + 256) * 1546 16) + 1547 vpss_line_buffer_size_1; 1548 topline_bufsize_fe_1stg_sao = 16 * (width_coded >> 5); 1549 topline_bufsize_fe_1stg_sao = ALIGN(topline_bufsize_fe_1stg_sao, 256); 1550 1551 return line_buf_ctrl_size + line_buf_data_size + 1552 line_buf_ctrl_size_buffid2 + leftline_buf_ctrl_size + 1553 vpss_line_buf + col_mv_buf_size + topline_buf_ctrl_size_FE + 1554 leftline_buf_ctrl_size_FE + line_buf_recon_pix_size + 1555 leftline_buf_recon_pix_size + leftline_buf_meta_recony + 1556 linebuf_meta_recon_uv + h265e_colrcbuf_size + 1557 h265e_framerc_bufsize + h265e_lcubitcnt_bufsize + 1558 h265e_lcubitmap_bufsize + line_buf_sde_size + 1559 topline_bufsize_fe_1stg_sao + override_buffer_size + 1560 bse_reg_buffer_size + vpp_reg_buffer_size + sps_pps_slice_hdr + 1561 slice_cmd_buffer_size + bse_slice_cmd_buffer_size + 1562 ir_buffer_size + slice_info_bufsize + lambda_lut_size + 1563 se_stats_bufsize + 1024; 1564 } 1565 1566 static u32 iris_vpu_enc_scratch1_size(struct iris_inst *inst) 1567 { 1568 u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe; 1569 struct v4l2_format *f = inst->fmt_dst; 1570 u32 frame_height = f->fmt.pix_mp.height; 1571 u32 frame_width = f->fmt.pix_mp.width; 1572 u32 num_ref = 1; 1573 u32 lcu_size; 1574 bool is_h265; 1575 1576 if (inst->codec == V4L2_PIX_FMT_H264) { 1577 lcu_size = 16; 1578 is_h265 = false; 1579 } else if (inst->codec == V4L2_PIX_FMT_HEVC) { 1580 lcu_size = 32; 1581 is_h265 = true; 1582 } else { 1583 return 0; 1584 } 1585 1586 return hfi_buffer_scratch1_enc(frame_width, frame_height, lcu_size, 1587 num_ref, false, num_vpp_pipes, is_h265); 1588 } 1589 1590 static inline u32 ubwc_metadata_plane_stride(u32 width, 1591 u32 metadata_stride_multi, 1592 u32 tile_width_pels) 1593 { 1594 return ALIGN(((width + (tile_width_pels - 1)) / tile_width_pels), 1595 metadata_stride_multi); 1596 } 1597 1598 static inline u32 ubwc_metadata_plane_bufheight(u32 height, 1599 u32 metadata_height_multi, 1600 u32 tile_height_pels) 1601 { 1602 return ALIGN(((height + (tile_height_pels - 1)) / tile_height_pels), 1603 metadata_height_multi); 1604 } 1605 1606 static inline u32 ubwc_metadata_plane_buffer_size(u32 metadata_stride, 1607 u32 metadata_buf_height) 1608 { 1609 return ALIGN(metadata_stride * metadata_buf_height, SZ_4K); 1610 } 1611 1612 static inline u32 hfi_buffer_scratch2_enc(u32 frame_width, u32 frame_height, 1613 u32 num_ref, bool ten_bit) 1614 { 1615 u32 aligned_width, aligned_height, chroma_height, ref_buf_height; 1616 u32 metadata_stride, meta_buf_height, meta_size_y, meta_size_c; 1617 u32 ref_luma_stride_bytes, ref_chroma_height_bytes; 1618 u32 ref_buf_size, ref_stride; 1619 u32 luma_size, chroma_size; 1620 u32 size; 1621 1622 if (!ten_bit) { 1623 aligned_height = ALIGN(frame_height, 32); 1624 chroma_height = frame_height >> 1; 1625 chroma_height = ALIGN(chroma_height, 32); 1626 aligned_width = ALIGN(frame_width, 128); 1627 metadata_stride = 1628 ubwc_metadata_plane_stride(frame_width, 64, 32); 1629 meta_buf_height = 1630 ubwc_metadata_plane_bufheight(frame_height, 16, 8); 1631 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 1632 meta_buf_height); 1633 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 1634 meta_buf_height); 1635 size = (aligned_height + chroma_height) * aligned_width + 1636 meta_size_y + meta_size_c; 1637 size = (size * (num_ref + 3)) + 4096; 1638 } else { 1639 ref_buf_height = (frame_height + (32 - 1)) & (~(32 - 1)); 1640 ref_luma_stride_bytes = ((frame_width + 192 - 1) / 192) * 192; 1641 ref_stride = 4 * (ref_luma_stride_bytes / 3); 1642 ref_stride = (ref_stride + (128 - 1)) & (~(128 - 1)); 1643 luma_size = ref_buf_height * ref_stride; 1644 ref_chroma_height_bytes = 1645 (((frame_height + 1) >> 1) + (32 - 1)) & (~(32 - 1)); 1646 chroma_size = ref_stride * ref_chroma_height_bytes; 1647 luma_size = (luma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 1648 chroma_size = (chroma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 1649 ref_buf_size = luma_size + chroma_size; 1650 metadata_stride = 1651 ubwc_metadata_plane_stride(frame_width, 64, 48); 1652 meta_buf_height = 1653 ubwc_metadata_plane_bufheight(frame_height, 16, 4); 1654 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 1655 meta_buf_height); 1656 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 1657 meta_buf_height); 1658 size = ref_buf_size + meta_size_y + meta_size_c; 1659 size = (size * (num_ref + 3)) + 4096; 1660 } 1661 1662 return size; 1663 } 1664 1665 static u32 iris_vpu_enc_scratch2_size(struct iris_inst *inst) 1666 { 1667 struct v4l2_format *f = inst->fmt_dst; 1668 u32 frame_width = f->fmt.pix_mp.width; 1669 u32 frame_height = f->fmt.pix_mp.height; 1670 u32 num_ref = 1; 1671 1672 return hfi_buffer_scratch2_enc(frame_width, frame_height, num_ref, 1673 false); 1674 } 1675 1676 static u32 iris_vpu_enc_vpss_size(struct iris_inst *inst) 1677 { 1678 u32 ds_enable = is_scaling_enabled(inst); 1679 struct v4l2_format *f = inst->fmt_dst; 1680 u32 height = f->fmt.pix_mp.height; 1681 u32 width = f->fmt.pix_mp.width; 1682 1683 return hfi_buffer_vpss_enc(width, height, ds_enable, 0, 0); 1684 } 1685 1686 static int output_min_count(struct iris_inst *inst) 1687 { 1688 int output_min_count = 4; 1689 1690 /* fw_min_count > 0 indicates reconfig event has already arrived */ 1691 if (inst->fw_min_count) { 1692 if (iris_split_mode_enabled(inst) && 1693 (inst->codec == V4L2_PIX_FMT_VP9 || 1694 inst->codec == V4L2_PIX_FMT_AV1)) 1695 return min_t(u32, 4, inst->fw_min_count); 1696 else 1697 return inst->fw_min_count; 1698 } 1699 1700 if (inst->codec == V4L2_PIX_FMT_VP9) 1701 output_min_count = 9; 1702 else if (inst->codec == V4L2_PIX_FMT_AV1) 1703 output_min_count = 11; 1704 1705 return output_min_count; 1706 } 1707 1708 struct iris_vpu_buf_type_handle { 1709 enum iris_buffer_type type; 1710 u32 (*handle)(struct iris_inst *inst); 1711 }; 1712 1713 u32 iris_vpu_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type) 1714 { 1715 const struct iris_vpu_buf_type_handle *buf_type_handle_arr = NULL; 1716 u32 size = 0, buf_type_handle_size = 0, i; 1717 1718 static const struct iris_vpu_buf_type_handle dec_internal_buf_type_handle[] = { 1719 {BUF_BIN, iris_vpu_dec_bin_size }, 1720 {BUF_COMV, iris_vpu_dec_comv_size }, 1721 {BUF_NON_COMV, iris_vpu_dec_non_comv_size }, 1722 {BUF_LINE, iris_vpu_dec_line_size }, 1723 {BUF_PERSIST, iris_vpu_dec_persist_size }, 1724 {BUF_DPB, iris_vpu_dec_dpb_size }, 1725 {BUF_SCRATCH_1, iris_vpu_dec_scratch1_size }, 1726 {BUF_PARTIAL, iris_vpu_dec_partial_size }, 1727 }; 1728 1729 static const struct iris_vpu_buf_type_handle enc_internal_buf_type_handle[] = { 1730 {BUF_BIN, iris_vpu_enc_bin_size }, 1731 {BUF_COMV, iris_vpu_enc_comv_size }, 1732 {BUF_NON_COMV, iris_vpu_enc_non_comv_size }, 1733 {BUF_LINE, iris_vpu_enc_line_size }, 1734 {BUF_ARP, iris_vpu_enc_arp_size }, 1735 {BUF_VPSS, iris_vpu_enc_vpss_size }, 1736 {BUF_SCRATCH_1, iris_vpu_enc_scratch1_size }, 1737 {BUF_SCRATCH_2, iris_vpu_enc_scratch2_size }, 1738 }; 1739 1740 if (inst->domain == DECODER) { 1741 buf_type_handle_size = ARRAY_SIZE(dec_internal_buf_type_handle); 1742 buf_type_handle_arr = dec_internal_buf_type_handle; 1743 } else if (inst->domain == ENCODER) { 1744 buf_type_handle_size = ARRAY_SIZE(enc_internal_buf_type_handle); 1745 buf_type_handle_arr = enc_internal_buf_type_handle; 1746 } 1747 1748 for (i = 0; i < buf_type_handle_size; i++) { 1749 if (buf_type_handle_arr[i].type == buffer_type) { 1750 size = buf_type_handle_arr[i].handle(inst); 1751 break; 1752 } 1753 } 1754 1755 return size; 1756 } 1757 1758 u32 iris_vpu33_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type) 1759 { 1760 u32 size = 0, i; 1761 1762 static const struct iris_vpu_buf_type_handle enc_internal_buf_type_handle[] = { 1763 {BUF_BIN, iris_vpu_enc_bin_size }, 1764 {BUF_COMV, iris_vpu_enc_comv_size }, 1765 {BUF_NON_COMV, iris_vpu_enc_non_comv_size }, 1766 {BUF_LINE, iris_vpu33_enc_line_size }, 1767 {BUF_ARP, iris_vpu_enc_arp_size }, 1768 {BUF_VPSS, iris_vpu_enc_vpss_size }, 1769 {BUF_SCRATCH_1, iris_vpu_enc_scratch1_size }, 1770 {BUF_SCRATCH_2, iris_vpu_enc_scratch2_size }, 1771 }; 1772 1773 if (inst->domain == DECODER) 1774 return iris_vpu_buf_size(inst, buffer_type); 1775 1776 for (i = 0; i < ARRAY_SIZE(enc_internal_buf_type_handle); i++) { 1777 if (enc_internal_buf_type_handle[i].type == buffer_type) { 1778 size = enc_internal_buf_type_handle[i].handle(inst); 1779 break; 1780 } 1781 } 1782 1783 return size; 1784 } 1785 1786 static u32 internal_buffer_count(struct iris_inst *inst, 1787 enum iris_buffer_type buffer_type) 1788 { 1789 if (buffer_type == BUF_BIN || buffer_type == BUF_LINE || 1790 buffer_type == BUF_PERSIST) { 1791 return 1; 1792 } else if (buffer_type == BUF_COMV || buffer_type == BUF_NON_COMV) { 1793 if (inst->codec == V4L2_PIX_FMT_H264 || 1794 inst->codec == V4L2_PIX_FMT_HEVC || 1795 inst->codec == V4L2_PIX_FMT_AV1) 1796 return 1; 1797 } 1798 1799 return 0; 1800 } 1801 1802 static inline int iris_vpu_dpb_count(struct iris_inst *inst) 1803 { 1804 if (inst->codec == V4L2_PIX_FMT_AV1) 1805 return 11; 1806 1807 if (iris_split_mode_enabled(inst)) { 1808 return inst->fw_min_count ? 1809 inst->fw_min_count : inst->buffers[BUF_OUTPUT].min_count; 1810 } 1811 1812 return 0; 1813 } 1814 1815 int iris_vpu_buf_count(struct iris_inst *inst, enum iris_buffer_type buffer_type) 1816 { 1817 switch (buffer_type) { 1818 case BUF_INPUT: 1819 return MIN_BUFFERS; 1820 case BUF_OUTPUT: 1821 if (inst->domain == ENCODER) 1822 return MIN_BUFFERS; 1823 else 1824 return output_min_count(inst); 1825 case BUF_NON_COMV: 1826 if (inst->codec == V4L2_PIX_FMT_AV1) 1827 return 0; 1828 else 1829 return 1; 1830 case BUF_BIN: 1831 case BUF_COMV: 1832 case BUF_LINE: 1833 case BUF_PERSIST: 1834 return internal_buffer_count(inst, buffer_type); 1835 case BUF_SCRATCH_1: 1836 case BUF_SCRATCH_2: 1837 case BUF_VPSS: 1838 case BUF_ARP: 1839 case BUF_PARTIAL: 1840 return 1; /* internal buffer count needed by firmware is 1 */ 1841 case BUF_DPB: 1842 return iris_vpu_dpb_count(inst); 1843 default: 1844 return 0; 1845 } 1846 } 1847