1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) STMicroelectronics SA 2015 4 * Authors: Yannick Fertre <yannick.fertre@st.com> 5 * Hugues Fruchet <hugues.fruchet@st.com> 6 */ 7 8 #include "hva.h" 9 #include "hva-hw.h" 10 11 #define MAX_SPS_PPS_SIZE 128 12 13 #define BITSTREAM_OFFSET_MASK 0x7F 14 15 /* video max size*/ 16 #define H264_MAX_SIZE_W 1920 17 #define H264_MAX_SIZE_H 1920 18 19 /* macroBlocs number (width & height) */ 20 #define MB_W(w) ((w + 0xF) / 0x10) 21 #define MB_H(h) ((h + 0xF) / 0x10) 22 23 /* formula to get temporal or spatial data size */ 24 #define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16) 25 26 #define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2) 27 #define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16) 28 #define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8) 29 #define SLICE_HEADER_SIZE (4 * 16) 30 #define BRC_DATA_SIZE (5 * 16) 31 32 /* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */ 33 #define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2) 34 35 /* 36 * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB 37 * for deblocking with size=4*16*MBx*2 38 */ 39 #define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2) 40 41 /* factor for bitrate and cpb buffer size max values if profile >= high */ 42 #define H264_FACTOR_HIGH 1200 43 44 /* factor for bitrate and cpb buffer size max values if profile < high */ 45 #define H264_FACTOR_BASELINE 1000 46 47 /* number of bytes for NALU_TYPE_FILLER_DATA header and footer */ 48 #define H264_FILLER_DATA_SIZE 6 49 50 struct h264_profile { 51 enum v4l2_mpeg_video_h264_level level; 52 u32 max_mb_per_seconds; 53 u32 max_frame_size; 54 u32 max_bitrate; 55 u32 max_cpb_size; 56 u32 min_comp_ratio; 57 }; 58 59 static const struct h264_profile h264_infos_list[] = { 60 {V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2}, 61 {V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2}, 62 {V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2}, 63 {V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2}, 64 {V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2}, 65 {V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2}, 66 {V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2}, 67 {V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2}, 68 {V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2}, 69 {V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4}, 70 {V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4}, 71 {V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4}, 72 {V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2}, 73 {V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2}, 74 {V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2}, 75 {V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2} 76 }; 77 78 enum hva_brc_type { 79 BRC_TYPE_NONE = 0, 80 BRC_TYPE_CBR = 1, 81 BRC_TYPE_VBR = 2, 82 BRC_TYPE_VBR_LOW_DELAY = 3 83 }; 84 85 enum hva_entropy_coding_mode { 86 CAVLC = 0, 87 CABAC = 1 88 }; 89 90 enum hva_picture_coding_type { 91 PICTURE_CODING_TYPE_I = 0, 92 PICTURE_CODING_TYPE_P = 1, 93 PICTURE_CODING_TYPE_B = 2 94 }; 95 96 enum hva_h264_sampling_mode { 97 SAMPLING_MODE_NV12 = 0, 98 SAMPLING_MODE_UYVY = 1, 99 SAMPLING_MODE_RGB3 = 3, 100 SAMPLING_MODE_XRGB4 = 4, 101 SAMPLING_MODE_NV21 = 8, 102 SAMPLING_MODE_VYUY = 9, 103 SAMPLING_MODE_BGR3 = 11, 104 SAMPLING_MODE_XBGR4 = 12, 105 SAMPLING_MODE_RGBX4 = 20, 106 SAMPLING_MODE_BGRX4 = 28 107 }; 108 109 enum hva_h264_nalu_type { 110 NALU_TYPE_UNKNOWN = 0, 111 NALU_TYPE_SLICE = 1, 112 NALU_TYPE_SLICE_DPA = 2, 113 NALU_TYPE_SLICE_DPB = 3, 114 NALU_TYPE_SLICE_DPC = 4, 115 NALU_TYPE_SLICE_IDR = 5, 116 NALU_TYPE_SEI = 6, 117 NALU_TYPE_SPS = 7, 118 NALU_TYPE_PPS = 8, 119 NALU_TYPE_AU_DELIMITER = 9, 120 NALU_TYPE_SEQ_END = 10, 121 NALU_TYPE_STREAM_END = 11, 122 NALU_TYPE_FILLER_DATA = 12, 123 NALU_TYPE_SPS_EXT = 13, 124 NALU_TYPE_PREFIX_UNIT = 14, 125 NALU_TYPE_SUBSET_SPS = 15, 126 NALU_TYPE_SLICE_AUX = 19, 127 NALU_TYPE_SLICE_EXT = 20 128 }; 129 130 enum hva_h264_sei_payload_type { 131 SEI_BUFFERING_PERIOD = 0, 132 SEI_PICTURE_TIMING = 1, 133 SEI_STEREO_VIDEO_INFO = 21, 134 SEI_FRAME_PACKING_ARRANGEMENT = 45 135 }; 136 137 /* 138 * stereo Video Info struct 139 */ 140 struct hva_h264_stereo_video_sei { 141 u8 field_views_flag; 142 u8 top_field_is_left_view_flag; 143 u8 current_frame_is_left_view_flag; 144 u8 next_frame_is_second_view_flag; 145 u8 left_view_self_contained_flag; 146 u8 right_view_self_contained_flag; 147 }; 148 149 /* 150 * struct hva_h264_td 151 * 152 * @frame_width: width in pixels of the buffer containing the input frame 153 * @frame_height: height in pixels of the buffer containing the input frame 154 * @frame_num: the parameter to be written in the slice header 155 * @picture_coding_type: type I, P or B 156 * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2 157 * @first_picture_in_sequence: flag telling to encoder that this is the 158 * first picture in a video sequence. 159 * Used for VBR 160 * @slice_size_type: 0 = no constraint to close the slice 161 * 1= a slice is closed as soon as the slice_mb_size limit 162 * is reached 163 * 2= a slice is closed as soon as the slice_byte_size limit 164 * is reached 165 * 3= a slice is closed as soon as either the slice_byte_size 166 * limit or the slice_mb_size limit is reached 167 * @slice_mb_size: defines the slice size in number of macroblocks 168 * (used when slice_size_type=1 or slice_size_type=3) 169 * @ir_param_option: defines the number of macroblocks per frame to be 170 * refreshed by AIR algorithm OR the refresh period 171 * by CIR algorithm 172 * @intra_refresh_type: enables the adaptive intra refresh algorithm. 173 * Disable=0 / Adaptative=1 and Cycle=2 as intra refresh 174 * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS 175 * @transform_mode: controls the use of 4x4/8x8 transform mode 176 * @disable_deblocking_filter_idc: 177 * 0: specifies that all luma and chroma block edges of 178 * the slice are filtered. 179 * 1: specifies that deblocking is disabled for all block 180 * edges of the slice. 181 * 2: specifies that all luma and chroma block edges of 182 * the slice are filtered with exception of the block edges 183 * that coincide with slice boundaries 184 * @slice_alpha_c0_offset_div2: to be written in slice header, 185 * controls deblocking 186 * @slice_beta_offset_div2: to be written in slice header, 187 * controls deblocking 188 * @encoder_complexity: encoder complexity control (IME). 189 * 0 = I_16x16, P_16x16, Full ME Complexity 190 * 1 = I_16x16, I_NxN, P_16x16, Full ME Complexity 191 * 2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity 192 * 4 = I_16x16, P_16x16, Reduced ME Complexity 193 * 5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity 194 * 6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity 195 * @chroma_qp_index_offset: coming from picture parameter set 196 * (PPS see [H.264 STD] 7.4.2.2) 197 * @entropy_coding_mode: entropy coding mode. 198 * 0 = CAVLC 199 * 1 = CABAC 200 * @brc_type: selects the bit-rate control algorithm 201 * 0 = constant Qp, (no BRC) 202 * 1 = CBR 203 * 2 = VBR 204 * @quant: Quantization param used in case of fix QP encoding (no BRC) 205 * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler), 206 * used by BRC 207 * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC 208 * @bit_rate: target bitrate, for BRC 209 * @qp_min: min QP threshold 210 * @qp_max: max QP threshold 211 * @framerate_num: target framerate numerator , used by BRC 212 * @framerate_den: target framerate denomurator , used by BRC 213 * @delay: End-to-End Initial Delay 214 * @strict_HRD_compliancy: flag for HDR compliancy (1) 215 * May impact quality encoding 216 * @addr_source_buffer: address of input frame buffer for current frame 217 * @addr_fwd_Ref_Buffer: address of reference frame buffer 218 * @addr_rec_buffer: address of reconstructed frame buffer 219 * @addr_output_bitstream_start: output bitstream start address 220 * @addr_output_bitstream_end: output bitstream end address 221 * @addr_external_sw : address of external search window 222 * @addr_lctx : address of context picture buffer 223 * @addr_local_rec_buffer: address of local reconstructed buffer 224 * @addr_spatial_context: address of spatial context buffer 225 * @bitstream_offset: offset in bits between aligned bitstream start 226 * address and first bit to be written by HVA. 227 * Range value is [0..63] 228 * @sampling_mode: Input picture format . 229 * 0: YUV420 semi_planar Interleaved 230 * 1: YUV422 raster Interleaved 231 * @addr_param_out: address of output parameters structure 232 * @addr_scaling_matrix: address to the coefficient of 233 * the inverse scaling matrix 234 * @addr_scaling_matrix_dir: address to the coefficient of 235 * the direct scaling matrix 236 * @addr_cabac_context_buffer: address of cabac context buffer 237 * @GmvX: Input information about the horizontal global displacement of 238 * the encoded frame versus the previous one 239 * @GmvY: Input information about the vertical global displacement of 240 * the encoded frame versus the previous one 241 * @window_width: width in pixels of the window to be encoded inside 242 * the input frame 243 * @window_height: width in pixels of the window to be encoded inside 244 * the input frame 245 * @window_horizontal_offset: horizontal offset in pels for input window 246 * within input frame 247 * @window_vertical_offset: vertical offset in pels for input window 248 * within input frame 249 * @addr_roi: Map of QP offset for the Region of Interest algorithm and 250 * also used for Error map. 251 * Bit 0-6 used for qp offset (value -64 to 63). 252 * Bit 7 used to force intra 253 * @addr_slice_header: address to slice header 254 * @slice_header_size_in_bits: size in bits of the Slice header 255 * @slice_header_offset0: Slice header offset where to insert 256 * first_Mb_in_slice 257 * @slice_header_offset1: Slice header offset where to insert 258 * slice_qp_delta 259 * @slice_header_offset2: Slice header offset where to insert 260 * num_MBs_in_slice 261 * @slice_synchro_enable: enable "slice ready" interrupt after each slice 262 * @max_slice_number: Maximum number of slice in a frame 263 * (0 is strictly forbidden) 264 * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to 265 * YUV for the Y component. 266 * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0) 267 * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to 268 * YUV for the Y component. 269 * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0) 270 * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to 271 * YUV for the U (Cb) component. 272 * U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0) 273 * @slice_byte_size: maximum slice size in bytes 274 * (used when slice_size_type=2 or slice_size_type=3) 275 * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame 276 * for the AIR algorithm 277 * @brc_no_skip: Disable skipping in the Bitrate Controller 278 * @addr_brc_in_out_parameter: address of static buffer for BRC parameters 279 */ 280 struct hva_h264_td { 281 u16 frame_width; 282 u16 frame_height; 283 u32 frame_num; 284 u16 picture_coding_type; 285 u16 reserved1; 286 u16 pic_order_cnt_type; 287 u16 first_picture_in_sequence; 288 u16 slice_size_type; 289 u16 reserved2; 290 u32 slice_mb_size; 291 u16 ir_param_option; 292 u16 intra_refresh_type; 293 u16 use_constrained_intra_flag; 294 u16 transform_mode; 295 u16 disable_deblocking_filter_idc; 296 s16 slice_alpha_c0_offset_div2; 297 s16 slice_beta_offset_div2; 298 u16 encoder_complexity; 299 s16 chroma_qp_index_offset; 300 u16 entropy_coding_mode; 301 u16 brc_type; 302 u16 quant; 303 u32 non_vcl_nalu_size; 304 u32 cpb_buffer_size; 305 u32 bit_rate; 306 u16 qp_min; 307 u16 qp_max; 308 u16 framerate_num; 309 u16 framerate_den; 310 u16 delay; 311 u16 strict_hrd_compliancy; 312 u32 addr_source_buffer; 313 u32 addr_fwd_ref_buffer; 314 u32 addr_rec_buffer; 315 u32 addr_output_bitstream_start; 316 u32 addr_output_bitstream_end; 317 u32 addr_external_sw; 318 u32 addr_lctx; 319 u32 addr_local_rec_buffer; 320 u32 addr_spatial_context; 321 u16 bitstream_offset; 322 u16 sampling_mode; 323 u32 addr_param_out; 324 u32 addr_scaling_matrix; 325 u32 addr_scaling_matrix_dir; 326 u32 addr_cabac_context_buffer; 327 u32 reserved3; 328 u32 reserved4; 329 s16 gmv_x; 330 s16 gmv_y; 331 u16 window_width; 332 u16 window_height; 333 u16 window_horizontal_offset; 334 u16 window_vertical_offset; 335 u32 addr_roi; 336 u32 addr_slice_header; 337 u16 slice_header_size_in_bits; 338 u16 slice_header_offset0; 339 u16 slice_header_offset1; 340 u16 slice_header_offset2; 341 u32 reserved5; 342 u32 reserved6; 343 u16 reserved7; 344 u16 reserved8; 345 u16 slice_synchro_enable; 346 u16 max_slice_number; 347 u32 rgb2_yuv_y_coeff; 348 u32 rgb2_yuv_u_coeff; 349 u32 rgb2_yuv_v_coeff; 350 u32 slice_byte_size; 351 u16 max_air_intra_mb_nb; 352 u16 brc_no_skip; 353 u32 addr_temporal_context; 354 u32 addr_brc_in_out_parameter; 355 }; 356 357 /* 358 * struct hva_h264_slice_po 359 * 360 * @ slice_size: slice size 361 * @ slice_start_time: start time 362 * @ slice_stop_time: stop time 363 * @ slice_num: slice number 364 */ 365 struct hva_h264_slice_po { 366 u32 slice_size; 367 u32 slice_start_time; 368 u32 slice_end_time; 369 u32 slice_num; 370 }; 371 372 /* 373 * struct hva_h264_po 374 * 375 * @ bitstream_size: bitstream size 376 * @ dct_bitstream_size: dtc bitstream size 377 * @ stuffing_bits: number of stuffing bits inserted by the encoder 378 * @ removal_time: removal time of current frame (nb of ticks 1/framerate) 379 * @ hvc_start_time: hvc start time 380 * @ hvc_stop_time: hvc stop time 381 * @ slice_count: slice count 382 */ 383 struct hva_h264_po { 384 u32 bitstream_size; 385 u32 dct_bitstream_size; 386 u32 stuffing_bits; 387 u32 removal_time; 388 u32 hvc_start_time; 389 u32 hvc_stop_time; 390 u32 slice_count; 391 u32 reserved0; 392 struct hva_h264_slice_po slice_params[16]; 393 }; 394 395 struct hva_h264_task { 396 struct hva_h264_td td; 397 struct hva_h264_po po; 398 }; 399 400 /* 401 * struct hva_h264_ctx 402 * 403 * @seq_info: sequence information buffer 404 * @ref_frame: reference frame buffer 405 * @rec_frame: reconstructed frame buffer 406 * @task: task descriptor 407 */ 408 struct hva_h264_ctx { 409 struct hva_buffer *seq_info; 410 struct hva_buffer *ref_frame; 411 struct hva_buffer *rec_frame; 412 struct hva_buffer *task; 413 }; 414 415 static int hva_h264_fill_slice_header(struct hva_ctx *pctx, 416 u8 *slice_header_addr, 417 struct hva_controls *ctrls, 418 int frame_num, 419 u16 *header_size, 420 u16 *header_offset0, 421 u16 *header_offset1, 422 u16 *header_offset2) 423 { 424 /* 425 * with this HVA hardware version, part of the slice header is computed 426 * on host and part by hardware. 427 * The part of host is precomputed and available through this array. 428 */ 429 struct device *dev = ctx_to_dev(pctx); 430 int cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC; 431 static const unsigned char slice_header[] = { 432 0x00, 0x00, 0x00, 0x01, 433 0x41, 0x34, 0x07, 0x00 434 }; 435 int idr_pic_id = frame_num % 2; 436 enum hva_picture_coding_type type; 437 u32 frame_order = frame_num % ctrls->gop_size; 438 439 if (!(frame_num % ctrls->gop_size)) 440 type = PICTURE_CODING_TYPE_I; 441 else 442 type = PICTURE_CODING_TYPE_P; 443 444 memcpy(slice_header_addr, slice_header, sizeof(slice_header)); 445 446 *header_size = 56; 447 *header_offset0 = 40; 448 *header_offset1 = 13; 449 *header_offset2 = 0; 450 451 if (type == PICTURE_CODING_TYPE_I) { 452 slice_header_addr[4] = 0x65; 453 slice_header_addr[5] = 0x11; 454 455 /* toggle the I frame */ 456 if ((frame_num / ctrls->gop_size) % 2) { 457 *header_size += 4; 458 *header_offset1 += 4; 459 slice_header_addr[6] = 0x04; 460 slice_header_addr[7] = 0x70; 461 462 } else { 463 *header_size += 2; 464 *header_offset1 += 2; 465 slice_header_addr[6] = 0x09; 466 slice_header_addr[7] = 0xC0; 467 } 468 } else { 469 if (ctrls->entropy_mode == cabac) { 470 *header_size += 1; 471 *header_offset1 += 1; 472 slice_header_addr[7] = 0x80; 473 } 474 /* 475 * update slice header with P frame order 476 * frame order is limited to 16 (coded on 4bits only) 477 */ 478 slice_header_addr[5] += ((frame_order & 0x0C) >> 2); 479 slice_header_addr[6] += ((frame_order & 0x03) << 6); 480 } 481 482 dev_dbg(dev, 483 "%s %s slice header order %d idrPicId %d header size %d\n", 484 pctx->name, __func__, frame_order, idr_pic_id, *header_size); 485 return 0; 486 } 487 488 static int hva_h264_fill_data_nal(struct hva_ctx *pctx, 489 unsigned int stuffing_bytes, u8 *addr, 490 unsigned int stream_size, unsigned int *size) 491 { 492 struct device *dev = ctx_to_dev(pctx); 493 static const u8 start[] = { 0x00, 0x00, 0x00, 0x01 }; 494 495 dev_dbg(dev, "%s %s stuffing bytes %d\n", pctx->name, __func__, 496 stuffing_bytes); 497 498 if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) { 499 dev_dbg(dev, "%s %s too many stuffing bytes %d\n", 500 pctx->name, __func__, stuffing_bytes); 501 return 0; 502 } 503 504 /* start code */ 505 memcpy(addr + *size, start, sizeof(start)); 506 *size += sizeof(start); 507 508 /* nal_unit_type */ 509 addr[*size] = NALU_TYPE_FILLER_DATA; 510 *size += 1; 511 512 memset(addr + *size, 0xff, stuffing_bytes); 513 *size += stuffing_bytes; 514 515 addr[*size] = 0x80; 516 *size += 1; 517 518 return 0; 519 } 520 521 static int hva_h264_fill_sei_nal(struct hva_ctx *pctx, 522 enum hva_h264_sei_payload_type type, 523 u8 *addr, u32 *size) 524 { 525 struct device *dev = ctx_to_dev(pctx); 526 static const u8 start[] = { 0x00, 0x00, 0x00, 0x01 }; 527 struct hva_h264_stereo_video_sei info; 528 u8 offset = 7; 529 u8 msg = 0; 530 531 /* start code */ 532 memcpy(addr + *size, start, sizeof(start)); 533 *size += sizeof(start); 534 535 /* nal_unit_type */ 536 addr[*size] = NALU_TYPE_SEI; 537 *size += 1; 538 539 /* payload type */ 540 addr[*size] = type; 541 *size += 1; 542 543 switch (type) { 544 case SEI_STEREO_VIDEO_INFO: 545 memset(&info, 0, sizeof(info)); 546 547 /* set to top/bottom frame packing arrangement */ 548 info.field_views_flag = 1; 549 info.top_field_is_left_view_flag = 1; 550 551 /* payload size */ 552 addr[*size] = 1; 553 *size += 1; 554 555 /* payload */ 556 msg = info.field_views_flag << offset--; 557 558 if (info.field_views_flag) { 559 msg |= info.top_field_is_left_view_flag << 560 offset--; 561 } else { 562 msg |= info.current_frame_is_left_view_flag << 563 offset--; 564 msg |= info.next_frame_is_second_view_flag << 565 offset--; 566 } 567 msg |= info.left_view_self_contained_flag << offset--; 568 msg |= info.right_view_self_contained_flag << offset--; 569 570 addr[*size] = msg; 571 *size += 1; 572 573 addr[*size] = 0x80; 574 *size += 1; 575 576 return 0; 577 case SEI_BUFFERING_PERIOD: 578 case SEI_PICTURE_TIMING: 579 case SEI_FRAME_PACKING_ARRANGEMENT: 580 default: 581 dev_err(dev, "%s sei nal type not supported %d\n", 582 pctx->name, type); 583 return -EINVAL; 584 } 585 } 586 587 static int hva_h264_prepare_task(struct hva_ctx *pctx, 588 struct hva_h264_task *task, 589 struct hva_frame *frame, 590 struct hva_stream *stream) 591 { 592 struct hva_dev *hva = ctx_to_hdev(pctx); 593 struct device *dev = ctx_to_dev(pctx); 594 struct hva_h264_ctx *ctx = pctx->priv; 595 struct hva_buffer *seq_info = ctx->seq_info; 596 struct hva_buffer *fwd_ref_frame = ctx->ref_frame; 597 struct hva_buffer *loc_rec_frame = ctx->rec_frame; 598 struct hva_h264_td *td = &task->td; 599 struct hva_controls *ctrls = &pctx->ctrls; 600 struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame; 601 int cavlc = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC; 602 u32 frame_num = pctx->stream_num; 603 u32 addr_esram = hva->esram_addr; 604 enum v4l2_mpeg_video_h264_level level; 605 dma_addr_t paddr = 0; 606 u8 *slice_header_vaddr; 607 u32 frame_width = frame->info.aligned_width; 608 u32 frame_height = frame->info.aligned_height; 609 u32 max_cpb_buffer_size; 610 unsigned int payload = stream->bytesused; 611 u32 max_bitrate; 612 613 /* check width and height parameters */ 614 if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) || 615 (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) { 616 dev_err(dev, 617 "%s width(%d) or height(%d) exceeds limits (%dx%d)\n", 618 pctx->name, frame_width, frame_height, 619 H264_MAX_SIZE_W, H264_MAX_SIZE_H); 620 pctx->frame_errors++; 621 return -EINVAL; 622 } 623 624 level = ctrls->level; 625 626 memset(td, 0, sizeof(struct hva_h264_td)); 627 628 td->frame_width = frame_width; 629 td->frame_height = frame_height; 630 631 /* set frame alignment */ 632 td->window_width = frame_width; 633 td->window_height = frame_height; 634 td->window_horizontal_offset = 0; 635 td->window_vertical_offset = 0; 636 637 td->first_picture_in_sequence = (!frame_num) ? 1 : 0; 638 639 /* pic_order_cnt_type hard coded to '2' as only I & P frames */ 640 td->pic_order_cnt_type = 2; 641 642 /* useConstrainedIntraFlag set to false for better coding efficiency */ 643 td->use_constrained_intra_flag = false; 644 td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) 645 ? BRC_TYPE_CBR : BRC_TYPE_VBR; 646 647 td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC : 648 CABAC; 649 650 td->bit_rate = ctrls->bitrate; 651 652 /* set framerate, framerate = 1 n/ time per frame */ 653 if (time_per_frame->numerator >= 536) { 654 /* 655 * due to a hardware bug, framerate denominator can't exceed 656 * 536 (BRC overflow). Compute nearest framerate 657 */ 658 td->framerate_den = 1; 659 td->framerate_num = (time_per_frame->denominator + 660 (time_per_frame->numerator >> 1) - 1) / 661 time_per_frame->numerator; 662 663 /* 664 * update bitrate to introduce a correction due to 665 * the new framerate 666 * new bitrate = (old bitrate * new framerate) / old framerate 667 */ 668 td->bit_rate /= time_per_frame->numerator; 669 td->bit_rate *= time_per_frame->denominator; 670 td->bit_rate /= td->framerate_num; 671 } else { 672 td->framerate_den = time_per_frame->numerator; 673 td->framerate_num = time_per_frame->denominator; 674 } 675 676 /* compute maximum bitrate depending on profile */ 677 if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH) 678 max_bitrate = h264_infos_list[level].max_bitrate * 679 H264_FACTOR_HIGH; 680 else 681 max_bitrate = h264_infos_list[level].max_bitrate * 682 H264_FACTOR_BASELINE; 683 684 /* check if bitrate doesn't exceed max size */ 685 if (td->bit_rate > max_bitrate) { 686 dev_dbg(dev, 687 "%s bitrate (%d) larger than level and profile allow, clip to %d\n", 688 pctx->name, td->bit_rate, max_bitrate); 689 td->bit_rate = max_bitrate; 690 } 691 692 /* convert cpb_buffer_size in bits */ 693 td->cpb_buffer_size = ctrls->cpb_size * 8000; 694 695 /* compute maximum cpb buffer size depending on profile */ 696 if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH) 697 max_cpb_buffer_size = 698 h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH; 699 else 700 max_cpb_buffer_size = 701 h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE; 702 703 /* check if cpb buffer size doesn't exceed max size */ 704 if (td->cpb_buffer_size > max_cpb_buffer_size) { 705 dev_dbg(dev, 706 "%s cpb size larger than level %d allows, clip to %d\n", 707 pctx->name, td->cpb_buffer_size, max_cpb_buffer_size); 708 td->cpb_buffer_size = max_cpb_buffer_size; 709 } 710 711 /* enable skipping in the Bitrate Controller */ 712 td->brc_no_skip = 0; 713 714 /* initial delay */ 715 if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) && 716 td->bit_rate) 717 td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate); 718 else 719 td->delay = 0; 720 721 switch (frame->info.pixelformat) { 722 case V4L2_PIX_FMT_NV12: 723 td->sampling_mode = SAMPLING_MODE_NV12; 724 break; 725 case V4L2_PIX_FMT_NV21: 726 td->sampling_mode = SAMPLING_MODE_NV21; 727 break; 728 default: 729 dev_err(dev, "%s invalid source pixel format\n", 730 pctx->name); 731 pctx->frame_errors++; 732 return -EINVAL; 733 } 734 735 /* 736 * fill matrix color converter (RGB to YUV) 737 * Y = 0,299 R + 0,587 G + 0,114 B 738 * Cb = -0,1687 R -0,3313 G + 0,5 B + 128 739 * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128 740 */ 741 td->rgb2_yuv_y_coeff = 0x12031008; 742 td->rgb2_yuv_u_coeff = 0x800EF7FB; 743 td->rgb2_yuv_v_coeff = 0x80FEF40E; 744 745 /* enable/disable transform mode */ 746 td->transform_mode = ctrls->dct8x8; 747 748 /* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */ 749 td->encoder_complexity = 2; 750 751 /* quant fix to 28, default VBR value */ 752 td->quant = 28; 753 754 if (td->framerate_den == 0) { 755 dev_err(dev, "%s invalid framerate\n", pctx->name); 756 pctx->frame_errors++; 757 return -EINVAL; 758 } 759 760 /* if automatic framerate, deactivate bitrate controller */ 761 if (td->framerate_num == 0) 762 td->brc_type = 0; 763 764 /* compliancy fix to true */ 765 td->strict_hrd_compliancy = 1; 766 767 /* set minimum & maximum quantizers */ 768 td->qp_min = clamp_val(ctrls->qpmin, 0, 51); 769 td->qp_max = clamp_val(ctrls->qpmax, 0, 51); 770 771 td->addr_source_buffer = frame->paddr; 772 td->addr_fwd_ref_buffer = fwd_ref_frame->paddr; 773 td->addr_rec_buffer = loc_rec_frame->paddr; 774 775 td->addr_output_bitstream_end = (u32)stream->paddr + stream->size; 776 777 td->addr_output_bitstream_start = (u32)stream->paddr; 778 td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) & 779 BITSTREAM_OFFSET_MASK; 780 781 td->addr_param_out = (u32)ctx->task->paddr + 782 offsetof(struct hva_h264_task, po); 783 784 /* swap spatial and temporal context */ 785 if (frame_num % 2) { 786 paddr = seq_info->paddr; 787 td->addr_spatial_context = ALIGN(paddr, 0x100); 788 paddr = seq_info->paddr + DATA_SIZE(frame_width, 789 frame_height); 790 td->addr_temporal_context = ALIGN(paddr, 0x100); 791 } else { 792 paddr = seq_info->paddr; 793 td->addr_temporal_context = ALIGN(paddr, 0x100); 794 paddr = seq_info->paddr + DATA_SIZE(frame_width, 795 frame_height); 796 td->addr_spatial_context = ALIGN(paddr, 0x100); 797 } 798 799 paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height); 800 801 td->addr_brc_in_out_parameter = ALIGN(paddr, 0x100); 802 803 paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE; 804 td->addr_slice_header = ALIGN(paddr, 0x100); 805 td->addr_external_sw = ALIGN(addr_esram, 0x100); 806 807 addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width); 808 td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100); 809 810 addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width); 811 td->addr_lctx = ALIGN(addr_esram, 0x100); 812 813 addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)); 814 td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100); 815 816 if (!(frame_num % ctrls->gop_size)) { 817 td->picture_coding_type = PICTURE_CODING_TYPE_I; 818 stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME; 819 } else { 820 td->picture_coding_type = PICTURE_CODING_TYPE_P; 821 stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME; 822 } 823 824 /* fill the slice header part */ 825 slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header - 826 seq_info->paddr); 827 828 hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num, 829 &td->slice_header_size_in_bits, 830 &td->slice_header_offset0, 831 &td->slice_header_offset1, 832 &td->slice_header_offset2); 833 834 td->chroma_qp_index_offset = 2; 835 td->slice_synchro_enable = 0; 836 td->max_slice_number = 1; 837 838 /* 839 * check the sps/pps header size for key frame only 840 * sps/pps header was previously fill by libv4l 841 * during qbuf of stream buffer 842 */ 843 if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) && 844 (payload > MAX_SPS_PPS_SIZE)) { 845 dev_err(dev, "%s invalid sps/pps size %d\n", pctx->name, 846 payload); 847 pctx->frame_errors++; 848 return -EINVAL; 849 } 850 851 if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME) 852 payload = 0; 853 854 /* add SEI nal (video stereo info) */ 855 if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO, 856 (u8 *)stream->vaddr, 857 &payload)) { 858 dev_err(dev, "%s fail to get SEI nal\n", pctx->name); 859 pctx->frame_errors++; 860 return -EINVAL; 861 } 862 863 /* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */ 864 td->non_vcl_nalu_size = payload * 8; 865 866 /* compute bitstream offset & new start address of bitstream */ 867 td->addr_output_bitstream_start += ((payload >> 4) << 4); 868 td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8; 869 870 stream->bytesused = payload; 871 872 return 0; 873 } 874 875 static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task) 876 { 877 struct hva_h264_po *po = &task->po; 878 879 return po->bitstream_size; 880 } 881 882 static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task) 883 { 884 struct hva_h264_po *po = &task->po; 885 886 return po->stuffing_bits >> 3; 887 } 888 889 static int hva_h264_open(struct hva_ctx *pctx) 890 { 891 struct device *dev = ctx_to_dev(pctx); 892 struct hva_h264_ctx *ctx; 893 struct hva_dev *hva = ctx_to_hdev(pctx); 894 u32 frame_width = pctx->frameinfo.aligned_width; 895 u32 frame_height = pctx->frameinfo.aligned_height; 896 u32 size; 897 int ret; 898 899 /* check esram size necessary to encode a frame */ 900 size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) + 901 LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) + 902 CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) + 903 CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width); 904 905 if (hva->esram_size < size) { 906 dev_err(dev, "%s not enough esram (max:%d request:%d)\n", 907 pctx->name, hva->esram_size, size); 908 ret = -EINVAL; 909 goto err; 910 } 911 912 /* allocate context for codec */ 913 ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); 914 if (!ctx) { 915 ret = -ENOMEM; 916 goto err; 917 } 918 919 /* allocate sequence info buffer */ 920 ret = hva_mem_alloc(pctx, 921 2 * DATA_SIZE(frame_width, frame_height) + 922 SLICE_HEADER_SIZE + 923 BRC_DATA_SIZE, 924 "hva sequence info", 925 &ctx->seq_info); 926 if (ret) { 927 dev_err(dev, 928 "%s failed to allocate sequence info buffer\n", 929 pctx->name); 930 goto err_ctx; 931 } 932 933 /* allocate reference frame buffer */ 934 ret = hva_mem_alloc(pctx, 935 frame_width * frame_height * 3 / 2, 936 "hva reference frame", 937 &ctx->ref_frame); 938 if (ret) { 939 dev_err(dev, "%s failed to allocate reference frame buffer\n", 940 pctx->name); 941 goto err_seq_info; 942 } 943 944 /* allocate reconstructed frame buffer */ 945 ret = hva_mem_alloc(pctx, 946 frame_width * frame_height * 3 / 2, 947 "hva reconstructed frame", 948 &ctx->rec_frame); 949 if (ret) { 950 dev_err(dev, 951 "%s failed to allocate reconstructed frame buffer\n", 952 pctx->name); 953 goto err_ref_frame; 954 } 955 956 /* allocate task descriptor */ 957 ret = hva_mem_alloc(pctx, 958 sizeof(struct hva_h264_task), 959 "hva task descriptor", 960 &ctx->task); 961 if (ret) { 962 dev_err(dev, 963 "%s failed to allocate task descriptor\n", 964 pctx->name); 965 goto err_rec_frame; 966 } 967 968 pctx->priv = (void *)ctx; 969 970 return 0; 971 972 err_rec_frame: 973 hva_mem_free(pctx, ctx->rec_frame); 974 err_ref_frame: 975 hva_mem_free(pctx, ctx->ref_frame); 976 err_seq_info: 977 hva_mem_free(pctx, ctx->seq_info); 978 err_ctx: 979 devm_kfree(dev, ctx); 980 err: 981 pctx->sys_errors++; 982 return ret; 983 } 984 985 static int hva_h264_close(struct hva_ctx *pctx) 986 { 987 struct hva_h264_ctx *ctx = pctx->priv; 988 struct device *dev = ctx_to_dev(pctx); 989 990 if (ctx->seq_info) 991 hva_mem_free(pctx, ctx->seq_info); 992 993 if (ctx->ref_frame) 994 hva_mem_free(pctx, ctx->ref_frame); 995 996 if (ctx->rec_frame) 997 hva_mem_free(pctx, ctx->rec_frame); 998 999 if (ctx->task) 1000 hva_mem_free(pctx, ctx->task); 1001 1002 devm_kfree(dev, ctx); 1003 1004 return 0; 1005 } 1006 1007 static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame, 1008 struct hva_stream *stream) 1009 { 1010 struct hva_h264_ctx *ctx = pctx->priv; 1011 struct hva_h264_task *task = ctx->task->vaddr; 1012 u32 stuffing_bytes = 0; 1013 int ret = 0; 1014 1015 ret = hva_h264_prepare_task(pctx, task, frame, stream); 1016 if (ret) 1017 goto err; 1018 1019 ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task); 1020 if (ret) 1021 goto err; 1022 1023 pctx->stream_num++; 1024 stream->bytesused += hva_h264_get_stream_size(task); 1025 1026 stuffing_bytes = hva_h264_get_stuffing_bytes(task); 1027 1028 if (stuffing_bytes) 1029 hva_h264_fill_data_nal(pctx, stuffing_bytes, 1030 (u8 *)stream->vaddr, 1031 stream->size, 1032 &stream->bytesused); 1033 1034 /* switch reference & reconstructed frame */ 1035 swap(ctx->ref_frame, ctx->rec_frame); 1036 1037 return 0; 1038 err: 1039 stream->bytesused = 0; 1040 return ret; 1041 } 1042 1043 const struct hva_enc nv12h264enc = { 1044 .name = "H264(NV12)", 1045 .pixelformat = V4L2_PIX_FMT_NV12, 1046 .streamformat = V4L2_PIX_FMT_H264, 1047 .max_width = H264_MAX_SIZE_W, 1048 .max_height = H264_MAX_SIZE_H, 1049 .open = hva_h264_open, 1050 .close = hva_h264_close, 1051 .encode = hva_h264_encode, 1052 }; 1053 1054 const struct hva_enc nv21h264enc = { 1055 .name = "H264(NV21)", 1056 .pixelformat = V4L2_PIX_FMT_NV21, 1057 .streamformat = V4L2_PIX_FMT_H264, 1058 .max_width = H264_MAX_SIZE_W, 1059 .max_height = H264_MAX_SIZE_H, 1060 .open = hva_h264_open, 1061 .close = hva_h264_close, 1062 .encode = hva_h264_encode, 1063 }; 1064