xref: /linux/drivers/media/platform/st/sti/hva/hva-h264.c (revision cdd5b5a9761fd66d17586e4f4ba6588c70e640ea)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) STMicroelectronics SA 2015
4  * Authors: Yannick Fertre <yannick.fertre@st.com>
5  *          Hugues Fruchet <hugues.fruchet@st.com>
6  */
7 
8 #include "hva.h"
9 #include "hva-hw.h"
10 
11 #define MAX_SPS_PPS_SIZE 128
12 
13 #define BITSTREAM_OFFSET_MASK 0x7F
14 
15 /* video max size*/
16 #define H264_MAX_SIZE_W 1920
17 #define H264_MAX_SIZE_H 1920
18 
19 /* macroBlocs number (width & height) */
20 #define MB_W(w) ((w + 0xF)  / 0x10)
21 #define MB_H(h) ((h + 0xF)  / 0x10)
22 
23 /* formula to get temporal or spatial data size */
24 #define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16)
25 
26 #define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2)
27 #define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16)
28 #define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8)
29 #define SLICE_HEADER_SIZE (4 * 16)
30 #define BRC_DATA_SIZE (5 * 16)
31 
32 /* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */
33 #define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2)
34 
35 /*
36  * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB
37  * for deblocking with size=4*16*MBx*2
38  */
39 #define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2)
40 
41 /* factor for bitrate and cpb buffer size max values if profile >= high */
42 #define H264_FACTOR_HIGH 1200
43 
44 /* factor for bitrate and cpb buffer size max values if profile < high */
45 #define H264_FACTOR_BASELINE 1000
46 
47 /* number of bytes for NALU_TYPE_FILLER_DATA header and footer */
48 #define H264_FILLER_DATA_SIZE 6
49 
50 struct h264_profile {
51 	enum v4l2_mpeg_video_h264_level level;
52 	u32 max_mb_per_seconds;
53 	u32 max_frame_size;
54 	u32 max_bitrate;
55 	u32 max_cpb_size;
56 	u32 min_comp_ratio;
57 };
58 
59 static const struct h264_profile h264_infos_list[] = {
60 	{V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2},
61 	{V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2},
62 	{V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2},
63 	{V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2},
64 	{V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2},
65 	{V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2},
66 	{V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2},
67 	{V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2},
68 	{V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2},
69 	{V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4},
70 	{V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4},
71 	{V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4},
72 	{V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2},
73 	{V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2},
74 	{V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2},
75 	{V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2}
76 };
77 
78 enum hva_brc_type {
79 	BRC_TYPE_NONE = 0,
80 	BRC_TYPE_CBR = 1,
81 	BRC_TYPE_VBR = 2,
82 	BRC_TYPE_VBR_LOW_DELAY = 3
83 };
84 
85 enum hva_entropy_coding_mode {
86 	CAVLC = 0,
87 	CABAC = 1
88 };
89 
90 enum hva_picture_coding_type {
91 	PICTURE_CODING_TYPE_I = 0,
92 	PICTURE_CODING_TYPE_P = 1,
93 	PICTURE_CODING_TYPE_B = 2
94 };
95 
96 enum hva_h264_sampling_mode {
97 	SAMPLING_MODE_NV12 = 0,
98 	SAMPLING_MODE_UYVY = 1,
99 	SAMPLING_MODE_RGB3 = 3,
100 	SAMPLING_MODE_XRGB4 = 4,
101 	SAMPLING_MODE_NV21 = 8,
102 	SAMPLING_MODE_VYUY = 9,
103 	SAMPLING_MODE_BGR3 = 11,
104 	SAMPLING_MODE_XBGR4 = 12,
105 	SAMPLING_MODE_RGBX4 = 20,
106 	SAMPLING_MODE_BGRX4 = 28
107 };
108 
109 enum hva_h264_nalu_type {
110 	NALU_TYPE_UNKNOWN = 0,
111 	NALU_TYPE_SLICE = 1,
112 	NALU_TYPE_SLICE_DPA = 2,
113 	NALU_TYPE_SLICE_DPB = 3,
114 	NALU_TYPE_SLICE_DPC = 4,
115 	NALU_TYPE_SLICE_IDR = 5,
116 	NALU_TYPE_SEI = 6,
117 	NALU_TYPE_SPS = 7,
118 	NALU_TYPE_PPS = 8,
119 	NALU_TYPE_AU_DELIMITER = 9,
120 	NALU_TYPE_SEQ_END = 10,
121 	NALU_TYPE_STREAM_END = 11,
122 	NALU_TYPE_FILLER_DATA = 12,
123 	NALU_TYPE_SPS_EXT = 13,
124 	NALU_TYPE_PREFIX_UNIT = 14,
125 	NALU_TYPE_SUBSET_SPS = 15,
126 	NALU_TYPE_SLICE_AUX = 19,
127 	NALU_TYPE_SLICE_EXT = 20
128 };
129 
130 enum hva_h264_sei_payload_type {
131 	SEI_BUFFERING_PERIOD = 0,
132 	SEI_PICTURE_TIMING = 1,
133 	SEI_STEREO_VIDEO_INFO = 21,
134 	SEI_FRAME_PACKING_ARRANGEMENT = 45
135 };
136 
137 /*
138  * stereo Video Info struct
139  */
140 struct hva_h264_stereo_video_sei {
141 	u8 field_views_flag;
142 	u8 top_field_is_left_view_flag;
143 	u8 current_frame_is_left_view_flag;
144 	u8 next_frame_is_second_view_flag;
145 	u8 left_view_self_contained_flag;
146 	u8 right_view_self_contained_flag;
147 };
148 
149 /*
150  * struct hva_h264_td
151  *
152  * @frame_width: width in pixels of the buffer containing the input frame
153  * @frame_height: height in pixels of the buffer containing the input frame
154  * @frame_num: the parameter to be written in the slice header
155  * @picture_coding_type: type I, P or B
156  * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2
157  * @first_picture_in_sequence: flag telling to encoder that this is the
158  *			       first picture in a video sequence.
159  *			       Used for VBR
160  * @slice_size_type: 0 = no constraint to close the slice
161  *		     1= a slice is closed as soon as the slice_mb_size limit
162  *			is reached
163  *		     2= a slice is closed as soon as the slice_byte_size limit
164  *			is reached
165  *		     3= a slice is closed as soon as either the slice_byte_size
166  *			limit or the slice_mb_size limit is reached
167  * @slice_mb_size: defines the slice size in number of macroblocks
168  *		   (used when slice_size_type=1 or slice_size_type=3)
169  * @ir_param_option: defines the number of macroblocks per frame to be
170  *		     refreshed by AIR algorithm OR the refresh period
171  *		     by CIR algorithm
172  * @intra_refresh_type: enables the adaptive intra refresh algorithm.
173  *			Disable=0 / Adaptative=1 and Cycle=2 as intra refresh
174  * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS
175  * @transform_mode: controls the use of 4x4/8x8 transform mode
176  * @disable_deblocking_filter_idc:
177  *		     0: specifies that all luma and chroma block edges of
178  *			the slice are filtered.
179  *		     1: specifies that deblocking is disabled for all block
180  *			edges of the slice.
181  *		     2: specifies that all luma and chroma block edges of
182  *			the slice are filtered with exception of the block edges
183  *			that coincide with slice boundaries
184  * @slice_alpha_c0_offset_div2: to be written in slice header,
185  *				controls deblocking
186  * @slice_beta_offset_div2: to be written in slice header,
187  *			    controls deblocking
188  * @encoder_complexity: encoder complexity control (IME).
189  *		     0 = I_16x16, P_16x16, Full ME Complexity
190  *		     1 = I_16x16, I_NxN, P_16x16, Full ME Complexity
191  *		     2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity
192  *		     4 = I_16x16, P_16x16, Reduced ME Complexity
193  *		     5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity
194  *		     6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity
195  *  @chroma_qp_index_offset: coming from picture parameter set
196  *			     (PPS see [H.264 STD] 7.4.2.2)
197  *  @entropy_coding_mode: entropy coding mode.
198  *			  0 = CAVLC
199  *			  1 = CABAC
200  * @brc_type: selects the bit-rate control algorithm
201  *		     0 = constant Qp, (no BRC)
202  *		     1 = CBR
203  *		     2 = VBR
204  * @quant: Quantization param used in case of fix QP encoding (no BRC)
205  * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler),
206  *		       used by BRC
207  * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC
208  * @bit_rate: target bitrate, for BRC
209  * @qp_min: min QP threshold
210  * @qp_max: max QP threshold
211  * @framerate_num: target framerate numerator , used by BRC
212  * @framerate_den: target framerate denomurator , used by BRC
213  * @delay: End-to-End Initial Delay
214  * @strict_HRD_compliancy: flag for HDR compliancy (1)
215  *			   May impact quality encoding
216  * @addr_source_buffer: address of input frame buffer for current frame
217  * @addr_fwd_Ref_Buffer: address of reference frame buffer
218  * @addr_rec_buffer: address of reconstructed frame buffer
219  * @addr_output_bitstream_start: output bitstream start address
220  * @addr_output_bitstream_end: output bitstream end address
221  * @addr_external_sw : address of external search window
222  * @addr_lctx : address of context picture buffer
223  * @addr_local_rec_buffer: address of local reconstructed buffer
224  * @addr_spatial_context: address of spatial context buffer
225  * @bitstream_offset: offset in bits between aligned bitstream start
226  *		      address and first bit to be written by HVA.
227  *		      Range value is [0..63]
228  * @sampling_mode: Input picture format .
229  *		     0: YUV420 semi_planar Interleaved
230  *		     1: YUV422 raster Interleaved
231  * @addr_param_out: address of output parameters structure
232  * @addr_scaling_matrix: address to the coefficient of
233  *			 the inverse scaling matrix
234  * @addr_scaling_matrix_dir: address to the coefficient of
235  *			     the direct scaling matrix
236  * @addr_cabac_context_buffer: address of cabac context buffer
237  * @GmvX: Input information about the horizontal global displacement of
238  *	  the encoded frame versus the previous one
239  * @GmvY: Input information about the vertical global displacement of
240  *	  the encoded frame versus the previous one
241  * @window_width: width in pixels of the window to be encoded inside
242  *		  the input frame
243  * @window_height: width in pixels of the window to be encoded inside
244  *		   the input frame
245  * @window_horizontal_offset: horizontal offset in pels for input window
246  *			      within input frame
247  * @window_vertical_offset: vertical offset in pels for input window
248  *			    within input frame
249  * @addr_roi: Map of QP offset for the Region of Interest algorithm and
250  *	      also used for Error map.
251  *	      Bit 0-6 used for qp offset (value -64 to 63).
252  *	      Bit 7 used to force intra
253  * @addr_slice_header: address to slice header
254  * @slice_header_size_in_bits: size in bits of the Slice header
255  * @slice_header_offset0: Slice header offset where to insert
256  *			  first_Mb_in_slice
257  * @slice_header_offset1: Slice header offset where to insert
258  *			  slice_qp_delta
259  * @slice_header_offset2: Slice header offset where to insert
260  *			  num_MBs_in_slice
261  * @slice_synchro_enable: enable "slice ready" interrupt after each slice
262  * @max_slice_number: Maximum number of slice in a frame
263  *		      (0 is strictly forbidden)
264  * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
265  *		      YUV for the Y component.
266  *		      Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
267  * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to
268  *		      YUV for the Y component.
269  *		      Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
270  * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
271  *		      YUV for the U (Cb) component.
272  *		      U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
273  * @slice_byte_size: maximum slice size in bytes
274  *		     (used when slice_size_type=2 or slice_size_type=3)
275  * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame
276  *			 for the AIR algorithm
277  * @brc_no_skip: Disable skipping in the Bitrate Controller
278  * @addr_brc_in_out_parameter: address of static buffer for BRC parameters
279  */
280 struct hva_h264_td {
281 	u16 frame_width;
282 	u16 frame_height;
283 	u32 frame_num;
284 	u16 picture_coding_type;
285 	u16 reserved1;
286 	u16 pic_order_cnt_type;
287 	u16 first_picture_in_sequence;
288 	u16 slice_size_type;
289 	u16 reserved2;
290 	u32 slice_mb_size;
291 	u16 ir_param_option;
292 	u16 intra_refresh_type;
293 	u16 use_constrained_intra_flag;
294 	u16 transform_mode;
295 	u16 disable_deblocking_filter_idc;
296 	s16 slice_alpha_c0_offset_div2;
297 	s16 slice_beta_offset_div2;
298 	u16 encoder_complexity;
299 	s16 chroma_qp_index_offset;
300 	u16 entropy_coding_mode;
301 	u16 brc_type;
302 	u16 quant;
303 	u32 non_vcl_nalu_size;
304 	u32 cpb_buffer_size;
305 	u32 bit_rate;
306 	u16 qp_min;
307 	u16 qp_max;
308 	u16 framerate_num;
309 	u16 framerate_den;
310 	u16 delay;
311 	u16 strict_hrd_compliancy;
312 	u32 addr_source_buffer;
313 	u32 addr_fwd_ref_buffer;
314 	u32 addr_rec_buffer;
315 	u32 addr_output_bitstream_start;
316 	u32 addr_output_bitstream_end;
317 	u32 addr_external_sw;
318 	u32 addr_lctx;
319 	u32 addr_local_rec_buffer;
320 	u32 addr_spatial_context;
321 	u16 bitstream_offset;
322 	u16 sampling_mode;
323 	u32 addr_param_out;
324 	u32 addr_scaling_matrix;
325 	u32 addr_scaling_matrix_dir;
326 	u32 addr_cabac_context_buffer;
327 	u32 reserved3;
328 	u32 reserved4;
329 	s16 gmv_x;
330 	s16 gmv_y;
331 	u16 window_width;
332 	u16 window_height;
333 	u16 window_horizontal_offset;
334 	u16 window_vertical_offset;
335 	u32 addr_roi;
336 	u32 addr_slice_header;
337 	u16 slice_header_size_in_bits;
338 	u16 slice_header_offset0;
339 	u16 slice_header_offset1;
340 	u16 slice_header_offset2;
341 	u32 reserved5;
342 	u32 reserved6;
343 	u16 reserved7;
344 	u16 reserved8;
345 	u16 slice_synchro_enable;
346 	u16 max_slice_number;
347 	u32 rgb2_yuv_y_coeff;
348 	u32 rgb2_yuv_u_coeff;
349 	u32 rgb2_yuv_v_coeff;
350 	u32 slice_byte_size;
351 	u16 max_air_intra_mb_nb;
352 	u16 brc_no_skip;
353 	u32 addr_temporal_context;
354 	u32 addr_brc_in_out_parameter;
355 };
356 
357 /*
358  * struct hva_h264_slice_po
359  *
360  * @ slice_size: slice size
361  * @ slice_start_time: start time
362  * @ slice_stop_time: stop time
363  * @ slice_num: slice number
364  */
365 struct hva_h264_slice_po {
366 	u32 slice_size;
367 	u32 slice_start_time;
368 	u32 slice_end_time;
369 	u32 slice_num;
370 };
371 
372 /*
373  * struct hva_h264_po
374  *
375  * @ bitstream_size: bitstream size
376  * @ dct_bitstream_size: dtc bitstream size
377  * @ stuffing_bits: number of stuffing bits inserted by the encoder
378  * @ removal_time: removal time of current frame (nb of ticks 1/framerate)
379  * @ hvc_start_time: hvc start time
380  * @ hvc_stop_time: hvc stop time
381  * @ slice_count: slice count
382  */
383 struct hva_h264_po {
384 	u32 bitstream_size;
385 	u32 dct_bitstream_size;
386 	u32 stuffing_bits;
387 	u32 removal_time;
388 	u32 hvc_start_time;
389 	u32 hvc_stop_time;
390 	u32 slice_count;
391 	u32 reserved0;
392 	struct hva_h264_slice_po slice_params[16];
393 };
394 
395 struct hva_h264_task {
396 	struct hva_h264_td td;
397 	struct hva_h264_po po;
398 };
399 
400 /*
401  * struct hva_h264_ctx
402  *
403  * @seq_info:  sequence information buffer
404  * @ref_frame: reference frame buffer
405  * @rec_frame: reconstructed frame buffer
406  * @task:      task descriptor
407  */
408 struct hva_h264_ctx {
409 	struct hva_buffer *seq_info;
410 	struct hva_buffer *ref_frame;
411 	struct hva_buffer *rec_frame;
412 	struct hva_buffer *task;
413 };
414 
hva_h264_fill_slice_header(struct hva_ctx * pctx,u8 * slice_header_addr,struct hva_controls * ctrls,int frame_num,u16 * header_size,u16 * header_offset0,u16 * header_offset1,u16 * header_offset2)415 static int hva_h264_fill_slice_header(struct hva_ctx *pctx,
416 				      u8 *slice_header_addr,
417 				      struct hva_controls *ctrls,
418 				      int frame_num,
419 				      u16 *header_size,
420 				      u16 *header_offset0,
421 				      u16 *header_offset1,
422 				      u16 *header_offset2)
423 {
424 	/*
425 	 * with this HVA hardware version, part of the slice header is computed
426 	 * on host and part by hardware.
427 	 * The part of host is precomputed and available through this array.
428 	 */
429 	struct device *dev = ctx_to_dev(pctx);
430 	int  cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC;
431 	static const unsigned char slice_header[] = {
432 		0x00, 0x00, 0x00, 0x01,
433 		0x41, 0x34, 0x07, 0x00
434 	};
435 	int idr_pic_id = frame_num % 2;
436 	enum hva_picture_coding_type type;
437 	u32 frame_order = frame_num % ctrls->gop_size;
438 
439 	if (!(frame_num % ctrls->gop_size))
440 		type = PICTURE_CODING_TYPE_I;
441 	else
442 		type = PICTURE_CODING_TYPE_P;
443 
444 	memcpy(slice_header_addr, slice_header, sizeof(slice_header));
445 
446 	*header_size = 56;
447 	*header_offset0 = 40;
448 	*header_offset1 = 13;
449 	*header_offset2 = 0;
450 
451 	if (type == PICTURE_CODING_TYPE_I) {
452 		slice_header_addr[4] = 0x65;
453 		slice_header_addr[5] = 0x11;
454 
455 		/* toggle the I frame */
456 		if ((frame_num / ctrls->gop_size) % 2) {
457 			*header_size += 4;
458 			*header_offset1 += 4;
459 			slice_header_addr[6] = 0x04;
460 			slice_header_addr[7] = 0x70;
461 
462 		} else {
463 			*header_size += 2;
464 			*header_offset1 += 2;
465 			slice_header_addr[6] = 0x09;
466 			slice_header_addr[7] = 0xC0;
467 		}
468 	} else {
469 		if (ctrls->entropy_mode == cabac) {
470 			*header_size += 1;
471 			*header_offset1 += 1;
472 			slice_header_addr[7] = 0x80;
473 		}
474 		/*
475 		 * update slice header with P frame order
476 		 * frame order is limited to 16 (coded on 4bits only)
477 		 */
478 		slice_header_addr[5] += ((frame_order & 0x0C) >> 2);
479 		slice_header_addr[6] += ((frame_order & 0x03) << 6);
480 	}
481 
482 	dev_dbg(dev,
483 		"%s   %s slice header order %d idrPicId %d header size %d\n",
484 		pctx->name, __func__, frame_order, idr_pic_id, *header_size);
485 	return 0;
486 }
487 
hva_h264_fill_data_nal(struct hva_ctx * pctx,unsigned int stuffing_bytes,u8 * addr,unsigned int stream_size,unsigned int * size)488 static int hva_h264_fill_data_nal(struct hva_ctx *pctx,
489 				  unsigned int stuffing_bytes, u8 *addr,
490 				  unsigned int stream_size, unsigned int *size)
491 {
492 	struct device *dev = ctx_to_dev(pctx);
493 	static const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
494 
495 	dev_dbg(dev, "%s   %s stuffing bytes %d\n", pctx->name, __func__,
496 		stuffing_bytes);
497 
498 	if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) {
499 		dev_dbg(dev, "%s   %s too many stuffing bytes %d\n",
500 			pctx->name, __func__, stuffing_bytes);
501 		return 0;
502 	}
503 
504 	/* start code */
505 	memcpy(addr + *size, start, sizeof(start));
506 	*size += sizeof(start);
507 
508 	/* nal_unit_type */
509 	addr[*size] = NALU_TYPE_FILLER_DATA;
510 	*size += 1;
511 
512 	memset(addr + *size, 0xff, stuffing_bytes);
513 	*size += stuffing_bytes;
514 
515 	addr[*size] = 0x80;
516 	*size += 1;
517 
518 	return 0;
519 }
520 
hva_h264_fill_sei_nal(struct hva_ctx * pctx,enum hva_h264_sei_payload_type type,u8 * addr,u32 * size)521 static int hva_h264_fill_sei_nal(struct hva_ctx *pctx,
522 				 enum hva_h264_sei_payload_type type,
523 				 u8 *addr, u32 *size)
524 {
525 	struct device *dev = ctx_to_dev(pctx);
526 	static const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
527 	struct hva_h264_stereo_video_sei info;
528 	u8 offset = 7;
529 	u8 msg = 0;
530 
531 	/* start code */
532 	memcpy(addr + *size, start, sizeof(start));
533 	*size += sizeof(start);
534 
535 	/* nal_unit_type */
536 	addr[*size] = NALU_TYPE_SEI;
537 	*size += 1;
538 
539 	/* payload type */
540 	addr[*size] = type;
541 	*size += 1;
542 
543 	switch (type) {
544 	case SEI_STEREO_VIDEO_INFO:
545 		memset(&info, 0, sizeof(info));
546 
547 		/* set to top/bottom frame packing arrangement */
548 		info.field_views_flag = 1;
549 		info.top_field_is_left_view_flag = 1;
550 
551 		/* payload size */
552 		addr[*size] = 1;
553 		*size += 1;
554 
555 		/* payload */
556 		msg = info.field_views_flag << offset--;
557 
558 		if (info.field_views_flag) {
559 			msg |= info.top_field_is_left_view_flag <<
560 			       offset--;
561 		} else {
562 			msg |= info.current_frame_is_left_view_flag <<
563 			       offset--;
564 			msg |= info.next_frame_is_second_view_flag <<
565 			       offset--;
566 		}
567 		msg |= info.left_view_self_contained_flag << offset--;
568 		msg |= info.right_view_self_contained_flag << offset--;
569 
570 		addr[*size] = msg;
571 		*size += 1;
572 
573 		addr[*size] = 0x80;
574 		*size += 1;
575 
576 		return 0;
577 	case SEI_BUFFERING_PERIOD:
578 	case SEI_PICTURE_TIMING:
579 	case SEI_FRAME_PACKING_ARRANGEMENT:
580 	default:
581 		dev_err(dev, "%s   sei nal type not supported %d\n",
582 			pctx->name, type);
583 		return -EINVAL;
584 	}
585 }
586 
hva_h264_prepare_task(struct hva_ctx * pctx,struct hva_h264_task * task,struct hva_frame * frame,struct hva_stream * stream)587 static int hva_h264_prepare_task(struct hva_ctx *pctx,
588 				 struct hva_h264_task *task,
589 				 struct hva_frame *frame,
590 				 struct hva_stream *stream)
591 {
592 	struct hva_dev *hva = ctx_to_hdev(pctx);
593 	struct device *dev = ctx_to_dev(pctx);
594 	struct hva_h264_ctx *ctx = pctx->priv;
595 	struct hva_buffer *seq_info = ctx->seq_info;
596 	struct hva_buffer *fwd_ref_frame = ctx->ref_frame;
597 	struct hva_buffer *loc_rec_frame = ctx->rec_frame;
598 	struct hva_h264_td *td = &task->td;
599 	struct hva_controls *ctrls = &pctx->ctrls;
600 	struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame;
601 	int cavlc =  V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC;
602 	u32 frame_num = pctx->stream_num;
603 	u32 addr_esram = hva->esram_addr;
604 	enum v4l2_mpeg_video_h264_level level;
605 	dma_addr_t paddr = 0;
606 	u8 *slice_header_vaddr;
607 	u32 frame_width = frame->info.aligned_width;
608 	u32 frame_height = frame->info.aligned_height;
609 	u32 max_cpb_buffer_size;
610 	unsigned int payload = stream->bytesused;
611 	u32 max_bitrate;
612 
613 	/* check width and height parameters */
614 	if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) ||
615 	    (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) {
616 		dev_err(dev,
617 			"%s   width(%d) or height(%d) exceeds limits (%dx%d)\n",
618 			pctx->name, frame_width, frame_height,
619 			H264_MAX_SIZE_W, H264_MAX_SIZE_H);
620 		pctx->frame_errors++;
621 		return -EINVAL;
622 	}
623 
624 	level = ctrls->level;
625 
626 	memset(td, 0, sizeof(struct hva_h264_td));
627 
628 	td->frame_width = frame_width;
629 	td->frame_height = frame_height;
630 
631 	/* set frame alignment */
632 	td->window_width =  frame_width;
633 	td->window_height = frame_height;
634 	td->window_horizontal_offset = 0;
635 	td->window_vertical_offset = 0;
636 
637 	td->first_picture_in_sequence = (!frame_num) ? 1 : 0;
638 
639 	/* pic_order_cnt_type hard coded to '2' as only I & P frames */
640 	td->pic_order_cnt_type = 2;
641 
642 	/* useConstrainedIntraFlag set to false for better coding efficiency */
643 	td->use_constrained_intra_flag = false;
644 	td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
645 			? BRC_TYPE_CBR : BRC_TYPE_VBR;
646 
647 	td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC :
648 				  CABAC;
649 
650 	td->bit_rate = ctrls->bitrate;
651 
652 	/* set framerate, framerate = 1 n/ time per frame */
653 	if (time_per_frame->numerator >= 536) {
654 		/*
655 		 * due to a hardware bug, framerate denominator can't exceed
656 		 * 536 (BRC overflow). Compute nearest framerate
657 		 */
658 		td->framerate_den = 1;
659 		td->framerate_num = (time_per_frame->denominator +
660 				    (time_per_frame->numerator >> 1) - 1) /
661 				    time_per_frame->numerator;
662 
663 		/*
664 		 * update bitrate to introduce a correction due to
665 		 * the new framerate
666 		 * new bitrate = (old bitrate * new framerate) / old framerate
667 		 */
668 		td->bit_rate /= time_per_frame->numerator;
669 		td->bit_rate *= time_per_frame->denominator;
670 		td->bit_rate /= td->framerate_num;
671 	} else {
672 		td->framerate_den = time_per_frame->numerator;
673 		td->framerate_num = time_per_frame->denominator;
674 	}
675 
676 	/* compute maximum bitrate depending on profile */
677 	if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
678 		max_bitrate = h264_infos_list[level].max_bitrate *
679 			      H264_FACTOR_HIGH;
680 	else
681 		max_bitrate = h264_infos_list[level].max_bitrate *
682 			      H264_FACTOR_BASELINE;
683 
684 	/* check if bitrate doesn't exceed max size */
685 	if (td->bit_rate > max_bitrate) {
686 		dev_dbg(dev,
687 			"%s   bitrate (%d) larger than level and profile allow, clip to %d\n",
688 			pctx->name, td->bit_rate, max_bitrate);
689 		td->bit_rate = max_bitrate;
690 	}
691 
692 	/* convert cpb_buffer_size in bits */
693 	td->cpb_buffer_size = ctrls->cpb_size * 8000;
694 
695 	/* compute maximum cpb buffer size depending on profile */
696 	if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
697 		max_cpb_buffer_size =
698 		    h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH;
699 	else
700 		max_cpb_buffer_size =
701 		    h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE;
702 
703 	/* check if cpb buffer size doesn't exceed max size */
704 	if (td->cpb_buffer_size > max_cpb_buffer_size) {
705 		dev_dbg(dev,
706 			"%s   cpb size larger than level %d allows, clip to %d\n",
707 			pctx->name, td->cpb_buffer_size, max_cpb_buffer_size);
708 		td->cpb_buffer_size = max_cpb_buffer_size;
709 	}
710 
711 	/* enable skipping in the Bitrate Controller */
712 	td->brc_no_skip = 0;
713 
714 	/* initial delay */
715 	if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) &&
716 	    td->bit_rate)
717 		td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate);
718 	else
719 		td->delay = 0;
720 
721 	switch (frame->info.pixelformat) {
722 	case V4L2_PIX_FMT_NV12:
723 		td->sampling_mode = SAMPLING_MODE_NV12;
724 		break;
725 	case V4L2_PIX_FMT_NV21:
726 		td->sampling_mode = SAMPLING_MODE_NV21;
727 		break;
728 	default:
729 		dev_err(dev, "%s   invalid source pixel format\n",
730 			pctx->name);
731 		pctx->frame_errors++;
732 		return -EINVAL;
733 	}
734 
735 	/*
736 	 * fill matrix color converter (RGB to YUV)
737 	 * Y = 0,299 R + 0,587 G + 0,114 B
738 	 * Cb = -0,1687 R -0,3313 G + 0,5 B + 128
739 	 * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128
740 	 */
741 	td->rgb2_yuv_y_coeff = 0x12031008;
742 	td->rgb2_yuv_u_coeff = 0x800EF7FB;
743 	td->rgb2_yuv_v_coeff = 0x80FEF40E;
744 
745 	/* enable/disable transform mode */
746 	td->transform_mode = ctrls->dct8x8;
747 
748 	/* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */
749 	td->encoder_complexity = 2;
750 
751 	/* quant fix to 28, default VBR value */
752 	td->quant = 28;
753 
754 	if (td->framerate_den == 0) {
755 		dev_err(dev, "%s   invalid framerate\n", pctx->name);
756 		pctx->frame_errors++;
757 		return -EINVAL;
758 	}
759 
760 	/* if automatic framerate, deactivate bitrate controller */
761 	if (td->framerate_num == 0)
762 		td->brc_type = 0;
763 
764 	/* compliancy fix to true */
765 	td->strict_hrd_compliancy = 1;
766 
767 	/* set minimum & maximum quantizers */
768 	td->qp_min = clamp_val(ctrls->qpmin, 0, 51);
769 	td->qp_max = clamp_val(ctrls->qpmax, 0, 51);
770 
771 	td->addr_source_buffer = frame->paddr;
772 	td->addr_fwd_ref_buffer = fwd_ref_frame->paddr;
773 	td->addr_rec_buffer = loc_rec_frame->paddr;
774 
775 	td->addr_output_bitstream_end = (u32)stream->paddr + stream->size;
776 
777 	td->addr_output_bitstream_start = (u32)stream->paddr;
778 	td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) &
779 			       BITSTREAM_OFFSET_MASK;
780 
781 	td->addr_param_out = (u32)ctx->task->paddr +
782 			     offsetof(struct hva_h264_task, po);
783 
784 	/* swap spatial and temporal context */
785 	if (frame_num % 2) {
786 		paddr = seq_info->paddr;
787 		td->addr_spatial_context =  ALIGN(paddr, 0x100);
788 		paddr = seq_info->paddr + DATA_SIZE(frame_width,
789 							frame_height);
790 		td->addr_temporal_context = ALIGN(paddr, 0x100);
791 	} else {
792 		paddr = seq_info->paddr;
793 		td->addr_temporal_context = ALIGN(paddr, 0x100);
794 		paddr = seq_info->paddr + DATA_SIZE(frame_width,
795 							frame_height);
796 		td->addr_spatial_context =  ALIGN(paddr, 0x100);
797 	}
798 
799 	paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height);
800 
801 	td->addr_brc_in_out_parameter =  ALIGN(paddr, 0x100);
802 
803 	paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE;
804 	td->addr_slice_header =  ALIGN(paddr, 0x100);
805 	td->addr_external_sw =  ALIGN(addr_esram, 0x100);
806 
807 	addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width);
808 	td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100);
809 
810 	addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width);
811 	td->addr_lctx = ALIGN(addr_esram, 0x100);
812 
813 	addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height));
814 	td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100);
815 
816 	if (!(frame_num % ctrls->gop_size)) {
817 		td->picture_coding_type = PICTURE_CODING_TYPE_I;
818 		stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME;
819 	} else {
820 		td->picture_coding_type = PICTURE_CODING_TYPE_P;
821 		stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
822 	}
823 
824 	/* fill the slice header part */
825 	slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header -
826 			     seq_info->paddr);
827 
828 	hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num,
829 				   &td->slice_header_size_in_bits,
830 				   &td->slice_header_offset0,
831 				   &td->slice_header_offset1,
832 				   &td->slice_header_offset2);
833 
834 	td->chroma_qp_index_offset = 2;
835 	td->slice_synchro_enable = 0;
836 	td->max_slice_number = 1;
837 
838 	/*
839 	 * check the sps/pps header size for key frame only
840 	 * sps/pps header was previously fill by libv4l
841 	 * during qbuf of stream buffer
842 	 */
843 	if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) &&
844 	    (payload > MAX_SPS_PPS_SIZE)) {
845 		dev_err(dev, "%s   invalid sps/pps size %d\n", pctx->name,
846 			payload);
847 		pctx->frame_errors++;
848 		return -EINVAL;
849 	}
850 
851 	if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME)
852 		payload = 0;
853 
854 	/* add SEI nal (video stereo info) */
855 	if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO,
856 						   (u8 *)stream->vaddr,
857 						   &payload)) {
858 		dev_err(dev, "%s   fail to get SEI nal\n", pctx->name);
859 		pctx->frame_errors++;
860 		return -EINVAL;
861 	}
862 
863 	/* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */
864 	td->non_vcl_nalu_size = payload * 8;
865 
866 	/* compute bitstream offset & new start address of bitstream */
867 	td->addr_output_bitstream_start += ((payload >> 4) << 4);
868 	td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8;
869 
870 	stream->bytesused = payload;
871 
872 	return 0;
873 }
874 
hva_h264_get_stream_size(struct hva_h264_task * task)875 static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task)
876 {
877 	struct hva_h264_po *po = &task->po;
878 
879 	return po->bitstream_size;
880 }
881 
hva_h264_get_stuffing_bytes(struct hva_h264_task * task)882 static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task)
883 {
884 	struct hva_h264_po *po = &task->po;
885 
886 	return po->stuffing_bits >> 3;
887 }
888 
hva_h264_open(struct hva_ctx * pctx)889 static int hva_h264_open(struct hva_ctx *pctx)
890 {
891 	struct device *dev = ctx_to_dev(pctx);
892 	struct hva_h264_ctx *ctx;
893 	struct hva_dev *hva = ctx_to_hdev(pctx);
894 	u32 frame_width = pctx->frameinfo.aligned_width;
895 	u32 frame_height = pctx->frameinfo.aligned_height;
896 	u32 size;
897 	int ret;
898 
899 	/* check esram size necessary to encode a frame */
900 	size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) +
901 	       LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) +
902 	       CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) +
903 	       CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width);
904 
905 	if (hva->esram_size < size) {
906 		dev_err(dev, "%s   not enough esram (max:%d request:%d)\n",
907 			pctx->name, hva->esram_size, size);
908 		ret = -EINVAL;
909 		goto err;
910 	}
911 
912 	/* allocate context for codec */
913 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
914 	if (!ctx) {
915 		ret = -ENOMEM;
916 		goto err;
917 	}
918 
919 	/* allocate sequence info buffer */
920 	ret = hva_mem_alloc(pctx,
921 			    2 * DATA_SIZE(frame_width, frame_height) +
922 			    SLICE_HEADER_SIZE +
923 			    BRC_DATA_SIZE,
924 			    "hva sequence info",
925 			    &ctx->seq_info);
926 	if (ret) {
927 		dev_err(dev,
928 			"%s   failed to allocate sequence info buffer\n",
929 			pctx->name);
930 		goto err_ctx;
931 	}
932 
933 	/* allocate reference frame buffer */
934 	ret = hva_mem_alloc(pctx,
935 			    frame_width * frame_height * 3 / 2,
936 			    "hva reference frame",
937 			    &ctx->ref_frame);
938 	if (ret) {
939 		dev_err(dev, "%s   failed to allocate reference frame buffer\n",
940 			pctx->name);
941 		goto err_seq_info;
942 	}
943 
944 	/* allocate reconstructed frame buffer */
945 	ret = hva_mem_alloc(pctx,
946 			    frame_width * frame_height * 3 / 2,
947 			    "hva reconstructed frame",
948 			    &ctx->rec_frame);
949 	if (ret) {
950 		dev_err(dev,
951 			"%s   failed to allocate reconstructed frame buffer\n",
952 			pctx->name);
953 		goto err_ref_frame;
954 	}
955 
956 	/* allocate task descriptor */
957 	ret = hva_mem_alloc(pctx,
958 			    sizeof(struct hva_h264_task),
959 			    "hva task descriptor",
960 			    &ctx->task);
961 	if (ret) {
962 		dev_err(dev,
963 			"%s   failed to allocate task descriptor\n",
964 			pctx->name);
965 		goto err_rec_frame;
966 	}
967 
968 	pctx->priv = (void *)ctx;
969 
970 	return 0;
971 
972 err_rec_frame:
973 	hva_mem_free(pctx, ctx->rec_frame);
974 err_ref_frame:
975 	hva_mem_free(pctx, ctx->ref_frame);
976 err_seq_info:
977 	hva_mem_free(pctx, ctx->seq_info);
978 err_ctx:
979 	devm_kfree(dev, ctx);
980 err:
981 	pctx->sys_errors++;
982 	return ret;
983 }
984 
hva_h264_close(struct hva_ctx * pctx)985 static int hva_h264_close(struct hva_ctx *pctx)
986 {
987 	struct hva_h264_ctx *ctx = pctx->priv;
988 	struct device *dev = ctx_to_dev(pctx);
989 
990 	if (ctx->seq_info)
991 		hva_mem_free(pctx, ctx->seq_info);
992 
993 	if (ctx->ref_frame)
994 		hva_mem_free(pctx, ctx->ref_frame);
995 
996 	if (ctx->rec_frame)
997 		hva_mem_free(pctx, ctx->rec_frame);
998 
999 	if (ctx->task)
1000 		hva_mem_free(pctx, ctx->task);
1001 
1002 	devm_kfree(dev, ctx);
1003 
1004 	return 0;
1005 }
1006 
hva_h264_encode(struct hva_ctx * pctx,struct hva_frame * frame,struct hva_stream * stream)1007 static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame,
1008 			   struct hva_stream *stream)
1009 {
1010 	struct hva_h264_ctx *ctx = pctx->priv;
1011 	struct hva_h264_task *task = ctx->task->vaddr;
1012 	u32 stuffing_bytes = 0;
1013 	int ret = 0;
1014 
1015 	ret = hva_h264_prepare_task(pctx, task, frame, stream);
1016 	if (ret)
1017 		goto err;
1018 
1019 	ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task);
1020 	if (ret)
1021 		goto err;
1022 
1023 	pctx->stream_num++;
1024 	stream->bytesused += hva_h264_get_stream_size(task);
1025 
1026 	stuffing_bytes = hva_h264_get_stuffing_bytes(task);
1027 
1028 	if (stuffing_bytes)
1029 		hva_h264_fill_data_nal(pctx, stuffing_bytes,
1030 				       (u8 *)stream->vaddr,
1031 				       stream->size,
1032 				       &stream->bytesused);
1033 
1034 	/* switch reference & reconstructed frame */
1035 	swap(ctx->ref_frame, ctx->rec_frame);
1036 
1037 	return 0;
1038 err:
1039 	stream->bytesused = 0;
1040 	return ret;
1041 }
1042 
1043 const struct hva_enc nv12h264enc = {
1044 	.name = "H264(NV12)",
1045 	.pixelformat = V4L2_PIX_FMT_NV12,
1046 	.streamformat = V4L2_PIX_FMT_H264,
1047 	.max_width = H264_MAX_SIZE_W,
1048 	.max_height = H264_MAX_SIZE_H,
1049 	.open = hva_h264_open,
1050 	.close = hva_h264_close,
1051 	.encode = hva_h264_encode,
1052 };
1053 
1054 const struct hva_enc nv21h264enc = {
1055 	.name = "H264(NV21)",
1056 	.pixelformat = V4L2_PIX_FMT_NV21,
1057 	.streamformat = V4L2_PIX_FMT_H264,
1058 	.max_width = H264_MAX_SIZE_W,
1059 	.max_height = H264_MAX_SIZE_H,
1060 	.open = hva_h264_open,
1061 	.close = hva_h264_close,
1062 	.encode = hva_h264_encode,
1063 };
1064