xref: /linux/drivers/gpu/drm/imx/dcss/dcss-scaler.c (revision 8e65320d91cdc3b241d4b94855c88459b91abf66)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2019 NXP.
4  *
5  * Scaling algorithms were contributed by Dzung Hoang <dzung.hoang@nxp.com>
6  */
7 
8 #include <linux/device.h>
9 #include <linux/slab.h>
10 
11 #include "dcss-dev.h"
12 
13 #define DCSS_SCALER_CTRL			0x00
14 #define   SCALER_EN				BIT(0)
15 #define   REPEAT_EN				BIT(4)
16 #define   SCALE2MEM_EN				BIT(8)
17 #define   MEM2OFIFO_EN				BIT(12)
18 #define DCSS_SCALER_OFIFO_CTRL			0x04
19 #define   OFIFO_LOW_THRES_POS			0
20 #define   OFIFO_LOW_THRES_MASK			GENMASK(9, 0)
21 #define   OFIFO_HIGH_THRES_POS			16
22 #define   OFIFO_HIGH_THRES_MASK			GENMASK(25, 16)
23 #define   UNDERRUN_DETECT_CLR			BIT(26)
24 #define   LOW_THRES_DETECT_CLR			BIT(27)
25 #define   HIGH_THRES_DETECT_CLR			BIT(28)
26 #define   UNDERRUN_DETECT_EN			BIT(29)
27 #define   LOW_THRES_DETECT_EN			BIT(30)
28 #define   HIGH_THRES_DETECT_EN			BIT(31)
29 #define DCSS_SCALER_SDATA_CTRL			0x08
30 #define   YUV_EN				BIT(0)
31 #define   RTRAM_8LINES				BIT(1)
32 #define   Y_UV_BYTE_SWAP			BIT(4)
33 #define   A2R10G10B10_FORMAT_POS		8
34 #define   A2R10G10B10_FORMAT_MASK		GENMASK(11, 8)
35 #define DCSS_SCALER_BIT_DEPTH			0x0C
36 #define   LUM_BIT_DEPTH_POS			0
37 #define   LUM_BIT_DEPTH_MASK			GENMASK(1, 0)
38 #define   CHR_BIT_DEPTH_POS			4
39 #define   CHR_BIT_DEPTH_MASK			GENMASK(5, 4)
40 #define DCSS_SCALER_SRC_FORMAT			0x10
41 #define DCSS_SCALER_DST_FORMAT			0x14
42 #define   FORMAT_MASK				GENMASK(1, 0)
43 #define DCSS_SCALER_SRC_LUM_RES			0x18
44 #define DCSS_SCALER_SRC_CHR_RES			0x1C
45 #define DCSS_SCALER_DST_LUM_RES			0x20
46 #define DCSS_SCALER_DST_CHR_RES			0x24
47 #define   WIDTH_POS				0
48 #define   WIDTH_MASK				GENMASK(11, 0)
49 #define   HEIGHT_POS				16
50 #define   HEIGHT_MASK				GENMASK(27, 16)
51 #define DCSS_SCALER_V_LUM_START			0x48
52 #define   V_START_MASK				GENMASK(15, 0)
53 #define DCSS_SCALER_V_LUM_INC			0x4C
54 #define   V_INC_MASK				GENMASK(15, 0)
55 #define DCSS_SCALER_H_LUM_START			0x50
56 #define   H_START_MASK				GENMASK(18, 0)
57 #define DCSS_SCALER_H_LUM_INC			0x54
58 #define   H_INC_MASK				GENMASK(15, 0)
59 #define DCSS_SCALER_V_CHR_START			0x58
60 #define DCSS_SCALER_V_CHR_INC			0x5C
61 #define DCSS_SCALER_H_CHR_START			0x60
62 #define DCSS_SCALER_H_CHR_INC			0x64
63 #define DCSS_SCALER_COEF_VLUM			0x80
64 #define DCSS_SCALER_COEF_HLUM			0x140
65 #define DCSS_SCALER_COEF_VCHR			0x200
66 #define DCSS_SCALER_COEF_HCHR			0x300
67 
68 struct dcss_scaler_ch {
69 	void __iomem *base_reg;
70 	u32 base_ofs;
71 	struct dcss_scaler *scl;
72 
73 	u32 sdata_ctrl;
74 	u32 scaler_ctrl;
75 
76 	bool scaler_ctrl_chgd;
77 
78 	u32 c_vstart;
79 	u32 c_hstart;
80 
81 	bool use_nn_interpolation;
82 };
83 
84 struct dcss_scaler {
85 	struct device *dev;
86 
87 	struct dcss_ctxld *ctxld;
88 	u32 ctx_id;
89 
90 	struct dcss_scaler_ch ch[3];
91 };
92 
93 /* scaler coefficients generator */
94 #define PSC_FRAC_BITS 30
95 #define PSC_FRAC_SCALE BIT(PSC_FRAC_BITS)
96 #define PSC_BITS_FOR_PHASE 4
97 #define PSC_NUM_PHASES 16
98 #define PSC_STORED_PHASES (PSC_NUM_PHASES / 2 + 1)
99 #define PSC_NUM_TAPS 7
100 #define PSC_NUM_TAPS_RGBA 5
101 #define PSC_COEFF_PRECISION 10
102 #define PSC_PHASE_FRACTION_BITS 13
103 #define PSC_PHASE_MASK (PSC_NUM_PHASES - 1)
104 #define PSC_Q_FRACTION 19
105 #define PSC_Q_ROUND_OFFSET (1 << (PSC_Q_FRACTION - 1))
106 
107 /**
108  * mult_q() - Performs fixed-point multiplication.
109  * @A: multiplier
110  * @B: multiplicand
111  */
mult_q(int A,int B)112 static int mult_q(int A, int B)
113 {
114 	int result;
115 	s64 temp;
116 
117 	temp = (int64_t)A * (int64_t)B;
118 	temp += PSC_Q_ROUND_OFFSET;
119 	result = (int)(temp >> PSC_Q_FRACTION);
120 	return result;
121 }
122 
123 /**
124  * div_q() - Performs fixed-point division.
125  * @A: dividend
126  * @B: divisor
127  */
div_q(int A,int B)128 static int div_q(int A, int B)
129 {
130 	int result;
131 	s64 temp;
132 
133 	temp = (int64_t)A << PSC_Q_FRACTION;
134 	if ((temp >= 0 && B >= 0) || (temp < 0 && B < 0))
135 		temp += B / 2;
136 	else
137 		temp -= B / 2;
138 
139 	result = div_s64(temp, B);
140 	return result;
141 }
142 
143 /**
144  * exp_approx_q() - Compute approximation to exp(x) function using Taylor
145  *		    series.
146  * @x: fixed-point argument of exp function
147  */
exp_approx_q(int x)148 static int exp_approx_q(int x)
149 {
150 	int sum = 1 << PSC_Q_FRACTION;
151 	int term = 1 << PSC_Q_FRACTION;
152 
153 	term = mult_q(term, div_q(x, 1 << PSC_Q_FRACTION));
154 	sum += term;
155 	term = mult_q(term, div_q(x, 2 << PSC_Q_FRACTION));
156 	sum += term;
157 	term = mult_q(term, div_q(x, 3 << PSC_Q_FRACTION));
158 	sum += term;
159 	term = mult_q(term, div_q(x, 4 << PSC_Q_FRACTION));
160 	sum += term;
161 
162 	return sum;
163 }
164 
165 /**
166  * dcss_scaler_gaussian_filter() - Generate gaussian prototype filter.
167  * @fc_q: fixed-point cutoff frequency normalized to range [0, 1]
168  * @use_5_taps: indicates whether to use 5 taps or 7 taps
169  * @phase0_identity: whether to override phase 0 coefficients with identity filter
170  * @coef: output filter coefficients
171  */
dcss_scaler_gaussian_filter(int fc_q,bool use_5_taps,bool phase0_identity,int coef[][PSC_NUM_TAPS])172 static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
173 					bool phase0_identity,
174 					int coef[][PSC_NUM_TAPS])
175 {
176 	int sigma_q, g0_q, g1_q, g2_q;
177 	int tap_cnt1, tap_cnt2, tap_idx, phase_cnt;
178 	int mid;
179 	int phase;
180 	int i;
181 	int taps;
182 
183 	if (use_5_taps)
184 		for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
185 			coef[phase][0] = 0;
186 			coef[phase][PSC_NUM_TAPS - 1] = 0;
187 		}
188 
189 	/* seed coefficient scanner */
190 	taps = use_5_taps ? PSC_NUM_TAPS_RGBA : PSC_NUM_TAPS;
191 	mid = (PSC_NUM_PHASES * taps) / 2 - 1;
192 	phase_cnt = (PSC_NUM_PHASES * (PSC_NUM_TAPS + 1)) / 2;
193 	tap_cnt1 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
194 	tap_cnt2 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
195 
196 	/* seed gaussian filter generator */
197 	sigma_q = div_q(PSC_Q_ROUND_OFFSET, fc_q);
198 	g0_q = 1 << PSC_Q_FRACTION;
199 	g1_q = exp_approx_q(div_q(-PSC_Q_ROUND_OFFSET,
200 				  mult_q(sigma_q, sigma_q)));
201 	g2_q = mult_q(g1_q, g1_q);
202 	coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = g0_q;
203 
204 	for (i = 0; i < mid; i++) {
205 		phase_cnt++;
206 		tap_cnt1--;
207 		tap_cnt2++;
208 
209 		g0_q = mult_q(g0_q, g1_q);
210 		g1_q = mult_q(g1_q, g2_q);
211 
212 		if ((phase_cnt & PSC_PHASE_MASK) <= 8) {
213 			tap_idx = tap_cnt1 >> PSC_BITS_FOR_PHASE;
214 			coef[phase_cnt & PSC_PHASE_MASK][tap_idx] = g0_q;
215 		}
216 		if (((-phase_cnt) & PSC_PHASE_MASK) <= 8) {
217 			tap_idx = tap_cnt2 >> PSC_BITS_FOR_PHASE;
218 			coef[(-phase_cnt) & PSC_PHASE_MASK][tap_idx] = g0_q;
219 		}
220 	}
221 
222 	phase_cnt++;
223 	tap_cnt1--;
224 	coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = 0;
225 
226 	/* override phase 0 with identity filter if specified */
227 	if (phase0_identity)
228 		for (i = 0; i < PSC_NUM_TAPS; i++)
229 			coef[0][i] = i == (PSC_NUM_TAPS >> 1) ?
230 						(1 << PSC_COEFF_PRECISION) : 0;
231 
232 	/* normalize coef */
233 	for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
234 		int sum = 0;
235 		s64 ll_temp;
236 
237 		for (i = 0; i < PSC_NUM_TAPS; i++)
238 			sum += coef[phase][i];
239 		for (i = 0; i < PSC_NUM_TAPS; i++) {
240 			ll_temp = coef[phase][i];
241 			ll_temp <<= PSC_COEFF_PRECISION;
242 			ll_temp += sum >> 1;
243 			ll_temp = div_s64(ll_temp, sum);
244 			coef[phase][i] = (int)ll_temp;
245 		}
246 	}
247 }
248 
dcss_scaler_nearest_neighbor_filter(bool use_5_taps,int coef[][PSC_NUM_TAPS])249 static void dcss_scaler_nearest_neighbor_filter(bool use_5_taps,
250 						int coef[][PSC_NUM_TAPS])
251 {
252 	int i, j;
253 
254 	for (i = 0; i < PSC_STORED_PHASES; i++)
255 		for (j = 0; j < PSC_NUM_TAPS; j++)
256 			coef[i][j] = j == PSC_NUM_TAPS >> 1 ?
257 						(1 << PSC_COEFF_PRECISION) : 0;
258 }
259 
260 /**
261  * dcss_scaler_filter_design() - Compute filter coefficients using
262  *				 Gaussian filter.
263  * @src_length: length of input
264  * @dst_length: length of output
265  * @use_5_taps: 0 for 7 taps per phase, 1 for 5 taps
266  * @phase0_identity: whether to override phase 0 coefficients with identity filter
267  * @coef: output coefficients
268  * @nn_interpolation: whether to use nearest neighbor instead of gaussian filter
269  */
dcss_scaler_filter_design(int src_length,int dst_length,bool use_5_taps,bool phase0_identity,int coef[][PSC_NUM_TAPS],bool nn_interpolation)270 static void dcss_scaler_filter_design(int src_length, int dst_length,
271 				      bool use_5_taps, bool phase0_identity,
272 				      int coef[][PSC_NUM_TAPS],
273 				      bool nn_interpolation)
274 {
275 	int fc_q;
276 
277 	/* compute cutoff frequency */
278 	if (dst_length >= src_length)
279 		fc_q = div_q(1, PSC_NUM_PHASES);
280 	else
281 		fc_q = div_q(dst_length, src_length * PSC_NUM_PHASES);
282 
283 	if (nn_interpolation)
284 		dcss_scaler_nearest_neighbor_filter(use_5_taps, coef);
285 	else
286 		/* compute gaussian filter coefficients */
287 		dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
288 }
289 
dcss_scaler_write(struct dcss_scaler_ch * ch,u32 val,u32 ofs)290 static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs)
291 {
292 	struct dcss_scaler *scl = ch->scl;
293 
294 	dcss_ctxld_write(scl->ctxld, scl->ctx_id, val, ch->base_ofs + ofs);
295 }
296 
dcss_scaler_ch_init_all(struct dcss_scaler * scl,unsigned long scaler_base)297 static int dcss_scaler_ch_init_all(struct dcss_scaler *scl,
298 				   unsigned long scaler_base)
299 {
300 	struct dcss_scaler_ch *ch;
301 	int i;
302 
303 	for (i = 0; i < 3; i++) {
304 		ch = &scl->ch[i];
305 
306 		ch->base_ofs = scaler_base + i * 0x400;
307 
308 		ch->base_reg = devm_ioremap(scl->dev, ch->base_ofs, SZ_4K);
309 		if (!ch->base_reg) {
310 			dev_err(scl->dev, "scaler: unable to remap ch base\n");
311 			return -ENOMEM;
312 		}
313 
314 		ch->scl = scl;
315 	}
316 
317 	return 0;
318 }
319 
dcss_scaler_init(struct dcss_dev * dcss,unsigned long scaler_base)320 int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base)
321 {
322 	struct dcss_scaler *scaler;
323 
324 	scaler = devm_kzalloc(dcss->dev, sizeof(*scaler), GFP_KERNEL);
325 	if (!scaler)
326 		return -ENOMEM;
327 
328 	dcss->scaler = scaler;
329 	scaler->dev = dcss->dev;
330 	scaler->ctxld = dcss->ctxld;
331 	scaler->ctx_id = CTX_SB_HP;
332 
333 	if (dcss_scaler_ch_init_all(scaler, scaler_base))
334 		return -ENOMEM;
335 
336 	return 0;
337 }
338 
dcss_scaler_exit(struct dcss_scaler * scl)339 void dcss_scaler_exit(struct dcss_scaler *scl)
340 {
341 	int ch_no;
342 
343 	for (ch_no = 0; ch_no < 3; ch_no++) {
344 		struct dcss_scaler_ch *ch = &scl->ch[ch_no];
345 
346 		dcss_writel(0, ch->base_reg + DCSS_SCALER_CTRL);
347 	}
348 }
349 
dcss_scaler_ch_enable(struct dcss_scaler * scl,int ch_num,bool en)350 void dcss_scaler_ch_enable(struct dcss_scaler *scl, int ch_num, bool en)
351 {
352 	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
353 	u32 scaler_ctrl;
354 
355 	scaler_ctrl = en ? SCALER_EN | REPEAT_EN : 0;
356 
357 	if (en)
358 		dcss_scaler_write(ch, ch->sdata_ctrl, DCSS_SCALER_SDATA_CTRL);
359 
360 	if (ch->scaler_ctrl != scaler_ctrl)
361 		ch->scaler_ctrl_chgd = true;
362 
363 	ch->scaler_ctrl = scaler_ctrl;
364 }
365 
dcss_scaler_yuv_enable(struct dcss_scaler_ch * ch,bool en)366 static void dcss_scaler_yuv_enable(struct dcss_scaler_ch *ch, bool en)
367 {
368 	ch->sdata_ctrl &= ~YUV_EN;
369 	ch->sdata_ctrl |= en ? YUV_EN : 0;
370 }
371 
dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch * ch,bool en)372 static void dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch *ch, bool en)
373 {
374 	ch->sdata_ctrl &= ~RTRAM_8LINES;
375 	ch->sdata_ctrl |= en ? RTRAM_8LINES : 0;
376 }
377 
dcss_scaler_bit_depth_set(struct dcss_scaler_ch * ch,int depth)378 static void dcss_scaler_bit_depth_set(struct dcss_scaler_ch *ch, int depth)
379 {
380 	u32 val;
381 
382 	val = depth == 30 ? 2 : 0;
383 
384 	dcss_scaler_write(ch,
385 			  ((val << CHR_BIT_DEPTH_POS) & CHR_BIT_DEPTH_MASK) |
386 			  ((val << LUM_BIT_DEPTH_POS) & LUM_BIT_DEPTH_MASK),
387 			  DCSS_SCALER_BIT_DEPTH);
388 }
389 
390 enum buffer_format {
391 	BUF_FMT_YUV420,
392 	BUF_FMT_YUV422,
393 	BUF_FMT_ARGB8888_YUV444,
394 };
395 
396 enum chroma_location {
397 	PSC_LOC_HORZ_0_VERT_1_OVER_4 = 0,
398 	PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4 = 1,
399 	PSC_LOC_HORZ_0_VERT_0 = 2,
400 	PSC_LOC_HORZ_1_OVER_4_VERT_0 = 3,
401 	PSC_LOC_HORZ_0_VERT_1_OVER_2 = 4,
402 	PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2 = 5
403 };
404 
dcss_scaler_format_set(struct dcss_scaler_ch * ch,enum buffer_format src_fmt,enum buffer_format dst_fmt)405 static void dcss_scaler_format_set(struct dcss_scaler_ch *ch,
406 				   enum buffer_format src_fmt,
407 				   enum buffer_format dst_fmt)
408 {
409 	dcss_scaler_write(ch, src_fmt, DCSS_SCALER_SRC_FORMAT);
410 	dcss_scaler_write(ch, dst_fmt, DCSS_SCALER_DST_FORMAT);
411 }
412 
dcss_scaler_res_set(struct dcss_scaler_ch * ch,int src_xres,int src_yres,int dst_xres,int dst_yres,u32 pix_format,enum buffer_format dst_format)413 static void dcss_scaler_res_set(struct dcss_scaler_ch *ch,
414 				int src_xres, int src_yres,
415 				int dst_xres, int dst_yres,
416 				u32 pix_format, enum buffer_format dst_format)
417 {
418 	u32 lsrc_xres, lsrc_yres, csrc_xres, csrc_yres;
419 	u32 ldst_xres, ldst_yres, cdst_xres, cdst_yres;
420 	bool src_is_444 = true;
421 
422 	lsrc_xres = src_xres;
423 	csrc_xres = src_xres;
424 	lsrc_yres = src_yres;
425 	csrc_yres = src_yres;
426 	ldst_xres = dst_xres;
427 	cdst_xres = dst_xres;
428 	ldst_yres = dst_yres;
429 	cdst_yres = dst_yres;
430 
431 	if (pix_format == DRM_FORMAT_UYVY || pix_format == DRM_FORMAT_VYUY ||
432 	    pix_format == DRM_FORMAT_YUYV || pix_format == DRM_FORMAT_YVYU) {
433 		csrc_xres >>= 1;
434 		src_is_444 = false;
435 	} else if (pix_format == DRM_FORMAT_NV12 ||
436 		   pix_format == DRM_FORMAT_NV21) {
437 		csrc_xres >>= 1;
438 		csrc_yres >>= 1;
439 		src_is_444 = false;
440 	}
441 
442 	if (dst_format == BUF_FMT_YUV422)
443 		cdst_xres >>= 1;
444 
445 	/* for 4:4:4 to 4:2:2 conversion, source height should be 1 less */
446 	if (src_is_444 && dst_format == BUF_FMT_YUV422) {
447 		lsrc_yres--;
448 		csrc_yres--;
449 	}
450 
451 	dcss_scaler_write(ch, (((lsrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
452 			       (((lsrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
453 			  DCSS_SCALER_SRC_LUM_RES);
454 	dcss_scaler_write(ch, (((csrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
455 			       (((csrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
456 			  DCSS_SCALER_SRC_CHR_RES);
457 	dcss_scaler_write(ch, (((ldst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
458 			       (((ldst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
459 			  DCSS_SCALER_DST_LUM_RES);
460 	dcss_scaler_write(ch, (((cdst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
461 			       (((cdst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
462 			  DCSS_SCALER_DST_CHR_RES);
463 }
464 
465 #define downscale_fp(factor, fp_pos)		((factor) << (fp_pos))
466 #define upscale_fp(factor, fp_pos)		((1 << (fp_pos)) / (factor))
467 
468 struct dcss_scaler_factors {
469 	int downscale;
470 	int upscale;
471 };
472 
473 static const struct dcss_scaler_factors dcss_scaler_factors[] = {
474 	{3, 8}, {5, 8}, {5, 8},
475 };
476 
dcss_scaler_fractions_set(struct dcss_scaler_ch * ch,int src_xres,int src_yres,int dst_xres,int dst_yres,u32 src_format,u32 dst_format,enum chroma_location src_chroma_loc)477 static void dcss_scaler_fractions_set(struct dcss_scaler_ch *ch,
478 				      int src_xres, int src_yres,
479 				      int dst_xres, int dst_yres,
480 				      u32 src_format, u32 dst_format,
481 				      enum chroma_location src_chroma_loc)
482 {
483 	int src_c_xres, src_c_yres, dst_c_xres, dst_c_yres;
484 	u32 l_vinc, l_hinc, c_vinc, c_hinc;
485 	u32 c_vstart, c_hstart;
486 
487 	src_c_xres = src_xres;
488 	src_c_yres = src_yres;
489 	dst_c_xres = dst_xres;
490 	dst_c_yres = dst_yres;
491 
492 	c_vstart = 0;
493 	c_hstart = 0;
494 
495 	/* adjustments for source chroma location */
496 	if (src_format == BUF_FMT_YUV420) {
497 		/* vertical input chroma position adjustment */
498 		switch (src_chroma_loc) {
499 		case PSC_LOC_HORZ_0_VERT_1_OVER_4:
500 		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
501 			/*
502 			 * move chroma up to first luma line
503 			 * (1/4 chroma input line spacing)
504 			 */
505 			c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
506 			break;
507 		case PSC_LOC_HORZ_0_VERT_1_OVER_2:
508 		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
509 			/*
510 			 * move chroma up to first luma line
511 			 * (1/2 chroma input line spacing)
512 			 */
513 			c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 1));
514 			break;
515 		default:
516 			break;
517 		}
518 		/* horizontal input chroma position adjustment */
519 		switch (src_chroma_loc) {
520 		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
521 		case PSC_LOC_HORZ_1_OVER_4_VERT_0:
522 		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
523 			/* move chroma left 1/4 chroma input sample spacing */
524 			c_hstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
525 			break;
526 		default:
527 			break;
528 		}
529 	}
530 
531 	/* adjustments to chroma resolution */
532 	if (src_format == BUF_FMT_YUV420) {
533 		src_c_xres >>= 1;
534 		src_c_yres >>= 1;
535 	} else if (src_format == BUF_FMT_YUV422) {
536 		src_c_xres >>= 1;
537 	}
538 
539 	if (dst_format == BUF_FMT_YUV422)
540 		dst_c_xres >>= 1;
541 
542 	l_vinc = ((src_yres << 13) + (dst_yres >> 1)) / dst_yres;
543 	c_vinc = ((src_c_yres << 13) + (dst_c_yres >> 1)) / dst_c_yres;
544 	l_hinc = ((src_xres << 13) + (dst_xres >> 1)) / dst_xres;
545 	c_hinc = ((src_c_xres << 13) + (dst_c_xres >> 1)) / dst_c_xres;
546 
547 	/* save chroma start phase */
548 	ch->c_vstart = c_vstart;
549 	ch->c_hstart = c_hstart;
550 
551 	dcss_scaler_write(ch, 0, DCSS_SCALER_V_LUM_START);
552 	dcss_scaler_write(ch, l_vinc, DCSS_SCALER_V_LUM_INC);
553 
554 	dcss_scaler_write(ch, 0, DCSS_SCALER_H_LUM_START);
555 	dcss_scaler_write(ch, l_hinc, DCSS_SCALER_H_LUM_INC);
556 
557 	dcss_scaler_write(ch, c_vstart, DCSS_SCALER_V_CHR_START);
558 	dcss_scaler_write(ch, c_vinc, DCSS_SCALER_V_CHR_INC);
559 
560 	dcss_scaler_write(ch, c_hstart, DCSS_SCALER_H_CHR_START);
561 	dcss_scaler_write(ch, c_hinc, DCSS_SCALER_H_CHR_INC);
562 }
563 
dcss_scaler_get_min_max_ratios(struct dcss_scaler * scl,int ch_num,int * min,int * max)564 int dcss_scaler_get_min_max_ratios(struct dcss_scaler *scl, int ch_num,
565 				   int *min, int *max)
566 {
567 	*min = upscale_fp(dcss_scaler_factors[ch_num].upscale, 16);
568 	*max = downscale_fp(dcss_scaler_factors[ch_num].downscale, 16);
569 
570 	return 0;
571 }
572 
dcss_scaler_program_5_coef_set(struct dcss_scaler_ch * ch,int base_addr,int coef[][PSC_NUM_TAPS])573 static void dcss_scaler_program_5_coef_set(struct dcss_scaler_ch *ch,
574 					   int base_addr,
575 					   int coef[][PSC_NUM_TAPS])
576 {
577 	int i, phase;
578 
579 	for (i = 0; i < PSC_STORED_PHASES; i++) {
580 		dcss_scaler_write(ch, ((coef[i][1] & 0xfff) << 16 |
581 				       (coef[i][2] & 0xfff) << 4  |
582 				       (coef[i][3] & 0xf00) >> 8),
583 				  base_addr + i * sizeof(u32));
584 		dcss_scaler_write(ch, ((coef[i][3] & 0x0ff) << 20 |
585 				       (coef[i][4] & 0xfff) << 8  |
586 				       (coef[i][5] & 0xff0) >> 4),
587 				  base_addr + 0x40 + i * sizeof(u32));
588 		dcss_scaler_write(ch, ((coef[i][5] & 0x00f) << 24),
589 				  base_addr + 0x80 + i * sizeof(u32));
590 	}
591 
592 	/* reverse both phase and tap orderings */
593 	for (phase = (PSC_NUM_PHASES >> 1) - 1;
594 			i < PSC_NUM_PHASES; i++, phase--) {
595 		dcss_scaler_write(ch, ((coef[phase][5] & 0xfff) << 16 |
596 				       (coef[phase][4] & 0xfff) << 4  |
597 				       (coef[phase][3] & 0xf00) >> 8),
598 				  base_addr + i * sizeof(u32));
599 		dcss_scaler_write(ch, ((coef[phase][3] & 0x0ff) << 20 |
600 				       (coef[phase][2] & 0xfff) << 8  |
601 				       (coef[phase][1] & 0xff0) >> 4),
602 				  base_addr + 0x40 + i * sizeof(u32));
603 		dcss_scaler_write(ch, ((coef[phase][1] & 0x00f) << 24),
604 				  base_addr + 0x80 + i * sizeof(u32));
605 	}
606 }
607 
dcss_scaler_program_7_coef_set(struct dcss_scaler_ch * ch,int base_addr,int coef[][PSC_NUM_TAPS])608 static void dcss_scaler_program_7_coef_set(struct dcss_scaler_ch *ch,
609 					   int base_addr,
610 					   int coef[][PSC_NUM_TAPS])
611 {
612 	int i, phase;
613 
614 	for (i = 0; i < PSC_STORED_PHASES; i++) {
615 		dcss_scaler_write(ch, ((coef[i][0] & 0xfff) << 16 |
616 				       (coef[i][1] & 0xfff) << 4  |
617 				       (coef[i][2] & 0xf00) >> 8),
618 				  base_addr + i * sizeof(u32));
619 		dcss_scaler_write(ch, ((coef[i][2] & 0x0ff) << 20 |
620 				       (coef[i][3] & 0xfff) << 8  |
621 				       (coef[i][4] & 0xff0) >> 4),
622 				  base_addr + 0x40 + i * sizeof(u32));
623 		dcss_scaler_write(ch, ((coef[i][4] & 0x00f) << 24 |
624 				       (coef[i][5] & 0xfff) << 12 |
625 				       (coef[i][6] & 0xfff)),
626 				  base_addr + 0x80 + i * sizeof(u32));
627 	}
628 
629 	/* reverse both phase and tap orderings */
630 	for (phase = (PSC_NUM_PHASES >> 1) - 1;
631 			i < PSC_NUM_PHASES; i++, phase--) {
632 		dcss_scaler_write(ch, ((coef[phase][6] & 0xfff) << 16 |
633 				       (coef[phase][5] & 0xfff) << 4  |
634 				       (coef[phase][4] & 0xf00) >> 8),
635 				  base_addr + i * sizeof(u32));
636 		dcss_scaler_write(ch, ((coef[phase][4] & 0x0ff) << 20 |
637 				       (coef[phase][3] & 0xfff) << 8  |
638 				       (coef[phase][2] & 0xff0) >> 4),
639 				  base_addr + 0x40 + i * sizeof(u32));
640 		dcss_scaler_write(ch, ((coef[phase][2] & 0x00f) << 24 |
641 				       (coef[phase][1] & 0xfff) << 12 |
642 				       (coef[phase][0] & 0xfff)),
643 				  base_addr + 0x80 + i * sizeof(u32));
644 	}
645 }
646 
dcss_scaler_yuv_coef_set(struct dcss_scaler_ch * ch,enum buffer_format src_format,enum buffer_format dst_format,bool use_5_taps,int src_xres,int src_yres,int dst_xres,int dst_yres)647 static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
648 				     enum buffer_format src_format,
649 				     enum buffer_format dst_format,
650 				     bool use_5_taps,
651 				     int src_xres, int src_yres, int dst_xres,
652 				     int dst_yres)
653 {
654 	int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
655 	bool program_5_taps = use_5_taps ||
656 			      (dst_format == BUF_FMT_YUV422 &&
657 			       src_format == BUF_FMT_ARGB8888_YUV444);
658 
659 	/* horizontal luma */
660 	dcss_scaler_filter_design(src_xres, dst_xres, false,
661 				  src_xres == dst_xres, coef,
662 				  ch->use_nn_interpolation);
663 	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
664 
665 	/* vertical luma */
666 	dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
667 				  src_yres == dst_yres, coef,
668 				  ch->use_nn_interpolation);
669 
670 	if (program_5_taps)
671 		dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
672 	else
673 		dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
674 
675 	/* adjust chroma resolution */
676 	if (src_format != BUF_FMT_ARGB8888_YUV444)
677 		src_xres >>= 1;
678 	if (src_format == BUF_FMT_YUV420)
679 		src_yres >>= 1;
680 	if (dst_format != BUF_FMT_ARGB8888_YUV444)
681 		dst_xres >>= 1;
682 	if (dst_format == BUF_FMT_YUV420) /* should not happen */
683 		dst_yres >>= 1;
684 
685 	/* horizontal chroma */
686 	dcss_scaler_filter_design(src_xres, dst_xres, false,
687 				  (src_xres == dst_xres) && (ch->c_hstart == 0),
688 				  coef, ch->use_nn_interpolation);
689 
690 	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef);
691 
692 	/* vertical chroma */
693 	dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
694 				  (src_yres == dst_yres) && (ch->c_vstart == 0),
695 				  coef, ch->use_nn_interpolation);
696 	if (program_5_taps)
697 		dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
698 	else
699 		dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
700 }
701 
dcss_scaler_rgb_coef_set(struct dcss_scaler_ch * ch,int src_xres,int src_yres,int dst_xres,int dst_yres)702 static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch,
703 				     int src_xres, int src_yres, int dst_xres,
704 				     int dst_yres)
705 {
706 	int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
707 
708 	/* horizontal RGB */
709 	dcss_scaler_filter_design(src_xres, dst_xres, false,
710 				  src_xres == dst_xres, coef,
711 				  ch->use_nn_interpolation);
712 	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
713 
714 	/* vertical RGB */
715 	dcss_scaler_filter_design(src_yres, dst_yres, false,
716 				  src_yres == dst_yres, coef,
717 				  ch->use_nn_interpolation);
718 	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
719 }
720 
dcss_scaler_set_rgb10_order(struct dcss_scaler_ch * ch,const struct drm_format_info * format)721 static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch,
722 					const struct drm_format_info *format)
723 {
724 	u32 a2r10g10b10_format;
725 
726 	if (format->is_yuv)
727 		return;
728 
729 	ch->sdata_ctrl &= ~A2R10G10B10_FORMAT_MASK;
730 
731 	if (format->depth != 30)
732 		return;
733 
734 	switch (format->format) {
735 	case DRM_FORMAT_ARGB2101010:
736 	case DRM_FORMAT_XRGB2101010:
737 		a2r10g10b10_format = 0;
738 		break;
739 
740 	case DRM_FORMAT_ABGR2101010:
741 	case DRM_FORMAT_XBGR2101010:
742 		a2r10g10b10_format = 5;
743 		break;
744 
745 	case DRM_FORMAT_RGBA1010102:
746 	case DRM_FORMAT_RGBX1010102:
747 		a2r10g10b10_format = 6;
748 		break;
749 
750 	case DRM_FORMAT_BGRA1010102:
751 	case DRM_FORMAT_BGRX1010102:
752 		a2r10g10b10_format = 11;
753 		break;
754 
755 	default:
756 		a2r10g10b10_format = 0;
757 		break;
758 	}
759 
760 	ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS;
761 }
762 
dcss_scaler_set_filter(struct dcss_scaler * scl,int ch_num,enum drm_scaling_filter scaling_filter)763 void dcss_scaler_set_filter(struct dcss_scaler *scl, int ch_num,
764 			    enum drm_scaling_filter scaling_filter)
765 {
766 	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
767 
768 	ch->use_nn_interpolation = scaling_filter == DRM_SCALING_FILTER_NEAREST_NEIGHBOR;
769 }
770 
dcss_scaler_setup(struct dcss_scaler * scl,int ch_num,const struct drm_format_info * format,int src_xres,int src_yres,int dst_xres,int dst_yres,u32 vrefresh_hz)771 void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
772 		       const struct drm_format_info *format,
773 		       int src_xres, int src_yres, int dst_xres, int dst_yres,
774 		       u32 vrefresh_hz)
775 {
776 	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
777 	unsigned int pixel_depth = 0;
778 	bool rtr_8line_en = false;
779 	bool use_5_taps = false;
780 	enum buffer_format src_format = BUF_FMT_ARGB8888_YUV444;
781 	enum buffer_format dst_format = BUF_FMT_ARGB8888_YUV444;
782 	u32 pix_format = format->format;
783 
784 	if (format->is_yuv) {
785 		dcss_scaler_yuv_enable(ch, true);
786 
787 		if (pix_format == DRM_FORMAT_NV12 ||
788 		    pix_format == DRM_FORMAT_NV21) {
789 			rtr_8line_en = true;
790 			src_format = BUF_FMT_YUV420;
791 		} else if (pix_format == DRM_FORMAT_UYVY ||
792 			   pix_format == DRM_FORMAT_VYUY ||
793 			   pix_format == DRM_FORMAT_YUYV ||
794 			   pix_format == DRM_FORMAT_YVYU) {
795 			src_format = BUF_FMT_YUV422;
796 		}
797 
798 		use_5_taps = !rtr_8line_en;
799 	} else {
800 		dcss_scaler_yuv_enable(ch, false);
801 
802 		pixel_depth = format->depth;
803 	}
804 
805 	dcss_scaler_fractions_set(ch, src_xres, src_yres, dst_xres,
806 				  dst_yres, src_format, dst_format,
807 				  PSC_LOC_HORZ_0_VERT_1_OVER_4);
808 
809 	if (format->is_yuv)
810 		dcss_scaler_yuv_coef_set(ch, src_format, dst_format,
811 					 use_5_taps, src_xres, src_yres,
812 					 dst_xres, dst_yres);
813 	else
814 		dcss_scaler_rgb_coef_set(ch, src_xres, src_yres,
815 					 dst_xres, dst_yres);
816 
817 	dcss_scaler_rtr_8lines_enable(ch, rtr_8line_en);
818 	dcss_scaler_bit_depth_set(ch, pixel_depth);
819 	dcss_scaler_set_rgb10_order(ch, format);
820 	dcss_scaler_format_set(ch, src_format, dst_format);
821 	dcss_scaler_res_set(ch, src_xres, src_yres, dst_xres, dst_yres,
822 			    pix_format, dst_format);
823 }
824 
825 /* This function will be called from interrupt context. */
dcss_scaler_write_sclctrl(struct dcss_scaler * scl)826 void dcss_scaler_write_sclctrl(struct dcss_scaler *scl)
827 {
828 	int chnum;
829 
830 	dcss_ctxld_assert_locked(scl->ctxld);
831 
832 	for (chnum = 0; chnum < 3; chnum++) {
833 		struct dcss_scaler_ch *ch = &scl->ch[chnum];
834 
835 		if (ch->scaler_ctrl_chgd) {
836 			dcss_ctxld_write_irqsafe(scl->ctxld, scl->ctx_id,
837 						 ch->scaler_ctrl,
838 						 ch->base_ofs +
839 						 DCSS_SCALER_CTRL);
840 			ch->scaler_ctrl_chgd = false;
841 		}
842 	}
843 }
844