xref: /linux/drivers/gpu/drm/vkms/vkms_formats.c (revision 3f2a5ba784b808109cac0aac921213e43143a216)
1 // SPDX-License-Identifier: GPL-2.0+
2 
3 #include <linux/kernel.h>
4 #include <linux/minmax.h>
5 
6 #include <drm/drm_blend.h>
7 #include <drm/drm_rect.h>
8 #include <drm/drm_fixed.h>
9 
10 #include <kunit/visibility.h>
11 
12 #include "vkms_formats.h"
13 
14 /**
15  * packed_pixels_offset() - Get the offset of the block containing the pixel at coordinates x/y
16  *
17  * @frame_info: Buffer metadata
18  * @x: The x coordinate of the wanted pixel in the buffer
19  * @y: The y coordinate of the wanted pixel in the buffer
20  * @plane_index: The index of the plane to use
21  * @offset: The returned offset inside the buffer of the block
22  * @rem_x: The returned X coordinate of the requested pixel in the block
23  * @rem_y: The returned Y coordinate of the requested pixel in the block
24  *
25  * As some pixel formats store multiple pixels in a block (DRM_FORMAT_R* for example), some
26  * pixels are not individually addressable. This function return 3 values: the offset of the
27  * whole block, and the coordinate of the requested pixel inside this block.
28  * For example, if the format is DRM_FORMAT_R1 and the requested coordinate is 13,5, the offset
29  * will point to the byte 5*pitches + 13/8 (second byte of the 5th line), and the rem_x/rem_y
30  * coordinates will be (13 % 8, 5 % 1) = (5, 0)
31  *
32  * With this function, the caller just have to extract the correct pixel from the block.
33  */
34 static void packed_pixels_offset(const struct vkms_frame_info *frame_info, int x, int y,
35 				 int plane_index, int *offset, int *rem_x, int *rem_y)
36 {
37 	struct drm_framebuffer *fb = frame_info->fb;
38 	const struct drm_format_info *format = frame_info->fb->format;
39 	/* Directly using x and y to multiply pitches and format->ccp is not sufficient because
40 	 * in some formats a block can represent multiple pixels.
41 	 *
42 	 * Dividing x and y by the block size allows to extract the correct offset of the block
43 	 * containing the pixel.
44 	 */
45 
46 	int block_x = x / drm_format_info_block_width(format, plane_index);
47 	int block_y = y / drm_format_info_block_height(format, plane_index);
48 	int block_pitch = fb->pitches[plane_index] * drm_format_info_block_height(format,
49 										  plane_index);
50 	*rem_x = x % drm_format_info_block_width(format, plane_index);
51 	*rem_y = y % drm_format_info_block_height(format, plane_index);
52 	*offset = fb->offsets[plane_index] +
53 		  block_y * block_pitch +
54 		  block_x * format->char_per_block[plane_index];
55 }
56 
57 /**
58  * packed_pixels_addr() - Get the pointer to the block containing the pixel at the given
59  * coordinates
60  *
61  * @frame_info: Buffer metadata
62  * @x: The x (width) coordinate inside the plane
63  * @y: The y (height) coordinate inside the plane
64  * @plane_index: The index of the plane
65  * @addr: The returned pointer
66  * @rem_x: The returned X coordinate of the requested pixel in the block
67  * @rem_y: The returned Y coordinate of the requested pixel in the block
68  *
69  * Takes the information stored in the frame_info, a pair of coordinates, and returns the address
70  * of the block containing this pixel and the pixel position inside this block.
71  *
72  * See @packed_pixels_offset for details about rem_x/rem_y behavior.
73  */
74 static void packed_pixels_addr(const struct vkms_frame_info *frame_info,
75 			       int x, int y, int plane_index, u8 **addr, int *rem_x,
76 			       int *rem_y)
77 {
78 	int offset;
79 
80 	packed_pixels_offset(frame_info, x, y, plane_index, &offset, rem_x, rem_y);
81 	*addr = (u8 *)frame_info->map[0].vaddr + offset;
82 }
83 
84 /**
85  * get_block_step_bytes() - Common helper to compute the correct step value between each pixel block
86  * to read in a certain direction.
87  *
88  * @fb: Framebuffer to iter on
89  * @direction: Direction of the reading
90  * @plane_index: Plane to get the step from
91  *
92  * As the returned count is the number of bytes between two consecutive blocks in a direction,
93  * the caller may have to read multiple pixels before using the next one (for example, to read from
94  * left to right in a DRM_FORMAT_R1 plane, each block contains 8 pixels, so the step must be used
95  * only every 8 pixels).
96  */
97 static int get_block_step_bytes(struct drm_framebuffer *fb, enum pixel_read_direction direction,
98 				int plane_index)
99 {
100 	switch (direction) {
101 	case READ_LEFT_TO_RIGHT:
102 		return fb->format->char_per_block[plane_index];
103 	case READ_RIGHT_TO_LEFT:
104 		return -fb->format->char_per_block[plane_index];
105 	case READ_TOP_TO_BOTTOM:
106 		return (int)fb->pitches[plane_index] * drm_format_info_block_width(fb->format,
107 										   plane_index);
108 	case READ_BOTTOM_TO_TOP:
109 		return -(int)fb->pitches[plane_index] * drm_format_info_block_width(fb->format,
110 										    plane_index);
111 	}
112 
113 	return 0;
114 }
115 
116 /**
117  * packed_pixels_addr_1x1() - Get the pointer to the block containing the pixel at the given
118  * coordinates
119  *
120  * @frame_info: Buffer metadata
121  * @x: The x (width) coordinate inside the plane
122  * @y: The y (height) coordinate inside the plane
123  * @plane_index: The index of the plane
124  * @addr: The returned pointer
125  *
126  * This function can only be used with format where block_h == block_w == 1.
127  */
128 static void packed_pixels_addr_1x1(const struct vkms_frame_info *frame_info,
129 				   int x, int y, int plane_index, u8 **addr)
130 {
131 	int offset, rem_x, rem_y;
132 
133 	WARN_ONCE(drm_format_info_block_width(frame_info->fb->format,
134 					      plane_index) != 1,
135 		"%s() only support formats with block_w == 1", __func__);
136 	WARN_ONCE(drm_format_info_block_height(frame_info->fb->format,
137 					       plane_index) != 1,
138 		"%s() only support formats with block_h == 1", __func__);
139 
140 	packed_pixels_offset(frame_info, x, y, plane_index, &offset, &rem_x,
141 			     &rem_y);
142 	*addr = (u8 *)frame_info->map[0].vaddr + offset;
143 }
144 
145 /**
146  * get_subsampling() - Get the subsampling divisor value on a specific direction
147  *
148  * @format: format to extarct the subsampling from
149  * @direction: direction of the subsampling requested
150  */
151 static int get_subsampling(const struct drm_format_info *format,
152 			   enum pixel_read_direction direction)
153 {
154 	switch (direction) {
155 	case READ_BOTTOM_TO_TOP:
156 	case READ_TOP_TO_BOTTOM:
157 		return format->vsub;
158 	case READ_RIGHT_TO_LEFT:
159 	case READ_LEFT_TO_RIGHT:
160 		return format->hsub;
161 	}
162 	WARN_ONCE(true, "Invalid direction for pixel reading: %d\n", direction);
163 	return 1;
164 }
165 
166 /**
167  * get_subsampling_offset() - An offset for keeping the chroma siting consistent regardless of
168  * x_start and y_start values
169  *
170  * @direction: direction of the reading to properly compute this offset
171  * @x_start: x coordinate of the starting point of the readed line
172  * @y_start: y coordinate of the starting point of the readed line
173  */
174 static int get_subsampling_offset(enum pixel_read_direction direction, int x_start, int y_start)
175 {
176 	switch (direction) {
177 	case READ_BOTTOM_TO_TOP:
178 		return -y_start - 1;
179 	case READ_TOP_TO_BOTTOM:
180 		return y_start;
181 	case READ_RIGHT_TO_LEFT:
182 		return -x_start - 1;
183 	case READ_LEFT_TO_RIGHT:
184 		return x_start;
185 	}
186 	WARN_ONCE(true, "Invalid direction for pixel reading: %d\n", direction);
187 	return 0;
188 }
189 
190 /*
191  * The following functions take pixel data (a, r, g, b, pixel, ...) and convert them to
192  * &struct pixel_argb_u16
193  *
194  * They are used in the `read_line`s functions to avoid duplicate work for some pixel formats.
195  */
196 
197 static struct pixel_argb_u16 argb_u16_from_u8888(u8 a, u8 r, u8 g, u8 b)
198 {
199 	struct pixel_argb_u16 out_pixel;
200 	/*
201 	 * The 257 is the "conversion ratio". This number is obtained by the
202 	 * (2^16 - 1) / (2^8 - 1) division. Which, in this case, tries to get
203 	 * the best color value in a pixel format with more possibilities.
204 	 * A similar idea applies to others RGB color conversions.
205 	 */
206 	out_pixel.a = (u16)a * 257;
207 	out_pixel.r = (u16)r * 257;
208 	out_pixel.g = (u16)g * 257;
209 	out_pixel.b = (u16)b * 257;
210 
211 	return out_pixel;
212 }
213 
214 static struct pixel_argb_u16 argb_u16_from_u16161616(u16 a, u16 r, u16 g, u16 b)
215 {
216 	struct pixel_argb_u16 out_pixel;
217 
218 	out_pixel.a = a;
219 	out_pixel.r = r;
220 	out_pixel.g = g;
221 	out_pixel.b = b;
222 
223 	return out_pixel;
224 }
225 
226 static struct pixel_argb_u16 argb_u16_from_le16161616(__le16 a, __le16 r, __le16 g, __le16 b)
227 {
228 	return argb_u16_from_u16161616(le16_to_cpu(a), le16_to_cpu(r), le16_to_cpu(g),
229 				       le16_to_cpu(b));
230 }
231 
232 static struct pixel_argb_u16 argb_u16_from_RGB565(const __le16 *pixel)
233 {
234 	struct pixel_argb_u16 out_pixel;
235 
236 	s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31));
237 	s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63));
238 
239 	u16 rgb_565 = le16_to_cpu(*pixel);
240 	s64 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f);
241 	s64 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f);
242 	s64 fp_b = drm_int2fixp(rgb_565 & 0x1f);
243 
244 	out_pixel.a = (u16)0xffff;
245 	out_pixel.r = drm_fixp2int_round(drm_fixp_mul(fp_r, fp_rb_ratio));
246 	out_pixel.g = drm_fixp2int_round(drm_fixp_mul(fp_g, fp_g_ratio));
247 	out_pixel.b = drm_fixp2int_round(drm_fixp_mul(fp_b, fp_rb_ratio));
248 
249 	return out_pixel;
250 }
251 
252 static struct pixel_argb_u16 argb_u16_from_gray8(u8 gray)
253 {
254 	return argb_u16_from_u8888(255, gray, gray, gray);
255 }
256 
257 static struct pixel_argb_u16 argb_u16_from_grayu16(u16 gray)
258 {
259 	return argb_u16_from_u16161616(0xFFFF, gray, gray, gray);
260 }
261 
262 VISIBLE_IF_KUNIT struct pixel_argb_u16 argb_u16_from_yuv888(u8 y, u8 channel_1, u8 channel_2,
263 							    const struct conversion_matrix *matrix)
264 {
265 	u16 r, g, b;
266 	s64 fp_y, fp_channel_1, fp_channel_2;
267 	s64 fp_r, fp_g, fp_b;
268 
269 	fp_y = drm_int2fixp(((int)y - matrix->y_offset) * 257);
270 	fp_channel_1 = drm_int2fixp(((int)channel_1 - 128) * 257);
271 	fp_channel_2 = drm_int2fixp(((int)channel_2 - 128) * 257);
272 
273 	fp_r = drm_fixp_mul(matrix->matrix[0][0], fp_y) +
274 	       drm_fixp_mul(matrix->matrix[0][1], fp_channel_1) +
275 	       drm_fixp_mul(matrix->matrix[0][2], fp_channel_2);
276 	fp_g = drm_fixp_mul(matrix->matrix[1][0], fp_y) +
277 	       drm_fixp_mul(matrix->matrix[1][1], fp_channel_1) +
278 	       drm_fixp_mul(matrix->matrix[1][2], fp_channel_2);
279 	fp_b = drm_fixp_mul(matrix->matrix[2][0], fp_y) +
280 	       drm_fixp_mul(matrix->matrix[2][1], fp_channel_1) +
281 	       drm_fixp_mul(matrix->matrix[2][2], fp_channel_2);
282 
283 	fp_r = drm_fixp2int_round(fp_r);
284 	fp_g = drm_fixp2int_round(fp_g);
285 	fp_b = drm_fixp2int_round(fp_b);
286 
287 	r = clamp(fp_r, 0, 0xffff);
288 	g = clamp(fp_g, 0, 0xffff);
289 	b = clamp(fp_b, 0, 0xffff);
290 
291 	return argb_u16_from_u16161616(0xffff, r, g, b);
292 }
293 EXPORT_SYMBOL_IF_KUNIT(argb_u16_from_yuv888);
294 
295 /*
296  * The following functions are read_line function for each pixel format supported by VKMS.
297  *
298  * They read a line starting at the point @x_start,@y_start following the @direction. The result
299  * is stored in @out_pixel and in a 64 bits format, see struct pixel_argb_u16.
300  *
301  * These functions are very repetitive, but the innermost pixel loops must be kept inside these
302  * functions for performance reasons. Some benchmarking was done in [1] where having the innermost
303  * loop factored out of these functions showed a slowdown by a factor of three.
304  *
305  * [1]: https://lore.kernel.org/dri-devel/d258c8dc-78e9-4509-9037-a98f7f33b3a3@riseup.net/
306  */
307 
308 static void Rx_read_line(const struct vkms_plane_state *plane, int x_start,
309 			 int y_start, enum pixel_read_direction direction, int count,
310 			 struct pixel_argb_u16 out_pixel[])
311 {
312 	struct pixel_argb_u16 *end = out_pixel + count;
313 	int bits_per_pixel = drm_format_info_bpp(plane->frame_info->fb->format, 0);
314 	u8 *src_pixels;
315 	int rem_x, rem_y;
316 
317 	WARN_ONCE(drm_format_info_block_height(plane->frame_info->fb->format, 0) != 1,
318 		  "%s() only support formats with block_h == 1", __func__);
319 
320 	packed_pixels_addr(plane->frame_info, x_start, y_start, 0, &src_pixels, &rem_x, &rem_y);
321 	int bit_offset = (8 - bits_per_pixel) - rem_x * bits_per_pixel;
322 	int step = get_block_step_bytes(plane->frame_info->fb, direction, 0);
323 	int mask = (0x1 << bits_per_pixel) - 1;
324 	int lum_per_level = 0xFFFF / mask;
325 
326 	if (direction == READ_LEFT_TO_RIGHT || direction == READ_RIGHT_TO_LEFT) {
327 		int restart_bit_offset;
328 		int step_bit_offset;
329 
330 		if (direction == READ_LEFT_TO_RIGHT) {
331 			restart_bit_offset = 8 - bits_per_pixel;
332 			step_bit_offset = -bits_per_pixel;
333 		} else {
334 			restart_bit_offset = 0;
335 			step_bit_offset = bits_per_pixel;
336 		}
337 
338 		while (out_pixel < end) {
339 			u8 val = ((*src_pixels) >> bit_offset) & mask;
340 
341 			*out_pixel = argb_u16_from_grayu16((int)val * lum_per_level);
342 
343 			bit_offset += step_bit_offset;
344 			if (bit_offset < 0 || 8 <= bit_offset) {
345 				bit_offset = restart_bit_offset;
346 				src_pixels += step;
347 			}
348 			out_pixel += 1;
349 		}
350 	} else if (direction == READ_TOP_TO_BOTTOM || direction == READ_BOTTOM_TO_TOP) {
351 		while (out_pixel < end) {
352 			u8 val = (*src_pixels >> bit_offset) & mask;
353 			*out_pixel = argb_u16_from_grayu16((int)val * lum_per_level);
354 			src_pixels += step;
355 			out_pixel += 1;
356 		}
357 	}
358 }
359 
360 static void R1_read_line(const struct vkms_plane_state *plane, int x_start,
361 			 int y_start, enum pixel_read_direction direction, int count,
362 			 struct pixel_argb_u16 out_pixel[])
363 {
364 	Rx_read_line(plane, x_start, y_start, direction, count, out_pixel);
365 }
366 
367 static void R2_read_line(const struct vkms_plane_state *plane, int x_start,
368 			 int y_start, enum pixel_read_direction direction, int count,
369 			 struct pixel_argb_u16 out_pixel[])
370 {
371 	Rx_read_line(plane, x_start, y_start, direction, count, out_pixel);
372 }
373 
374 static void R4_read_line(const struct vkms_plane_state *plane, int x_start,
375 			 int y_start, enum pixel_read_direction direction, int count,
376 			 struct pixel_argb_u16 out_pixel[])
377 {
378 	Rx_read_line(plane, x_start, y_start, direction, count, out_pixel);
379 }
380 
381 static void R8_read_line(const struct vkms_plane_state *plane, int x_start,
382 			 int y_start, enum pixel_read_direction direction, int count,
383 			 struct pixel_argb_u16 out_pixel[])
384 {
385 	struct pixel_argb_u16 *end = out_pixel + count;
386 	u8 *src_pixels;
387 	int step = get_block_step_bytes(plane->frame_info->fb, direction, 0);
388 
389 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels);
390 
391 	while (out_pixel < end) {
392 		*out_pixel = argb_u16_from_gray8(*src_pixels);
393 		src_pixels += step;
394 		out_pixel += 1;
395 	}
396 }
397 
398 static void ARGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start,
399 			       enum pixel_read_direction direction, int count,
400 			       struct pixel_argb_u16 out_pixel[])
401 {
402 	struct pixel_argb_u16 *end = out_pixel + count;
403 	u8 *src_pixels;
404 
405 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels);
406 
407 	int step = get_block_step_bytes(plane->frame_info->fb, direction, 0);
408 
409 	while (out_pixel < end) {
410 		u8 *px = (u8 *)src_pixels;
411 		*out_pixel = argb_u16_from_u8888(px[3], px[2], px[1], px[0]);
412 		out_pixel += 1;
413 		src_pixels += step;
414 	}
415 }
416 
417 static void XRGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start,
418 			       enum pixel_read_direction direction, int count,
419 			       struct pixel_argb_u16 out_pixel[])
420 {
421 	struct pixel_argb_u16 *end = out_pixel + count;
422 	u8 *src_pixels;
423 
424 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels);
425 
426 	int step = get_block_step_bytes(plane->frame_info->fb, direction, 0);
427 
428 	while (out_pixel < end) {
429 		u8 *px = (u8 *)src_pixels;
430 		*out_pixel = argb_u16_from_u8888(255, px[2], px[1], px[0]);
431 		out_pixel += 1;
432 		src_pixels += step;
433 	}
434 }
435 
436 static void ABGR8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start,
437 			       enum pixel_read_direction direction, int count,
438 			       struct pixel_argb_u16 out_pixel[])
439 {
440 	struct pixel_argb_u16 *end = out_pixel + count;
441 	u8 *src_pixels;
442 
443 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels);
444 
445 	int step = get_block_step_bytes(plane->frame_info->fb, direction, 0);
446 
447 	while (out_pixel < end) {
448 		u8 *px = (u8 *)src_pixels;
449 		/* Switch blue and red pixels. */
450 		*out_pixel = argb_u16_from_u8888(px[3], px[0], px[1], px[2]);
451 		out_pixel += 1;
452 		src_pixels += step;
453 	}
454 }
455 
456 static void ARGB16161616_read_line(const struct vkms_plane_state *plane, int x_start,
457 				   int y_start, enum pixel_read_direction direction, int count,
458 				   struct pixel_argb_u16 out_pixel[])
459 {
460 	struct pixel_argb_u16 *end = out_pixel + count;
461 	u8 *src_pixels;
462 
463 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels);
464 
465 	int step = get_block_step_bytes(plane->frame_info->fb, direction, 0);
466 
467 	while (out_pixel < end) {
468 		u16 *px = (u16 *)src_pixels;
469 		*out_pixel = argb_u16_from_u16161616(px[3], px[2], px[1], px[0]);
470 		out_pixel += 1;
471 		src_pixels += step;
472 	}
473 }
474 
475 static void XRGB16161616_read_line(const struct vkms_plane_state *plane, int x_start,
476 				   int y_start, enum pixel_read_direction direction, int count,
477 				   struct pixel_argb_u16 out_pixel[])
478 {
479 	struct pixel_argb_u16 *end = out_pixel + count;
480 	u8 *src_pixels;
481 
482 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels);
483 
484 	int step = get_block_step_bytes(plane->frame_info->fb, direction, 0);
485 
486 	while (out_pixel < end) {
487 		__le16 *px = (__le16 *)src_pixels;
488 		*out_pixel = argb_u16_from_le16161616(cpu_to_le16(0xFFFF), px[2], px[1], px[0]);
489 		out_pixel += 1;
490 		src_pixels += step;
491 	}
492 }
493 
494 static void RGB565_read_line(const struct vkms_plane_state *plane, int x_start,
495 			     int y_start, enum pixel_read_direction direction, int count,
496 			     struct pixel_argb_u16 out_pixel[])
497 {
498 	struct pixel_argb_u16 *end = out_pixel + count;
499 	u8 *src_pixels;
500 
501 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels);
502 
503 	int step = get_block_step_bytes(plane->frame_info->fb, direction, 0);
504 
505 	while (out_pixel < end) {
506 		__le16 *px = (__le16 *)src_pixels;
507 
508 		*out_pixel = argb_u16_from_RGB565(px);
509 		out_pixel += 1;
510 		src_pixels += step;
511 	}
512 }
513 
514 /*
515  * This callback can be used for YUV formats where U and V values are
516  * stored in the same plane (often called semi-planar formats). It will
517  * correctly handle subsampling as described in the drm_format_info of the plane.
518  *
519  * The conversion matrix stored in the @plane is used to:
520  * - Apply the correct color range and encoding
521  * - Convert YUV and YVU with the same function (a column swap is needed when setting up
522  * plane->conversion_matrix)
523  */
524 static void semi_planar_yuv_read_line(const struct vkms_plane_state *plane, int x_start,
525 				      int y_start, enum pixel_read_direction direction, int count,
526 				      struct pixel_argb_u16 out_pixel[])
527 {
528 	u8 *y_plane;
529 	u8 *uv_plane;
530 
531 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0,
532 			       &y_plane);
533 	packed_pixels_addr_1x1(plane->frame_info,
534 			       x_start / plane->frame_info->fb->format->hsub,
535 			       y_start / plane->frame_info->fb->format->vsub, 1,
536 			       &uv_plane);
537 	int step_y = get_block_step_bytes(plane->frame_info->fb, direction, 0);
538 	int step_uv = get_block_step_bytes(plane->frame_info->fb, direction, 1);
539 	int subsampling = get_subsampling(plane->frame_info->fb->format, direction);
540 	int subsampling_offset = get_subsampling_offset(direction, x_start, y_start);
541 	const struct conversion_matrix *conversion_matrix = &plane->conversion_matrix;
542 
543 	for (int i = 0; i < count; i++) {
544 		*out_pixel = argb_u16_from_yuv888(y_plane[0], uv_plane[0], uv_plane[1],
545 						  conversion_matrix);
546 		out_pixel += 1;
547 		y_plane += step_y;
548 		if ((i + subsampling_offset + 1) % subsampling == 0)
549 			uv_plane += step_uv;
550 	}
551 }
552 
553 /*
554  * This callback can be used for YUV format where each color component is
555  * stored in a different plane (often called planar formats). It will
556  * correctly handle subsampling as described in the drm_format_info of the plane.
557  *
558  * The conversion matrix stored in the @plane is used to:
559  * - Apply the correct color range and encoding
560  * - Convert YUV and YVU with the same function (a column swap is needed when setting up
561  * plane->conversion_matrix)
562  */
563 static void planar_yuv_read_line(const struct vkms_plane_state *plane, int x_start,
564 				 int y_start, enum pixel_read_direction direction, int count,
565 				 struct pixel_argb_u16 out_pixel[])
566 {
567 	u8 *y_plane;
568 	u8 *channel_1_plane;
569 	u8 *channel_2_plane;
570 
571 	packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0,
572 			       &y_plane);
573 	packed_pixels_addr_1x1(plane->frame_info,
574 			       x_start / plane->frame_info->fb->format->hsub,
575 			       y_start / plane->frame_info->fb->format->vsub, 1,
576 			       &channel_1_plane);
577 	packed_pixels_addr_1x1(plane->frame_info,
578 			       x_start / plane->frame_info->fb->format->hsub,
579 			       y_start / plane->frame_info->fb->format->vsub, 2,
580 			       &channel_2_plane);
581 	int step_y = get_block_step_bytes(plane->frame_info->fb, direction, 0);
582 	int step_channel_1 = get_block_step_bytes(plane->frame_info->fb, direction, 1);
583 	int step_channel_2 = get_block_step_bytes(plane->frame_info->fb, direction, 2);
584 	int subsampling = get_subsampling(plane->frame_info->fb->format, direction);
585 	int subsampling_offset = get_subsampling_offset(direction, x_start, y_start);
586 	const struct conversion_matrix *conversion_matrix = &plane->conversion_matrix;
587 
588 	for (int i = 0; i < count; i++) {
589 		*out_pixel = argb_u16_from_yuv888(*y_plane, *channel_1_plane, *channel_2_plane,
590 						  conversion_matrix);
591 		out_pixel += 1;
592 		y_plane += step_y;
593 		if ((i + subsampling_offset + 1) % subsampling == 0) {
594 			channel_1_plane += step_channel_1;
595 			channel_2_plane += step_channel_2;
596 		}
597 	}
598 }
599 
600 /*
601  * The following functions take one &struct pixel_argb_u16 and convert it to a specific format.
602  * The result is stored in @out_pixel.
603  *
604  * They are used in vkms_writeback_row() to convert and store a pixel from the src_buffer to
605  * the writeback buffer.
606  */
607 static void argb_u16_to_ARGB8888(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel)
608 {
609 	/*
610 	 * This sequence below is important because the format's byte order is
611 	 * in little-endian. In the case of the ARGB8888 the memory is
612 	 * organized this way:
613 	 *
614 	 * | Addr     | = blue channel
615 	 * | Addr + 1 | = green channel
616 	 * | Addr + 2 | = Red channel
617 	 * | Addr + 3 | = Alpha channel
618 	 */
619 	out_pixel[3] = DIV_ROUND_CLOSEST(in_pixel->a, 257);
620 	out_pixel[2] = DIV_ROUND_CLOSEST(in_pixel->r, 257);
621 	out_pixel[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257);
622 	out_pixel[0] = DIV_ROUND_CLOSEST(in_pixel->b, 257);
623 }
624 
625 static void argb_u16_to_XRGB8888(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel)
626 {
627 	out_pixel[3] = 0xff;
628 	out_pixel[2] = DIV_ROUND_CLOSEST(in_pixel->r, 257);
629 	out_pixel[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257);
630 	out_pixel[0] = DIV_ROUND_CLOSEST(in_pixel->b, 257);
631 }
632 
633 static void argb_u16_to_ABGR8888(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel)
634 {
635 	out_pixel[3] = DIV_ROUND_CLOSEST(in_pixel->a, 257);
636 	out_pixel[2] = DIV_ROUND_CLOSEST(in_pixel->b, 257);
637 	out_pixel[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257);
638 	out_pixel[0] = DIV_ROUND_CLOSEST(in_pixel->r, 257);
639 }
640 
641 static void argb_u16_to_ARGB16161616(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel)
642 {
643 	__le16 *pixel = (__le16 *)out_pixel;
644 
645 	pixel[3] = cpu_to_le16(in_pixel->a);
646 	pixel[2] = cpu_to_le16(in_pixel->r);
647 	pixel[1] = cpu_to_le16(in_pixel->g);
648 	pixel[0] = cpu_to_le16(in_pixel->b);
649 }
650 
651 static void argb_u16_to_XRGB16161616(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel)
652 {
653 	__le16 *pixel = (__le16 *)out_pixel;
654 
655 	pixel[3] = cpu_to_le16(0xffff);
656 	pixel[2] = cpu_to_le16(in_pixel->r);
657 	pixel[1] = cpu_to_le16(in_pixel->g);
658 	pixel[0] = cpu_to_le16(in_pixel->b);
659 }
660 
661 static void argb_u16_to_RGB565(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel)
662 {
663 	__le16 *pixel = (__le16 *)out_pixel;
664 
665 	s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31));
666 	s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63));
667 
668 	s64 fp_r = drm_int2fixp(in_pixel->r);
669 	s64 fp_g = drm_int2fixp(in_pixel->g);
670 	s64 fp_b = drm_int2fixp(in_pixel->b);
671 
672 	u16 r = drm_fixp2int(drm_fixp_div(fp_r, fp_rb_ratio));
673 	u16 g = drm_fixp2int(drm_fixp_div(fp_g, fp_g_ratio));
674 	u16 b = drm_fixp2int(drm_fixp_div(fp_b, fp_rb_ratio));
675 
676 	*pixel = cpu_to_le16(r << 11 | g << 5 | b);
677 }
678 
679 /**
680  * vkms_writeback_row() - Generic loop for all supported writeback format. It is executed just
681  * after the blending to write a line in the writeback buffer.
682  *
683  * @wb: Job where to insert the final image
684  * @src_buffer: Line to write
685  * @y: Row to write in the writeback buffer
686  */
687 void vkms_writeback_row(struct vkms_writeback_job *wb,
688 			const struct line_buffer *src_buffer, int y)
689 {
690 	struct vkms_frame_info *frame_info = &wb->wb_frame_info;
691 	int x_dst = frame_info->dst.x1;
692 	u8 *dst_pixels;
693 	int rem_x, rem_y;
694 
695 	packed_pixels_addr(frame_info, x_dst, y, 0, &dst_pixels, &rem_x, &rem_y);
696 	struct pixel_argb_u16 *in_pixels = src_buffer->pixels;
697 	int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), src_buffer->n_pixels);
698 
699 	for (size_t x = 0; x < x_limit; x++, dst_pixels += frame_info->fb->format->cpp[0])
700 		wb->pixel_write(dst_pixels, &in_pixels[x]);
701 }
702 
703 /**
704  * get_pixel_read_line_function() - Retrieve the correct read_line function for a specific
705  * format. The returned pointer is NULL for unsupported pixel formats. The caller must ensure that
706  * the pointer is valid before using it in a vkms_plane_state.
707  *
708  * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h])
709  */
710 pixel_read_line_t get_pixel_read_line_function(u32 format)
711 {
712 	switch (format) {
713 	case DRM_FORMAT_ARGB8888:
714 		return &ARGB8888_read_line;
715 	case DRM_FORMAT_XRGB8888:
716 		return &XRGB8888_read_line;
717 	case DRM_FORMAT_ABGR8888:
718 		return &ABGR8888_read_line;
719 	case DRM_FORMAT_ARGB16161616:
720 		return &ARGB16161616_read_line;
721 	case DRM_FORMAT_XRGB16161616:
722 		return &XRGB16161616_read_line;
723 	case DRM_FORMAT_RGB565:
724 		return &RGB565_read_line;
725 	case DRM_FORMAT_NV12:
726 	case DRM_FORMAT_NV16:
727 	case DRM_FORMAT_NV24:
728 	case DRM_FORMAT_NV21:
729 	case DRM_FORMAT_NV61:
730 	case DRM_FORMAT_NV42:
731 		return &semi_planar_yuv_read_line;
732 	case DRM_FORMAT_YUV420:
733 	case DRM_FORMAT_YUV422:
734 	case DRM_FORMAT_YUV444:
735 	case DRM_FORMAT_YVU420:
736 	case DRM_FORMAT_YVU422:
737 	case DRM_FORMAT_YVU444:
738 		return &planar_yuv_read_line;
739 	case DRM_FORMAT_R1:
740 		return &R1_read_line;
741 	case DRM_FORMAT_R2:
742 		return &R2_read_line;
743 	case DRM_FORMAT_R4:
744 		return &R4_read_line;
745 	case DRM_FORMAT_R8:
746 		return &R8_read_line;
747 	default:
748 		/*
749 		 * This is a bug in vkms_plane_atomic_check(). All the supported
750 		 * format must:
751 		 * - Be listed in vkms_formats in vkms_plane.c
752 		 * - Have a pixel_read callback defined here
753 		 */
754 		pr_err("Pixel format %p4cc is not supported by VKMS planes. This is a kernel bug, atomic check must forbid this configuration.\n",
755 		       &format);
756 		BUG();
757 	}
758 }
759 
760 /*
761  * Those matrices were generated using the colour python framework
762  *
763  * Below are the function calls used to generate each matrix, go to
764  * https://colour.readthedocs.io/en/develop/generated/colour.matrix_YCbCr.html
765  * for more info:
766  *
767  * numpy.around(colour.matrix_YCbCr(K=colour.WEIGHTS_YCBCR["ITU-R BT.601"],
768  *                                  is_legal = False,
769  *                                  bits = 8) * 2**32).astype(int)
770  */
771 static const struct conversion_matrix no_operation = {
772 	.matrix = {
773 		{ 4294967296, 0,          0, },
774 		{ 0,          4294967296, 0, },
775 		{ 0,          0,          4294967296, },
776 	},
777 	.y_offset = 0,
778 };
779 
780 static const struct conversion_matrix yuv_bt601_full = {
781 	.matrix = {
782 		{ 4294967296, 0,           6021544149 },
783 		{ 4294967296, -1478054095, -3067191994 },
784 		{ 4294967296, 7610682049,  0 },
785 	},
786 	.y_offset = 0,
787 };
788 
789 /*
790  * numpy.around(colour.matrix_YCbCr(K=colour.WEIGHTS_YCBCR["ITU-R BT.601"],
791  *                                  is_legal = True,
792  *                                  bits = 8) * 2**32).astype(int)
793  */
794 static const struct conversion_matrix yuv_bt601_limited = {
795 	.matrix = {
796 		{ 5020601039, 0,           6881764740 },
797 		{ 5020601039, -1689204679, -3505362278 },
798 		{ 5020601039, 8697922339,  0 },
799 	},
800 	.y_offset = 16,
801 };
802 
803 /*
804  * numpy.around(colour.matrix_YCbCr(K=colour.WEIGHTS_YCBCR["ITU-R BT.709"],
805  *                                  is_legal = False,
806  *                                  bits = 8) * 2**32).astype(int)
807  */
808 static const struct conversion_matrix yuv_bt709_full = {
809 	.matrix = {
810 		{ 4294967296, 0,          6763714498 },
811 		{ 4294967296, -804551626, -2010578443 },
812 		{ 4294967296, 7969741314, 0 },
813 	},
814 	.y_offset = 0,
815 };
816 
817 /*
818  * numpy.around(colour.matrix_YCbCr(K=colour.WEIGHTS_YCBCR["ITU-R BT.709"],
819  *                                  is_legal = True,
820  *                                  bits = 8) * 2**32).astype(int)
821  */
822 static const struct conversion_matrix yuv_bt709_limited = {
823 	.matrix = {
824 		{ 5020601039, 0,          7729959424 },
825 		{ 5020601039, -919487572, -2297803934 },
826 		{ 5020601039, 9108275786, 0 },
827 	},
828 	.y_offset = 16,
829 };
830 
831 /*
832  * numpy.around(colour.matrix_YCbCr(K=colour.WEIGHTS_YCBCR["ITU-R BT.2020"],
833  *                                  is_legal = False,
834  *                                  bits = 8) * 2**32).astype(int)
835  */
836 static const struct conversion_matrix yuv_bt2020_full = {
837 	.matrix = {
838 		{ 4294967296, 0,          6333358775 },
839 		{ 4294967296, -706750298, -2453942994 },
840 		{ 4294967296, 8080551471, 0 },
841 	},
842 	.y_offset = 0,
843 };
844 
845 /*
846  * numpy.around(colour.matrix_YCbCr(K=colour.WEIGHTS_YCBCR["ITU-R BT.2020"],
847  *                                  is_legal = True,
848  *                                  bits = 8) * 2**32).astype(int)
849  */
850 static const struct conversion_matrix yuv_bt2020_limited = {
851 	.matrix = {
852 		{ 5020601039, 0,          7238124312 },
853 		{ 5020601039, -807714626, -2804506279 },
854 		{ 5020601039, 9234915964, 0 },
855 	},
856 	.y_offset = 16,
857 };
858 
859 /**
860  * swap_uv_columns() - Swap u and v column of a given matrix
861  *
862  * @matrix: Matrix in which column are swapped
863  */
864 static void swap_uv_columns(struct conversion_matrix *matrix)
865 {
866 	swap(matrix->matrix[0][2], matrix->matrix[0][1]);
867 	swap(matrix->matrix[1][2], matrix->matrix[1][1]);
868 	swap(matrix->matrix[2][2], matrix->matrix[2][1]);
869 }
870 
871 /**
872  * get_conversion_matrix_to_argb_u16() - Retrieve the correct yuv to rgb conversion matrix for a
873  * given encoding and range.
874  *
875  * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h])
876  * @encoding: DRM_COLOR_* value for which to obtain a conversion matrix
877  * @range: DRM_COLOR_*_RANGE value for which to obtain a conversion matrix
878  * @matrix: Pointer to store the value into
879  */
880 void get_conversion_matrix_to_argb_u16(u32 format,
881 				       enum drm_color_encoding encoding,
882 				       enum drm_color_range range,
883 				       struct conversion_matrix *matrix)
884 {
885 	const struct conversion_matrix *matrix_to_copy;
886 	bool limited_range;
887 
888 	switch (range) {
889 	case DRM_COLOR_YCBCR_LIMITED_RANGE:
890 		limited_range = true;
891 		break;
892 	case DRM_COLOR_YCBCR_FULL_RANGE:
893 		limited_range = false;
894 		break;
895 	case DRM_COLOR_RANGE_MAX:
896 		limited_range = false;
897 		WARN_ONCE(true, "The requested range is not supported.");
898 		break;
899 	}
900 
901 	switch (encoding) {
902 	case DRM_COLOR_YCBCR_BT601:
903 		matrix_to_copy = limited_range ? &yuv_bt601_limited :
904 						 &yuv_bt601_full;
905 		break;
906 	case DRM_COLOR_YCBCR_BT709:
907 		matrix_to_copy = limited_range ? &yuv_bt709_limited :
908 						 &yuv_bt709_full;
909 		break;
910 	case DRM_COLOR_YCBCR_BT2020:
911 		matrix_to_copy = limited_range ? &yuv_bt2020_limited :
912 						 &yuv_bt2020_full;
913 		break;
914 	case DRM_COLOR_ENCODING_MAX:
915 		matrix_to_copy = &no_operation;
916 		WARN_ONCE(true, "The requested encoding is not supported.");
917 		break;
918 	}
919 
920 	memcpy(matrix, matrix_to_copy, sizeof(*matrix_to_copy));
921 
922 	switch (format) {
923 	case DRM_FORMAT_YVU420:
924 	case DRM_FORMAT_YVU422:
925 	case DRM_FORMAT_YVU444:
926 	case DRM_FORMAT_NV21:
927 	case DRM_FORMAT_NV61:
928 	case DRM_FORMAT_NV42:
929 		swap_uv_columns(matrix);
930 		break;
931 	default:
932 		break;
933 	}
934 }
935 EXPORT_SYMBOL(get_conversion_matrix_to_argb_u16);
936 
937 /**
938  * get_pixel_write_function() - Retrieve the correct write_pixel function for a specific format.
939  * The returned pointer is NULL for unsupported pixel formats. The caller must ensure that the
940  * pointer is valid before using it in a vkms_writeback_job.
941  *
942  * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h])
943  */
944 pixel_write_t get_pixel_write_function(u32 format)
945 {
946 	switch (format) {
947 	case DRM_FORMAT_ARGB8888:
948 		return &argb_u16_to_ARGB8888;
949 	case DRM_FORMAT_XRGB8888:
950 		return &argb_u16_to_XRGB8888;
951 	case DRM_FORMAT_ABGR8888:
952 		return &argb_u16_to_ABGR8888;
953 	case DRM_FORMAT_ARGB16161616:
954 		return &argb_u16_to_ARGB16161616;
955 	case DRM_FORMAT_XRGB16161616:
956 		return &argb_u16_to_XRGB16161616;
957 	case DRM_FORMAT_RGB565:
958 		return &argb_u16_to_RGB565;
959 	default:
960 		/*
961 		 * This is a bug in vkms_writeback_atomic_check. All the supported
962 		 * format must:
963 		 * - Be listed in vkms_wb_formats in vkms_writeback.c
964 		 * - Have a pixel_write callback defined here
965 		 */
966 		pr_err("Pixel format %p4cc is not supported by VKMS writeback. This is a kernel bug, atomic check must forbid this configuration.\n",
967 		       &format);
968 		BUG();
969 	}
970 }
971