xref: /linux/drivers/gpu/drm/vc4/vc4_hvs.c (revision 815e260a18a3af4dab59025ee99a7156c0e8b5e0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5 
6 /**
7  * DOC: VC4 HVS module.
8  *
9  * The Hardware Video Scaler (HVS) is the piece of hardware that does
10  * translation, scaling, colorspace conversion, and compositing of
11  * pixels stored in framebuffers into a FIFO of pixels going out to
12  * the Pixel Valve (CRTC).  It operates at the system clock rate (the
13  * system audio clock gate, specifically), which is much higher than
14  * the pixel clock rate.
15  *
16  * There is a single global HVS, with multiple output FIFOs that can
17  * be consumed by the PVs.  This file just manages the resources for
18  * the HVS, while the vc4_crtc.c code actually drives HVS setup for
19  * each CRTC.
20  */
21 
22 #include <linux/bitfield.h>
23 #include <linux/clk.h>
24 #include <linux/component.h>
25 #include <linux/platform_device.h>
26 
27 #include <drm/drm_atomic_helper.h>
28 #include <drm/drm_drv.h>
29 #include <drm/drm_print.h>
30 #include <drm/drm_vblank.h>
31 
32 #include <soc/bcm2835/raspberrypi-firmware.h>
33 
34 #include "vc4_drv.h"
35 #include "vc4_regs.h"
36 
37 static const struct debugfs_reg32 vc4_hvs_regs[] = {
38 	VC4_REG32(SCALER_DISPCTRL),
39 	VC4_REG32(SCALER_DISPSTAT),
40 	VC4_REG32(SCALER_DISPID),
41 	VC4_REG32(SCALER_DISPECTRL),
42 	VC4_REG32(SCALER_DISPPROF),
43 	VC4_REG32(SCALER_DISPDITHER),
44 	VC4_REG32(SCALER_DISPEOLN),
45 	VC4_REG32(SCALER_DISPLIST0),
46 	VC4_REG32(SCALER_DISPLIST1),
47 	VC4_REG32(SCALER_DISPLIST2),
48 	VC4_REG32(SCALER_DISPLSTAT),
49 	VC4_REG32(SCALER_DISPLACT0),
50 	VC4_REG32(SCALER_DISPLACT1),
51 	VC4_REG32(SCALER_DISPLACT2),
52 	VC4_REG32(SCALER_DISPCTRL0),
53 	VC4_REG32(SCALER_DISPBKGND0),
54 	VC4_REG32(SCALER_DISPSTAT0),
55 	VC4_REG32(SCALER_DISPBASE0),
56 	VC4_REG32(SCALER_DISPCTRL1),
57 	VC4_REG32(SCALER_DISPBKGND1),
58 	VC4_REG32(SCALER_DISPSTAT1),
59 	VC4_REG32(SCALER_DISPBASE1),
60 	VC4_REG32(SCALER_DISPCTRL2),
61 	VC4_REG32(SCALER_DISPBKGND2),
62 	VC4_REG32(SCALER_DISPSTAT2),
63 	VC4_REG32(SCALER_DISPBASE2),
64 	VC4_REG32(SCALER_DISPALPHA2),
65 	VC4_REG32(SCALER_OLEDOFFS),
66 	VC4_REG32(SCALER_OLEDCOEF0),
67 	VC4_REG32(SCALER_OLEDCOEF1),
68 	VC4_REG32(SCALER_OLEDCOEF2),
69 };
70 
71 static const struct debugfs_reg32 vc6_hvs_regs[] = {
72 	VC4_REG32(SCALER6_VERSION),
73 	VC4_REG32(SCALER6_CXM_SIZE),
74 	VC4_REG32(SCALER6_LBM_SIZE),
75 	VC4_REG32(SCALER6_UBM_SIZE),
76 	VC4_REG32(SCALER6_COBA_SIZE),
77 	VC4_REG32(SCALER6_COB_SIZE),
78 	VC4_REG32(SCALER6_CONTROL),
79 	VC4_REG32(SCALER6_FETCHER_STATUS),
80 	VC4_REG32(SCALER6_FETCH_STATUS),
81 	VC4_REG32(SCALER6_HANDLE_ERROR),
82 	VC4_REG32(SCALER6_DISP0_CTRL0),
83 	VC4_REG32(SCALER6_DISP0_CTRL1),
84 	VC4_REG32(SCALER6_DISP0_BGND),
85 	VC4_REG32(SCALER6_DISP0_LPTRS),
86 	VC4_REG32(SCALER6_DISP0_COB),
87 	VC4_REG32(SCALER6_DISP0_STATUS),
88 	VC4_REG32(SCALER6_DISP0_DL),
89 	VC4_REG32(SCALER6_DISP0_RUN),
90 	VC4_REG32(SCALER6_DISP1_CTRL0),
91 	VC4_REG32(SCALER6_DISP1_CTRL1),
92 	VC4_REG32(SCALER6_DISP1_BGND),
93 	VC4_REG32(SCALER6_DISP1_LPTRS),
94 	VC4_REG32(SCALER6_DISP1_COB),
95 	VC4_REG32(SCALER6_DISP1_STATUS),
96 	VC4_REG32(SCALER6_DISP1_DL),
97 	VC4_REG32(SCALER6_DISP1_RUN),
98 	VC4_REG32(SCALER6_DISP2_CTRL0),
99 	VC4_REG32(SCALER6_DISP2_CTRL1),
100 	VC4_REG32(SCALER6_DISP2_BGND),
101 	VC4_REG32(SCALER6_DISP2_LPTRS),
102 	VC4_REG32(SCALER6_DISP2_COB),
103 	VC4_REG32(SCALER6_DISP2_STATUS),
104 	VC4_REG32(SCALER6_DISP2_DL),
105 	VC4_REG32(SCALER6_DISP2_RUN),
106 	VC4_REG32(SCALER6_EOLN),
107 	VC4_REG32(SCALER6_DL_STATUS),
108 	VC4_REG32(SCALER6_BFG_MISC),
109 	VC4_REG32(SCALER6_QOS0),
110 	VC4_REG32(SCALER6_PROF0),
111 	VC4_REG32(SCALER6_QOS1),
112 	VC4_REG32(SCALER6_PROF1),
113 	VC4_REG32(SCALER6_QOS2),
114 	VC4_REG32(SCALER6_PROF2),
115 	VC4_REG32(SCALER6_PRI_MAP0),
116 	VC4_REG32(SCALER6_PRI_MAP1),
117 	VC4_REG32(SCALER6_HISTCTRL),
118 	VC4_REG32(SCALER6_HISTBIN0),
119 	VC4_REG32(SCALER6_HISTBIN1),
120 	VC4_REG32(SCALER6_HISTBIN2),
121 	VC4_REG32(SCALER6_HISTBIN3),
122 	VC4_REG32(SCALER6_HISTBIN4),
123 	VC4_REG32(SCALER6_HISTBIN5),
124 	VC4_REG32(SCALER6_HISTBIN6),
125 	VC4_REG32(SCALER6_HISTBIN7),
126 	VC4_REG32(SCALER6_HDR_CFG_REMAP),
127 	VC4_REG32(SCALER6_COL_SPACE),
128 	VC4_REG32(SCALER6_HVS_ID),
129 	VC4_REG32(SCALER6_CFC1),
130 	VC4_REG32(SCALER6_DISP_UPM_ISO0),
131 	VC4_REG32(SCALER6_DISP_UPM_ISO1),
132 	VC4_REG32(SCALER6_DISP_UPM_ISO2),
133 	VC4_REG32(SCALER6_DISP_LBM_ISO0),
134 	VC4_REG32(SCALER6_DISP_LBM_ISO1),
135 	VC4_REG32(SCALER6_DISP_LBM_ISO2),
136 	VC4_REG32(SCALER6_DISP_COB_ISO0),
137 	VC4_REG32(SCALER6_DISP_COB_ISO1),
138 	VC4_REG32(SCALER6_DISP_COB_ISO2),
139 	VC4_REG32(SCALER6_BAD_COB),
140 	VC4_REG32(SCALER6_BAD_LBM),
141 	VC4_REG32(SCALER6_BAD_UPM),
142 	VC4_REG32(SCALER6_BAD_AXI),
143 };
144 
145 static const struct debugfs_reg32 vc6_d_hvs_regs[] = {
146 	VC4_REG32(SCALER6D_VERSION),
147 	VC4_REG32(SCALER6D_CXM_SIZE),
148 	VC4_REG32(SCALER6D_LBM_SIZE),
149 	VC4_REG32(SCALER6D_UBM_SIZE),
150 	VC4_REG32(SCALER6D_COBA_SIZE),
151 	VC4_REG32(SCALER6D_COB_SIZE),
152 	VC4_REG32(SCALER6D_CONTROL),
153 	VC4_REG32(SCALER6D_FETCHER_STATUS),
154 	VC4_REG32(SCALER6D_FETCH_STATUS),
155 	VC4_REG32(SCALER6D_HANDLE_ERROR),
156 	VC4_REG32(SCALER6D_DISP0_CTRL0),
157 	VC4_REG32(SCALER6D_DISP0_CTRL1),
158 	VC4_REG32(SCALER6D_DISP0_BGND0),
159 	VC4_REG32(SCALER6D_DISP0_BGND1),
160 	VC4_REG32(SCALER6D_DISP0_LPTRS),
161 	VC4_REG32(SCALER6D_DISP0_COB),
162 	VC4_REG32(SCALER6D_DISP0_STATUS),
163 	VC4_REG32(SCALER6D_DISP0_DL),
164 	VC4_REG32(SCALER6D_DISP0_RUN),
165 	VC4_REG32(SCALER6D_DISP1_CTRL0),
166 	VC4_REG32(SCALER6D_DISP1_CTRL1),
167 	VC4_REG32(SCALER6D_DISP1_BGND0),
168 	VC4_REG32(SCALER6D_DISP1_BGND1),
169 	VC4_REG32(SCALER6D_DISP1_LPTRS),
170 	VC4_REG32(SCALER6D_DISP1_COB),
171 	VC4_REG32(SCALER6D_DISP1_STATUS),
172 	VC4_REG32(SCALER6D_DISP1_DL),
173 	VC4_REG32(SCALER6D_DISP1_RUN),
174 	VC4_REG32(SCALER6D_DISP2_CTRL0),
175 	VC4_REG32(SCALER6D_DISP2_CTRL1),
176 	VC4_REG32(SCALER6D_DISP2_BGND0),
177 	VC4_REG32(SCALER6D_DISP2_BGND1),
178 	VC4_REG32(SCALER6D_DISP2_LPTRS),
179 	VC4_REG32(SCALER6D_DISP2_COB),
180 	VC4_REG32(SCALER6D_DISP2_STATUS),
181 	VC4_REG32(SCALER6D_DISP2_DL),
182 	VC4_REG32(SCALER6D_DISP2_RUN),
183 	VC4_REG32(SCALER6D_EOLN),
184 	VC4_REG32(SCALER6D_DL_STATUS),
185 	VC4_REG32(SCALER6D_QOS0),
186 	VC4_REG32(SCALER6D_PROF0),
187 	VC4_REG32(SCALER6D_QOS1),
188 	VC4_REG32(SCALER6D_PROF1),
189 	VC4_REG32(SCALER6D_QOS2),
190 	VC4_REG32(SCALER6D_PROF2),
191 	VC4_REG32(SCALER6D_PRI_MAP0),
192 	VC4_REG32(SCALER6D_PRI_MAP1),
193 	VC4_REG32(SCALER6D_HISTCTRL),
194 	VC4_REG32(SCALER6D_HISTBIN0),
195 	VC4_REG32(SCALER6D_HISTBIN1),
196 	VC4_REG32(SCALER6D_HISTBIN2),
197 	VC4_REG32(SCALER6D_HISTBIN3),
198 	VC4_REG32(SCALER6D_HISTBIN4),
199 	VC4_REG32(SCALER6D_HISTBIN5),
200 	VC4_REG32(SCALER6D_HISTBIN6),
201 	VC4_REG32(SCALER6D_HISTBIN7),
202 	VC4_REG32(SCALER6D_HVS_ID),
203 };
204 
205 void vc4_hvs_dump_state(struct vc4_hvs *hvs)
206 {
207 	struct drm_device *drm = &hvs->vc4->base;
208 	struct drm_printer p = drm_info_printer(&hvs->pdev->dev);
209 	int idx, i;
210 
211 	if (!drm_dev_enter(drm, &idx))
212 		return;
213 
214 	drm_print_regset32(&p, &hvs->regset);
215 
216 	DRM_INFO("HVS ctx:\n");
217 	for (i = 0; i < 64; i += 4) {
218 		DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n",
219 			 i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D",
220 			 readl((u32 __iomem *)hvs->dlist + i + 0),
221 			 readl((u32 __iomem *)hvs->dlist + i + 1),
222 			 readl((u32 __iomem *)hvs->dlist + i + 2),
223 			 readl((u32 __iomem *)hvs->dlist + i + 3));
224 	}
225 
226 	drm_dev_exit(idx);
227 }
228 
229 static int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data)
230 {
231 	struct drm_debugfs_entry *entry = m->private;
232 	struct drm_device *dev = entry->dev;
233 	struct vc4_dev *vc4 = to_vc4_dev(dev);
234 	struct drm_printer p = drm_seq_file_printer(m);
235 
236 	drm_printf(&p, "%d\n", atomic_read(&vc4->underrun));
237 
238 	return 0;
239 }
240 
241 static int vc4_hvs_debugfs_dlist(struct seq_file *m, void *data)
242 {
243 	struct drm_debugfs_entry *entry = m->private;
244 	struct drm_device *dev = entry->dev;
245 	struct vc4_dev *vc4 = to_vc4_dev(dev);
246 	struct vc4_hvs *hvs = vc4->hvs;
247 	struct drm_printer p = drm_seq_file_printer(m);
248 	unsigned int dlist_mem_size = hvs->dlist_mem_size;
249 	unsigned int next_entry_start;
250 	unsigned int i, j;
251 	u32 dlist_word, dispstat;
252 
253 	for (i = 0; i < SCALER_CHANNELS_COUNT; i++) {
254 		dispstat = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(i)),
255 					 SCALER_DISPSTATX_MODE);
256 		if (dispstat == SCALER_DISPSTATX_MODE_DISABLED ||
257 		    dispstat == SCALER_DISPSTATX_MODE_EOF) {
258 			drm_printf(&p, "HVS chan %u disabled\n", i);
259 			continue;
260 		}
261 
262 		drm_printf(&p, "HVS chan %u:\n", i);
263 		next_entry_start = 0;
264 
265 		for (j = HVS_READ(SCALER_DISPLISTX(i)); j < dlist_mem_size; j++) {
266 			dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j);
267 			drm_printf(&p, "dlist: %02d: 0x%08x\n", j,
268 				   dlist_word);
269 			if (!next_entry_start ||
270 			    next_entry_start == j) {
271 				if (dlist_word & SCALER_CTL0_END)
272 					break;
273 				next_entry_start = j +
274 					VC4_GET_FIELD(dlist_word,
275 						      SCALER_CTL0_SIZE);
276 			}
277 		}
278 	}
279 
280 	return 0;
281 }
282 
283 static int vc6_hvs_debugfs_dlist(struct seq_file *m, void *data)
284 {
285 	struct drm_info_node *node = m->private;
286 	struct drm_device *dev = node->minor->dev;
287 	struct vc4_dev *vc4 = to_vc4_dev(dev);
288 	struct vc4_hvs *hvs = vc4->hvs;
289 	struct drm_printer p = drm_seq_file_printer(m);
290 	unsigned int dlist_mem_size = hvs->dlist_mem_size;
291 	unsigned int next_entry_start;
292 	unsigned int i;
293 
294 	for (i = 0; i < SCALER_CHANNELS_COUNT; i++) {
295 		unsigned int active_dlist, dispstat;
296 		unsigned int j;
297 
298 		dispstat = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(i)),
299 					 SCALER6_DISPX_STATUS_MODE);
300 		if (dispstat == SCALER6_DISPX_STATUS_MODE_DISABLED ||
301 		    dispstat == SCALER6_DISPX_STATUS_MODE_EOF) {
302 			drm_printf(&p, "HVS chan %u disabled\n", i);
303 			continue;
304 		}
305 
306 		drm_printf(&p, "HVS chan %u:\n", i);
307 
308 		active_dlist = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_DL(i)),
309 					     SCALER6_DISPX_DL_LACT);
310 		next_entry_start = 0;
311 
312 		for (j = active_dlist; j < dlist_mem_size; j++) {
313 			u32 dlist_word;
314 
315 			dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j);
316 			drm_printf(&p, "dlist: %02d: 0x%08x\n", j,
317 				   dlist_word);
318 			if (!next_entry_start ||
319 			    next_entry_start == j) {
320 				if (dlist_word & SCALER_CTL0_END)
321 					break;
322 				next_entry_start = j +
323 					VC4_GET_FIELD(dlist_word,
324 						      SCALER_CTL0_SIZE);
325 			}
326 		}
327 	}
328 
329 	return 0;
330 }
331 
332 static int vc6_hvs_debugfs_upm_allocs(struct seq_file *m, void *data)
333 {
334 	struct drm_debugfs_entry *entry = m->private;
335 	struct drm_device *dev = entry->dev;
336 	struct vc4_dev *vc4 = to_vc4_dev(dev);
337 	struct vc4_hvs *hvs = vc4->hvs;
338 	struct drm_printer p = drm_seq_file_printer(m);
339 	struct vc4_upm_refcounts *refcount;
340 	unsigned int i;
341 
342 	drm_printf(&p, "UPM Handles:\n");
343 	for (i = 1; i <= VC4_NUM_UPM_HANDLES; i++) {
344 		refcount = &hvs->upm_refcounts[i];
345 		drm_printf(&p, "handle %u: refcount %u, size %zu [%08llx + %08llx]\n",
346 			   i, refcount_read(&refcount->refcount), refcount->size,
347 			   refcount->upm.start, refcount->upm.size);
348 	}
349 
350 	return 0;
351 }
352 
353 /* The filter kernel is composed of dwords each containing 3 9-bit
354  * signed integers packed next to each other.
355  */
356 #define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
357 #define VC4_PPF_FILTER_WORD(c0, c1, c2)				\
358 	((((c0) & 0x1ff) << 0) |				\
359 	 (((c1) & 0x1ff) << 9) |				\
360 	 (((c2) & 0x1ff) << 18))
361 
362 /* The whole filter kernel is arranged as the coefficients 0-16 going
363  * up, then a pad, then 17-31 going down and reversed within the
364  * dwords.  This means that a linear phase kernel (where it's
365  * symmetrical at the boundary between 15 and 16) has the last 5
366  * dwords matching the first 5, but reversed.
367  */
368 #define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8,	\
369 				c9, c10, c11, c12, c13, c14, c15)	\
370 	{VC4_PPF_FILTER_WORD(c0, c1, c2),				\
371 	 VC4_PPF_FILTER_WORD(c3, c4, c5),				\
372 	 VC4_PPF_FILTER_WORD(c6, c7, c8),				\
373 	 VC4_PPF_FILTER_WORD(c9, c10, c11),				\
374 	 VC4_PPF_FILTER_WORD(c12, c13, c14),				\
375 	 VC4_PPF_FILTER_WORD(c15, c15, 0)}
376 
377 #define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
378 #define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
379 
380 /* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
381  * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
382  */
383 static const u32 mitchell_netravali_1_3_1_3_kernel[] =
384 	VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
385 				50, 82, 119, 155, 187, 213, 227);
386 
387 static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
388 					struct drm_mm_node *space,
389 					const u32 *kernel)
390 {
391 	int ret, i;
392 	u32 __iomem *dst_kernel;
393 
394 	/*
395 	 * NOTE: We don't need a call to drm_dev_enter()/drm_dev_exit()
396 	 * here since that function is only called from vc4_hvs_bind().
397 	 */
398 
399 	ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS);
400 	if (ret) {
401 		drm_err(&hvs->vc4->base, "Failed to allocate space for filter kernel: %d\n",
402 			ret);
403 		return ret;
404 	}
405 
406 	dst_kernel = hvs->dlist + space->start;
407 
408 	for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
409 		if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
410 			writel(kernel[i], &dst_kernel[i]);
411 		else {
412 			writel(kernel[VC4_KERNEL_DWORDS - i - 1],
413 			       &dst_kernel[i]);
414 		}
415 	}
416 
417 	return 0;
418 }
419 
420 static void vc4_hvs_lut_load(struct vc4_hvs *hvs,
421 			     struct vc4_crtc *vc4_crtc)
422 {
423 	struct vc4_dev *vc4 = hvs->vc4;
424 	struct drm_device *drm = &vc4->base;
425 	struct drm_crtc *crtc = &vc4_crtc->base;
426 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
427 	int idx;
428 	u32 i;
429 
430 	WARN_ON_ONCE(vc4->gen > VC4_GEN_5);
431 
432 	if (!drm_dev_enter(drm, &idx))
433 		return;
434 
435 	if (hvs->vc4->gen != VC4_GEN_4)
436 		goto exit;
437 
438 	/* The LUT memory is laid out with each HVS channel in order,
439 	 * each of which takes 256 writes for R, 256 for G, then 256
440 	 * for B.
441 	 */
442 	HVS_WRITE(SCALER_GAMADDR,
443 		  SCALER_GAMADDR_AUTOINC |
444 		  (vc4_state->assigned_channel * 3 * crtc->gamma_size));
445 
446 	for (i = 0; i < crtc->gamma_size; i++)
447 		HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_r[i]);
448 	for (i = 0; i < crtc->gamma_size; i++)
449 		HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_g[i]);
450 	for (i = 0; i < crtc->gamma_size; i++)
451 		HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]);
452 
453 exit:
454 	drm_dev_exit(idx);
455 }
456 
457 static void vc4_hvs_update_gamma_lut(struct vc4_hvs *hvs,
458 				     struct vc4_crtc *vc4_crtc)
459 {
460 	struct drm_crtc_state *crtc_state = vc4_crtc->base.state;
461 	struct drm_color_lut *lut = crtc_state->gamma_lut->data;
462 	u32 length = drm_color_lut_size(crtc_state->gamma_lut);
463 	u32 i;
464 
465 	for (i = 0; i < length; i++) {
466 		vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8);
467 		vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8);
468 		vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8);
469 	}
470 
471 	vc4_hvs_lut_load(hvs, vc4_crtc);
472 }
473 
474 u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo)
475 {
476 	struct vc4_dev *vc4 = hvs->vc4;
477 	struct drm_device *drm = &vc4->base;
478 	u8 field = 0;
479 	int idx;
480 
481 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
482 
483 	if (!drm_dev_enter(drm, &idx))
484 		return 0;
485 
486 	switch (vc4->gen) {
487 	case VC4_GEN_6_C:
488 	case VC4_GEN_6_D:
489 		field = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(fifo)),
490 				      SCALER6_DISPX_STATUS_FRCNT);
491 		break;
492 	case VC4_GEN_5:
493 		switch (fifo) {
494 		case 0:
495 			field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
496 					      SCALER5_DISPSTAT1_FRCNT0);
497 			break;
498 		case 1:
499 			field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
500 					      SCALER5_DISPSTAT1_FRCNT1);
501 			break;
502 		case 2:
503 			field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2),
504 					      SCALER5_DISPSTAT2_FRCNT2);
505 			break;
506 		}
507 		break;
508 	case VC4_GEN_4:
509 		switch (fifo) {
510 		case 0:
511 			field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
512 					      SCALER_DISPSTAT1_FRCNT0);
513 			break;
514 		case 1:
515 			field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
516 					      SCALER_DISPSTAT1_FRCNT1);
517 			break;
518 		case 2:
519 			field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2),
520 					      SCALER_DISPSTAT2_FRCNT2);
521 			break;
522 		}
523 		break;
524 	default:
525 		drm_err(drm, "Unknown VC4 generation: %d", vc4->gen);
526 		break;
527 	}
528 
529 	drm_dev_exit(idx);
530 	return field;
531 }
532 
533 int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output)
534 {
535 	struct vc4_dev *vc4 = hvs->vc4;
536 	u32 reg;
537 	int ret;
538 
539 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
540 
541 	switch (vc4->gen) {
542 	case VC4_GEN_4:
543 		return output;
544 
545 	case VC4_GEN_5:
546 		/*
547 		 * NOTE: We should probably use
548 		 * drm_dev_enter()/drm_dev_exit() here, but this
549 		 * function is only used during the DRM device
550 		 * initialization, so we should be fine.
551 		 */
552 
553 		switch (output) {
554 		case 0:
555 			return 0;
556 
557 		case 1:
558 			return 1;
559 
560 		case 2:
561 			reg = HVS_READ(SCALER_DISPECTRL);
562 			ret = FIELD_GET(SCALER_DISPECTRL_DSP2_MUX_MASK, reg);
563 			if (ret == 0)
564 				return 2;
565 
566 			return 0;
567 
568 		case 3:
569 			reg = HVS_READ(SCALER_DISPCTRL);
570 			ret = FIELD_GET(SCALER_DISPCTRL_DSP3_MUX_MASK, reg);
571 			if (ret == 3)
572 				return -EPIPE;
573 
574 			return ret;
575 
576 		case 4:
577 			reg = HVS_READ(SCALER_DISPEOLN);
578 			ret = FIELD_GET(SCALER_DISPEOLN_DSP4_MUX_MASK, reg);
579 			if (ret == 3)
580 				return -EPIPE;
581 
582 			return ret;
583 
584 		case 5:
585 			reg = HVS_READ(SCALER_DISPDITHER);
586 			ret = FIELD_GET(SCALER_DISPDITHER_DSP5_MUX_MASK, reg);
587 			if (ret == 3)
588 				return -EPIPE;
589 
590 			return ret;
591 
592 		default:
593 			return -EPIPE;
594 		}
595 
596 	case VC4_GEN_6_C:
597 	case VC4_GEN_6_D:
598 		switch (output) {
599 		case 0:
600 			return 0;
601 
602 		case 2:
603 			return 2;
604 
605 		case 1:
606 		case 3:
607 		case 4:
608 			return 1;
609 
610 		default:
611 			return -EPIPE;
612 		}
613 
614 	default:
615 		return -EPIPE;
616 	}
617 }
618 
619 static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc,
620 				struct drm_display_mode *mode, bool oneshot)
621 {
622 	struct vc4_dev *vc4 = hvs->vc4;
623 	struct drm_device *drm = &vc4->base;
624 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
625 	struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state);
626 	unsigned int chan = vc4_crtc_state->assigned_channel;
627 	bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
628 	u32 dispbkgndx;
629 	u32 dispctrl;
630 	int idx;
631 
632 	WARN_ON_ONCE(vc4->gen > VC4_GEN_5);
633 
634 	if (!drm_dev_enter(drm, &idx))
635 		return -ENODEV;
636 
637 	HVS_WRITE(SCALER_DISPCTRLX(chan), 0);
638 	HVS_WRITE(SCALER_DISPCTRLX(chan), SCALER_DISPCTRLX_RESET);
639 	HVS_WRITE(SCALER_DISPCTRLX(chan), 0);
640 
641 	/* Turn on the scaler, which will wait for vstart to start
642 	 * compositing.
643 	 * When feeding the transposer, we should operate in oneshot
644 	 * mode.
645 	 */
646 	dispctrl = SCALER_DISPCTRLX_ENABLE;
647 	dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(chan));
648 
649 	if (vc4->gen == VC4_GEN_4) {
650 		dispctrl |= VC4_SET_FIELD(mode->hdisplay,
651 					  SCALER_DISPCTRLX_WIDTH) |
652 			    VC4_SET_FIELD(mode->vdisplay,
653 					  SCALER_DISPCTRLX_HEIGHT) |
654 			    (oneshot ? SCALER_DISPCTRLX_ONESHOT : 0);
655 		dispbkgndx |= SCALER_DISPBKGND_AUTOHS;
656 	} else {
657 		dispctrl |= VC4_SET_FIELD(mode->hdisplay,
658 					  SCALER5_DISPCTRLX_WIDTH) |
659 			    VC4_SET_FIELD(mode->vdisplay,
660 					  SCALER5_DISPCTRLX_HEIGHT) |
661 			    (oneshot ? SCALER5_DISPCTRLX_ONESHOT : 0);
662 		dispbkgndx &= ~SCALER5_DISPBKGND_BCK2BCK;
663 	}
664 
665 	HVS_WRITE(SCALER_DISPCTRLX(chan), dispctrl);
666 
667 	dispbkgndx &= ~SCALER_DISPBKGND_GAMMA;
668 	dispbkgndx &= ~SCALER_DISPBKGND_INTERLACE;
669 
670 	HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx |
671 		  ((vc4->gen == VC4_GEN_4) ? SCALER_DISPBKGND_GAMMA : 0) |
672 		  (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
673 
674 	/* Reload the LUT, since the SRAMs would have been disabled if
675 	 * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
676 	 */
677 	vc4_hvs_lut_load(hvs, vc4_crtc);
678 
679 	drm_dev_exit(idx);
680 
681 	return 0;
682 }
683 
684 static int vc6_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc,
685 				struct drm_display_mode *mode, bool oneshot)
686 {
687 	struct vc4_dev *vc4 = hvs->vc4;
688 	struct drm_device *drm = &vc4->base;
689 	struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state);
690 	unsigned int chan = vc4_crtc_state->assigned_channel;
691 	bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
692 	u32 disp_ctrl1;
693 	int idx;
694 
695 	WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
696 
697 	if (!drm_dev_enter(drm, &idx))
698 		return -ENODEV;
699 
700 	HVS_WRITE(SCALER6_DISPX_CTRL0(chan), SCALER6_DISPX_CTRL0_RESET);
701 
702 	disp_ctrl1 = HVS_READ(SCALER6_DISPX_CTRL1(chan));
703 	disp_ctrl1 &= ~SCALER6_DISPX_CTRL1_INTLACE;
704 	HVS_WRITE(SCALER6_DISPX_CTRL1(chan),
705 		  disp_ctrl1 | (interlace ? SCALER6_DISPX_CTRL1_INTLACE : 0));
706 
707 	HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
708 		  SCALER6_DISPX_CTRL0_ENB |
709 		  VC4_SET_FIELD(mode->hdisplay - 1,
710 				SCALER6_DISPX_CTRL0_FWIDTH) |
711 		  (oneshot ? SCALER6_DISPX_CTRL0_ONESHOT : 0) |
712 		  VC4_SET_FIELD(mode->vdisplay - 1,
713 				SCALER6_DISPX_CTRL0_LINES));
714 
715 	drm_dev_exit(idx);
716 
717 	return 0;
718 }
719 
720 static void __vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
721 {
722 	struct vc4_dev *vc4 = hvs->vc4;
723 	struct drm_device *drm = &vc4->base;
724 	int idx;
725 
726 	WARN_ON_ONCE(vc4->gen > VC4_GEN_5);
727 
728 	if (!drm_dev_enter(drm, &idx))
729 		return;
730 
731 	if (!(HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_ENABLE))
732 		goto out;
733 
734 	HVS_WRITE(SCALER_DISPCTRLX(chan), SCALER_DISPCTRLX_RESET);
735 	HVS_WRITE(SCALER_DISPCTRLX(chan), 0);
736 
737 	/* Once we leave, the scaler should be disabled and its fifo empty. */
738 	WARN_ON_ONCE(HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_RESET);
739 
740 	WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(chan)),
741 				   SCALER_DISPSTATX_MODE) !=
742 		     SCALER_DISPSTATX_MODE_DISABLED);
743 
744 	WARN_ON_ONCE((HVS_READ(SCALER_DISPSTATX(chan)) &
745 		      (SCALER_DISPSTATX_FULL | SCALER_DISPSTATX_EMPTY)) !=
746 		     SCALER_DISPSTATX_EMPTY);
747 
748 out:
749 	drm_dev_exit(idx);
750 }
751 
752 static void __vc6_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
753 {
754 	struct vc4_dev *vc4 = hvs->vc4;
755 	struct drm_device *drm = &vc4->base;
756 	int idx;
757 
758 	WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
759 
760 	if (!drm_dev_enter(drm, &idx))
761 		return;
762 
763 	if (!(HVS_READ(SCALER6_DISPX_CTRL0(chan)) & SCALER6_DISPX_CTRL0_ENB))
764 		goto out;
765 
766 	HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
767 		  HVS_READ(SCALER6_DISPX_CTRL0(chan)) | SCALER6_DISPX_CTRL0_RESET);
768 
769 	HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
770 		  HVS_READ(SCALER6_DISPX_CTRL0(chan)) & ~SCALER6_DISPX_CTRL0_ENB);
771 
772 	WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(chan)),
773 				   SCALER6_DISPX_STATUS_MODE) !=
774 		     SCALER6_DISPX_STATUS_MODE_DISABLED);
775 
776 out:
777 	drm_dev_exit(idx);
778 }
779 
780 void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
781 {
782 	struct vc4_dev *vc4 = hvs->vc4;
783 
784 	if (vc4->gen >= VC4_GEN_6_C)
785 		__vc6_hvs_stop_channel(hvs, chan);
786 	else
787 		__vc4_hvs_stop_channel(hvs, chan);
788 }
789 
790 int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
791 {
792 	struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
793 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
794 	struct drm_device *dev = crtc->dev;
795 	struct vc4_dev *vc4 = to_vc4_dev(dev);
796 	struct drm_plane *plane;
797 	unsigned long flags;
798 	const struct drm_plane_state *plane_state;
799 	u32 dlist_count = 0;
800 	int ret;
801 
802 	/* The pixelvalve can only feed one encoder (and encoders are
803 	 * 1:1 with connectors.)
804 	 */
805 	if (hweight32(crtc_state->connector_mask) > 1)
806 		return -EINVAL;
807 
808 	drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
809 		u32 plane_dlist_count = vc4_plane_dlist_size(plane_state);
810 
811 		drm_dbg_driver(dev, "[CRTC:%d:%s] Found [PLANE:%d:%s] with DLIST size: %u\n",
812 			       crtc->base.id, crtc->name,
813 			       plane->base.id, plane->name,
814 			       plane_dlist_count);
815 
816 		dlist_count += plane_dlist_count;
817 	}
818 
819 	dlist_count++; /* Account for SCALER_CTL0_END. */
820 
821 	drm_dbg_driver(dev, "[CRTC:%d:%s] Allocating DLIST block with size: %u\n",
822 		       crtc->base.id, crtc->name, dlist_count);
823 	spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
824 	ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm,
825 				 dlist_count);
826 	spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
827 	if (ret) {
828 		drm_err(dev, "Failed to allocate DLIST entry: %d\n", ret);
829 		return ret;
830 	}
831 
832 	return 0;
833 }
834 
835 static void vc4_hvs_install_dlist(struct drm_crtc *crtc)
836 {
837 	struct drm_device *dev = crtc->dev;
838 	struct vc4_dev *vc4 = to_vc4_dev(dev);
839 	struct vc4_hvs *hvs = vc4->hvs;
840 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
841 	int idx;
842 
843 	if (!drm_dev_enter(dev, &idx))
844 		return;
845 
846 	if (vc4->gen >= VC4_GEN_6_C)
847 		HVS_WRITE(SCALER6_DISPX_LPTRS(vc4_state->assigned_channel),
848 			  VC4_SET_FIELD(vc4_state->mm.start,
849 					SCALER6_DISPX_LPTRS_HEADE));
850 	else
851 		HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
852 			  vc4_state->mm.start);
853 
854 	drm_dev_exit(idx);
855 }
856 
857 static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
858 {
859 	struct drm_device *dev = crtc->dev;
860 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
861 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
862 	unsigned long flags;
863 
864 	if (crtc->state->event) {
865 		crtc->state->event->pipe = drm_crtc_index(crtc);
866 
867 		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
868 
869 		spin_lock_irqsave(&dev->event_lock, flags);
870 
871 		if (!vc4_crtc->feeds_txp || vc4_state->txp_armed) {
872 			vc4_crtc->event = crtc->state->event;
873 			crtc->state->event = NULL;
874 		}
875 
876 		spin_unlock_irqrestore(&dev->event_lock, flags);
877 	}
878 
879 	spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
880 	vc4_crtc->current_dlist = vc4_state->mm.start;
881 	spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
882 }
883 
884 void vc4_hvs_atomic_begin(struct drm_crtc *crtc,
885 			  struct drm_atomic_state *state)
886 {
887 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
888 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
889 	unsigned long flags;
890 
891 	spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
892 	vc4_crtc->current_hvs_channel = vc4_state->assigned_channel;
893 	spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
894 }
895 
896 void vc4_hvs_atomic_enable(struct drm_crtc *crtc,
897 			   struct drm_atomic_state *state)
898 {
899 	struct drm_device *dev = crtc->dev;
900 	struct vc4_dev *vc4 = to_vc4_dev(dev);
901 	struct drm_display_mode *mode = &crtc->state->adjusted_mode;
902 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
903 	bool oneshot = vc4_crtc->feeds_txp;
904 
905 	vc4_hvs_install_dlist(crtc);
906 	vc4_hvs_update_dlist(crtc);
907 
908 	if (vc4->gen >= VC4_GEN_6_C)
909 		vc6_hvs_init_channel(vc4->hvs, crtc, mode, oneshot);
910 	else
911 		vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot);
912 }
913 
914 void vc4_hvs_atomic_disable(struct drm_crtc *crtc,
915 			    struct drm_atomic_state *state)
916 {
917 	struct drm_device *dev = crtc->dev;
918 	struct vc4_dev *vc4 = to_vc4_dev(dev);
919 	struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state, crtc);
920 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(old_state);
921 	unsigned int chan = vc4_state->assigned_channel;
922 
923 	vc4_hvs_stop_channel(vc4->hvs, chan);
924 }
925 
926 void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
927 			  struct drm_atomic_state *state)
928 {
929 	struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state,
930 									 crtc);
931 	struct drm_device *dev = crtc->dev;
932 	struct vc4_dev *vc4 = to_vc4_dev(dev);
933 	struct vc4_hvs *hvs = vc4->hvs;
934 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
935 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
936 	unsigned int channel = vc4_state->assigned_channel;
937 	struct drm_plane *plane;
938 	struct vc4_plane_state *vc4_plane_state;
939 	bool debug_dump_regs = false;
940 	bool enable_bg_fill = true;
941 	u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start;
942 	u32 __iomem *dlist_next = dlist_start;
943 	unsigned int zpos = 0;
944 	bool found = false;
945 	int idx;
946 
947 	WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D);
948 
949 	if (!drm_dev_enter(dev, &idx)) {
950 		vc4_crtc_send_vblank(crtc);
951 		return;
952 	}
953 
954 	if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED)
955 		goto exit;
956 
957 	if (debug_dump_regs) {
958 		DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc));
959 		vc4_hvs_dump_state(hvs);
960 	}
961 
962 	/* Copy all the active planes' dlist contents to the hardware dlist. */
963 	do {
964 		found = false;
965 
966 		drm_atomic_crtc_for_each_plane(plane, crtc) {
967 			if (plane->state->normalized_zpos != zpos)
968 				continue;
969 
970 			/* Is this the first active plane? */
971 			if (dlist_next == dlist_start) {
972 				/* We need to enable background fill when a plane
973 				 * could be alpha blending from the background, i.e.
974 				 * where no other plane is underneath. It suffices to
975 				 * consider the first active plane here since we set
976 				 * needs_bg_fill such that either the first plane
977 				 * already needs it or all planes on top blend from
978 				 * the first or a lower plane.
979 				 */
980 				vc4_plane_state = to_vc4_plane_state(plane->state);
981 				enable_bg_fill = vc4_plane_state->needs_bg_fill;
982 			}
983 
984 			dlist_next += vc4_plane_write_dlist(plane, dlist_next);
985 
986 			found = true;
987 		}
988 
989 		zpos++;
990 	} while (found);
991 
992 	writel(SCALER_CTL0_END, dlist_next);
993 	dlist_next++;
994 
995 	WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
996 
997 	if (vc4->gen >= VC4_GEN_6_C) {
998 		/* This sets a black background color fill, as is the case
999 		 * with other DRM drivers.
1000 		 */
1001 		if (enable_bg_fill)
1002 			HVS_WRITE(SCALER6_DISPX_CTRL1(channel),
1003 				  HVS_READ(SCALER6_DISPX_CTRL1(channel)) |
1004 				  SCALER6_DISPX_CTRL1_BGENB);
1005 		else
1006 			HVS_WRITE(SCALER6_DISPX_CTRL1(channel),
1007 				  HVS_READ(SCALER6_DISPX_CTRL1(channel)) &
1008 				  ~SCALER6_DISPX_CTRL1_BGENB);
1009 	} else {
1010 		/* we can actually run with a lower core clock when background
1011 		 * fill is enabled on VC4_GEN_5 so leave it enabled always.
1012 		 */
1013 		HVS_WRITE(SCALER_DISPBKGNDX(channel),
1014 			  HVS_READ(SCALER_DISPBKGNDX(channel)) |
1015 			  SCALER_DISPBKGND_FILL);
1016 	}
1017 
1018 	/* Only update DISPLIST if the CRTC was already running and is not
1019 	 * being disabled.
1020 	 * vc4_crtc_enable() takes care of updating the dlist just after
1021 	 * re-enabling VBLANK interrupts and before enabling the engine.
1022 	 * If the CRTC is being disabled, there's no point in updating this
1023 	 * information.
1024 	 */
1025 	if (crtc->state->active && old_state->active) {
1026 		vc4_hvs_install_dlist(crtc);
1027 		vc4_hvs_update_dlist(crtc);
1028 	}
1029 
1030 	if (crtc->state->color_mgmt_changed) {
1031 		u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(channel));
1032 
1033 		WARN_ON_ONCE(vc4->gen > VC4_GEN_5);
1034 
1035 		if (crtc->state->gamma_lut) {
1036 			vc4_hvs_update_gamma_lut(hvs, vc4_crtc);
1037 			dispbkgndx |= SCALER_DISPBKGND_GAMMA;
1038 		} else {
1039 			/* Unsetting DISPBKGND_GAMMA skips the gamma lut step
1040 			 * in hardware, which is the same as a linear lut that
1041 			 * DRM expects us to use in absence of a user lut.
1042 			 */
1043 			dispbkgndx &= ~SCALER_DISPBKGND_GAMMA;
1044 		}
1045 		HVS_WRITE(SCALER_DISPBKGNDX(channel), dispbkgndx);
1046 	}
1047 
1048 	if (debug_dump_regs) {
1049 		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
1050 		vc4_hvs_dump_state(hvs);
1051 	}
1052 
1053 exit:
1054 	drm_dev_exit(idx);
1055 }
1056 
1057 void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel)
1058 {
1059 	struct vc4_dev *vc4 = hvs->vc4;
1060 	struct drm_device *drm = &vc4->base;
1061 	u32 dispctrl;
1062 	int idx;
1063 
1064 	WARN_ON(vc4->gen > VC4_GEN_5);
1065 
1066 	if (!drm_dev_enter(drm, &idx))
1067 		return;
1068 
1069 	dispctrl = HVS_READ(SCALER_DISPCTRL);
1070 	dispctrl &= ~((vc4->gen == VC4_GEN_5) ?
1071 		      SCALER5_DISPCTRL_DSPEISLUR(channel) :
1072 		      SCALER_DISPCTRL_DSPEISLUR(channel));
1073 
1074 	HVS_WRITE(SCALER_DISPCTRL, dispctrl);
1075 
1076 	drm_dev_exit(idx);
1077 }
1078 
1079 void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel)
1080 {
1081 	struct vc4_dev *vc4 = hvs->vc4;
1082 	struct drm_device *drm = &vc4->base;
1083 	u32 dispctrl;
1084 	int idx;
1085 
1086 	WARN_ON(vc4->gen > VC4_GEN_5);
1087 
1088 	if (!drm_dev_enter(drm, &idx))
1089 		return;
1090 
1091 	dispctrl = HVS_READ(SCALER_DISPCTRL);
1092 	dispctrl |= ((vc4->gen == VC4_GEN_5) ?
1093 		     SCALER5_DISPCTRL_DSPEISLUR(channel) :
1094 		     SCALER_DISPCTRL_DSPEISLUR(channel));
1095 
1096 	HVS_WRITE(SCALER_DISPSTAT,
1097 		  SCALER_DISPSTAT_EUFLOW(channel));
1098 	HVS_WRITE(SCALER_DISPCTRL, dispctrl);
1099 
1100 	drm_dev_exit(idx);
1101 }
1102 
1103 static void vc4_hvs_report_underrun(struct drm_device *dev)
1104 {
1105 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1106 
1107 	atomic_inc(&vc4->underrun);
1108 	DRM_DEV_ERROR(dev->dev, "HVS underrun\n");
1109 }
1110 
1111 static irqreturn_t vc4_hvs_irq_handler(int irq, void *data)
1112 {
1113 	struct drm_device *dev = data;
1114 	struct vc4_dev *vc4 = to_vc4_dev(dev);
1115 	struct vc4_hvs *hvs = vc4->hvs;
1116 	irqreturn_t irqret = IRQ_NONE;
1117 	int channel;
1118 	u32 control;
1119 	u32 status;
1120 	u32 dspeislur;
1121 
1122 	WARN_ON(vc4->gen > VC4_GEN_5);
1123 
1124 	/*
1125 	 * NOTE: We don't need to protect the register access using
1126 	 * drm_dev_enter() there because the interrupt handler lifetime
1127 	 * is tied to the device itself, and not to the DRM device.
1128 	 *
1129 	 * So when the device will be gone, one of the first thing we
1130 	 * will be doing will be to unregister the interrupt handler,
1131 	 * and then unregister the DRM device. drm_dev_enter() would
1132 	 * thus always succeed if we are here.
1133 	 */
1134 
1135 	status = HVS_READ(SCALER_DISPSTAT);
1136 	control = HVS_READ(SCALER_DISPCTRL);
1137 
1138 	for (channel = 0; channel < SCALER_CHANNELS_COUNT; channel++) {
1139 		dspeislur = (vc4->gen == VC4_GEN_5) ?
1140 			SCALER5_DISPCTRL_DSPEISLUR(channel) :
1141 			SCALER_DISPCTRL_DSPEISLUR(channel);
1142 
1143 		/* Interrupt masking is not always honored, so check it here. */
1144 		if (status & SCALER_DISPSTAT_EUFLOW(channel) &&
1145 		    control & dspeislur) {
1146 			vc4_hvs_mask_underrun(hvs, channel);
1147 			vc4_hvs_report_underrun(dev);
1148 
1149 			irqret = IRQ_HANDLED;
1150 		}
1151 	}
1152 
1153 	/* Clear every per-channel interrupt flag. */
1154 	HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_IRQMASK(0) |
1155 				   SCALER_DISPSTAT_IRQMASK(1) |
1156 				   SCALER_DISPSTAT_IRQMASK(2));
1157 
1158 	return irqret;
1159 }
1160 
1161 int vc4_hvs_debugfs_init(struct drm_minor *minor)
1162 {
1163 	struct drm_device *drm = minor->dev;
1164 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1165 	struct vc4_hvs *hvs = vc4->hvs;
1166 
1167 	if (!vc4->hvs)
1168 		return -ENODEV;
1169 
1170 	if (vc4->gen == VC4_GEN_4)
1171 		debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR,
1172 				    minor->debugfs_root,
1173 				    &vc4->load_tracker_enabled);
1174 
1175 	if (vc4->gen >= VC4_GEN_6_C) {
1176 		drm_debugfs_add_file(drm, "hvs_dlists", vc6_hvs_debugfs_dlist, NULL);
1177 		drm_debugfs_add_file(drm, "hvs_upm", vc6_hvs_debugfs_upm_allocs, NULL);
1178 	} else {
1179 		drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL);
1180 	}
1181 
1182 	drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL);
1183 
1184 	vc4_debugfs_add_regset32(drm, "hvs_regs", &hvs->regset);
1185 
1186 	return 0;
1187 }
1188 
1189 struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
1190 				void __iomem *regs,
1191 				struct platform_device *pdev)
1192 {
1193 	struct drm_device *drm = &vc4->base;
1194 	struct vc4_hvs *hvs;
1195 	unsigned int dlist_start;
1196 	size_t dlist_size;
1197 	size_t lbm_size;
1198 	unsigned int i;
1199 
1200 	hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL);
1201 	if (!hvs)
1202 		return ERR_PTR(-ENOMEM);
1203 
1204 	hvs->vc4 = vc4;
1205 	hvs->regs = regs;
1206 	hvs->pdev = pdev;
1207 
1208 	spin_lock_init(&hvs->mm_lock);
1209 
1210 	switch (vc4->gen) {
1211 	case VC4_GEN_4:
1212 	case VC4_GEN_5:
1213 		/* Set up the HVS display list memory manager. We never
1214 		 * overwrite the setup from the bootloader (just 128b
1215 		 * out of our 16K), since we don't want to scramble the
1216 		 * screen when transitioning from the firmware's boot
1217 		 * setup to runtime.
1218 		 */
1219 		dlist_start = HVS_BOOTLOADER_DLIST_END;
1220 		dlist_size = (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END;
1221 		break;
1222 
1223 	case VC4_GEN_6_C:
1224 	case VC4_GEN_6_D:
1225 		dlist_start = HVS_BOOTLOADER_DLIST_END;
1226 
1227 		/*
1228 		 * If we are running a test, it means that we can't
1229 		 * access a register. Use a plausible size then.
1230 		 */
1231 		if (!kunit_get_current_test())
1232 			dlist_size = HVS_READ(SCALER6_CXM_SIZE);
1233 		else
1234 			dlist_size = 4096;
1235 
1236 		for (i = 0; i < VC4_NUM_UPM_HANDLES; i++) {
1237 			refcount_set(&hvs->upm_refcounts[i].refcount, 0);
1238 			hvs->upm_refcounts[i].hvs = hvs;
1239 		}
1240 
1241 		break;
1242 
1243 	default:
1244 		drm_err(drm, "Unknown VC4 generation: %d", vc4->gen);
1245 		return ERR_PTR(-ENODEV);
1246 	}
1247 
1248 	drm_mm_init(&hvs->dlist_mm, dlist_start, dlist_size);
1249 
1250 	hvs->dlist_mem_size = dlist_size;
1251 
1252 	/* Set up the HVS LBM memory manager.  We could have some more
1253 	 * complicated data structure that allowed reuse of LBM areas
1254 	 * between planes when they don't overlap on the screen, but
1255 	 * for now we just allocate globally.
1256 	 */
1257 
1258 	switch (vc4->gen) {
1259 	case VC4_GEN_4:
1260 		/* 48k words of 2x12-bit pixels */
1261 		lbm_size = 48 * SZ_1K;
1262 		break;
1263 
1264 	case VC4_GEN_5:
1265 		/* 60k words of 4x12-bit pixels */
1266 		lbm_size = 60 * SZ_1K;
1267 		break;
1268 
1269 	case VC4_GEN_6_C:
1270 	case VC4_GEN_6_D:
1271 		/*
1272 		 * If we are running a test, it means that we can't
1273 		 * access a register. Use a plausible size then.
1274 		 */
1275 		lbm_size = 1024;
1276 		break;
1277 
1278 	default:
1279 		drm_err(drm, "Unknown VC4 generation: %d", vc4->gen);
1280 		return ERR_PTR(-ENODEV);
1281 	}
1282 
1283 	drm_mm_init(&hvs->lbm_mm, 0, lbm_size);
1284 
1285 	if (vc4->gen >= VC4_GEN_6_C) {
1286 		ida_init(&hvs->upm_handles);
1287 
1288 		/*
1289 		 * NOTE: On BCM2712, the size can also be read through
1290 		 * the SCALER_UBM_SIZE register. We would need to do a
1291 		 * register access though, which we can't do with kunit
1292 		 * that also uses this function to create its mock
1293 		 * device.
1294 		 */
1295 		drm_mm_init(&hvs->upm_mm, 0, 1024 * HVS_UBM_WORD_SIZE);
1296 	}
1297 
1298 
1299 	vc4->hvs = hvs;
1300 
1301 	return hvs;
1302 }
1303 
1304 static int vc4_hvs_hw_init(struct vc4_hvs *hvs)
1305 {
1306 	struct vc4_dev *vc4 = hvs->vc4;
1307 	u32 dispctrl, reg;
1308 
1309 	dispctrl = HVS_READ(SCALER_DISPCTRL);
1310 	dispctrl |= SCALER_DISPCTRL_ENABLE;
1311 	HVS_WRITE(SCALER_DISPCTRL, dispctrl);
1312 
1313 	reg = HVS_READ(SCALER_DISPECTRL);
1314 	reg &= ~SCALER_DISPECTRL_DSP2_MUX_MASK;
1315 	HVS_WRITE(SCALER_DISPECTRL,
1316 		  reg | VC4_SET_FIELD(0, SCALER_DISPECTRL_DSP2_MUX));
1317 
1318 	reg = HVS_READ(SCALER_DISPCTRL);
1319 	reg &= ~SCALER_DISPCTRL_DSP3_MUX_MASK;
1320 	HVS_WRITE(SCALER_DISPCTRL,
1321 		  reg | VC4_SET_FIELD(3, SCALER_DISPCTRL_DSP3_MUX));
1322 
1323 	reg = HVS_READ(SCALER_DISPEOLN);
1324 	reg &= ~SCALER_DISPEOLN_DSP4_MUX_MASK;
1325 	HVS_WRITE(SCALER_DISPEOLN,
1326 		  reg | VC4_SET_FIELD(3, SCALER_DISPEOLN_DSP4_MUX));
1327 
1328 	reg = HVS_READ(SCALER_DISPDITHER);
1329 	reg &= ~SCALER_DISPDITHER_DSP5_MUX_MASK;
1330 	HVS_WRITE(SCALER_DISPDITHER,
1331 		  reg | VC4_SET_FIELD(3, SCALER_DISPDITHER_DSP5_MUX));
1332 
1333 	dispctrl = HVS_READ(SCALER_DISPCTRL);
1334 	dispctrl |= SCALER_DISPCTRL_DISPEIRQ(0) |
1335 		    SCALER_DISPCTRL_DISPEIRQ(1) |
1336 		    SCALER_DISPCTRL_DISPEIRQ(2);
1337 
1338 	if (vc4->gen == VC4_GEN_4)
1339 		dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ |
1340 			      SCALER_DISPCTRL_SLVWREIRQ |
1341 			      SCALER_DISPCTRL_SLVRDEIRQ |
1342 			      SCALER_DISPCTRL_DSPEIEOF(0) |
1343 			      SCALER_DISPCTRL_DSPEIEOF(1) |
1344 			      SCALER_DISPCTRL_DSPEIEOF(2) |
1345 			      SCALER_DISPCTRL_DSPEIEOLN(0) |
1346 			      SCALER_DISPCTRL_DSPEIEOLN(1) |
1347 			      SCALER_DISPCTRL_DSPEIEOLN(2) |
1348 			      SCALER_DISPCTRL_DSPEISLUR(0) |
1349 			      SCALER_DISPCTRL_DSPEISLUR(1) |
1350 			      SCALER_DISPCTRL_DSPEISLUR(2) |
1351 			      SCALER_DISPCTRL_SCLEIRQ);
1352 	else
1353 		dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ |
1354 			      SCALER5_DISPCTRL_SLVEIRQ |
1355 			      SCALER5_DISPCTRL_DSPEIEOF(0) |
1356 			      SCALER5_DISPCTRL_DSPEIEOF(1) |
1357 			      SCALER5_DISPCTRL_DSPEIEOF(2) |
1358 			      SCALER5_DISPCTRL_DSPEIEOLN(0) |
1359 			      SCALER5_DISPCTRL_DSPEIEOLN(1) |
1360 			      SCALER5_DISPCTRL_DSPEIEOLN(2) |
1361 			      SCALER5_DISPCTRL_DSPEISLUR(0) |
1362 			      SCALER5_DISPCTRL_DSPEISLUR(1) |
1363 			      SCALER5_DISPCTRL_DSPEISLUR(2) |
1364 			      SCALER_DISPCTRL_SCLEIRQ);
1365 
1366 
1367 	/* Set AXI panic mode.
1368 	 * VC4 panics when < 2 lines in FIFO.
1369 	 * VC5 panics when less than 1 line in the FIFO.
1370 	 */
1371 	dispctrl &= ~(SCALER_DISPCTRL_PANIC0_MASK |
1372 		      SCALER_DISPCTRL_PANIC1_MASK |
1373 		      SCALER_DISPCTRL_PANIC2_MASK);
1374 	dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC0);
1375 	dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC1);
1376 	dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC2);
1377 
1378 	/* Set AXI panic mode.
1379 	 * VC4 panics when < 2 lines in FIFO.
1380 	 * VC5 panics when less than 1 line in the FIFO.
1381 	 */
1382 	dispctrl &= ~(SCALER_DISPCTRL_PANIC0_MASK |
1383 		      SCALER_DISPCTRL_PANIC1_MASK |
1384 		      SCALER_DISPCTRL_PANIC2_MASK);
1385 	dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC0);
1386 	dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC1);
1387 	dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC2);
1388 
1389 	HVS_WRITE(SCALER_DISPCTRL, dispctrl);
1390 
1391 	return 0;
1392 }
1393 
1394 #define CFC1_N_NL_CSC_CTRL(x)		(0xa000 + ((x) * 0x3000))
1395 #define CFC1_N_MA_CSC_COEFF_C00(x)	(0xa008 + ((x) * 0x3000))
1396 #define CFC1_N_MA_CSC_COEFF_C01(x)	(0xa00c + ((x) * 0x3000))
1397 #define CFC1_N_MA_CSC_COEFF_C02(x)	(0xa010 + ((x) * 0x3000))
1398 #define CFC1_N_MA_CSC_COEFF_C03(x)	(0xa014 + ((x) * 0x3000))
1399 #define CFC1_N_MA_CSC_COEFF_C04(x)	(0xa018 + ((x) * 0x3000))
1400 #define CFC1_N_MA_CSC_COEFF_C10(x)	(0xa01c + ((x) * 0x3000))
1401 #define CFC1_N_MA_CSC_COEFF_C11(x)	(0xa020 + ((x) * 0x3000))
1402 #define CFC1_N_MA_CSC_COEFF_C12(x)	(0xa024 + ((x) * 0x3000))
1403 #define CFC1_N_MA_CSC_COEFF_C13(x)	(0xa028 + ((x) * 0x3000))
1404 #define CFC1_N_MA_CSC_COEFF_C14(x)	(0xa02c + ((x) * 0x3000))
1405 #define CFC1_N_MA_CSC_COEFF_C20(x)	(0xa030 + ((x) * 0x3000))
1406 #define CFC1_N_MA_CSC_COEFF_C21(x)	(0xa034 + ((x) * 0x3000))
1407 #define CFC1_N_MA_CSC_COEFF_C22(x)	(0xa038 + ((x) * 0x3000))
1408 #define CFC1_N_MA_CSC_COEFF_C23(x)	(0xa03c + ((x) * 0x3000))
1409 #define CFC1_N_MA_CSC_COEFF_C24(x)	(0xa040 + ((x) * 0x3000))
1410 
1411 #define SCALER_PI_CMP_CSC_RED0(x)		(0x200 + ((x) * 0x40))
1412 #define SCALER_PI_CMP_CSC_RED1(x)		(0x204 + ((x) * 0x40))
1413 #define SCALER_PI_CMP_CSC_RED_CLAMP(x)		(0x208 + ((x) * 0x40))
1414 #define SCALER_PI_CMP_CSC_CFG(x)		(0x20c + ((x) * 0x40))
1415 #define SCALER_PI_CMP_CSC_GREEN0(x)		(0x210 + ((x) * 0x40))
1416 #define SCALER_PI_CMP_CSC_GREEN1(x)		(0x214 + ((x) * 0x40))
1417 #define SCALER_PI_CMP_CSC_GREEN_CLAMP(x)	(0x218 + ((x) * 0x40))
1418 #define SCALER_PI_CMP_CSC_BLUE0(x)		(0x220 + ((x) * 0x40))
1419 #define SCALER_PI_CMP_CSC_BLUE1(x)		(0x224 + ((x) * 0x40))
1420 #define SCALER_PI_CMP_CSC_BLUE_CLAMP(x)		(0x228 + ((x) * 0x40))
1421 
1422 /* 4 S2.22 multiplication factors, and 1 S9.15 addititive element for each of 3
1423  * output components
1424  */
1425 struct vc6_csc_coeff_entry {
1426 	u32 csc[3][5];
1427 };
1428 
1429 static const struct vc6_csc_coeff_entry csc_coeffs[2][3] = {
1430 	[DRM_COLOR_YCBCR_LIMITED_RANGE] = {
1431 		[DRM_COLOR_YCBCR_BT601] = {
1432 			.csc = {
1433 				{ 0x004A8542, 0x0, 0x0066254A, 0x0, 0xFF908A0D },
1434 				{ 0x004A8542, 0xFFE6ED5D, 0xFFCBF856, 0x0, 0x0043C9A3 },
1435 				{ 0x004A8542, 0x00811A54, 0x0, 0x0, 0xFF759502 }
1436 			}
1437 		},
1438 		[DRM_COLOR_YCBCR_BT709] = {
1439 			.csc = {
1440 				{ 0x004A8542, 0x0, 0x0072BC44, 0x0, 0xFF83F312 },
1441 				{ 0x004A8542, 0xFFF25A22, 0xFFDDE4D0, 0x0, 0x00267064 },
1442 				{ 0x004A8542, 0x00873197, 0x0, 0x0, 0xFF6F7DC0 }
1443 			}
1444 		},
1445 		[DRM_COLOR_YCBCR_BT2020] = {
1446 			.csc = {
1447 				{ 0x004A8542, 0x0, 0x006B4A17, 0x0, 0xFF8B653F },
1448 				{ 0x004A8542, 0xFFF402D9, 0xFFDDE4D0, 0x0, 0x0024C7AE },
1449 				{ 0x004A8542, 0x008912CC, 0x0, 0x0, 0xFF6D9C8B }
1450 			}
1451 		}
1452 	},
1453 	[DRM_COLOR_YCBCR_FULL_RANGE] = {
1454 		[DRM_COLOR_YCBCR_BT601] = {
1455 			.csc = {
1456 				{ 0x00400000, 0x0, 0x0059BA5E, 0x0, 0xFFA645A1 },
1457 				{ 0x00400000, 0xFFE9F9AC, 0xFFD24B97, 0x0, 0x0043BABB },
1458 				{ 0x00400000, 0x00716872, 0x0, 0x0, 0xFF8E978D }
1459 			}
1460 		},
1461 		[DRM_COLOR_YCBCR_BT709] = {
1462 			.csc = {
1463 				{ 0x00400000, 0x0, 0x0064C985, 0x0, 0xFF9B367A },
1464 				{ 0x00400000, 0xFFF402E1, 0xFFE20A40, 0x0, 0x0029F2DE },
1465 				{ 0x00400000, 0x0076C226, 0x0, 0x0, 0xFF893DD9 }
1466 			}
1467 		},
1468 		[DRM_COLOR_YCBCR_BT2020] = {
1469 			.csc = {
1470 				{ 0x00400000, 0x0, 0x005E3F14, 0x0, 0xFFA1C0EB },
1471 				{ 0x00400000, 0xFFF577F6, 0xFFDB580F, 0x0, 0x002F2FFA },
1472 				{ 0x00400000, 0x007868DB, 0x0, 0x0, 0xFF879724 }
1473 			}
1474 		}
1475 	}
1476 };
1477 
1478 static int vc6_hvs_hw_init(struct vc4_hvs *hvs)
1479 {
1480 	const struct vc6_csc_coeff_entry *coeffs;
1481 	unsigned int i;
1482 
1483 	HVS_WRITE(SCALER6_CONTROL,
1484 		  SCALER6_CONTROL_HVS_EN |
1485 		  VC4_SET_FIELD(8, SCALER6_CONTROL_PF_LINES) |
1486 		  VC4_SET_FIELD(15, SCALER6_CONTROL_MAX_REQS));
1487 
1488 	/* Set HVS arbiter priority to max */
1489 	HVS_WRITE(SCALER6(PRI_MAP0), 0xffffffff);
1490 	HVS_WRITE(SCALER6(PRI_MAP1), 0xffffffff);
1491 
1492 	if (hvs->vc4->gen == VC4_GEN_6_C) {
1493 		for (i = 0; i < 6; i++) {
1494 			coeffs = &csc_coeffs[i / 3][i % 3];
1495 
1496 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C00(i), coeffs->csc[0][0]);
1497 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C01(i), coeffs->csc[0][1]);
1498 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C02(i), coeffs->csc[0][2]);
1499 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C03(i), coeffs->csc[0][3]);
1500 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C04(i), coeffs->csc[0][4]);
1501 
1502 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C10(i), coeffs->csc[1][0]);
1503 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C11(i), coeffs->csc[1][1]);
1504 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C12(i), coeffs->csc[1][2]);
1505 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C13(i), coeffs->csc[1][3]);
1506 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C14(i), coeffs->csc[1][4]);
1507 
1508 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C20(i), coeffs->csc[2][0]);
1509 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C21(i), coeffs->csc[2][1]);
1510 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C22(i), coeffs->csc[2][2]);
1511 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C23(i), coeffs->csc[2][3]);
1512 			HVS_WRITE(CFC1_N_MA_CSC_COEFF_C24(i), coeffs->csc[2][4]);
1513 
1514 			HVS_WRITE(CFC1_N_NL_CSC_CTRL(i), BIT(15));
1515 		}
1516 	} else {
1517 		for (i = 0; i < 8; i++) {
1518 			HVS_WRITE(SCALER_PI_CMP_CSC_RED0(i), 0x1f002566);
1519 			HVS_WRITE(SCALER_PI_CMP_CSC_RED1(i), 0x3994);
1520 			HVS_WRITE(SCALER_PI_CMP_CSC_RED_CLAMP(i), 0xfff00000);
1521 			HVS_WRITE(SCALER_PI_CMP_CSC_CFG(i), 0x1);
1522 			HVS_WRITE(SCALER_PI_CMP_CSC_GREEN0(i), 0x18002566);
1523 			HVS_WRITE(SCALER_PI_CMP_CSC_GREEN1(i), 0xf927eee2);
1524 			HVS_WRITE(SCALER_PI_CMP_CSC_GREEN_CLAMP(i), 0xfff00000);
1525 			HVS_WRITE(SCALER_PI_CMP_CSC_BLUE0(i), 0x18002566);
1526 			HVS_WRITE(SCALER_PI_CMP_CSC_BLUE1(i), 0x43d80000);
1527 			HVS_WRITE(SCALER_PI_CMP_CSC_BLUE_CLAMP(i), 0xfff00000);
1528 		}
1529 	}
1530 
1531 	return 0;
1532 }
1533 
1534 static int vc4_hvs_cob_init(struct vc4_hvs *hvs)
1535 {
1536 	struct vc4_dev *vc4 = hvs->vc4;
1537 	u32 reg, top, base;
1538 
1539 	/*
1540 	 * Recompute Composite Output Buffer (COB) allocations for the
1541 	 * displays
1542 	 */
1543 	switch (vc4->gen) {
1544 	case VC4_GEN_4:
1545 		/* The COB is 20736 pixels, or just over 10 lines at 2048 wide.
1546 		 * The bottom 2048 pixels are full 32bpp RGBA (intended for the
1547 		 * TXP composing RGBA to memory), whilst the remainder are only
1548 		 * 24bpp RGB.
1549 		 *
1550 		 * Assign 3 lines to channels 1 & 2, and just over 4 lines to
1551 		 * channel 0.
1552 		 */
1553 		#define VC4_COB_SIZE		20736
1554 		#define VC4_COB_LINE_WIDTH	2048
1555 		#define VC4_COB_NUM_LINES	3
1556 		reg = 0;
1557 		top = VC4_COB_LINE_WIDTH * VC4_COB_NUM_LINES;
1558 		reg |= (top - 1) << 16;
1559 		HVS_WRITE(SCALER_DISPBASE2, reg);
1560 		reg = top;
1561 		top += VC4_COB_LINE_WIDTH * VC4_COB_NUM_LINES;
1562 		reg |= (top - 1) << 16;
1563 		HVS_WRITE(SCALER_DISPBASE1, reg);
1564 		reg = top;
1565 		top = VC4_COB_SIZE;
1566 		reg |= (top - 1) << 16;
1567 		HVS_WRITE(SCALER_DISPBASE0, reg);
1568 		break;
1569 
1570 	case VC4_GEN_5:
1571 		/* The COB is 44416 pixels, or 10.8 lines at 4096 wide.
1572 		 * The bottom 4096 pixels are full RGBA (intended for the TXP
1573 		 * composing RGBA to memory), whilst the remainder are only
1574 		 * RGB. Addressing is always pixel wide.
1575 		 *
1576 		 * Assign 3 lines of 4096 to channels 1 & 2, and just over 4
1577 		 * lines. to channel 0.
1578 		 */
1579 		#define VC5_COB_SIZE		44416
1580 		#define VC5_COB_LINE_WIDTH	4096
1581 		#define VC5_COB_NUM_LINES	3
1582 		reg = 0;
1583 		top = VC5_COB_LINE_WIDTH * VC5_COB_NUM_LINES;
1584 		reg |= top << 16;
1585 		HVS_WRITE(SCALER_DISPBASE2, reg);
1586 		top += 16;
1587 		reg = top;
1588 		top += VC5_COB_LINE_WIDTH * VC5_COB_NUM_LINES;
1589 		reg |= top << 16;
1590 		HVS_WRITE(SCALER_DISPBASE1, reg);
1591 		top += 16;
1592 		reg = top;
1593 		top = VC5_COB_SIZE;
1594 		reg |= top << 16;
1595 		HVS_WRITE(SCALER_DISPBASE0, reg);
1596 		break;
1597 
1598 	case VC4_GEN_6_C:
1599 	case VC4_GEN_6_D:
1600 		#define VC6_COB_LINE_WIDTH	3840
1601 		#define VC6_COB_NUM_LINES	4
1602 		base = 0;
1603 		top = 3840;
1604 
1605 		HVS_WRITE(SCALER6_DISPX_COB(2),
1606 			  VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
1607 			  VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
1608 
1609 		base = top + 16;
1610 		top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES;
1611 
1612 		HVS_WRITE(SCALER6_DISPX_COB(1),
1613 			  VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
1614 			  VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
1615 
1616 		base = top + 16;
1617 		top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES;
1618 
1619 		HVS_WRITE(SCALER6_DISPX_COB(0),
1620 			  VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
1621 			  VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
1622 		break;
1623 
1624 	default:
1625 		return -EINVAL;
1626 	}
1627 
1628 	return 0;
1629 }
1630 
1631 static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
1632 {
1633 	struct platform_device *pdev = to_platform_device(dev);
1634 	struct drm_device *drm = dev_get_drvdata(master);
1635 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1636 	struct vc4_hvs *hvs = NULL;
1637 	void __iomem *regs;
1638 	int ret;
1639 
1640 	regs = vc4_ioremap_regs(pdev, 0);
1641 	if (IS_ERR(regs))
1642 		return PTR_ERR(regs);
1643 
1644 	hvs = __vc4_hvs_alloc(vc4, regs, pdev);
1645 	if (IS_ERR(hvs))
1646 		return PTR_ERR(hvs);
1647 
1648 	hvs->regset.base = hvs->regs;
1649 
1650 	if (vc4->gen == VC4_GEN_6_C) {
1651 		hvs->regset.regs = vc6_hvs_regs;
1652 		hvs->regset.nregs = ARRAY_SIZE(vc6_hvs_regs);
1653 
1654 		if (VC4_GET_FIELD(HVS_READ(SCALER6_VERSION), SCALER6_VERSION) ==
1655 						SCALER6_VERSION_D0) {
1656 			vc4->gen = VC4_GEN_6_D;
1657 			hvs->regset.regs = vc6_d_hvs_regs;
1658 			hvs->regset.nregs = ARRAY_SIZE(vc6_d_hvs_regs);
1659 		}
1660 	} else {
1661 		hvs->regset.regs = vc4_hvs_regs;
1662 		hvs->regset.nregs = ARRAY_SIZE(vc4_hvs_regs);
1663 	}
1664 
1665 	if (vc4->gen >= VC4_GEN_5) {
1666 		struct rpi_firmware *firmware;
1667 		struct device_node *node;
1668 		unsigned int max_rate;
1669 
1670 		node = rpi_firmware_find_node();
1671 		if (!node)
1672 			return -EINVAL;
1673 
1674 		firmware = rpi_firmware_get(node);
1675 		of_node_put(node);
1676 		if (!firmware)
1677 			return -EPROBE_DEFER;
1678 
1679 		hvs->core_clk = devm_clk_get(&pdev->dev,
1680 					     (vc4->gen >= VC4_GEN_6_C) ? "core" : NULL);
1681 		if (IS_ERR(hvs->core_clk)) {
1682 			dev_err(&pdev->dev, "Couldn't get core clock\n");
1683 			return PTR_ERR(hvs->core_clk);
1684 		}
1685 
1686 		hvs->disp_clk = devm_clk_get(&pdev->dev,
1687 					     (vc4->gen >= VC4_GEN_6_C) ? "disp" : NULL);
1688 		if (IS_ERR(hvs->disp_clk)) {
1689 			dev_err(&pdev->dev, "Couldn't get disp clock\n");
1690 			return PTR_ERR(hvs->disp_clk);
1691 		}
1692 
1693 		max_rate = rpi_firmware_clk_get_max_rate(firmware,
1694 							 RPI_FIRMWARE_CORE_CLK_ID);
1695 		rpi_firmware_put(firmware);
1696 		if (max_rate >= 550000000)
1697 			hvs->vc5_hdmi_enable_hdmi_20 = true;
1698 
1699 		if (max_rate >= 600000000)
1700 			hvs->vc5_hdmi_enable_4096by2160 = true;
1701 
1702 		hvs->max_core_rate = max_rate;
1703 
1704 		ret = clk_prepare_enable(hvs->core_clk);
1705 		if (ret) {
1706 			dev_err(&pdev->dev, "Couldn't enable the core clock\n");
1707 			return ret;
1708 		}
1709 
1710 		ret = clk_prepare_enable(hvs->disp_clk);
1711 		if (ret) {
1712 			dev_err(&pdev->dev, "Couldn't enable the disp clock\n");
1713 			return ret;
1714 		}
1715 	}
1716 
1717 	if (vc4->gen >= VC4_GEN_5)
1718 		hvs->dlist = hvs->regs + SCALER5_DLIST_START;
1719 	else
1720 		hvs->dlist = hvs->regs + SCALER_DLIST_START;
1721 
1722 	if (vc4->gen >= VC4_GEN_6_C)
1723 		ret = vc6_hvs_hw_init(hvs);
1724 	else
1725 		ret = vc4_hvs_hw_init(hvs);
1726 	if (ret)
1727 		return ret;
1728 
1729 	/* Upload filter kernels.  We only have the one for now, so we
1730 	 * keep it around for the lifetime of the driver.
1731 	 */
1732 	ret = vc4_hvs_upload_linear_kernel(hvs,
1733 					   &hvs->mitchell_netravali_filter,
1734 					   mitchell_netravali_1_3_1_3_kernel);
1735 	if (ret)
1736 		return ret;
1737 
1738 	ret = vc4_hvs_cob_init(hvs);
1739 	if (ret)
1740 		return ret;
1741 
1742 	if (vc4->gen < VC4_GEN_6_C) {
1743 		ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
1744 				       vc4_hvs_irq_handler, 0, "vc4 hvs", drm);
1745 		if (ret)
1746 			return ret;
1747 	}
1748 
1749 	return 0;
1750 }
1751 
1752 static void vc4_hvs_unbind(struct device *dev, struct device *master,
1753 			   void *data)
1754 {
1755 	struct drm_device *drm = dev_get_drvdata(master);
1756 	struct vc4_dev *vc4 = to_vc4_dev(drm);
1757 	struct vc4_hvs *hvs = vc4->hvs;
1758 	struct drm_mm_node *node, *next;
1759 
1760 	if (drm_mm_node_allocated(&vc4->hvs->mitchell_netravali_filter))
1761 		drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
1762 
1763 	drm_mm_for_each_node_safe(node, next, &vc4->hvs->dlist_mm)
1764 		drm_mm_remove_node(node);
1765 
1766 	drm_mm_takedown(&vc4->hvs->dlist_mm);
1767 
1768 	drm_mm_for_each_node_safe(node, next, &vc4->hvs->lbm_mm)
1769 		drm_mm_remove_node(node);
1770 	drm_mm_takedown(&vc4->hvs->lbm_mm);
1771 
1772 	clk_disable_unprepare(hvs->disp_clk);
1773 	clk_disable_unprepare(hvs->core_clk);
1774 
1775 	vc4->hvs = NULL;
1776 }
1777 
1778 static const struct component_ops vc4_hvs_ops = {
1779 	.bind   = vc4_hvs_bind,
1780 	.unbind = vc4_hvs_unbind,
1781 };
1782 
1783 static int vc4_hvs_dev_probe(struct platform_device *pdev)
1784 {
1785 	return component_add(&pdev->dev, &vc4_hvs_ops);
1786 }
1787 
1788 static void vc4_hvs_dev_remove(struct platform_device *pdev)
1789 {
1790 	component_del(&pdev->dev, &vc4_hvs_ops);
1791 }
1792 
1793 static const struct of_device_id vc4_hvs_dt_match[] = {
1794 	{ .compatible = "brcm,bcm2711-hvs" },
1795 	{ .compatible = "brcm,bcm2712-hvs" },
1796 	{ .compatible = "brcm,bcm2835-hvs" },
1797 	{}
1798 };
1799 
1800 struct platform_driver vc4_hvs_driver = {
1801 	.probe = vc4_hvs_dev_probe,
1802 	.remove = vc4_hvs_dev_remove,
1803 	.driver = {
1804 		.name = "vc4_hvs",
1805 		.of_match_table = vc4_hvs_dt_match,
1806 	},
1807 };
1808