xref: /linux/drivers/accel/ethosu/ethosu_gem.c (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1*5a5e9c02SRob Herring (Arm) // SPDX-License-Identifier: GPL-2.0-only or MIT
2*5a5e9c02SRob Herring (Arm) /* Copyright 2025 Arm, Ltd. */
3*5a5e9c02SRob Herring (Arm) 
4*5a5e9c02SRob Herring (Arm) #include <linux/err.h>
5*5a5e9c02SRob Herring (Arm) #include <linux/slab.h>
6*5a5e9c02SRob Herring (Arm) 
7*5a5e9c02SRob Herring (Arm) #include <drm/ethosu_accel.h>
8*5a5e9c02SRob Herring (Arm) 
9*5a5e9c02SRob Herring (Arm) #include "ethosu_device.h"
10*5a5e9c02SRob Herring (Arm) #include "ethosu_gem.h"
11*5a5e9c02SRob Herring (Arm) 
12*5a5e9c02SRob Herring (Arm) static void ethosu_gem_free_object(struct drm_gem_object *obj)
13*5a5e9c02SRob Herring (Arm) {
14*5a5e9c02SRob Herring (Arm) 	struct ethosu_gem_object *bo = to_ethosu_bo(obj);
15*5a5e9c02SRob Herring (Arm) 
16*5a5e9c02SRob Herring (Arm) 	kfree(bo->info);
17*5a5e9c02SRob Herring (Arm) 	drm_gem_free_mmap_offset(&bo->base.base);
18*5a5e9c02SRob Herring (Arm) 	drm_gem_dma_free(&bo->base);
19*5a5e9c02SRob Herring (Arm) }
20*5a5e9c02SRob Herring (Arm) 
21*5a5e9c02SRob Herring (Arm) static int ethosu_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
22*5a5e9c02SRob Herring (Arm) {
23*5a5e9c02SRob Herring (Arm) 	struct ethosu_gem_object *bo = to_ethosu_bo(obj);
24*5a5e9c02SRob Herring (Arm) 
25*5a5e9c02SRob Herring (Arm) 	/* Don't allow mmap on objects that have the NO_MMAP flag set. */
26*5a5e9c02SRob Herring (Arm) 	if (bo->flags & DRM_ETHOSU_BO_NO_MMAP)
27*5a5e9c02SRob Herring (Arm) 		return -EINVAL;
28*5a5e9c02SRob Herring (Arm) 
29*5a5e9c02SRob Herring (Arm) 	return drm_gem_dma_object_mmap(obj, vma);
30*5a5e9c02SRob Herring (Arm) }
31*5a5e9c02SRob Herring (Arm) 
32*5a5e9c02SRob Herring (Arm) static const struct drm_gem_object_funcs ethosu_gem_funcs = {
33*5a5e9c02SRob Herring (Arm) 	.free = ethosu_gem_free_object,
34*5a5e9c02SRob Herring (Arm) 	.print_info = drm_gem_dma_object_print_info,
35*5a5e9c02SRob Herring (Arm) 	.get_sg_table = drm_gem_dma_object_get_sg_table,
36*5a5e9c02SRob Herring (Arm) 	.vmap = drm_gem_dma_object_vmap,
37*5a5e9c02SRob Herring (Arm) 	.mmap = ethosu_gem_mmap,
38*5a5e9c02SRob Herring (Arm) 	.vm_ops = &drm_gem_dma_vm_ops,
39*5a5e9c02SRob Herring (Arm) };
40*5a5e9c02SRob Herring (Arm) 
41*5a5e9c02SRob Herring (Arm) /**
42*5a5e9c02SRob Herring (Arm)  * ethosu_gem_create_object - Implementation of driver->gem_create_object.
43*5a5e9c02SRob Herring (Arm)  * @ddev: DRM device
44*5a5e9c02SRob Herring (Arm)  * @size: Size in bytes of the memory the object will reference
45*5a5e9c02SRob Herring (Arm)  *
46*5a5e9c02SRob Herring (Arm)  * This lets the GEM helpers allocate object structs for us, and keep
47*5a5e9c02SRob Herring (Arm)  * our BO stats correct.
48*5a5e9c02SRob Herring (Arm)  */
49*5a5e9c02SRob Herring (Arm) struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, size_t size)
50*5a5e9c02SRob Herring (Arm) {
51*5a5e9c02SRob Herring (Arm) 	struct ethosu_gem_object *obj;
52*5a5e9c02SRob Herring (Arm) 
53*5a5e9c02SRob Herring (Arm) 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
54*5a5e9c02SRob Herring (Arm) 	if (!obj)
55*5a5e9c02SRob Herring (Arm) 		return ERR_PTR(-ENOMEM);
56*5a5e9c02SRob Herring (Arm) 
57*5a5e9c02SRob Herring (Arm) 	obj->base.base.funcs = &ethosu_gem_funcs;
58*5a5e9c02SRob Herring (Arm) 	return &obj->base.base;
59*5a5e9c02SRob Herring (Arm) }
60*5a5e9c02SRob Herring (Arm) 
61*5a5e9c02SRob Herring (Arm) /**
62*5a5e9c02SRob Herring (Arm)  * ethosu_gem_create_with_handle() - Create a GEM object and attach it to a handle.
63*5a5e9c02SRob Herring (Arm)  * @file: DRM file.
64*5a5e9c02SRob Herring (Arm)  * @ddev: DRM device.
65*5a5e9c02SRob Herring (Arm)  * @size: Size of the GEM object to allocate.
66*5a5e9c02SRob Herring (Arm)  * @flags: Combination of drm_ethosu_bo_flags flags.
67*5a5e9c02SRob Herring (Arm)  * @handle: Pointer holding the handle pointing to the new GEM object.
68*5a5e9c02SRob Herring (Arm)  *
69*5a5e9c02SRob Herring (Arm)  * Return: Zero on success
70*5a5e9c02SRob Herring (Arm)  */
71*5a5e9c02SRob Herring (Arm) int ethosu_gem_create_with_handle(struct drm_file *file,
72*5a5e9c02SRob Herring (Arm) 				  struct drm_device *ddev,
73*5a5e9c02SRob Herring (Arm) 				  u64 *size, u32 flags, u32 *handle)
74*5a5e9c02SRob Herring (Arm) {
75*5a5e9c02SRob Herring (Arm) 	struct drm_gem_dma_object *mem;
76*5a5e9c02SRob Herring (Arm) 	struct ethosu_gem_object *bo;
77*5a5e9c02SRob Herring (Arm) 	int ret;
78*5a5e9c02SRob Herring (Arm) 
79*5a5e9c02SRob Herring (Arm) 	mem = drm_gem_dma_create(ddev, *size);
80*5a5e9c02SRob Herring (Arm) 	if (IS_ERR(mem))
81*5a5e9c02SRob Herring (Arm) 		return PTR_ERR(mem);
82*5a5e9c02SRob Herring (Arm) 
83*5a5e9c02SRob Herring (Arm) 	bo = to_ethosu_bo(&mem->base);
84*5a5e9c02SRob Herring (Arm) 	bo->flags = flags;
85*5a5e9c02SRob Herring (Arm) 
86*5a5e9c02SRob Herring (Arm) 	/*
87*5a5e9c02SRob Herring (Arm) 	 * Allocate an id of idr table where the obj is registered
88*5a5e9c02SRob Herring (Arm) 	 * and handle has the id what user can see.
89*5a5e9c02SRob Herring (Arm) 	 */
90*5a5e9c02SRob Herring (Arm) 	ret = drm_gem_handle_create(file, &mem->base, handle);
91*5a5e9c02SRob Herring (Arm) 	if (!ret)
92*5a5e9c02SRob Herring (Arm) 		*size = bo->base.base.size;
93*5a5e9c02SRob Herring (Arm) 
94*5a5e9c02SRob Herring (Arm) 	/* drop reference from allocate - handle holds it now. */
95*5a5e9c02SRob Herring (Arm) 	drm_gem_object_put(&mem->base);
96*5a5e9c02SRob Herring (Arm) 
97*5a5e9c02SRob Herring (Arm) 	return ret;
98*5a5e9c02SRob Herring (Arm) }
99*5a5e9c02SRob Herring (Arm) 
100*5a5e9c02SRob Herring (Arm) struct dma {
101*5a5e9c02SRob Herring (Arm) 	s8 region;
102*5a5e9c02SRob Herring (Arm) 	u64 len;
103*5a5e9c02SRob Herring (Arm) 	u64 offset;
104*5a5e9c02SRob Herring (Arm) 	s64 stride[2];
105*5a5e9c02SRob Herring (Arm) };
106*5a5e9c02SRob Herring (Arm) 
107*5a5e9c02SRob Herring (Arm) struct dma_state {
108*5a5e9c02SRob Herring (Arm) 	u16 size0;
109*5a5e9c02SRob Herring (Arm) 	u16 size1;
110*5a5e9c02SRob Herring (Arm) 	s8 mode;
111*5a5e9c02SRob Herring (Arm) 	struct dma src;
112*5a5e9c02SRob Herring (Arm) 	struct dma dst;
113*5a5e9c02SRob Herring (Arm) };
114*5a5e9c02SRob Herring (Arm) 
115*5a5e9c02SRob Herring (Arm) struct buffer {
116*5a5e9c02SRob Herring (Arm) 	u64 base;
117*5a5e9c02SRob Herring (Arm) 	u32 length;
118*5a5e9c02SRob Herring (Arm) 	s8 region;
119*5a5e9c02SRob Herring (Arm) };
120*5a5e9c02SRob Herring (Arm) 
121*5a5e9c02SRob Herring (Arm) struct feat_matrix {
122*5a5e9c02SRob Herring (Arm) 	u64 base[4];
123*5a5e9c02SRob Herring (Arm) 	s64 stride_x;
124*5a5e9c02SRob Herring (Arm) 	s64 stride_y;
125*5a5e9c02SRob Herring (Arm) 	s64 stride_c;
126*5a5e9c02SRob Herring (Arm) 	s8 region;
127*5a5e9c02SRob Herring (Arm) 	u8 broadcast;
128*5a5e9c02SRob Herring (Arm) 	u16 stride_kernel;
129*5a5e9c02SRob Herring (Arm) 	u16 precision;
130*5a5e9c02SRob Herring (Arm) 	u16 depth;
131*5a5e9c02SRob Herring (Arm) 	u16 width;
132*5a5e9c02SRob Herring (Arm) 	u16 width0;
133*5a5e9c02SRob Herring (Arm) 	u16 height[3];
134*5a5e9c02SRob Herring (Arm) 	u8 pad_top;
135*5a5e9c02SRob Herring (Arm) 	u8 pad_left;
136*5a5e9c02SRob Herring (Arm) 	u8 pad_bottom;
137*5a5e9c02SRob Herring (Arm) 	u8 pad_right;
138*5a5e9c02SRob Herring (Arm) };
139*5a5e9c02SRob Herring (Arm) 
140*5a5e9c02SRob Herring (Arm) struct cmd_state {
141*5a5e9c02SRob Herring (Arm) 	struct dma_state dma;
142*5a5e9c02SRob Herring (Arm) 	struct buffer scale[2];
143*5a5e9c02SRob Herring (Arm) 	struct buffer weight[4];
144*5a5e9c02SRob Herring (Arm) 	struct feat_matrix ofm;
145*5a5e9c02SRob Herring (Arm) 	struct feat_matrix ifm;
146*5a5e9c02SRob Herring (Arm) 	struct feat_matrix ifm2;
147*5a5e9c02SRob Herring (Arm) };
148*5a5e9c02SRob Herring (Arm) 
149*5a5e9c02SRob Herring (Arm) static void cmd_state_init(struct cmd_state *st)
150*5a5e9c02SRob Herring (Arm) {
151*5a5e9c02SRob Herring (Arm) 	/* Initialize to all 1s to detect missing setup */
152*5a5e9c02SRob Herring (Arm) 	memset(st, 0xff, sizeof(*st));
153*5a5e9c02SRob Herring (Arm) }
154*5a5e9c02SRob Herring (Arm) 
155*5a5e9c02SRob Herring (Arm) static u64 cmd_to_addr(u32 *cmd)
156*5a5e9c02SRob Herring (Arm) {
157*5a5e9c02SRob Herring (Arm) 	return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1];
158*5a5e9c02SRob Herring (Arm) }
159*5a5e9c02SRob Herring (Arm) 
160*5a5e9c02SRob Herring (Arm) static u64 dma_length(struct ethosu_validated_cmdstream_info *info,
161*5a5e9c02SRob Herring (Arm) 		      struct dma_state *dma_st, struct dma *dma)
162*5a5e9c02SRob Herring (Arm) {
163*5a5e9c02SRob Herring (Arm) 	s8 mode = dma_st->mode;
164*5a5e9c02SRob Herring (Arm) 	u64 len = dma->len;
165*5a5e9c02SRob Herring (Arm) 
166*5a5e9c02SRob Herring (Arm) 	if (mode >= 1) {
167*5a5e9c02SRob Herring (Arm) 		len += dma->stride[0];
168*5a5e9c02SRob Herring (Arm) 		len *= dma_st->size0;
169*5a5e9c02SRob Herring (Arm) 	}
170*5a5e9c02SRob Herring (Arm) 	if (mode == 2) {
171*5a5e9c02SRob Herring (Arm) 		len += dma->stride[1];
172*5a5e9c02SRob Herring (Arm) 		len *= dma_st->size1;
173*5a5e9c02SRob Herring (Arm) 	}
174*5a5e9c02SRob Herring (Arm) 	if (dma->region >= 0)
175*5a5e9c02SRob Herring (Arm) 		info->region_size[dma->region] = max(info->region_size[dma->region],
176*5a5e9c02SRob Herring (Arm) 						     len + dma->offset);
177*5a5e9c02SRob Herring (Arm) 
178*5a5e9c02SRob Herring (Arm) 	return len;
179*5a5e9c02SRob Herring (Arm) }
180*5a5e9c02SRob Herring (Arm) 
181*5a5e9c02SRob Herring (Arm) static u64 feat_matrix_length(struct ethosu_validated_cmdstream_info *info,
182*5a5e9c02SRob Herring (Arm) 			      struct feat_matrix *fm,
183*5a5e9c02SRob Herring (Arm) 			      u32 x, u32 y, u32 c)
184*5a5e9c02SRob Herring (Arm) {
185*5a5e9c02SRob Herring (Arm) 	u32 element_size, storage = fm->precision >> 14;
186*5a5e9c02SRob Herring (Arm) 	int tile = 0;
187*5a5e9c02SRob Herring (Arm) 	u64 addr;
188*5a5e9c02SRob Herring (Arm) 
189*5a5e9c02SRob Herring (Arm) 	if (fm->region < 0)
190*5a5e9c02SRob Herring (Arm) 		return U64_MAX;
191*5a5e9c02SRob Herring (Arm) 
192*5a5e9c02SRob Herring (Arm) 	switch (storage) {
193*5a5e9c02SRob Herring (Arm) 	case 0:
194*5a5e9c02SRob Herring (Arm) 		if (x >= fm->width0 + 1) {
195*5a5e9c02SRob Herring (Arm) 			x -= fm->width0 + 1;
196*5a5e9c02SRob Herring (Arm) 			tile += 1;
197*5a5e9c02SRob Herring (Arm) 		}
198*5a5e9c02SRob Herring (Arm) 		if (y >= fm->height[tile] + 1) {
199*5a5e9c02SRob Herring (Arm) 			y -= fm->height[tile] + 1;
200*5a5e9c02SRob Herring (Arm) 			tile += 2;
201*5a5e9c02SRob Herring (Arm) 		}
202*5a5e9c02SRob Herring (Arm) 		break;
203*5a5e9c02SRob Herring (Arm) 	case 1:
204*5a5e9c02SRob Herring (Arm) 		if (y >= fm->height[1] + 1) {
205*5a5e9c02SRob Herring (Arm) 			y -= fm->height[1] + 1;
206*5a5e9c02SRob Herring (Arm) 			tile = 2;
207*5a5e9c02SRob Herring (Arm) 		} else if (y >= fm->height[0] + 1) {
208*5a5e9c02SRob Herring (Arm) 			y -= fm->height[0] + 1;
209*5a5e9c02SRob Herring (Arm) 			tile = 1;
210*5a5e9c02SRob Herring (Arm) 		}
211*5a5e9c02SRob Herring (Arm) 		break;
212*5a5e9c02SRob Herring (Arm) 	}
213*5a5e9c02SRob Herring (Arm) 	if (fm->base[tile] == U64_MAX)
214*5a5e9c02SRob Herring (Arm) 		return U64_MAX;
215*5a5e9c02SRob Herring (Arm) 
216*5a5e9c02SRob Herring (Arm) 	addr = fm->base[tile] + y * fm->stride_y;
217*5a5e9c02SRob Herring (Arm) 
218*5a5e9c02SRob Herring (Arm) 	switch ((fm->precision >> 6) & 0x3) { // format
219*5a5e9c02SRob Herring (Arm) 	case 0: //nhwc:
220*5a5e9c02SRob Herring (Arm) 		addr += x * fm->stride_x + c;
221*5a5e9c02SRob Herring (Arm) 		break;
222*5a5e9c02SRob Herring (Arm) 	case 1: //nhcwb16:
223*5a5e9c02SRob Herring (Arm) 		element_size = BIT((fm->precision >> 1) & 0x3);
224*5a5e9c02SRob Herring (Arm) 
225*5a5e9c02SRob Herring (Arm) 		addr += (c / 16) * fm->stride_c + (16 * x + (c & 0xf)) * element_size;
226*5a5e9c02SRob Herring (Arm) 		break;
227*5a5e9c02SRob Herring (Arm) 	}
228*5a5e9c02SRob Herring (Arm) 
229*5a5e9c02SRob Herring (Arm) 	info->region_size[fm->region] = max(info->region_size[fm->region], addr + 1);
230*5a5e9c02SRob Herring (Arm) 
231*5a5e9c02SRob Herring (Arm) 	return addr;
232*5a5e9c02SRob Herring (Arm) }
233*5a5e9c02SRob Herring (Arm) 
234*5a5e9c02SRob Herring (Arm) static int calc_sizes(struct drm_device *ddev,
235*5a5e9c02SRob Herring (Arm) 		      struct ethosu_validated_cmdstream_info *info,
236*5a5e9c02SRob Herring (Arm) 		      u16 op, struct cmd_state *st,
237*5a5e9c02SRob Herring (Arm) 		      bool ifm, bool ifm2, bool weight, bool scale)
238*5a5e9c02SRob Herring (Arm) {
239*5a5e9c02SRob Herring (Arm) 	u64 len;
240*5a5e9c02SRob Herring (Arm) 
241*5a5e9c02SRob Herring (Arm) 	if (ifm) {
242*5a5e9c02SRob Herring (Arm) 		if (st->ifm.stride_kernel == U16_MAX)
243*5a5e9c02SRob Herring (Arm) 			return -EINVAL;
244*5a5e9c02SRob Herring (Arm) 		u32 stride_y = ((st->ifm.stride_kernel >> 8) & 0x2) +
245*5a5e9c02SRob Herring (Arm) 			((st->ifm.stride_kernel >> 1) & 0x1) + 1;
246*5a5e9c02SRob Herring (Arm) 		u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) +
247*5a5e9c02SRob Herring (Arm) 			(st->ifm.stride_kernel & 0x1) + 1;
248*5a5e9c02SRob Herring (Arm) 		u32 ifm_height = st->ofm.height[2] * stride_y +
249*5a5e9c02SRob Herring (Arm) 			st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom);
250*5a5e9c02SRob Herring (Arm) 		u32 ifm_width  = st->ofm.width * stride_x +
251*5a5e9c02SRob Herring (Arm) 			st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right);
252*5a5e9c02SRob Herring (Arm) 
253*5a5e9c02SRob Herring (Arm) 		len = feat_matrix_length(info, &st->ifm, ifm_width,
254*5a5e9c02SRob Herring (Arm) 					 ifm_height, st->ifm.depth);
255*5a5e9c02SRob Herring (Arm) 		dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",
256*5a5e9c02SRob Herring (Arm) 			op, st->ifm.region, st->ifm.base[0], len);
257*5a5e9c02SRob Herring (Arm) 		if (len == U64_MAX)
258*5a5e9c02SRob Herring (Arm) 			return -EINVAL;
259*5a5e9c02SRob Herring (Arm) 	}
260*5a5e9c02SRob Herring (Arm) 
261*5a5e9c02SRob Herring (Arm) 	if (ifm2) {
262*5a5e9c02SRob Herring (Arm) 		len = feat_matrix_length(info, &st->ifm2, st->ifm.depth,
263*5a5e9c02SRob Herring (Arm) 					 0, st->ofm.depth);
264*5a5e9c02SRob Herring (Arm) 		dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n",
265*5a5e9c02SRob Herring (Arm) 			op, st->ifm2.region, st->ifm2.base[0], len);
266*5a5e9c02SRob Herring (Arm) 		if (len == U64_MAX)
267*5a5e9c02SRob Herring (Arm) 			return -EINVAL;
268*5a5e9c02SRob Herring (Arm) 	}
269*5a5e9c02SRob Herring (Arm) 
270*5a5e9c02SRob Herring (Arm) 	if (weight) {
271*5a5e9c02SRob Herring (Arm) 		dev_dbg(ddev->dev, "op %d: W:%d:0x%llx-0x%llx\n",
272*5a5e9c02SRob Herring (Arm) 			op, st->weight[0].region, st->weight[0].base,
273*5a5e9c02SRob Herring (Arm) 			st->weight[0].base + st->weight[0].length - 1);
274*5a5e9c02SRob Herring (Arm) 		if (st->weight[0].region < 0 || st->weight[0].base == U64_MAX ||
275*5a5e9c02SRob Herring (Arm) 		    st->weight[0].length == U32_MAX)
276*5a5e9c02SRob Herring (Arm) 			return -EINVAL;
277*5a5e9c02SRob Herring (Arm) 		info->region_size[st->weight[0].region] =
278*5a5e9c02SRob Herring (Arm) 			max(info->region_size[st->weight[0].region],
279*5a5e9c02SRob Herring (Arm) 			    st->weight[0].base + st->weight[0].length);
280*5a5e9c02SRob Herring (Arm) 	}
281*5a5e9c02SRob Herring (Arm) 
282*5a5e9c02SRob Herring (Arm) 	if (scale) {
283*5a5e9c02SRob Herring (Arm) 		dev_dbg(ddev->dev, "op %d: S:%d:0x%llx-0x%llx\n",
284*5a5e9c02SRob Herring (Arm) 			op, st->scale[0].region, st->scale[0].base,
285*5a5e9c02SRob Herring (Arm) 			st->scale[0].base + st->scale[0].length - 1);
286*5a5e9c02SRob Herring (Arm) 		if (st->scale[0].region < 0 || st->scale[0].base == U64_MAX ||
287*5a5e9c02SRob Herring (Arm) 		    st->scale[0].length == U32_MAX)
288*5a5e9c02SRob Herring (Arm) 			return -EINVAL;
289*5a5e9c02SRob Herring (Arm) 		info->region_size[st->scale[0].region] =
290*5a5e9c02SRob Herring (Arm) 			max(info->region_size[st->scale[0].region],
291*5a5e9c02SRob Herring (Arm) 			    st->scale[0].base + st->scale[0].length);
292*5a5e9c02SRob Herring (Arm) 	}
293*5a5e9c02SRob Herring (Arm) 
294*5a5e9c02SRob Herring (Arm) 	len = feat_matrix_length(info, &st->ofm, st->ofm.width,
295*5a5e9c02SRob Herring (Arm) 				 st->ofm.height[2], st->ofm.depth);
296*5a5e9c02SRob Herring (Arm) 	dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n",
297*5a5e9c02SRob Herring (Arm) 		op, st->ofm.region, st->ofm.base[0], len);
298*5a5e9c02SRob Herring (Arm) 	if (len == U64_MAX)
299*5a5e9c02SRob Herring (Arm) 		return -EINVAL;
300*5a5e9c02SRob Herring (Arm) 	info->output_region[st->ofm.region] = true;
301*5a5e9c02SRob Herring (Arm) 
302*5a5e9c02SRob Herring (Arm) 	return 0;
303*5a5e9c02SRob Herring (Arm) }
304*5a5e9c02SRob Herring (Arm) 
305*5a5e9c02SRob Herring (Arm) static int calc_sizes_elemwise(struct drm_device *ddev,
306*5a5e9c02SRob Herring (Arm) 			       struct ethosu_validated_cmdstream_info *info,
307*5a5e9c02SRob Herring (Arm) 			       u16 op, struct cmd_state *st,
308*5a5e9c02SRob Herring (Arm) 			       bool ifm, bool ifm2)
309*5a5e9c02SRob Herring (Arm) {
310*5a5e9c02SRob Herring (Arm) 	u32 height, width, depth;
311*5a5e9c02SRob Herring (Arm) 	u64 len;
312*5a5e9c02SRob Herring (Arm) 
313*5a5e9c02SRob Herring (Arm) 	if (ifm) {
314*5a5e9c02SRob Herring (Arm) 		height = st->ifm.broadcast & 0x1 ? 0 : st->ofm.height[2];
315*5a5e9c02SRob Herring (Arm) 		width = st->ifm.broadcast & 0x2 ? 0 : st->ofm.width;
316*5a5e9c02SRob Herring (Arm) 		depth = st->ifm.broadcast & 0x4 ? 0 : st->ofm.depth;
317*5a5e9c02SRob Herring (Arm) 
318*5a5e9c02SRob Herring (Arm) 		len = feat_matrix_length(info, &st->ifm, width,
319*5a5e9c02SRob Herring (Arm) 					 height, depth);
320*5a5e9c02SRob Herring (Arm) 		dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",
321*5a5e9c02SRob Herring (Arm) 			op, st->ifm.region, st->ifm.base[0], len);
322*5a5e9c02SRob Herring (Arm) 		if (len == U64_MAX)
323*5a5e9c02SRob Herring (Arm) 			return -EINVAL;
324*5a5e9c02SRob Herring (Arm) 	}
325*5a5e9c02SRob Herring (Arm) 
326*5a5e9c02SRob Herring (Arm) 	if (ifm2) {
327*5a5e9c02SRob Herring (Arm) 		height = st->ifm2.broadcast & 0x1 ? 0 : st->ofm.height[2];
328*5a5e9c02SRob Herring (Arm) 		width = st->ifm2.broadcast & 0x2 ? 0 : st->ofm.width;
329*5a5e9c02SRob Herring (Arm) 		depth = st->ifm2.broadcast & 0x4 ? 0 : st->ofm.depth;
330*5a5e9c02SRob Herring (Arm) 
331*5a5e9c02SRob Herring (Arm) 		len = feat_matrix_length(info, &st->ifm2, width,
332*5a5e9c02SRob Herring (Arm) 					 height, depth);
333*5a5e9c02SRob Herring (Arm) 		dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n",
334*5a5e9c02SRob Herring (Arm) 			op, st->ifm2.region, st->ifm2.base[0], len);
335*5a5e9c02SRob Herring (Arm) 		if (len == U64_MAX)
336*5a5e9c02SRob Herring (Arm) 			return -EINVAL;
337*5a5e9c02SRob Herring (Arm) 	}
338*5a5e9c02SRob Herring (Arm) 
339*5a5e9c02SRob Herring (Arm) 	len = feat_matrix_length(info, &st->ofm, st->ofm.width,
340*5a5e9c02SRob Herring (Arm) 				 st->ofm.height[2], st->ofm.depth);
341*5a5e9c02SRob Herring (Arm) 	dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n",
342*5a5e9c02SRob Herring (Arm) 		op, st->ofm.region, st->ofm.base[0], len);
343*5a5e9c02SRob Herring (Arm) 	if (len == U64_MAX)
344*5a5e9c02SRob Herring (Arm) 		return -EINVAL;
345*5a5e9c02SRob Herring (Arm) 	info->output_region[st->ofm.region] = true;
346*5a5e9c02SRob Herring (Arm) 
347*5a5e9c02SRob Herring (Arm) 	return 0;
348*5a5e9c02SRob Herring (Arm) }
349*5a5e9c02SRob Herring (Arm) 
350*5a5e9c02SRob Herring (Arm) static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev,
351*5a5e9c02SRob Herring (Arm) 						  u32 __user *ucmds,
352*5a5e9c02SRob Herring (Arm) 						  struct ethosu_gem_object *bo,
353*5a5e9c02SRob Herring (Arm) 						  u32 size)
354*5a5e9c02SRob Herring (Arm) {
355*5a5e9c02SRob Herring (Arm) 	struct ethosu_validated_cmdstream_info __free(kfree) *info = kzalloc(sizeof(*info), GFP_KERNEL);
356*5a5e9c02SRob Herring (Arm) 	struct ethosu_device *edev = to_ethosu_device(ddev);
357*5a5e9c02SRob Herring (Arm) 	u32 *bocmds = bo->base.vaddr;
358*5a5e9c02SRob Herring (Arm) 	struct cmd_state st;
359*5a5e9c02SRob Herring (Arm) 	int i, ret;
360*5a5e9c02SRob Herring (Arm) 
361*5a5e9c02SRob Herring (Arm) 	if (!info)
362*5a5e9c02SRob Herring (Arm) 		return -ENOMEM;
363*5a5e9c02SRob Herring (Arm) 	info->cmd_size = size;
364*5a5e9c02SRob Herring (Arm) 
365*5a5e9c02SRob Herring (Arm) 	cmd_state_init(&st);
366*5a5e9c02SRob Herring (Arm) 
367*5a5e9c02SRob Herring (Arm) 	for (i = 0; i < size / 4; i++) {
368*5a5e9c02SRob Herring (Arm) 		bool use_ifm, use_ifm2, use_scale;
369*5a5e9c02SRob Herring (Arm) 		u64 dstlen, srclen;
370*5a5e9c02SRob Herring (Arm) 		u16 cmd, param;
371*5a5e9c02SRob Herring (Arm) 		u32 cmds[2];
372*5a5e9c02SRob Herring (Arm) 		u64 addr;
373*5a5e9c02SRob Herring (Arm) 
374*5a5e9c02SRob Herring (Arm) 		if (get_user(cmds[0], ucmds++))
375*5a5e9c02SRob Herring (Arm) 			return -EFAULT;
376*5a5e9c02SRob Herring (Arm) 
377*5a5e9c02SRob Herring (Arm) 		bocmds[i] = cmds[0];
378*5a5e9c02SRob Herring (Arm) 
379*5a5e9c02SRob Herring (Arm) 		cmd = cmds[0];
380*5a5e9c02SRob Herring (Arm) 		param = cmds[0] >> 16;
381*5a5e9c02SRob Herring (Arm) 
382*5a5e9c02SRob Herring (Arm) 		if (cmd & 0x4000) {
383*5a5e9c02SRob Herring (Arm) 			if (get_user(cmds[1], ucmds++))
384*5a5e9c02SRob Herring (Arm) 				return -EFAULT;
385*5a5e9c02SRob Herring (Arm) 
386*5a5e9c02SRob Herring (Arm) 			i++;
387*5a5e9c02SRob Herring (Arm) 			bocmds[i] = cmds[1];
388*5a5e9c02SRob Herring (Arm) 			addr = cmd_to_addr(cmds);
389*5a5e9c02SRob Herring (Arm) 		}
390*5a5e9c02SRob Herring (Arm) 
391*5a5e9c02SRob Herring (Arm) 		switch (cmd) {
392*5a5e9c02SRob Herring (Arm) 		case NPU_OP_DMA_START:
393*5a5e9c02SRob Herring (Arm) 			srclen = dma_length(info, &st.dma, &st.dma.src);
394*5a5e9c02SRob Herring (Arm) 			dstlen = dma_length(info, &st.dma, &st.dma.dst);
395*5a5e9c02SRob Herring (Arm) 
396*5a5e9c02SRob Herring (Arm) 			if (st.dma.dst.region >= 0)
397*5a5e9c02SRob Herring (Arm) 				info->output_region[st.dma.dst.region] = true;
398*5a5e9c02SRob Herring (Arm) 			dev_dbg(ddev->dev, "cmd: DMA SRC:%d:0x%llx+0x%llx DST:%d:0x%llx+0x%llx\n",
399*5a5e9c02SRob Herring (Arm) 				st.dma.src.region, st.dma.src.offset, srclen,
400*5a5e9c02SRob Herring (Arm) 				st.dma.dst.region, st.dma.dst.offset, dstlen);
401*5a5e9c02SRob Herring (Arm) 			break;
402*5a5e9c02SRob Herring (Arm) 		case NPU_OP_CONV:
403*5a5e9c02SRob Herring (Arm) 		case NPU_OP_DEPTHWISE:
404*5a5e9c02SRob Herring (Arm) 			use_ifm2 = param & 0x1;  // weights_ifm2
405*5a5e9c02SRob Herring (Arm) 			use_scale = !(st.ofm.precision & 0x100);
406*5a5e9c02SRob Herring (Arm) 			ret = calc_sizes(ddev, info, cmd, &st, true, use_ifm2,
407*5a5e9c02SRob Herring (Arm) 					 !use_ifm2, use_scale);
408*5a5e9c02SRob Herring (Arm) 			if (ret)
409*5a5e9c02SRob Herring (Arm) 				return ret;
410*5a5e9c02SRob Herring (Arm) 			break;
411*5a5e9c02SRob Herring (Arm) 		case NPU_OP_POOL:
412*5a5e9c02SRob Herring (Arm) 			use_ifm = param != 0x4;  // pooling mode
413*5a5e9c02SRob Herring (Arm) 			use_scale = !(st.ofm.precision & 0x100);
414*5a5e9c02SRob Herring (Arm) 			ret = calc_sizes(ddev, info, cmd, &st, use_ifm, false,
415*5a5e9c02SRob Herring (Arm) 					 false, use_scale);
416*5a5e9c02SRob Herring (Arm) 			if (ret)
417*5a5e9c02SRob Herring (Arm) 				return ret;
418*5a5e9c02SRob Herring (Arm) 			break;
419*5a5e9c02SRob Herring (Arm) 		case NPU_OP_ELEMENTWISE:
420*5a5e9c02SRob Herring (Arm) 			use_ifm2 = !((st.ifm2.broadcast == 8) || (param == 5) ||
421*5a5e9c02SRob Herring (Arm) 				(param == 6) || (param == 7) || (param == 0x24));
422*5a5e9c02SRob Herring (Arm) 			use_ifm = st.ifm.broadcast != 8;
423*5a5e9c02SRob Herring (Arm) 			ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2);
424*5a5e9c02SRob Herring (Arm) 			if (ret)
425*5a5e9c02SRob Herring (Arm) 				return ret;
426*5a5e9c02SRob Herring (Arm) 			break;
427*5a5e9c02SRob Herring (Arm) 		case NPU_OP_RESIZE: // U85 only
428*5a5e9c02SRob Herring (Arm) 			WARN_ON(1); // TODO
429*5a5e9c02SRob Herring (Arm) 			break;
430*5a5e9c02SRob Herring (Arm) 		case NPU_SET_KERNEL_WIDTH_M1:
431*5a5e9c02SRob Herring (Arm) 			st.ifm.width = param;
432*5a5e9c02SRob Herring (Arm) 			break;
433*5a5e9c02SRob Herring (Arm) 		case NPU_SET_KERNEL_HEIGHT_M1:
434*5a5e9c02SRob Herring (Arm) 			st.ifm.height[2] = param;
435*5a5e9c02SRob Herring (Arm) 			break;
436*5a5e9c02SRob Herring (Arm) 		case NPU_SET_KERNEL_STRIDE:
437*5a5e9c02SRob Herring (Arm) 			st.ifm.stride_kernel = param;
438*5a5e9c02SRob Herring (Arm) 			break;
439*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_PAD_TOP:
440*5a5e9c02SRob Herring (Arm) 			st.ifm.pad_top = param & 0x7f;
441*5a5e9c02SRob Herring (Arm) 			break;
442*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_PAD_LEFT:
443*5a5e9c02SRob Herring (Arm) 			st.ifm.pad_left = param & 0x7f;
444*5a5e9c02SRob Herring (Arm) 			break;
445*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_PAD_RIGHT:
446*5a5e9c02SRob Herring (Arm) 			st.ifm.pad_right = param & 0xff;
447*5a5e9c02SRob Herring (Arm) 			break;
448*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_PAD_BOTTOM:
449*5a5e9c02SRob Herring (Arm) 			st.ifm.pad_bottom = param & 0xff;
450*5a5e9c02SRob Herring (Arm) 			break;
451*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_DEPTH_M1:
452*5a5e9c02SRob Herring (Arm) 			st.ifm.depth = param;
453*5a5e9c02SRob Herring (Arm) 			break;
454*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_PRECISION:
455*5a5e9c02SRob Herring (Arm) 			st.ifm.precision = param;
456*5a5e9c02SRob Herring (Arm) 			break;
457*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_BROADCAST:
458*5a5e9c02SRob Herring (Arm) 			st.ifm.broadcast = param;
459*5a5e9c02SRob Herring (Arm) 			break;
460*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_REGION:
461*5a5e9c02SRob Herring (Arm) 			st.ifm.region = param & 0x7f;
462*5a5e9c02SRob Herring (Arm) 			break;
463*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_WIDTH0_M1:
464*5a5e9c02SRob Herring (Arm) 			st.ifm.width0 = param;
465*5a5e9c02SRob Herring (Arm) 			break;
466*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_HEIGHT0_M1:
467*5a5e9c02SRob Herring (Arm) 			st.ifm.height[0] = param;
468*5a5e9c02SRob Herring (Arm) 			break;
469*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_HEIGHT1_M1:
470*5a5e9c02SRob Herring (Arm) 			st.ifm.height[1] = param;
471*5a5e9c02SRob Herring (Arm) 			break;
472*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_BASE0:
473*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_BASE1:
474*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_BASE2:
475*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_BASE3:
476*5a5e9c02SRob Herring (Arm) 			st.ifm.base[cmd & 0x3] = addr;
477*5a5e9c02SRob Herring (Arm) 			break;
478*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_STRIDE_X:
479*5a5e9c02SRob Herring (Arm) 			st.ifm.stride_x = addr;
480*5a5e9c02SRob Herring (Arm) 			break;
481*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_STRIDE_Y:
482*5a5e9c02SRob Herring (Arm) 			st.ifm.stride_y = addr;
483*5a5e9c02SRob Herring (Arm) 			break;
484*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM_STRIDE_C:
485*5a5e9c02SRob Herring (Arm) 			st.ifm.stride_c = addr;
486*5a5e9c02SRob Herring (Arm) 			break;
487*5a5e9c02SRob Herring (Arm) 
488*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_WIDTH_M1:
489*5a5e9c02SRob Herring (Arm) 			st.ofm.width = param;
490*5a5e9c02SRob Herring (Arm) 			break;
491*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_HEIGHT_M1:
492*5a5e9c02SRob Herring (Arm) 			st.ofm.height[2] = param;
493*5a5e9c02SRob Herring (Arm) 			break;
494*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_DEPTH_M1:
495*5a5e9c02SRob Herring (Arm) 			st.ofm.depth = param;
496*5a5e9c02SRob Herring (Arm) 			break;
497*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_PRECISION:
498*5a5e9c02SRob Herring (Arm) 			st.ofm.precision = param;
499*5a5e9c02SRob Herring (Arm) 			break;
500*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_REGION:
501*5a5e9c02SRob Herring (Arm) 			st.ofm.region = param & 0x7;
502*5a5e9c02SRob Herring (Arm) 			break;
503*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_WIDTH0_M1:
504*5a5e9c02SRob Herring (Arm) 			st.ofm.width0 = param;
505*5a5e9c02SRob Herring (Arm) 			break;
506*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_HEIGHT0_M1:
507*5a5e9c02SRob Herring (Arm) 			st.ofm.height[0] = param;
508*5a5e9c02SRob Herring (Arm) 			break;
509*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_HEIGHT1_M1:
510*5a5e9c02SRob Herring (Arm) 			st.ofm.height[1] = param;
511*5a5e9c02SRob Herring (Arm) 			break;
512*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_BASE0:
513*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_BASE1:
514*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_BASE2:
515*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_BASE3:
516*5a5e9c02SRob Herring (Arm) 			st.ofm.base[cmd & 0x3] = addr;
517*5a5e9c02SRob Herring (Arm) 			break;
518*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_STRIDE_X:
519*5a5e9c02SRob Herring (Arm) 			st.ofm.stride_x = addr;
520*5a5e9c02SRob Herring (Arm) 			break;
521*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_STRIDE_Y:
522*5a5e9c02SRob Herring (Arm) 			st.ofm.stride_y = addr;
523*5a5e9c02SRob Herring (Arm) 			break;
524*5a5e9c02SRob Herring (Arm) 		case NPU_SET_OFM_STRIDE_C:
525*5a5e9c02SRob Herring (Arm) 			st.ofm.stride_c = addr;
526*5a5e9c02SRob Herring (Arm) 			break;
527*5a5e9c02SRob Herring (Arm) 
528*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_BROADCAST:
529*5a5e9c02SRob Herring (Arm) 			st.ifm2.broadcast = param;
530*5a5e9c02SRob Herring (Arm) 			break;
531*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_PRECISION:
532*5a5e9c02SRob Herring (Arm) 			st.ifm2.precision = param;
533*5a5e9c02SRob Herring (Arm) 			break;
534*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_REGION:
535*5a5e9c02SRob Herring (Arm) 			st.ifm2.region = param & 0x7;
536*5a5e9c02SRob Herring (Arm) 			break;
537*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_WIDTH0_M1:
538*5a5e9c02SRob Herring (Arm) 			st.ifm2.width0 = param;
539*5a5e9c02SRob Herring (Arm) 			break;
540*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_HEIGHT0_M1:
541*5a5e9c02SRob Herring (Arm) 			st.ifm2.height[0] = param;
542*5a5e9c02SRob Herring (Arm) 			break;
543*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_HEIGHT1_M1:
544*5a5e9c02SRob Herring (Arm) 			st.ifm2.height[1] = param;
545*5a5e9c02SRob Herring (Arm) 			break;
546*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_BASE0:
547*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_BASE1:
548*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_BASE2:
549*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_BASE3:
550*5a5e9c02SRob Herring (Arm) 			st.ifm2.base[cmd & 0x3] = addr;
551*5a5e9c02SRob Herring (Arm) 			break;
552*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_STRIDE_X:
553*5a5e9c02SRob Herring (Arm) 			st.ifm2.stride_x = addr;
554*5a5e9c02SRob Herring (Arm) 			break;
555*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_STRIDE_Y:
556*5a5e9c02SRob Herring (Arm) 			st.ifm2.stride_y = addr;
557*5a5e9c02SRob Herring (Arm) 			break;
558*5a5e9c02SRob Herring (Arm) 		case NPU_SET_IFM2_STRIDE_C:
559*5a5e9c02SRob Herring (Arm) 			st.ifm2.stride_c = addr;
560*5a5e9c02SRob Herring (Arm) 			break;
561*5a5e9c02SRob Herring (Arm) 
562*5a5e9c02SRob Herring (Arm) 		case NPU_SET_WEIGHT_REGION:
563*5a5e9c02SRob Herring (Arm) 			st.weight[0].region = param & 0x7;
564*5a5e9c02SRob Herring (Arm) 			break;
565*5a5e9c02SRob Herring (Arm) 		case NPU_SET_SCALE_REGION:
566*5a5e9c02SRob Herring (Arm) 			st.scale[0].region = param & 0x7;
567*5a5e9c02SRob Herring (Arm) 			break;
568*5a5e9c02SRob Herring (Arm) 		case NPU_SET_WEIGHT_BASE:
569*5a5e9c02SRob Herring (Arm) 			st.weight[0].base = addr;
570*5a5e9c02SRob Herring (Arm) 			break;
571*5a5e9c02SRob Herring (Arm) 		case NPU_SET_WEIGHT_LENGTH:
572*5a5e9c02SRob Herring (Arm) 			st.weight[0].length = cmds[1];
573*5a5e9c02SRob Herring (Arm) 			break;
574*5a5e9c02SRob Herring (Arm) 		case NPU_SET_SCALE_BASE:
575*5a5e9c02SRob Herring (Arm) 			st.scale[0].base = addr;
576*5a5e9c02SRob Herring (Arm) 			break;
577*5a5e9c02SRob Herring (Arm) 		case NPU_SET_SCALE_LENGTH:
578*5a5e9c02SRob Herring (Arm) 			st.scale[0].length = cmds[1];
579*5a5e9c02SRob Herring (Arm) 			break;
580*5a5e9c02SRob Herring (Arm) 		case NPU_SET_WEIGHT1_BASE:
581*5a5e9c02SRob Herring (Arm) 			st.weight[1].base = addr;
582*5a5e9c02SRob Herring (Arm) 			break;
583*5a5e9c02SRob Herring (Arm) 		case NPU_SET_WEIGHT1_LENGTH:
584*5a5e9c02SRob Herring (Arm) 			st.weight[1].length = cmds[1];
585*5a5e9c02SRob Herring (Arm) 			break;
586*5a5e9c02SRob Herring (Arm) 		case NPU_SET_SCALE1_BASE: // NPU_SET_WEIGHT2_BASE (U85)
587*5a5e9c02SRob Herring (Arm) 			if (ethosu_is_u65(edev))
588*5a5e9c02SRob Herring (Arm) 				st.scale[1].base = addr;
589*5a5e9c02SRob Herring (Arm) 			else
590*5a5e9c02SRob Herring (Arm) 				st.weight[2].base = addr;
591*5a5e9c02SRob Herring (Arm) 			break;
592*5a5e9c02SRob Herring (Arm) 		case NPU_SET_SCALE1_LENGTH: // NPU_SET_WEIGHT2_LENGTH (U85)
593*5a5e9c02SRob Herring (Arm) 			if (ethosu_is_u65(edev))
594*5a5e9c02SRob Herring (Arm) 				st.scale[1].length = cmds[1];
595*5a5e9c02SRob Herring (Arm) 			else
596*5a5e9c02SRob Herring (Arm) 				st.weight[1].length = cmds[1];
597*5a5e9c02SRob Herring (Arm) 			break;
598*5a5e9c02SRob Herring (Arm) 		case NPU_SET_WEIGHT3_BASE:
599*5a5e9c02SRob Herring (Arm) 			st.weight[3].base = addr;
600*5a5e9c02SRob Herring (Arm) 			break;
601*5a5e9c02SRob Herring (Arm) 		case NPU_SET_WEIGHT3_LENGTH:
602*5a5e9c02SRob Herring (Arm) 			st.weight[3].length = cmds[1];
603*5a5e9c02SRob Herring (Arm) 			break;
604*5a5e9c02SRob Herring (Arm) 
605*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_SRC_REGION:
606*5a5e9c02SRob Herring (Arm) 			if (param & 0x100)
607*5a5e9c02SRob Herring (Arm) 				st.dma.src.region = -1;
608*5a5e9c02SRob Herring (Arm) 			else
609*5a5e9c02SRob Herring (Arm) 				st.dma.src.region = param & 0x7;
610*5a5e9c02SRob Herring (Arm) 			st.dma.mode = (param >> 9) & 0x3;
611*5a5e9c02SRob Herring (Arm) 			break;
612*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_DST_REGION:
613*5a5e9c02SRob Herring (Arm) 			if (param & 0x100)
614*5a5e9c02SRob Herring (Arm) 				st.dma.dst.region = -1;
615*5a5e9c02SRob Herring (Arm) 			else
616*5a5e9c02SRob Herring (Arm) 				st.dma.dst.region = param & 0x7;
617*5a5e9c02SRob Herring (Arm) 			break;
618*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_SIZE0:
619*5a5e9c02SRob Herring (Arm) 			st.dma.size0 = param;
620*5a5e9c02SRob Herring (Arm) 			break;
621*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_SIZE1:
622*5a5e9c02SRob Herring (Arm) 			st.dma.size1 = param;
623*5a5e9c02SRob Herring (Arm) 			break;
624*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_SRC_STRIDE0:
625*5a5e9c02SRob Herring (Arm) 			st.dma.src.stride[0] = ((s64)addr << 24) >> 24;
626*5a5e9c02SRob Herring (Arm) 			break;
627*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_SRC_STRIDE1:
628*5a5e9c02SRob Herring (Arm) 			st.dma.src.stride[1] = ((s64)addr << 24) >> 24;
629*5a5e9c02SRob Herring (Arm) 			break;
630*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_DST_STRIDE0:
631*5a5e9c02SRob Herring (Arm) 			st.dma.dst.stride[0] = ((s64)addr << 24) >> 24;
632*5a5e9c02SRob Herring (Arm) 			break;
633*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_DST_STRIDE1:
634*5a5e9c02SRob Herring (Arm) 			st.dma.dst.stride[1] = ((s64)addr << 24) >> 24;
635*5a5e9c02SRob Herring (Arm) 			break;
636*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_SRC:
637*5a5e9c02SRob Herring (Arm) 			st.dma.src.offset = addr;
638*5a5e9c02SRob Herring (Arm) 			break;
639*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_DST:
640*5a5e9c02SRob Herring (Arm) 			st.dma.dst.offset = addr;
641*5a5e9c02SRob Herring (Arm) 			break;
642*5a5e9c02SRob Herring (Arm) 		case NPU_SET_DMA0_LEN:
643*5a5e9c02SRob Herring (Arm) 			st.dma.src.len = st.dma.dst.len = addr;
644*5a5e9c02SRob Herring (Arm) 			break;
645*5a5e9c02SRob Herring (Arm) 		default:
646*5a5e9c02SRob Herring (Arm) 			break;
647*5a5e9c02SRob Herring (Arm) 		}
648*5a5e9c02SRob Herring (Arm) 	}
649*5a5e9c02SRob Herring (Arm) 
650*5a5e9c02SRob Herring (Arm) 	for (i = 0; i < NPU_BASEP_REGION_MAX; i++) {
651*5a5e9c02SRob Herring (Arm) 		if (!info->region_size[i])
652*5a5e9c02SRob Herring (Arm) 			continue;
653*5a5e9c02SRob Herring (Arm) 		dev_dbg(ddev->dev, "region %d max size: 0x%llx\n",
654*5a5e9c02SRob Herring (Arm) 			i, info->region_size[i]);
655*5a5e9c02SRob Herring (Arm) 	}
656*5a5e9c02SRob Herring (Arm) 
657*5a5e9c02SRob Herring (Arm) 	bo->info = no_free_ptr(info);
658*5a5e9c02SRob Herring (Arm) 	return 0;
659*5a5e9c02SRob Herring (Arm) }
660*5a5e9c02SRob Herring (Arm) 
661*5a5e9c02SRob Herring (Arm) /**
662*5a5e9c02SRob Herring (Arm)  * ethosu_gem_cmdstream_create() - Create a GEM object and attach it to a handle.
663*5a5e9c02SRob Herring (Arm)  * @file: DRM file.
664*5a5e9c02SRob Herring (Arm)  * @ddev: DRM device.
665*5a5e9c02SRob Herring (Arm)  * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared.
666*5a5e9c02SRob Herring (Arm)  * @size: Size of the GEM object to allocate.
667*5a5e9c02SRob Herring (Arm)  * @flags: Combination of drm_ethosu_bo_flags flags.
668*5a5e9c02SRob Herring (Arm)  * @handle: Pointer holding the handle pointing to the new GEM object.
669*5a5e9c02SRob Herring (Arm)  *
670*5a5e9c02SRob Herring (Arm)  * Return: Zero on success
671*5a5e9c02SRob Herring (Arm)  */
672*5a5e9c02SRob Herring (Arm) int ethosu_gem_cmdstream_create(struct drm_file *file,
673*5a5e9c02SRob Herring (Arm) 				struct drm_device *ddev,
674*5a5e9c02SRob Herring (Arm) 				u32 size, u64 data, u32 flags, u32 *handle)
675*5a5e9c02SRob Herring (Arm) {
676*5a5e9c02SRob Herring (Arm) 	int ret;
677*5a5e9c02SRob Herring (Arm) 	struct drm_gem_dma_object *mem;
678*5a5e9c02SRob Herring (Arm) 	struct ethosu_gem_object *bo;
679*5a5e9c02SRob Herring (Arm) 
680*5a5e9c02SRob Herring (Arm) 	mem = drm_gem_dma_create(ddev, size);
681*5a5e9c02SRob Herring (Arm) 	if (IS_ERR(mem))
682*5a5e9c02SRob Herring (Arm) 		return PTR_ERR(mem);
683*5a5e9c02SRob Herring (Arm) 
684*5a5e9c02SRob Herring (Arm) 	bo = to_ethosu_bo(&mem->base);
685*5a5e9c02SRob Herring (Arm) 	bo->flags = flags;
686*5a5e9c02SRob Herring (Arm) 
687*5a5e9c02SRob Herring (Arm) 	ret = ethosu_gem_cmdstream_copy_and_validate(ddev,
688*5a5e9c02SRob Herring (Arm) 						     (void __user *)(uintptr_t)data,
689*5a5e9c02SRob Herring (Arm) 						     bo, size);
690*5a5e9c02SRob Herring (Arm) 	if (ret)
691*5a5e9c02SRob Herring (Arm) 		goto fail;
692*5a5e9c02SRob Herring (Arm) 
693*5a5e9c02SRob Herring (Arm) 	/*
694*5a5e9c02SRob Herring (Arm) 	 * Allocate an id of idr table where the obj is registered
695*5a5e9c02SRob Herring (Arm) 	 * and handle has the id what user can see.
696*5a5e9c02SRob Herring (Arm) 	 */
697*5a5e9c02SRob Herring (Arm) 	ret = drm_gem_handle_create(file, &mem->base, handle);
698*5a5e9c02SRob Herring (Arm) 
699*5a5e9c02SRob Herring (Arm) fail:
700*5a5e9c02SRob Herring (Arm) 	/* drop reference from allocate - handle holds it now. */
701*5a5e9c02SRob Herring (Arm) 	drm_gem_object_put(&mem->base);
702*5a5e9c02SRob Herring (Arm) 
703*5a5e9c02SRob Herring (Arm) 	return ret;
704*5a5e9c02SRob Herring (Arm) }
705