xref: /linux/drivers/accel/ethosu/ethosu_gem.c (revision 8e65320d91cdc3b241d4b94855c88459b91abf66)
1 // SPDX-License-Identifier: GPL-2.0-only or MIT
2 /* Copyright 2025 Arm, Ltd. */
3 
4 #include <linux/err.h>
5 #include <linux/overflow.h>
6 #include <linux/slab.h>
7 
8 #include <drm/ethosu_accel.h>
9 
10 #include "ethosu_device.h"
11 #include "ethosu_gem.h"
12 
ethosu_gem_free_object(struct drm_gem_object * obj)13 static void ethosu_gem_free_object(struct drm_gem_object *obj)
14 {
15 	struct ethosu_gem_object *bo = to_ethosu_bo(obj);
16 
17 	kfree(bo->info);
18 	drm_gem_free_mmap_offset(&bo->base.base);
19 	drm_gem_dma_free(&bo->base);
20 }
21 
ethosu_gem_mmap(struct drm_gem_object * obj,struct vm_area_struct * vma)22 static int ethosu_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
23 {
24 	struct ethosu_gem_object *bo = to_ethosu_bo(obj);
25 
26 	/* Don't allow mmap on objects that have the NO_MMAP flag set. */
27 	if (bo->flags & DRM_ETHOSU_BO_NO_MMAP)
28 		return -EINVAL;
29 
30 	return drm_gem_dma_object_mmap(obj, vma);
31 }
32 
33 static const struct drm_gem_object_funcs ethosu_gem_funcs = {
34 	.free = ethosu_gem_free_object,
35 	.print_info = drm_gem_dma_object_print_info,
36 	.get_sg_table = drm_gem_dma_object_get_sg_table,
37 	.vmap = drm_gem_dma_object_vmap,
38 	.mmap = ethosu_gem_mmap,
39 	.vm_ops = &drm_gem_dma_vm_ops,
40 };
41 
42 /**
43  * ethosu_gem_create_object - Implementation of driver->gem_create_object.
44  * @ddev: DRM device
45  * @size: Size in bytes of the memory the object will reference
46  *
47  * This lets the GEM helpers allocate object structs for us, and keep
48  * our BO stats correct.
49  */
ethosu_gem_create_object(struct drm_device * ddev,size_t size)50 struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, size_t size)
51 {
52 	struct ethosu_gem_object *obj;
53 
54 	obj = kzalloc_obj(*obj);
55 	if (!obj)
56 		return ERR_PTR(-ENOMEM);
57 
58 	obj->base.base.funcs = &ethosu_gem_funcs;
59 	return &obj->base.base;
60 }
61 
62 /**
63  * ethosu_gem_create_with_handle() - Create a GEM object and attach it to a handle.
64  * @file: DRM file.
65  * @ddev: DRM device.
66  * @size: Size of the GEM object to allocate.
67  * @flags: Combination of drm_ethosu_bo_flags flags.
68  * @handle: Pointer holding the handle pointing to the new GEM object.
69  *
70  * Return: Zero on success
71  */
ethosu_gem_create_with_handle(struct drm_file * file,struct drm_device * ddev,u64 * size,u32 flags,u32 * handle)72 int ethosu_gem_create_with_handle(struct drm_file *file,
73 				  struct drm_device *ddev,
74 				  u64 *size, u32 flags, u32 *handle)
75 {
76 	struct drm_gem_dma_object *mem;
77 	struct ethosu_gem_object *bo;
78 	int ret;
79 
80 	mem = drm_gem_dma_create(ddev, *size);
81 	if (IS_ERR(mem))
82 		return PTR_ERR(mem);
83 
84 	bo = to_ethosu_bo(&mem->base);
85 	bo->flags = flags;
86 
87 	/*
88 	 * Allocate an id of idr table where the obj is registered
89 	 * and handle has the id what user can see.
90 	 */
91 	ret = drm_gem_handle_create(file, &mem->base, handle);
92 	if (!ret)
93 		*size = bo->base.base.size;
94 
95 	/* drop reference from allocate - handle holds it now. */
96 	drm_gem_object_put(&mem->base);
97 
98 	return ret;
99 }
100 
101 struct dma {
102 	s8 region;
103 	u64 len;
104 	u64 offset;
105 	s64 stride[2];
106 };
107 
108 struct dma_state {
109 	u16 size0;
110 	u16 size1;
111 	s8 mode;
112 	struct dma src;
113 	struct dma dst;
114 };
115 
116 struct buffer {
117 	u64 base;
118 	u32 length;
119 	s8 region;
120 };
121 
122 struct feat_matrix {
123 	u64 base[4];
124 	s64 stride_x;
125 	s64 stride_y;
126 	s64 stride_c;
127 	s8 region;
128 	u8 broadcast;
129 	u16 stride_kernel;
130 	u16 precision;
131 	u16 depth;
132 	u16 width;
133 	u16 width0;
134 	u16 height[3];
135 	u8 pad_top;
136 	u8 pad_left;
137 	u8 pad_bottom;
138 	u8 pad_right;
139 };
140 
141 struct cmd_state {
142 	struct dma_state dma;
143 	struct buffer scale[2];
144 	struct buffer weight[4];
145 	struct feat_matrix ofm;
146 	struct feat_matrix ifm;
147 	struct feat_matrix ifm2;
148 };
149 
cmd_state_init(struct cmd_state * st)150 static void cmd_state_init(struct cmd_state *st)
151 {
152 	/* Initialize to all 1s to detect missing setup */
153 	memset(st, 0xff, sizeof(*st));
154 }
155 
cmd_to_addr(u32 * cmd)156 static u64 cmd_to_addr(u32 *cmd)
157 {
158 	return (((u64)cmd[0] & 0xff0000) << 16) | cmd[1];
159 }
160 
dma_length(struct ethosu_validated_cmdstream_info * info,struct dma_state * dma_st,struct dma * dma)161 static u64 dma_length(struct ethosu_validated_cmdstream_info *info,
162 		      struct dma_state *dma_st, struct dma *dma)
163 {
164 	s8 mode = dma_st->mode;
165 	u64 len = dma->len;
166 
167 	if (len == U64_MAX)
168 		return U64_MAX;
169 
170 	if (mode >= 1) {
171 		if (dma->stride[0] < 0 && (u64)(-dma->stride[0]) > len)
172 			return U64_MAX;
173 		len += dma->stride[0];
174 		if (check_mul_overflow(len, (u64)dma_st->size0, &len))
175 			return U64_MAX;
176 	}
177 	if (mode == 2) {
178 		if (dma->stride[1] < 0 && (u64)(-dma->stride[1]) > len)
179 			return U64_MAX;
180 		len += dma->stride[1];
181 		if (check_mul_overflow(len, (u64)dma_st->size1, &len))
182 			return U64_MAX;
183 	}
184 	if (dma->region >= 0) {
185 		u64 end;
186 
187 		if (check_add_overflow(len, dma->offset, &end))
188 			return U64_MAX;
189 		info->region_size[dma->region] = max(info->region_size[dma->region], end);
190 	}
191 
192 	return len;
193 }
194 
feat_matrix_length(struct ethosu_validated_cmdstream_info * info,struct feat_matrix * fm,u32 x,u32 y,u32 c)195 static u64 feat_matrix_length(struct ethosu_validated_cmdstream_info *info,
196 			      struct feat_matrix *fm,
197 			      u32 x, u32 y, u32 c)
198 {
199 	u32 element_size, storage = fm->precision >> 14;
200 	int tile = 0;
201 	u64 addr;
202 
203 	if (fm->region < 0)
204 		return U64_MAX;
205 
206 	switch (storage) {
207 	case 0:
208 		if (x >= fm->width0 + 1) {
209 			x -= fm->width0 + 1;
210 			tile += 1;
211 		}
212 		if (y >= fm->height[tile] + 1) {
213 			y -= fm->height[tile] + 1;
214 			tile += 2;
215 		}
216 		break;
217 	case 1:
218 		if (y >= fm->height[1] + 1) {
219 			y -= fm->height[1] + 1;
220 			tile = 2;
221 		} else if (y >= fm->height[0] + 1) {
222 			y -= fm->height[0] + 1;
223 			tile = 1;
224 		}
225 		break;
226 	}
227 	if (fm->base[tile] == U64_MAX)
228 		return U64_MAX;
229 
230 	addr = fm->base[tile] + y * fm->stride_y;
231 
232 	switch ((fm->precision >> 6) & 0x3) { // format
233 	case 0: //nhwc:
234 		addr += x * fm->stride_x + c;
235 		break;
236 	case 1: //nhcwb16:
237 		element_size = BIT((fm->precision >> 1) & 0x3);
238 
239 		addr += (c / 16) * fm->stride_c + (16 * x + (c & 0xf)) * element_size;
240 		break;
241 	}
242 
243 	info->region_size[fm->region] = max(info->region_size[fm->region], addr + 1);
244 
245 	return addr;
246 }
247 
calc_sizes(struct drm_device * ddev,struct ethosu_validated_cmdstream_info * info,u16 op,struct cmd_state * st,bool ifm,bool ifm2,bool weight,bool scale)248 static int calc_sizes(struct drm_device *ddev,
249 		      struct ethosu_validated_cmdstream_info *info,
250 		      u16 op, struct cmd_state *st,
251 		      bool ifm, bool ifm2, bool weight, bool scale)
252 {
253 	u64 len;
254 
255 	if (ifm) {
256 		if (st->ifm.stride_kernel == U16_MAX)
257 			return -EINVAL;
258 		u32 stride_y = ((st->ifm.stride_kernel >> 8) & 0x2) +
259 			((st->ifm.stride_kernel >> 1) & 0x1) + 1;
260 		u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) +
261 			(st->ifm.stride_kernel & 0x1) + 1;
262 		s32 ifm_height = st->ofm.height[2] * stride_y +
263 			st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom);
264 		s32 ifm_width = st->ofm.width * stride_x +
265 			st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right);
266 
267 		if (ifm_height < 0 || ifm_width < 0)
268 			return -EINVAL;
269 
270 		len = feat_matrix_length(info, &st->ifm, ifm_width,
271 					 ifm_height, st->ifm.depth);
272 		dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",
273 			op, st->ifm.region, st->ifm.base[0], len);
274 		if (len == U64_MAX)
275 			return -EINVAL;
276 	}
277 
278 	if (ifm2) {
279 		len = feat_matrix_length(info, &st->ifm2, st->ifm.depth,
280 					 0, st->ofm.depth);
281 		dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n",
282 			op, st->ifm2.region, st->ifm2.base[0], len);
283 		if (len == U64_MAX)
284 			return -EINVAL;
285 	}
286 
287 	if (weight) {
288 		dev_dbg(ddev->dev, "op %d: W:%d:0x%llx-0x%llx\n",
289 			op, st->weight[0].region, st->weight[0].base,
290 			st->weight[0].base + st->weight[0].length - 1);
291 		if (st->weight[0].region < 0 || st->weight[0].base == U64_MAX ||
292 		    st->weight[0].length == U32_MAX)
293 			return -EINVAL;
294 		info->region_size[st->weight[0].region] =
295 			max(info->region_size[st->weight[0].region],
296 			    st->weight[0].base + st->weight[0].length);
297 	}
298 
299 	if (scale) {
300 		dev_dbg(ddev->dev, "op %d: S:%d:0x%llx-0x%llx\n",
301 			op, st->scale[0].region, st->scale[0].base,
302 			st->scale[0].base + st->scale[0].length - 1);
303 		if (st->scale[0].region < 0 || st->scale[0].base == U64_MAX ||
304 		    st->scale[0].length == U32_MAX)
305 			return -EINVAL;
306 		info->region_size[st->scale[0].region] =
307 			max(info->region_size[st->scale[0].region],
308 			    st->scale[0].base + st->scale[0].length);
309 	}
310 
311 	len = feat_matrix_length(info, &st->ofm, st->ofm.width,
312 				 st->ofm.height[2], st->ofm.depth);
313 	dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n",
314 		op, st->ofm.region, st->ofm.base[0], len);
315 	if (len == U64_MAX)
316 		return -EINVAL;
317 	info->output_region[st->ofm.region] = true;
318 
319 	return 0;
320 }
321 
calc_sizes_elemwise(struct drm_device * ddev,struct ethosu_validated_cmdstream_info * info,u16 op,struct cmd_state * st,bool ifm,bool ifm2)322 static int calc_sizes_elemwise(struct drm_device *ddev,
323 			       struct ethosu_validated_cmdstream_info *info,
324 			       u16 op, struct cmd_state *st,
325 			       bool ifm, bool ifm2)
326 {
327 	u32 height, width, depth;
328 	u64 len;
329 
330 	if (ifm) {
331 		height = st->ifm.broadcast & 0x1 ? 0 : st->ofm.height[2];
332 		width = st->ifm.broadcast & 0x2 ? 0 : st->ofm.width;
333 		depth = st->ifm.broadcast & 0x4 ? 0 : st->ofm.depth;
334 
335 		len = feat_matrix_length(info, &st->ifm, width,
336 					 height, depth);
337 		dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",
338 			op, st->ifm.region, st->ifm.base[0], len);
339 		if (len == U64_MAX)
340 			return -EINVAL;
341 	}
342 
343 	if (ifm2) {
344 		height = st->ifm2.broadcast & 0x1 ? 0 : st->ofm.height[2];
345 		width = st->ifm2.broadcast & 0x2 ? 0 : st->ofm.width;
346 		depth = st->ifm2.broadcast & 0x4 ? 0 : st->ofm.depth;
347 
348 		len = feat_matrix_length(info, &st->ifm2, width,
349 					 height, depth);
350 		dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n",
351 			op, st->ifm2.region, st->ifm2.base[0], len);
352 		if (len == U64_MAX)
353 			return -EINVAL;
354 	}
355 
356 	len = feat_matrix_length(info, &st->ofm, st->ofm.width,
357 				 st->ofm.height[2], st->ofm.depth);
358 	dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n",
359 		op, st->ofm.region, st->ofm.base[0], len);
360 	if (len == U64_MAX)
361 		return -EINVAL;
362 	info->output_region[st->ofm.region] = true;
363 
364 	return 0;
365 }
366 
ethosu_gem_cmdstream_copy_and_validate(struct drm_device * ddev,u32 __user * ucmds,struct ethosu_gem_object * bo,u32 size)367 static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev,
368 						  u32 __user *ucmds,
369 						  struct ethosu_gem_object *bo,
370 						  u32 size)
371 {
372 	struct ethosu_validated_cmdstream_info __free(kfree) *info = kzalloc_obj(*info);
373 	struct ethosu_device *edev = to_ethosu_device(ddev);
374 	u32 *bocmds = bo->base.vaddr;
375 	struct cmd_state st;
376 	int i, ret;
377 
378 	if (!info)
379 		return -ENOMEM;
380 	info->cmd_size = size;
381 
382 	cmd_state_init(&st);
383 
384 	for (i = 0; i < size / 4; i++) {
385 		bool use_ifm, use_ifm2, use_scale;
386 		u64 dstlen, srclen;
387 		u16 cmd, param;
388 		u32 cmds[2];
389 		u64 addr;
390 
391 		if (get_user(cmds[0], ucmds++))
392 			return -EFAULT;
393 
394 		bocmds[i] = cmds[0];
395 
396 		cmd = cmds[0];
397 		param = cmds[0] >> 16;
398 
399 		if (cmd & 0x4000) {
400 			if (get_user(cmds[1], ucmds++))
401 				return -EFAULT;
402 
403 			i++;
404 			if (i >= size / 4)
405 				return -EINVAL;
406 			bocmds[i] = cmds[1];
407 			addr = cmd_to_addr(cmds);
408 		}
409 
410 		switch (cmd) {
411 		case NPU_OP_DMA_START:
412 			srclen = dma_length(info, &st.dma, &st.dma.src);
413 			dstlen = dma_length(info, &st.dma, &st.dma.dst);
414 			if (srclen == U64_MAX || dstlen == U64_MAX)
415 				return -EINVAL;
416 
417 			if (st.dma.dst.region >= 0)
418 				info->output_region[st.dma.dst.region] = true;
419 			dev_dbg(ddev->dev, "cmd: DMA SRC:%d:0x%llx+0x%llx DST:%d:0x%llx+0x%llx\n",
420 				st.dma.src.region, st.dma.src.offset, srclen,
421 				st.dma.dst.region, st.dma.dst.offset, dstlen);
422 			break;
423 		case NPU_OP_CONV:
424 		case NPU_OP_DEPTHWISE:
425 			use_ifm2 = param & 0x1;  // weights_ifm2
426 			use_scale = !(st.ofm.precision & 0x100);
427 			ret = calc_sizes(ddev, info, cmd, &st, true, use_ifm2,
428 					 !use_ifm2, use_scale);
429 			if (ret)
430 				return ret;
431 			break;
432 		case NPU_OP_POOL:
433 			use_ifm = param != 0x4;  // pooling mode
434 			use_scale = !(st.ofm.precision & 0x100);
435 			ret = calc_sizes(ddev, info, cmd, &st, use_ifm, false,
436 					 false, use_scale);
437 			if (ret)
438 				return ret;
439 			break;
440 		case NPU_OP_ELEMENTWISE:
441 			use_scale = ethosu_is_u65(edev) ?
442 				    (st.ifm2.broadcast & 0x80) :
443 				    (st.ifm2.broadcast == 8);
444 			use_ifm2 = !(use_scale || (param == 5) ||
445 				(param == 6) || (param == 7) || (param == 0x24));
446 			use_ifm = st.ifm.broadcast != 8;
447 			ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2);
448 			if (ret)
449 				return ret;
450 			break;
451 		case NPU_OP_RESIZE: // U85 only
452 			return -EINVAL;
453 		case NPU_SET_KERNEL_WIDTH_M1:
454 			st.ifm.width = param;
455 			break;
456 		case NPU_SET_KERNEL_HEIGHT_M1:
457 			st.ifm.height[2] = param;
458 			break;
459 		case NPU_SET_KERNEL_STRIDE:
460 			st.ifm.stride_kernel = param;
461 			break;
462 		case NPU_SET_IFM_PAD_TOP:
463 			st.ifm.pad_top = param & 0x7f;
464 			break;
465 		case NPU_SET_IFM_PAD_LEFT:
466 			st.ifm.pad_left = param & 0x7f;
467 			break;
468 		case NPU_SET_IFM_PAD_RIGHT:
469 			st.ifm.pad_right = param & 0xff;
470 			break;
471 		case NPU_SET_IFM_PAD_BOTTOM:
472 			st.ifm.pad_bottom = param & 0xff;
473 			break;
474 		case NPU_SET_IFM_DEPTH_M1:
475 			st.ifm.depth = param;
476 			break;
477 		case NPU_SET_IFM_PRECISION:
478 			st.ifm.precision = param;
479 			break;
480 		case NPU_SET_IFM_BROADCAST:
481 			st.ifm.broadcast = param;
482 			break;
483 		case NPU_SET_IFM_REGION:
484 			st.ifm.region = param & 0x7;
485 			break;
486 		case NPU_SET_IFM_WIDTH0_M1:
487 			st.ifm.width0 = param;
488 			break;
489 		case NPU_SET_IFM_HEIGHT0_M1:
490 			st.ifm.height[0] = param;
491 			break;
492 		case NPU_SET_IFM_HEIGHT1_M1:
493 			st.ifm.height[1] = param;
494 			break;
495 		case NPU_SET_IFM_BASE0:
496 		case NPU_SET_IFM_BASE1:
497 		case NPU_SET_IFM_BASE2:
498 		case NPU_SET_IFM_BASE3:
499 			st.ifm.base[cmd & 0x3] = addr;
500 			break;
501 		case NPU_SET_IFM_STRIDE_X:
502 			st.ifm.stride_x = addr;
503 			break;
504 		case NPU_SET_IFM_STRIDE_Y:
505 			st.ifm.stride_y = addr;
506 			break;
507 		case NPU_SET_IFM_STRIDE_C:
508 			st.ifm.stride_c = addr;
509 			break;
510 
511 		case NPU_SET_OFM_WIDTH_M1:
512 			st.ofm.width = param;
513 			break;
514 		case NPU_SET_OFM_HEIGHT_M1:
515 			st.ofm.height[2] = param;
516 			break;
517 		case NPU_SET_OFM_DEPTH_M1:
518 			st.ofm.depth = param;
519 			break;
520 		case NPU_SET_OFM_PRECISION:
521 			st.ofm.precision = param;
522 			break;
523 		case NPU_SET_OFM_REGION:
524 			st.ofm.region = param & 0x7;
525 			break;
526 		case NPU_SET_OFM_WIDTH0_M1:
527 			st.ofm.width0 = param;
528 			break;
529 		case NPU_SET_OFM_HEIGHT0_M1:
530 			st.ofm.height[0] = param;
531 			break;
532 		case NPU_SET_OFM_HEIGHT1_M1:
533 			st.ofm.height[1] = param;
534 			break;
535 		case NPU_SET_OFM_BASE0:
536 		case NPU_SET_OFM_BASE1:
537 		case NPU_SET_OFM_BASE2:
538 		case NPU_SET_OFM_BASE3:
539 			st.ofm.base[cmd & 0x3] = addr;
540 			break;
541 		case NPU_SET_OFM_STRIDE_X:
542 			st.ofm.stride_x = addr;
543 			break;
544 		case NPU_SET_OFM_STRIDE_Y:
545 			st.ofm.stride_y = addr;
546 			break;
547 		case NPU_SET_OFM_STRIDE_C:
548 			st.ofm.stride_c = addr;
549 			break;
550 
551 		case NPU_SET_IFM2_BROADCAST:
552 			st.ifm2.broadcast = param;
553 			break;
554 		case NPU_SET_IFM2_PRECISION:
555 			st.ifm2.precision = param;
556 			break;
557 		case NPU_SET_IFM2_REGION:
558 			st.ifm2.region = param & 0x7;
559 			break;
560 		case NPU_SET_IFM2_WIDTH0_M1:
561 			st.ifm2.width0 = param;
562 			break;
563 		case NPU_SET_IFM2_HEIGHT0_M1:
564 			st.ifm2.height[0] = param;
565 			break;
566 		case NPU_SET_IFM2_HEIGHT1_M1:
567 			st.ifm2.height[1] = param;
568 			break;
569 		case NPU_SET_IFM2_BASE0:
570 		case NPU_SET_IFM2_BASE1:
571 		case NPU_SET_IFM2_BASE2:
572 		case NPU_SET_IFM2_BASE3:
573 			st.ifm2.base[cmd & 0x3] = addr;
574 			break;
575 		case NPU_SET_IFM2_STRIDE_X:
576 			st.ifm2.stride_x = addr;
577 			break;
578 		case NPU_SET_IFM2_STRIDE_Y:
579 			st.ifm2.stride_y = addr;
580 			break;
581 		case NPU_SET_IFM2_STRIDE_C:
582 			st.ifm2.stride_c = addr;
583 			break;
584 
585 		case NPU_SET_WEIGHT_REGION:
586 			st.weight[0].region = param & 0x7;
587 			break;
588 		case NPU_SET_SCALE_REGION:
589 			st.scale[0].region = param & 0x7;
590 			break;
591 		case NPU_SET_WEIGHT_BASE:
592 			st.weight[0].base = addr;
593 			break;
594 		case NPU_SET_WEIGHT_LENGTH:
595 			st.weight[0].length = cmds[1];
596 			break;
597 		case NPU_SET_SCALE_BASE:
598 			st.scale[0].base = addr;
599 			break;
600 		case NPU_SET_SCALE_LENGTH:
601 			st.scale[0].length = cmds[1];
602 			break;
603 		case NPU_SET_WEIGHT1_BASE:
604 			st.weight[1].base = addr;
605 			break;
606 		case NPU_SET_WEIGHT1_LENGTH:
607 			st.weight[1].length = cmds[1];
608 			break;
609 		case NPU_SET_SCALE1_BASE: // NPU_SET_WEIGHT2_BASE (U85)
610 			if (ethosu_is_u65(edev))
611 				st.scale[1].base = addr;
612 			else
613 				st.weight[2].base = addr;
614 			break;
615 		case NPU_SET_SCALE1_LENGTH: // NPU_SET_WEIGHT2_LENGTH (U85)
616 			if (ethosu_is_u65(edev))
617 				st.scale[1].length = cmds[1];
618 			else
619 				st.weight[2].length = cmds[1];
620 			break;
621 		case NPU_SET_WEIGHT3_BASE:
622 			st.weight[3].base = addr;
623 			break;
624 		case NPU_SET_WEIGHT3_LENGTH:
625 			st.weight[3].length = cmds[1];
626 			break;
627 
628 		case NPU_SET_DMA0_SRC_REGION:
629 			if (param & 0x100)
630 				st.dma.src.region = -1;
631 			else
632 				st.dma.src.region = param & 0x7;
633 			st.dma.mode = (param >> 9) & 0x3;
634 			break;
635 		case NPU_SET_DMA0_DST_REGION:
636 			if (param & 0x100)
637 				st.dma.dst.region = -1;
638 			else
639 				st.dma.dst.region = param & 0x7;
640 			break;
641 		case NPU_SET_DMA0_SIZE0:
642 			st.dma.size0 = param;
643 			break;
644 		case NPU_SET_DMA0_SIZE1:
645 			st.dma.size1 = param;
646 			break;
647 		case NPU_SET_DMA0_SRC_STRIDE0:
648 			st.dma.src.stride[0] = ((s64)addr << 24) >> 24;
649 			break;
650 		case NPU_SET_DMA0_SRC_STRIDE1:
651 			st.dma.src.stride[1] = ((s64)addr << 24) >> 24;
652 			break;
653 		case NPU_SET_DMA0_DST_STRIDE0:
654 			st.dma.dst.stride[0] = ((s64)addr << 24) >> 24;
655 			break;
656 		case NPU_SET_DMA0_DST_STRIDE1:
657 			st.dma.dst.stride[1] = ((s64)addr << 24) >> 24;
658 			break;
659 		case NPU_SET_DMA0_SRC:
660 			st.dma.src.offset = addr;
661 			break;
662 		case NPU_SET_DMA0_DST:
663 			st.dma.dst.offset = addr;
664 			break;
665 		case NPU_SET_DMA0_LEN:
666 			st.dma.src.len = st.dma.dst.len = addr;
667 			break;
668 		default:
669 			break;
670 		}
671 	}
672 
673 	for (i = 0; i < NPU_BASEP_REGION_MAX; i++) {
674 		if (!info->region_size[i])
675 			continue;
676 		dev_dbg(ddev->dev, "region %d max size: 0x%llx\n",
677 			i, info->region_size[i]);
678 	}
679 
680 	bo->info = no_free_ptr(info);
681 	return 0;
682 }
683 
684 /**
685  * ethosu_gem_cmdstream_create() - Create a GEM object and attach it to a handle.
686  * @file: DRM file.
687  * @ddev: DRM device.
688  * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared.
689  * @size: Size of the GEM object to allocate.
690  * @flags: Combination of drm_ethosu_bo_flags flags.
691  * @handle: Pointer holding the handle pointing to the new GEM object.
692  *
693  * Return: Zero on success
694  */
ethosu_gem_cmdstream_create(struct drm_file * file,struct drm_device * ddev,u32 size,u64 data,u32 flags,u32 * handle)695 int ethosu_gem_cmdstream_create(struct drm_file *file,
696 				struct drm_device *ddev,
697 				u32 size, u64 data, u32 flags, u32 *handle)
698 {
699 	int ret;
700 	struct drm_gem_dma_object *mem;
701 	struct ethosu_gem_object *bo;
702 
703 	mem = drm_gem_dma_create(ddev, size);
704 	if (IS_ERR(mem))
705 		return PTR_ERR(mem);
706 
707 	bo = to_ethosu_bo(&mem->base);
708 	bo->flags = flags;
709 
710 	ret = ethosu_gem_cmdstream_copy_and_validate(ddev,
711 						     (void __user *)(uintptr_t)data,
712 						     bo, size);
713 	if (ret)
714 		goto fail;
715 
716 	/*
717 	 * Allocate an id of idr table where the obj is registered
718 	 * and handle has the id what user can see.
719 	 */
720 	ret = drm_gem_handle_create(file, &mem->base, handle);
721 
722 fail:
723 	/* drop reference from allocate - handle holds it now. */
724 	drm_gem_object_put(&mem->base);
725 
726 	return ret;
727 }
728