xref: /linux/drivers/accel/amdxdna/aie2_ctx.c (revision 54fd6bd42e7bd351802ff1d193a2e33e4bfb1836)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2024, Advanced Micro Devices, Inc.
4  */
5 
6 #include <drm/amdxdna_accel.h>
7 #include <drm/drm_device.h>
8 #include <drm/drm_gem.h>
9 #include <drm/drm_gem_shmem_helper.h>
10 #include <drm/drm_print.h>
11 #include <drm/drm_syncobj.h>
12 #include <linux/hmm.h>
13 #include <linux/types.h>
14 #include <linux/xarray.h>
15 #include <trace/events/amdxdna.h>
16 
17 #include "aie2_msg_priv.h"
18 #include "aie2_pci.h"
19 #include "aie2_solver.h"
20 #include "amdxdna_ctx.h"
21 #include "amdxdna_gem.h"
22 #include "amdxdna_mailbox.h"
23 #include "amdxdna_pci_drv.h"
24 
25 static bool force_cmdlist;
26 module_param(force_cmdlist, bool, 0600);
27 MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
28 
29 #define HWCTX_MAX_TIMEOUT	60000 /* milliseconds */
30 
31 static void aie2_job_release(struct kref *ref)
32 {
33 	struct amdxdna_sched_job *job;
34 
35 	job = container_of(ref, struct amdxdna_sched_job, refcnt);
36 	amdxdna_sched_job_cleanup(job);
37 	atomic64_inc(&job->hwctx->job_free_cnt);
38 	wake_up(&job->hwctx->priv->job_free_wq);
39 	if (job->out_fence)
40 		dma_fence_put(job->out_fence);
41 	kfree(job);
42 }
43 
44 static void aie2_job_put(struct amdxdna_sched_job *job)
45 {
46 	kref_put(&job->refcnt, aie2_job_release);
47 }
48 
49 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
50 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
51 			    struct drm_sched_job *bad_job)
52 {
53 	drm_sched_stop(&hwctx->priv->sched, bad_job);
54 	aie2_destroy_context(xdna->dev_handle, hwctx);
55 }
56 
57 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
58 {
59 	struct amdxdna_gem_obj *heap = hwctx->priv->heap;
60 	int ret;
61 
62 	ret = aie2_create_context(xdna->dev_handle, hwctx);
63 	if (ret) {
64 		XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
65 		goto out;
66 	}
67 
68 	ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
69 				heap->mem.userptr, heap->mem.size);
70 	if (ret) {
71 		XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
72 		goto out;
73 	}
74 
75 	if (hwctx->status != HWCTX_STAT_READY) {
76 		XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
77 		goto out;
78 	}
79 
80 	ret = aie2_config_cu(hwctx);
81 	if (ret) {
82 		XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
83 		goto out;
84 	}
85 
86 out:
87 	drm_sched_start(&hwctx->priv->sched, 0);
88 	XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
89 	return ret;
90 }
91 
92 void aie2_restart_ctx(struct amdxdna_client *client)
93 {
94 	struct amdxdna_dev *xdna = client->xdna;
95 	struct amdxdna_hwctx *hwctx;
96 	unsigned long hwctx_id;
97 
98 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
99 	mutex_lock(&client->hwctx_lock);
100 	amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
101 		if (hwctx->status != HWCTX_STAT_STOP)
102 			continue;
103 
104 		hwctx->status = hwctx->old_status;
105 		XDNA_DBG(xdna, "Resetting %s", hwctx->name);
106 		aie2_hwctx_restart(xdna, hwctx);
107 	}
108 	mutex_unlock(&client->hwctx_lock);
109 }
110 
111 static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
112 {
113 	struct dma_fence *fence, *out_fence = NULL;
114 	int ret;
115 
116 	fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
117 	if (!fence)
118 		return NULL;
119 
120 	ret = dma_fence_chain_find_seqno(&fence,  seq);
121 	if (ret)
122 		goto out;
123 
124 	out_fence = dma_fence_get(dma_fence_chain_contained(fence));
125 
126 out:
127 	dma_fence_put(fence);
128 	return out_fence;
129 }
130 
131 static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
132 {
133 	struct dma_fence *fence;
134 
135 	fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
136 	if (!fence)
137 		return;
138 
139 	/* Wait up to 2 seconds for fw to finish all pending requests */
140 	dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
141 	dma_fence_put(fence);
142 }
143 
144 void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
145 {
146 	struct amdxdna_dev *xdna = hwctx->client->xdna;
147 
148 	/*
149 	 * Command timeout is unlikely. But if it happens, it doesn't
150 	 * break the system. aie2_hwctx_stop() will destroy mailbox
151 	 * and abort all commands.
152 	 */
153 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
154 	aie2_hwctx_wait_for_idle(hwctx);
155 	aie2_hwctx_stop(xdna, hwctx, NULL);
156 	hwctx->old_status = hwctx->status;
157 	hwctx->status = HWCTX_STAT_STOP;
158 }
159 
160 void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
161 {
162 	struct amdxdna_dev *xdna = hwctx->client->xdna;
163 
164 	/*
165 	 * The resume path cannot guarantee that mailbox channel can be
166 	 * regenerated. If this happen, when submit message to this
167 	 * mailbox channel, error will return.
168 	 */
169 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
170 	hwctx->status = hwctx->old_status;
171 	aie2_hwctx_restart(xdna, hwctx);
172 }
173 
174 static void
175 aie2_sched_notify(struct amdxdna_sched_job *job)
176 {
177 	struct dma_fence *fence = job->fence;
178 
179 	trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
180 	job->hwctx->priv->completed++;
181 	dma_fence_signal(fence);
182 
183 	up(&job->hwctx->priv->job_sem);
184 	job->job_done = true;
185 	dma_fence_put(fence);
186 	mmput_async(job->mm);
187 	aie2_job_put(job);
188 }
189 
190 static int
191 aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
192 {
193 	struct amdxdna_sched_job *job = handle;
194 	struct amdxdna_gem_obj *cmd_abo;
195 	u32 ret = 0;
196 	u32 status;
197 
198 	cmd_abo = job->cmd_bo;
199 
200 	if (unlikely(!data))
201 		goto out;
202 
203 	if (unlikely(size != sizeof(u32))) {
204 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
205 		ret = -EINVAL;
206 		goto out;
207 	}
208 
209 	status = readl(data);
210 	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
211 	if (status == AIE2_STATUS_SUCCESS)
212 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
213 	else
214 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
215 
216 out:
217 	aie2_sched_notify(job);
218 	return ret;
219 }
220 
221 static int
222 aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
223 {
224 	struct amdxdna_sched_job *job = handle;
225 	u32 ret = 0;
226 	u32 status;
227 
228 	if (unlikely(!data))
229 		goto out;
230 
231 	if (unlikely(size != sizeof(u32))) {
232 		ret = -EINVAL;
233 		goto out;
234 	}
235 
236 	status = readl(data);
237 	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
238 
239 out:
240 	aie2_sched_notify(job);
241 	return ret;
242 }
243 
244 static int
245 aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
246 {
247 	struct amdxdna_sched_job *job = handle;
248 	struct amdxdna_gem_obj *cmd_abo;
249 	struct amdxdna_dev *xdna;
250 	u32 fail_cmd_status;
251 	u32 fail_cmd_idx;
252 	u32 cmd_status;
253 	u32 ret = 0;
254 
255 	cmd_abo = job->cmd_bo;
256 	if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
257 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
258 		ret = -EINVAL;
259 		goto out;
260 	}
261 
262 	cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
263 	xdna = job->hwctx->client->xdna;
264 	XDNA_DBG(xdna, "Status 0x%x", cmd_status);
265 	if (cmd_status == AIE2_STATUS_SUCCESS) {
266 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
267 		goto out;
268 	}
269 
270 	/* Slow path to handle error, read from ringbuf on BAR */
271 	fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
272 	fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
273 	XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
274 		 fail_cmd_idx, fail_cmd_status);
275 
276 	if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
277 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
278 		ret = -EINVAL;
279 		goto out;
280 	}
281 	amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
282 
283 	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
284 		struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
285 
286 		cc->error_index = fail_cmd_idx;
287 		if (cc->error_index >= cc->command_count)
288 			cc->error_index = 0;
289 	}
290 out:
291 	aie2_sched_notify(job);
292 	return ret;
293 }
294 
295 static struct dma_fence *
296 aie2_sched_job_run(struct drm_sched_job *sched_job)
297 {
298 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
299 	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
300 	struct amdxdna_hwctx *hwctx = job->hwctx;
301 	struct dma_fence *fence;
302 	int ret;
303 
304 	if (!mmget_not_zero(job->mm))
305 		return ERR_PTR(-ESRCH);
306 
307 	kref_get(&job->refcnt);
308 	fence = dma_fence_get(job->fence);
309 
310 	if (unlikely(!cmd_abo)) {
311 		ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
312 		goto out;
313 	}
314 
315 	amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
316 
317 	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
318 		ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
319 	else if (force_cmdlist)
320 		ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
321 	else
322 		ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
323 
324 out:
325 	if (ret) {
326 		dma_fence_put(job->fence);
327 		aie2_job_put(job);
328 		mmput(job->mm);
329 		fence = ERR_PTR(ret);
330 	}
331 	trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
332 
333 	return fence;
334 }
335 
336 static void aie2_sched_job_free(struct drm_sched_job *sched_job)
337 {
338 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
339 	struct amdxdna_hwctx *hwctx = job->hwctx;
340 
341 	trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
342 	if (!job->job_done)
343 		up(&hwctx->priv->job_sem);
344 
345 	drm_sched_job_cleanup(sched_job);
346 	aie2_job_put(job);
347 }
348 
349 static enum drm_gpu_sched_stat
350 aie2_sched_job_timedout(struct drm_sched_job *sched_job)
351 {
352 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
353 	struct amdxdna_hwctx *hwctx = job->hwctx;
354 	struct amdxdna_dev *xdna;
355 
356 	xdna = hwctx->client->xdna;
357 	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
358 	mutex_lock(&xdna->dev_lock);
359 	aie2_hwctx_stop(xdna, hwctx, sched_job);
360 
361 	aie2_hwctx_restart(xdna, hwctx);
362 	mutex_unlock(&xdna->dev_lock);
363 
364 	return DRM_GPU_SCHED_STAT_RESET;
365 }
366 
367 static const struct drm_sched_backend_ops sched_ops = {
368 	.run_job = aie2_sched_job_run,
369 	.free_job = aie2_sched_job_free,
370 	.timedout_job = aie2_sched_job_timedout,
371 };
372 
373 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
374 {
375 	struct amdxdna_dev *xdna = hwctx->client->xdna;
376 	struct amdxdna_dev_hdl *ndev;
377 	int start, end, first, last;
378 	u32 width = 1, entries = 0;
379 	int i;
380 
381 	if (!hwctx->num_tiles) {
382 		XDNA_ERR(xdna, "Number of tiles is zero");
383 		return -EINVAL;
384 	}
385 
386 	ndev = xdna->dev_handle;
387 	if (unlikely(!ndev->metadata.core.row_count)) {
388 		XDNA_WARN(xdna, "Core tile row count is zero");
389 		return -EINVAL;
390 	}
391 
392 	hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
393 	if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
394 		XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
395 		return -EINVAL;
396 	}
397 
398 	if (ndev->priv->col_align == COL_ALIGN_NATURE)
399 		width = hwctx->num_col;
400 
401 	/*
402 	 * In range [start, end], find out columns that is multiple of width.
403 	 *	'first' is the first column,
404 	 *	'last' is the last column,
405 	 *	'entries' is the total number of columns.
406 	 */
407 	start =  xdna->dev_info->first_col;
408 	end =  ndev->total_col - hwctx->num_col;
409 	if (start > 0 && end == 0) {
410 		XDNA_DBG(xdna, "Force start from col 0");
411 		start = 0;
412 	}
413 	first = start + (width - start % width) % width;
414 	last = end - end % width;
415 	if (last >= first)
416 		entries = (last - first) / width + 1;
417 	XDNA_DBG(xdna, "start %d end %d first %d last %d",
418 		 start, end, first, last);
419 
420 	if (unlikely(!entries)) {
421 		XDNA_ERR(xdna, "Start %d end %d width %d",
422 			 start, end, width);
423 		return -EINVAL;
424 	}
425 
426 	hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
427 	if (!hwctx->col_list)
428 		return -ENOMEM;
429 
430 	hwctx->col_list_len = entries;
431 	hwctx->col_list[0] = first;
432 	for (i = 1; i < entries; i++)
433 		hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
434 
435 	print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
436 			     entries * sizeof(*hwctx->col_list), false);
437 	return 0;
438 }
439 
440 static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
441 {
442 	struct amdxdna_dev *xdna = hwctx->client->xdna;
443 	struct alloc_requests *xrs_req;
444 	int ret;
445 
446 	xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
447 	if (!xrs_req)
448 		return -ENOMEM;
449 
450 	xrs_req->cdo.start_cols = hwctx->col_list;
451 	xrs_req->cdo.cols_len = hwctx->col_list_len;
452 	xrs_req->cdo.ncols = hwctx->num_col;
453 	xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
454 
455 	xrs_req->rqos.gops = hwctx->qos.gops;
456 	xrs_req->rqos.fps = hwctx->qos.fps;
457 	xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
458 	xrs_req->rqos.latency = hwctx->qos.latency;
459 	xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
460 	xrs_req->rqos.priority = hwctx->qos.priority;
461 
462 	xrs_req->rid = (uintptr_t)hwctx;
463 
464 	ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
465 	if (ret)
466 		XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
467 
468 	kfree(xrs_req);
469 	return ret;
470 }
471 
472 static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
473 {
474 	struct amdxdna_dev *xdna = hwctx->client->xdna;
475 	int ret;
476 
477 	ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
478 	if (ret)
479 		XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
480 }
481 
482 static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
483 {
484 	struct amdxdna_dev *xdna = hwctx->client->xdna;
485 	struct drm_file *filp = hwctx->client->filp;
486 	struct drm_syncobj *syncobj;
487 	u32 hdl;
488 	int ret;
489 
490 	hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
491 
492 	ret = drm_syncobj_create(&syncobj, 0, NULL);
493 	if (ret) {
494 		XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
495 		return ret;
496 	}
497 	ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
498 	if (ret) {
499 		drm_syncobj_put(syncobj);
500 		XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
501 		return ret;
502 	}
503 	hwctx->priv->syncobj = syncobj;
504 	hwctx->syncobj_hdl = hdl;
505 
506 	return 0;
507 }
508 
509 static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
510 {
511 	/*
512 	 * The syncobj_hdl is owned by user space and will be cleaned up
513 	 * separately.
514 	 */
515 	drm_syncobj_put(hwctx->priv->syncobj);
516 }
517 
518 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
519 {
520 	struct amdxdna_client *client = hwctx->client;
521 	struct amdxdna_dev *xdna = client->xdna;
522 	const struct drm_sched_init_args args = {
523 		.ops = &sched_ops,
524 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
525 		.credit_limit = HWCTX_MAX_CMDS,
526 		.timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
527 		.name = hwctx->name,
528 		.dev = xdna->ddev.dev,
529 	};
530 	struct drm_gpu_scheduler *sched;
531 	struct amdxdna_hwctx_priv *priv;
532 	struct amdxdna_gem_obj *heap;
533 	struct amdxdna_dev_hdl *ndev;
534 	int i, ret;
535 
536 	priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
537 	if (!priv)
538 		return -ENOMEM;
539 	hwctx->priv = priv;
540 
541 	mutex_lock(&client->mm_lock);
542 	heap = client->dev_heap;
543 	if (!heap) {
544 		XDNA_ERR(xdna, "The client dev heap object not exist");
545 		mutex_unlock(&client->mm_lock);
546 		ret = -ENOENT;
547 		goto free_priv;
548 	}
549 	drm_gem_object_get(to_gobj(heap));
550 	mutex_unlock(&client->mm_lock);
551 	priv->heap = heap;
552 	sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
553 
554 	ret = amdxdna_gem_pin(heap);
555 	if (ret) {
556 		XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
557 		goto put_heap;
558 	}
559 
560 	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
561 		struct amdxdna_gem_obj *abo;
562 		struct amdxdna_drm_create_bo args = {
563 			.flags = 0,
564 			.type = AMDXDNA_BO_DEV,
565 			.vaddr = 0,
566 			.size = MAX_CHAIN_CMDBUF_SIZE,
567 		};
568 
569 		abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
570 		if (IS_ERR(abo)) {
571 			ret = PTR_ERR(abo);
572 			goto free_cmd_bufs;
573 		}
574 
575 		XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
576 			 i, abo->mem.dev_addr, abo->mem.size);
577 		priv->cmd_buf[i] = abo;
578 	}
579 
580 	sched = &priv->sched;
581 	mutex_init(&priv->io_lock);
582 
583 	fs_reclaim_acquire(GFP_KERNEL);
584 	might_lock(&priv->io_lock);
585 	fs_reclaim_release(GFP_KERNEL);
586 
587 	ret = drm_sched_init(sched, &args);
588 	if (ret) {
589 		XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
590 		goto free_cmd_bufs;
591 	}
592 
593 	ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
594 				    &sched, 1, NULL);
595 	if (ret) {
596 		XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
597 		goto free_sched;
598 	}
599 
600 	ret = aie2_hwctx_col_list(hwctx);
601 	if (ret) {
602 		XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
603 		goto free_entity;
604 	}
605 
606 	ret = aie2_alloc_resource(hwctx);
607 	if (ret) {
608 		XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
609 		goto free_col_list;
610 	}
611 
612 	ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
613 				heap->mem.userptr, heap->mem.size);
614 	if (ret) {
615 		XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
616 		goto release_resource;
617 	}
618 
619 	ret = aie2_ctx_syncobj_create(hwctx);
620 	if (ret) {
621 		XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
622 		goto release_resource;
623 	}
624 
625 	hwctx->status = HWCTX_STAT_INIT;
626 	ndev = xdna->dev_handle;
627 	ndev->hwctx_num++;
628 	init_waitqueue_head(&priv->job_free_wq);
629 
630 	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
631 
632 	return 0;
633 
634 release_resource:
635 	aie2_release_resource(hwctx);
636 free_col_list:
637 	kfree(hwctx->col_list);
638 free_entity:
639 	drm_sched_entity_destroy(&priv->entity);
640 free_sched:
641 	drm_sched_fini(&priv->sched);
642 free_cmd_bufs:
643 	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
644 		if (!priv->cmd_buf[i])
645 			continue;
646 		drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
647 	}
648 	amdxdna_gem_unpin(heap);
649 put_heap:
650 	drm_gem_object_put(to_gobj(heap));
651 free_priv:
652 	kfree(priv);
653 	return ret;
654 }
655 
656 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
657 {
658 	struct amdxdna_dev_hdl *ndev;
659 	struct amdxdna_dev *xdna;
660 	int idx;
661 
662 	xdna = hwctx->client->xdna;
663 	ndev = xdna->dev_handle;
664 	ndev->hwctx_num--;
665 
666 	XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
667 	drm_sched_entity_destroy(&hwctx->priv->entity);
668 
669 	aie2_hwctx_wait_for_idle(hwctx);
670 
671 	/* Request fw to destroy hwctx and cancel the rest pending requests */
672 	aie2_release_resource(hwctx);
673 
674 	/* Wait for all submitted jobs to be completed or canceled */
675 	wait_event(hwctx->priv->job_free_wq,
676 		   atomic64_read(&hwctx->job_submit_cnt) ==
677 		   atomic64_read(&hwctx->job_free_cnt));
678 
679 	drm_sched_fini(&hwctx->priv->sched);
680 	aie2_ctx_syncobj_destroy(hwctx);
681 
682 	for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
683 		drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
684 	amdxdna_gem_unpin(hwctx->priv->heap);
685 	drm_gem_object_put(to_gobj(hwctx->priv->heap));
686 
687 	mutex_destroy(&hwctx->priv->io_lock);
688 	kfree(hwctx->col_list);
689 	kfree(hwctx->priv);
690 	kfree(hwctx->cus);
691 }
692 
693 static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
694 {
695 	struct amdxdna_hwctx_param_config_cu *config = buf;
696 	struct amdxdna_dev *xdna = hwctx->client->xdna;
697 	u32 total_size;
698 	int ret;
699 
700 	XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
701 	if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
702 		return -EINVAL;
703 
704 	if (hwctx->status != HWCTX_STAT_INIT) {
705 		XDNA_ERR(xdna, "Not support re-config CU");
706 		return -EINVAL;
707 	}
708 
709 	if (!config->num_cus) {
710 		XDNA_ERR(xdna, "Number of CU is zero");
711 		return -EINVAL;
712 	}
713 
714 	total_size = struct_size(config, cu_configs, config->num_cus);
715 	if (total_size > size) {
716 		XDNA_ERR(xdna, "CU config larger than size");
717 		return -EINVAL;
718 	}
719 
720 	hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
721 	if (!hwctx->cus)
722 		return -ENOMEM;
723 
724 	ret = aie2_config_cu(hwctx);
725 	if (ret) {
726 		XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
727 		goto free_cus;
728 	}
729 
730 	wmb(); /* To avoid locking in command submit when check status */
731 	hwctx->status = HWCTX_STAT_READY;
732 
733 	return 0;
734 
735 free_cus:
736 	kfree(hwctx->cus);
737 	hwctx->cus = NULL;
738 	return ret;
739 }
740 
741 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
742 {
743 	struct amdxdna_dev *xdna = hwctx->client->xdna;
744 
745 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
746 	switch (type) {
747 	case DRM_AMDXDNA_HWCTX_CONFIG_CU:
748 		return aie2_hwctx_cu_config(hwctx, buf, size);
749 	case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
750 	case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
751 		return -EOPNOTSUPP;
752 	default:
753 		XDNA_DBG(xdna, "Not supported type %d", type);
754 		return -EOPNOTSUPP;
755 	}
756 }
757 
758 static int aie2_populate_range(struct amdxdna_gem_obj *abo)
759 {
760 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
761 	struct amdxdna_umap *mapp;
762 	unsigned long timeout;
763 	struct mm_struct *mm;
764 	bool found;
765 	int ret;
766 
767 	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
768 again:
769 	found = false;
770 	down_write(&xdna->notifier_lock);
771 	list_for_each_entry(mapp, &abo->mem.umap_list, node) {
772 		if (mapp->invalid) {
773 			found = true;
774 			break;
775 		}
776 	}
777 
778 	if (!found) {
779 		abo->mem.map_invalid = false;
780 		up_write(&xdna->notifier_lock);
781 		return 0;
782 	}
783 	kref_get(&mapp->refcnt);
784 	up_write(&xdna->notifier_lock);
785 
786 	XDNA_DBG(xdna, "populate memory range %lx %lx",
787 		 mapp->vma->vm_start, mapp->vma->vm_end);
788 	mm = mapp->notifier.mm;
789 	if (!mmget_not_zero(mm)) {
790 		amdxdna_umap_put(mapp);
791 		return -EFAULT;
792 	}
793 
794 	mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
795 	mmap_read_lock(mm);
796 	ret = hmm_range_fault(&mapp->range);
797 	mmap_read_unlock(mm);
798 	if (ret) {
799 		if (time_after(jiffies, timeout)) {
800 			ret = -ETIME;
801 			goto put_mm;
802 		}
803 
804 		if (ret == -EBUSY) {
805 			amdxdna_umap_put(mapp);
806 			goto again;
807 		}
808 
809 		goto put_mm;
810 	}
811 
812 	down_write(&xdna->notifier_lock);
813 	if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
814 		up_write(&xdna->notifier_lock);
815 		amdxdna_umap_put(mapp);
816 		goto again;
817 	}
818 	mapp->invalid = false;
819 	up_write(&xdna->notifier_lock);
820 	amdxdna_umap_put(mapp);
821 	goto again;
822 
823 put_mm:
824 	amdxdna_umap_put(mapp);
825 	mmput(mm);
826 	return ret;
827 }
828 
829 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
830 {
831 	struct amdxdna_dev *xdna = hwctx->client->xdna;
832 	struct ww_acquire_ctx acquire_ctx;
833 	struct dma_fence_chain *chain;
834 	struct amdxdna_gem_obj *abo;
835 	unsigned long timeout = 0;
836 	int ret, i;
837 
838 	ret = down_interruptible(&hwctx->priv->job_sem);
839 	if (ret) {
840 		XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
841 		return ret;
842 	}
843 
844 	chain = dma_fence_chain_alloc();
845 	if (!chain) {
846 		XDNA_ERR(xdna, "Alloc fence chain failed");
847 		ret = -ENOMEM;
848 		goto up_sem;
849 	}
850 
851 	ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
852 				 hwctx->client->filp->client_id);
853 	if (ret) {
854 		XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
855 		goto free_chain;
856 	}
857 
858 retry:
859 	ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
860 	if (ret) {
861 		XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
862 		goto cleanup_job;
863 	}
864 
865 	for (i = 0; i < job->bo_cnt; i++) {
866 		ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
867 		if (ret) {
868 			XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
869 			drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
870 			goto cleanup_job;
871 		}
872 	}
873 
874 	down_read(&xdna->notifier_lock);
875 	for (i = 0; i < job->bo_cnt; i++) {
876 		abo = to_xdna_obj(job->bos[i]);
877 		if (abo->mem.map_invalid) {
878 			up_read(&xdna->notifier_lock);
879 			drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
880 			if (!timeout) {
881 				timeout = jiffies +
882 					msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
883 			} else if (time_after(jiffies, timeout)) {
884 				ret = -ETIME;
885 				goto cleanup_job;
886 			}
887 
888 			ret = aie2_populate_range(abo);
889 			if (ret)
890 				goto cleanup_job;
891 			goto retry;
892 		}
893 	}
894 
895 	mutex_lock(&hwctx->priv->io_lock);
896 	drm_sched_job_arm(&job->base);
897 	job->out_fence = dma_fence_get(&job->base.s_fence->finished);
898 	for (i = 0; i < job->bo_cnt; i++)
899 		dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
900 	job->seq = hwctx->priv->seq++;
901 	kref_get(&job->refcnt);
902 	drm_sched_entity_push_job(&job->base);
903 
904 	*seq = job->seq;
905 	drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
906 	mutex_unlock(&hwctx->priv->io_lock);
907 
908 	up_read(&xdna->notifier_lock);
909 	drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
910 
911 	aie2_job_put(job);
912 	atomic64_inc(&hwctx->job_submit_cnt);
913 
914 	return 0;
915 
916 cleanup_job:
917 	drm_sched_job_cleanup(&job->base);
918 free_chain:
919 	dma_fence_chain_free(chain);
920 up_sem:
921 	up(&hwctx->priv->job_sem);
922 	job->job_done = true;
923 	return ret;
924 }
925 
926 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
927 			 unsigned long cur_seq)
928 {
929 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
930 	struct drm_gem_object *gobj = to_gobj(abo);
931 	long ret;
932 
933 	ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
934 				    true, MAX_SCHEDULE_TIMEOUT);
935 	if (!ret || ret == -ERESTARTSYS)
936 		XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
937 }
938