xref: /linux/drivers/accel/amdxdna/aie2_ctx.c (revision 68a052239fc4b351e961f698b824f7654a346091)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2024, Advanced Micro Devices, Inc.
4  */
5 
6 #include <drm/amdxdna_accel.h>
7 #include <drm/drm_device.h>
8 #include <drm/drm_gem.h>
9 #include <drm/drm_gem_shmem_helper.h>
10 #include <drm/drm_print.h>
11 #include <drm/drm_syncobj.h>
12 #include <linux/hmm.h>
13 #include <linux/types.h>
14 #include <linux/xarray.h>
15 #include <trace/events/amdxdna.h>
16 
17 #include "aie2_msg_priv.h"
18 #include "aie2_pci.h"
19 #include "aie2_solver.h"
20 #include "amdxdna_ctx.h"
21 #include "amdxdna_gem.h"
22 #include "amdxdna_mailbox.h"
23 #include "amdxdna_pci_drv.h"
24 
25 static bool force_cmdlist;
26 module_param(force_cmdlist, bool, 0600);
27 MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
28 
29 #define HWCTX_MAX_TIMEOUT	60000 /* milliseconds */
30 
31 static void aie2_job_release(struct kref *ref)
32 {
33 	struct amdxdna_sched_job *job;
34 
35 	job = container_of(ref, struct amdxdna_sched_job, refcnt);
36 	amdxdna_sched_job_cleanup(job);
37 	atomic64_inc(&job->hwctx->job_free_cnt);
38 	wake_up(&job->hwctx->priv->job_free_wq);
39 	if (job->out_fence)
40 		dma_fence_put(job->out_fence);
41 	kfree(job);
42 }
43 
44 static void aie2_job_put(struct amdxdna_sched_job *job)
45 {
46 	kref_put(&job->refcnt, aie2_job_release);
47 }
48 
49 static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx)
50 {
51 	 hwctx->old_status = hwctx->status;
52 	 hwctx->status = HWCTX_STAT_STOP;
53 }
54 
55 static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx)
56 {
57 	hwctx->status = hwctx->old_status;
58 }
59 
60 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
61 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
62 			    struct drm_sched_job *bad_job)
63 {
64 	drm_sched_stop(&hwctx->priv->sched, bad_job);
65 	aie2_destroy_context(xdna->dev_handle, hwctx);
66 }
67 
68 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
69 {
70 	struct amdxdna_gem_obj *heap = hwctx->priv->heap;
71 	int ret;
72 
73 	ret = aie2_create_context(xdna->dev_handle, hwctx);
74 	if (ret) {
75 		XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
76 		goto out;
77 	}
78 
79 	ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
80 				heap->mem.userptr, heap->mem.size);
81 	if (ret) {
82 		XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
83 		goto out;
84 	}
85 
86 	if (hwctx->status != HWCTX_STAT_READY) {
87 		XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
88 		goto out;
89 	}
90 
91 	ret = aie2_config_cu(hwctx);
92 	if (ret) {
93 		XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
94 		goto out;
95 	}
96 
97 out:
98 	drm_sched_start(&hwctx->priv->sched, 0);
99 	XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
100 	return ret;
101 }
102 
103 static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
104 {
105 	struct dma_fence *fence, *out_fence = NULL;
106 	int ret;
107 
108 	fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
109 	if (!fence)
110 		return NULL;
111 
112 	ret = dma_fence_chain_find_seqno(&fence,  seq);
113 	if (ret)
114 		goto out;
115 
116 	out_fence = dma_fence_get(dma_fence_chain_contained(fence));
117 
118 out:
119 	dma_fence_put(fence);
120 	return out_fence;
121 }
122 
123 static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
124 {
125 	struct dma_fence *fence;
126 
127 	fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
128 	if (!fence)
129 		return;
130 
131 	/* Wait up to 2 seconds for fw to finish all pending requests */
132 	dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
133 	dma_fence_put(fence);
134 }
135 
136 static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
137 {
138 	struct amdxdna_dev *xdna = hwctx->client->xdna;
139 
140 	aie2_hwctx_wait_for_idle(hwctx);
141 	aie2_hwctx_stop(xdna, hwctx, NULL);
142 	aie2_hwctx_status_shift_stop(hwctx);
143 
144 	return 0;
145 }
146 
147 void aie2_hwctx_suspend(struct amdxdna_client *client)
148 {
149 	struct amdxdna_dev *xdna = client->xdna;
150 
151 	/*
152 	 * Command timeout is unlikely. But if it happens, it doesn't
153 	 * break the system. aie2_hwctx_stop() will destroy mailbox
154 	 * and abort all commands.
155 	 */
156 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
157 	amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb);
158 }
159 
160 static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
161 {
162 	struct amdxdna_dev *xdna = hwctx->client->xdna;
163 
164 	aie2_hwctx_status_restore(hwctx);
165 	return aie2_hwctx_restart(xdna, hwctx);
166 }
167 
168 int aie2_hwctx_resume(struct amdxdna_client *client)
169 {
170 	struct amdxdna_dev *xdna = client->xdna;
171 
172 	/*
173 	 * The resume path cannot guarantee that mailbox channel can be
174 	 * regenerated. If this happen, when submit message to this
175 	 * mailbox channel, error will return.
176 	 */
177 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
178 	return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
179 }
180 
181 static void
182 aie2_sched_notify(struct amdxdna_sched_job *job)
183 {
184 	struct dma_fence *fence = job->fence;
185 
186 	trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
187 	job->hwctx->priv->completed++;
188 	dma_fence_signal(fence);
189 
190 	up(&job->hwctx->priv->job_sem);
191 	job->job_done = true;
192 	dma_fence_put(fence);
193 	mmput_async(job->mm);
194 	aie2_job_put(job);
195 }
196 
197 static int
198 aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
199 {
200 	struct amdxdna_sched_job *job = handle;
201 	struct amdxdna_gem_obj *cmd_abo;
202 	int ret = 0;
203 	u32 status;
204 
205 	cmd_abo = job->cmd_bo;
206 
207 	if (unlikely(!data))
208 		goto out;
209 
210 	if (unlikely(size != sizeof(u32))) {
211 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
212 		ret = -EINVAL;
213 		goto out;
214 	}
215 
216 	status = readl(data);
217 	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
218 	if (status == AIE2_STATUS_SUCCESS)
219 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
220 	else
221 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
222 
223 out:
224 	aie2_sched_notify(job);
225 	return ret;
226 }
227 
228 static int
229 aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
230 {
231 	struct amdxdna_sched_job *job = handle;
232 	int ret = 0;
233 	u32 status;
234 
235 	if (unlikely(!data))
236 		goto out;
237 
238 	if (unlikely(size != sizeof(u32))) {
239 		ret = -EINVAL;
240 		goto out;
241 	}
242 
243 	status = readl(data);
244 	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
245 
246 out:
247 	aie2_sched_notify(job);
248 	return ret;
249 }
250 
251 static int
252 aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
253 {
254 	struct amdxdna_sched_job *job = handle;
255 	struct amdxdna_gem_obj *cmd_abo;
256 	struct amdxdna_dev *xdna;
257 	u32 fail_cmd_status;
258 	u32 fail_cmd_idx;
259 	u32 cmd_status;
260 	int ret = 0;
261 
262 	cmd_abo = job->cmd_bo;
263 	if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
264 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
265 		ret = -EINVAL;
266 		goto out;
267 	}
268 
269 	cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
270 	xdna = job->hwctx->client->xdna;
271 	XDNA_DBG(xdna, "Status 0x%x", cmd_status);
272 	if (cmd_status == AIE2_STATUS_SUCCESS) {
273 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
274 		goto out;
275 	}
276 
277 	/* Slow path to handle error, read from ringbuf on BAR */
278 	fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
279 	fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
280 	XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
281 		 fail_cmd_idx, fail_cmd_status);
282 
283 	if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
284 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
285 		ret = -EINVAL;
286 		goto out;
287 	}
288 	amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
289 
290 	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
291 		struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
292 
293 		cc->error_index = fail_cmd_idx;
294 		if (cc->error_index >= cc->command_count)
295 			cc->error_index = 0;
296 	}
297 out:
298 	aie2_sched_notify(job);
299 	return ret;
300 }
301 
302 static struct dma_fence *
303 aie2_sched_job_run(struct drm_sched_job *sched_job)
304 {
305 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
306 	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
307 	struct amdxdna_hwctx *hwctx = job->hwctx;
308 	struct dma_fence *fence;
309 	int ret;
310 
311 	if (!mmget_not_zero(job->mm))
312 		return ERR_PTR(-ESRCH);
313 
314 	kref_get(&job->refcnt);
315 	fence = dma_fence_get(job->fence);
316 
317 	if (unlikely(!cmd_abo)) {
318 		ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
319 		goto out;
320 	}
321 
322 	amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
323 
324 	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
325 		ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
326 	else if (force_cmdlist)
327 		ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
328 	else
329 		ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
330 
331 out:
332 	if (ret) {
333 		dma_fence_put(job->fence);
334 		aie2_job_put(job);
335 		mmput(job->mm);
336 		fence = ERR_PTR(ret);
337 	}
338 	trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
339 
340 	return fence;
341 }
342 
343 static void aie2_sched_job_free(struct drm_sched_job *sched_job)
344 {
345 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
346 	struct amdxdna_hwctx *hwctx = job->hwctx;
347 
348 	trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
349 	if (!job->job_done)
350 		up(&hwctx->priv->job_sem);
351 
352 	drm_sched_job_cleanup(sched_job);
353 	aie2_job_put(job);
354 }
355 
356 static enum drm_gpu_sched_stat
357 aie2_sched_job_timedout(struct drm_sched_job *sched_job)
358 {
359 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
360 	struct amdxdna_hwctx *hwctx = job->hwctx;
361 	struct amdxdna_dev *xdna;
362 
363 	xdna = hwctx->client->xdna;
364 	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
365 	mutex_lock(&xdna->dev_lock);
366 	aie2_hwctx_stop(xdna, hwctx, sched_job);
367 
368 	aie2_hwctx_restart(xdna, hwctx);
369 	mutex_unlock(&xdna->dev_lock);
370 
371 	return DRM_GPU_SCHED_STAT_RESET;
372 }
373 
374 static const struct drm_sched_backend_ops sched_ops = {
375 	.run_job = aie2_sched_job_run,
376 	.free_job = aie2_sched_job_free,
377 	.timedout_job = aie2_sched_job_timedout,
378 };
379 
380 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
381 {
382 	struct amdxdna_dev *xdna = hwctx->client->xdna;
383 	struct amdxdna_dev_hdl *ndev;
384 	int start, end, first, last;
385 	u32 width = 1, entries = 0;
386 	int i;
387 
388 	if (!hwctx->num_tiles) {
389 		XDNA_ERR(xdna, "Number of tiles is zero");
390 		return -EINVAL;
391 	}
392 
393 	ndev = xdna->dev_handle;
394 	if (unlikely(!ndev->metadata.core.row_count)) {
395 		XDNA_WARN(xdna, "Core tile row count is zero");
396 		return -EINVAL;
397 	}
398 
399 	hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
400 	if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
401 		XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
402 		return -EINVAL;
403 	}
404 
405 	if (ndev->priv->col_align == COL_ALIGN_NATURE)
406 		width = hwctx->num_col;
407 
408 	/*
409 	 * In range [start, end], find out columns that is multiple of width.
410 	 *	'first' is the first column,
411 	 *	'last' is the last column,
412 	 *	'entries' is the total number of columns.
413 	 */
414 	start =  xdna->dev_info->first_col;
415 	end =  ndev->total_col - hwctx->num_col;
416 	if (start > 0 && end == 0) {
417 		XDNA_DBG(xdna, "Force start from col 0");
418 		start = 0;
419 	}
420 	first = start + (width - start % width) % width;
421 	last = end - end % width;
422 	if (last >= first)
423 		entries = (last - first) / width + 1;
424 	XDNA_DBG(xdna, "start %d end %d first %d last %d",
425 		 start, end, first, last);
426 
427 	if (unlikely(!entries)) {
428 		XDNA_ERR(xdna, "Start %d end %d width %d",
429 			 start, end, width);
430 		return -EINVAL;
431 	}
432 
433 	hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
434 	if (!hwctx->col_list)
435 		return -ENOMEM;
436 
437 	hwctx->col_list_len = entries;
438 	hwctx->col_list[0] = first;
439 	for (i = 1; i < entries; i++)
440 		hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
441 
442 	print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
443 			     entries * sizeof(*hwctx->col_list), false);
444 	return 0;
445 }
446 
447 static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
448 {
449 	struct amdxdna_dev *xdna = hwctx->client->xdna;
450 	struct alloc_requests *xrs_req;
451 	int ret;
452 
453 	xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
454 	if (!xrs_req)
455 		return -ENOMEM;
456 
457 	xrs_req->cdo.start_cols = hwctx->col_list;
458 	xrs_req->cdo.cols_len = hwctx->col_list_len;
459 	xrs_req->cdo.ncols = hwctx->num_col;
460 	xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
461 
462 	xrs_req->rqos.gops = hwctx->qos.gops;
463 	xrs_req->rqos.fps = hwctx->qos.fps;
464 	xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
465 	xrs_req->rqos.latency = hwctx->qos.latency;
466 	xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
467 	xrs_req->rqos.priority = hwctx->qos.priority;
468 
469 	xrs_req->rid = (uintptr_t)hwctx;
470 
471 	ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
472 	if (ret)
473 		XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
474 
475 	kfree(xrs_req);
476 	return ret;
477 }
478 
479 static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
480 {
481 	struct amdxdna_dev *xdna = hwctx->client->xdna;
482 	int ret;
483 
484 	ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
485 	if (ret)
486 		XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
487 }
488 
489 static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
490 {
491 	struct amdxdna_dev *xdna = hwctx->client->xdna;
492 	struct drm_file *filp = hwctx->client->filp;
493 	struct drm_syncobj *syncobj;
494 	u32 hdl;
495 	int ret;
496 
497 	hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
498 
499 	ret = drm_syncobj_create(&syncobj, 0, NULL);
500 	if (ret) {
501 		XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
502 		return ret;
503 	}
504 	ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
505 	if (ret) {
506 		drm_syncobj_put(syncobj);
507 		XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
508 		return ret;
509 	}
510 	hwctx->priv->syncobj = syncobj;
511 	hwctx->syncobj_hdl = hdl;
512 
513 	return 0;
514 }
515 
516 static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
517 {
518 	/*
519 	 * The syncobj_hdl is owned by user space and will be cleaned up
520 	 * separately.
521 	 */
522 	drm_syncobj_put(hwctx->priv->syncobj);
523 }
524 
525 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
526 {
527 	struct amdxdna_client *client = hwctx->client;
528 	struct amdxdna_dev *xdna = client->xdna;
529 	const struct drm_sched_init_args args = {
530 		.ops = &sched_ops,
531 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
532 		.credit_limit = HWCTX_MAX_CMDS,
533 		.timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
534 		.name = hwctx->name,
535 		.dev = xdna->ddev.dev,
536 	};
537 	struct drm_gpu_scheduler *sched;
538 	struct amdxdna_hwctx_priv *priv;
539 	struct amdxdna_gem_obj *heap;
540 	struct amdxdna_dev_hdl *ndev;
541 	int i, ret;
542 
543 	priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
544 	if (!priv)
545 		return -ENOMEM;
546 	hwctx->priv = priv;
547 
548 	mutex_lock(&client->mm_lock);
549 	heap = client->dev_heap;
550 	if (!heap) {
551 		XDNA_ERR(xdna, "The client dev heap object not exist");
552 		mutex_unlock(&client->mm_lock);
553 		ret = -ENOENT;
554 		goto free_priv;
555 	}
556 	drm_gem_object_get(to_gobj(heap));
557 	mutex_unlock(&client->mm_lock);
558 	priv->heap = heap;
559 	sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
560 
561 	ret = amdxdna_gem_pin(heap);
562 	if (ret) {
563 		XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
564 		goto put_heap;
565 	}
566 
567 	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
568 		struct amdxdna_gem_obj *abo;
569 		struct amdxdna_drm_create_bo args = {
570 			.flags = 0,
571 			.type = AMDXDNA_BO_DEV,
572 			.vaddr = 0,
573 			.size = MAX_CHAIN_CMDBUF_SIZE,
574 		};
575 
576 		abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
577 		if (IS_ERR(abo)) {
578 			ret = PTR_ERR(abo);
579 			goto free_cmd_bufs;
580 		}
581 
582 		XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
583 			 i, abo->mem.dev_addr, abo->mem.size);
584 		priv->cmd_buf[i] = abo;
585 	}
586 
587 	sched = &priv->sched;
588 	mutex_init(&priv->io_lock);
589 
590 	fs_reclaim_acquire(GFP_KERNEL);
591 	might_lock(&priv->io_lock);
592 	fs_reclaim_release(GFP_KERNEL);
593 
594 	ret = drm_sched_init(sched, &args);
595 	if (ret) {
596 		XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
597 		goto free_cmd_bufs;
598 	}
599 
600 	ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
601 				    &sched, 1, NULL);
602 	if (ret) {
603 		XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
604 		goto free_sched;
605 	}
606 
607 	ret = aie2_hwctx_col_list(hwctx);
608 	if (ret) {
609 		XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
610 		goto free_entity;
611 	}
612 
613 	ret = aie2_alloc_resource(hwctx);
614 	if (ret) {
615 		XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
616 		goto free_col_list;
617 	}
618 
619 	ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
620 				heap->mem.userptr, heap->mem.size);
621 	if (ret) {
622 		XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
623 		goto release_resource;
624 	}
625 
626 	ret = aie2_ctx_syncobj_create(hwctx);
627 	if (ret) {
628 		XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
629 		goto release_resource;
630 	}
631 
632 	hwctx->status = HWCTX_STAT_INIT;
633 	ndev = xdna->dev_handle;
634 	ndev->hwctx_num++;
635 	init_waitqueue_head(&priv->job_free_wq);
636 
637 	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
638 
639 	return 0;
640 
641 release_resource:
642 	aie2_release_resource(hwctx);
643 free_col_list:
644 	kfree(hwctx->col_list);
645 free_entity:
646 	drm_sched_entity_destroy(&priv->entity);
647 free_sched:
648 	drm_sched_fini(&priv->sched);
649 free_cmd_bufs:
650 	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
651 		if (!priv->cmd_buf[i])
652 			continue;
653 		drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
654 	}
655 	amdxdna_gem_unpin(heap);
656 put_heap:
657 	drm_gem_object_put(to_gobj(heap));
658 free_priv:
659 	kfree(priv);
660 	return ret;
661 }
662 
663 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
664 {
665 	struct amdxdna_dev_hdl *ndev;
666 	struct amdxdna_dev *xdna;
667 	int idx;
668 
669 	xdna = hwctx->client->xdna;
670 	ndev = xdna->dev_handle;
671 	ndev->hwctx_num--;
672 
673 	XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
674 	drm_sched_entity_destroy(&hwctx->priv->entity);
675 
676 	aie2_hwctx_wait_for_idle(hwctx);
677 
678 	/* Request fw to destroy hwctx and cancel the rest pending requests */
679 	aie2_release_resource(hwctx);
680 
681 	/* Wait for all submitted jobs to be completed or canceled */
682 	wait_event(hwctx->priv->job_free_wq,
683 		   atomic64_read(&hwctx->job_submit_cnt) ==
684 		   atomic64_read(&hwctx->job_free_cnt));
685 
686 	drm_sched_fini(&hwctx->priv->sched);
687 	aie2_ctx_syncobj_destroy(hwctx);
688 
689 	for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
690 		drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
691 	amdxdna_gem_unpin(hwctx->priv->heap);
692 	drm_gem_object_put(to_gobj(hwctx->priv->heap));
693 
694 	mutex_destroy(&hwctx->priv->io_lock);
695 	kfree(hwctx->col_list);
696 	kfree(hwctx->priv);
697 	kfree(hwctx->cus);
698 }
699 
700 static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
701 {
702 	struct amdxdna_hwctx_param_config_cu *config = buf;
703 	struct amdxdna_dev *xdna = hwctx->client->xdna;
704 	u32 total_size;
705 	int ret;
706 
707 	XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
708 	if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
709 		return -EINVAL;
710 
711 	if (hwctx->status != HWCTX_STAT_INIT) {
712 		XDNA_ERR(xdna, "Not support re-config CU");
713 		return -EINVAL;
714 	}
715 
716 	if (!config->num_cus) {
717 		XDNA_ERR(xdna, "Number of CU is zero");
718 		return -EINVAL;
719 	}
720 
721 	total_size = struct_size(config, cu_configs, config->num_cus);
722 	if (total_size > size) {
723 		XDNA_ERR(xdna, "CU config larger than size");
724 		return -EINVAL;
725 	}
726 
727 	hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
728 	if (!hwctx->cus)
729 		return -ENOMEM;
730 
731 	ret = aie2_config_cu(hwctx);
732 	if (ret) {
733 		XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
734 		goto free_cus;
735 	}
736 
737 	wmb(); /* To avoid locking in command submit when check status */
738 	hwctx->status = HWCTX_STAT_READY;
739 
740 	return 0;
741 
742 free_cus:
743 	kfree(hwctx->cus);
744 	hwctx->cus = NULL;
745 	return ret;
746 }
747 
748 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
749 {
750 	struct amdxdna_dev *xdna = hwctx->client->xdna;
751 
752 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
753 	switch (type) {
754 	case DRM_AMDXDNA_HWCTX_CONFIG_CU:
755 		return aie2_hwctx_cu_config(hwctx, buf, size);
756 	case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
757 	case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
758 		return -EOPNOTSUPP;
759 	default:
760 		XDNA_DBG(xdna, "Not supported type %d", type);
761 		return -EOPNOTSUPP;
762 	}
763 }
764 
765 static int aie2_populate_range(struct amdxdna_gem_obj *abo)
766 {
767 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
768 	struct amdxdna_umap *mapp;
769 	unsigned long timeout;
770 	struct mm_struct *mm;
771 	bool found;
772 	int ret;
773 
774 	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
775 again:
776 	found = false;
777 	down_write(&xdna->notifier_lock);
778 	list_for_each_entry(mapp, &abo->mem.umap_list, node) {
779 		if (mapp->invalid) {
780 			found = true;
781 			break;
782 		}
783 	}
784 
785 	if (!found) {
786 		abo->mem.map_invalid = false;
787 		up_write(&xdna->notifier_lock);
788 		return 0;
789 	}
790 	kref_get(&mapp->refcnt);
791 	up_write(&xdna->notifier_lock);
792 
793 	XDNA_DBG(xdna, "populate memory range %lx %lx",
794 		 mapp->vma->vm_start, mapp->vma->vm_end);
795 	mm = mapp->notifier.mm;
796 	if (!mmget_not_zero(mm)) {
797 		amdxdna_umap_put(mapp);
798 		return -EFAULT;
799 	}
800 
801 	mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
802 	mmap_read_lock(mm);
803 	ret = hmm_range_fault(&mapp->range);
804 	mmap_read_unlock(mm);
805 	if (ret) {
806 		if (time_after(jiffies, timeout)) {
807 			ret = -ETIME;
808 			goto put_mm;
809 		}
810 
811 		if (ret == -EBUSY) {
812 			amdxdna_umap_put(mapp);
813 			goto again;
814 		}
815 
816 		goto put_mm;
817 	}
818 
819 	down_write(&xdna->notifier_lock);
820 	if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
821 		up_write(&xdna->notifier_lock);
822 		amdxdna_umap_put(mapp);
823 		goto again;
824 	}
825 	mapp->invalid = false;
826 	up_write(&xdna->notifier_lock);
827 	amdxdna_umap_put(mapp);
828 	goto again;
829 
830 put_mm:
831 	amdxdna_umap_put(mapp);
832 	mmput(mm);
833 	return ret;
834 }
835 
836 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
837 {
838 	struct amdxdna_dev *xdna = hwctx->client->xdna;
839 	struct ww_acquire_ctx acquire_ctx;
840 	struct dma_fence_chain *chain;
841 	struct amdxdna_gem_obj *abo;
842 	unsigned long timeout = 0;
843 	int ret, i;
844 
845 	ret = down_interruptible(&hwctx->priv->job_sem);
846 	if (ret) {
847 		XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
848 		return ret;
849 	}
850 
851 	chain = dma_fence_chain_alloc();
852 	if (!chain) {
853 		XDNA_ERR(xdna, "Alloc fence chain failed");
854 		ret = -ENOMEM;
855 		goto up_sem;
856 	}
857 
858 	ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
859 				 hwctx->client->filp->client_id);
860 	if (ret) {
861 		XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
862 		goto free_chain;
863 	}
864 
865 retry:
866 	ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
867 	if (ret) {
868 		XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
869 		goto cleanup_job;
870 	}
871 
872 	for (i = 0; i < job->bo_cnt; i++) {
873 		ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
874 		if (ret) {
875 			XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
876 			drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
877 			goto cleanup_job;
878 		}
879 	}
880 
881 	down_read(&xdna->notifier_lock);
882 	for (i = 0; i < job->bo_cnt; i++) {
883 		abo = to_xdna_obj(job->bos[i]);
884 		if (abo->mem.map_invalid) {
885 			up_read(&xdna->notifier_lock);
886 			drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
887 			if (!timeout) {
888 				timeout = jiffies +
889 					msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
890 			} else if (time_after(jiffies, timeout)) {
891 				ret = -ETIME;
892 				goto cleanup_job;
893 			}
894 
895 			ret = aie2_populate_range(abo);
896 			if (ret)
897 				goto cleanup_job;
898 			goto retry;
899 		}
900 	}
901 
902 	mutex_lock(&hwctx->priv->io_lock);
903 	drm_sched_job_arm(&job->base);
904 	job->out_fence = dma_fence_get(&job->base.s_fence->finished);
905 	for (i = 0; i < job->bo_cnt; i++)
906 		dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
907 	job->seq = hwctx->priv->seq++;
908 	kref_get(&job->refcnt);
909 	drm_sched_entity_push_job(&job->base);
910 
911 	*seq = job->seq;
912 	drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
913 	mutex_unlock(&hwctx->priv->io_lock);
914 
915 	up_read(&xdna->notifier_lock);
916 	drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
917 
918 	aie2_job_put(job);
919 	atomic64_inc(&hwctx->job_submit_cnt);
920 
921 	return 0;
922 
923 cleanup_job:
924 	drm_sched_job_cleanup(&job->base);
925 free_chain:
926 	dma_fence_chain_free(chain);
927 up_sem:
928 	up(&hwctx->priv->job_sem);
929 	job->job_done = true;
930 	return ret;
931 }
932 
933 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
934 			 unsigned long cur_seq)
935 {
936 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
937 	struct drm_gem_object *gobj = to_gobj(abo);
938 	long ret;
939 
940 	ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
941 				    true, MAX_SCHEDULE_TIMEOUT);
942 	if (!ret || ret == -ERESTARTSYS)
943 		XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
944 }
945