xref: /linux/drivers/accel/amdxdna/aie2_ctx.c (revision 1c9982b4961334c1edb0745a04cabd34bc2de675)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2024, Advanced Micro Devices, Inc.
4  */
5 
6 #include <drm/amdxdna_accel.h>
7 #include <drm/drm_device.h>
8 #include <drm/drm_gem.h>
9 #include <drm/drm_gem_shmem_helper.h>
10 #include <drm/drm_print.h>
11 #include <drm/drm_syncobj.h>
12 #include <linux/hmm.h>
13 #include <linux/types.h>
14 #include <linux/xarray.h>
15 #include <trace/events/amdxdna.h>
16 
17 #include "aie2_msg_priv.h"
18 #include "aie2_pci.h"
19 #include "aie2_solver.h"
20 #include "amdxdna_ctx.h"
21 #include "amdxdna_gem.h"
22 #include "amdxdna_mailbox.h"
23 #include "amdxdna_pci_drv.h"
24 #include "amdxdna_pm.h"
25 
26 static bool force_cmdlist = true;
27 module_param(force_cmdlist, bool, 0600);
28 MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)");
29 
30 #define HWCTX_MAX_TIMEOUT	60000 /* milliseconds */
31 
aie2_job_release(struct kref * ref)32 static void aie2_job_release(struct kref *ref)
33 {
34 	struct amdxdna_sched_job *job;
35 
36 	job = container_of(ref, struct amdxdna_sched_job, refcnt);
37 	amdxdna_sched_job_cleanup(job);
38 	atomic64_inc(&job->hwctx->job_free_cnt);
39 	wake_up(&job->hwctx->priv->job_free_wq);
40 	if (job->out_fence)
41 		dma_fence_put(job->out_fence);
42 	kfree(job);
43 }
44 
aie2_job_put(struct amdxdna_sched_job * job)45 static void aie2_job_put(struct amdxdna_sched_job *job)
46 {
47 	kref_put(&job->refcnt, aie2_job_release);
48 }
49 
50 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
aie2_hwctx_stop(struct amdxdna_dev * xdna,struct amdxdna_hwctx * hwctx,struct drm_sched_job * bad_job)51 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
52 			    struct drm_sched_job *bad_job)
53 {
54 	drm_sched_stop(&hwctx->priv->sched, bad_job);
55 	aie2_destroy_context(xdna->dev_handle, hwctx);
56 	drm_sched_start(&hwctx->priv->sched, 0);
57 }
58 
aie2_hwctx_restart(struct amdxdna_dev * xdna,struct amdxdna_hwctx * hwctx)59 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
60 {
61 	struct amdxdna_gem_obj *heap = hwctx->priv->heap;
62 	int ret;
63 
64 	ret = aie2_create_context(xdna->dev_handle, hwctx);
65 	if (ret) {
66 		XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
67 		goto out;
68 	}
69 
70 	ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
71 				heap->mem.userptr, heap->mem.size);
72 	if (ret) {
73 		XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
74 		goto out;
75 	}
76 
77 	ret = aie2_config_cu(hwctx, NULL);
78 	if (ret) {
79 		XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
80 		goto out;
81 	}
82 
83 out:
84 	XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
85 	return ret;
86 }
87 
aie2_cmd_get_out_fence(struct amdxdna_hwctx * hwctx,u64 seq)88 static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
89 {
90 	struct dma_fence *fence, *out_fence = NULL;
91 	int ret;
92 
93 	fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
94 	if (!fence)
95 		return NULL;
96 
97 	ret = dma_fence_chain_find_seqno(&fence,  seq);
98 	if (ret)
99 		goto out;
100 
101 	out_fence = dma_fence_get(dma_fence_chain_contained(fence));
102 
103 out:
104 	dma_fence_put(fence);
105 	return out_fence;
106 }
107 
aie2_hwctx_wait_for_idle(struct amdxdna_hwctx * hwctx)108 static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
109 {
110 	struct dma_fence *fence;
111 
112 	fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
113 	if (!fence)
114 		return;
115 
116 	/* Wait up to 2 seconds for fw to finish all pending requests */
117 	dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
118 	dma_fence_put(fence);
119 }
120 
aie2_hwctx_suspend_cb(struct amdxdna_hwctx * hwctx,void * arg)121 static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
122 {
123 	struct amdxdna_dev *xdna = hwctx->client->xdna;
124 
125 	aie2_hwctx_wait_for_idle(hwctx);
126 	aie2_hwctx_stop(xdna, hwctx, NULL);
127 
128 	return 0;
129 }
130 
aie2_hwctx_suspend(struct amdxdna_client * client)131 void aie2_hwctx_suspend(struct amdxdna_client *client)
132 {
133 	struct amdxdna_dev *xdna = client->xdna;
134 
135 	/*
136 	 * Command timeout is unlikely. But if it happens, it doesn't
137 	 * break the system. aie2_hwctx_stop() will destroy mailbox
138 	 * and abort all commands.
139 	 */
140 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
141 	amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb);
142 }
143 
aie2_hwctx_resume_cb(struct amdxdna_hwctx * hwctx,void * arg)144 static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
145 {
146 	struct amdxdna_dev *xdna = hwctx->client->xdna;
147 
148 	return aie2_hwctx_restart(xdna, hwctx);
149 }
150 
aie2_hwctx_resume(struct amdxdna_client * client)151 int aie2_hwctx_resume(struct amdxdna_client *client)
152 {
153 	/*
154 	 * The resume path cannot guarantee that mailbox channel can be
155 	 * regenerated. If this happen, when submit message to this
156 	 * mailbox channel, error will return.
157 	 */
158 	return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
159 }
160 
161 static void
aie2_sched_notify(struct amdxdna_sched_job * job)162 aie2_sched_notify(struct amdxdna_sched_job *job)
163 {
164 	struct dma_fence *fence = job->fence;
165 
166 	trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
167 
168 	job->hwctx->priv->completed++;
169 	dma_fence_signal(fence);
170 
171 	up(&job->hwctx->priv->job_sem);
172 	job->job_done = true;
173 	mmput_async(job->mm);
174 	aie2_job_put(job);
175 }
176 
177 static int
aie2_sched_resp_handler(void * handle,void __iomem * data,size_t size)178 aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
179 {
180 	struct amdxdna_sched_job *job = handle;
181 	struct amdxdna_gem_obj *cmd_abo;
182 	int ret = 0;
183 	u32 status;
184 
185 	cmd_abo = job->cmd_bo;
186 
187 	if (unlikely(job->job_timeout)) {
188 		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
189 		ret = -EINVAL;
190 		goto out;
191 	}
192 
193 	if (unlikely(!data) || unlikely(size != sizeof(u32))) {
194 		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
195 		ret = -EINVAL;
196 		goto out;
197 	}
198 
199 	status = readl(data);
200 	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
201 	if (status == AIE2_STATUS_SUCCESS)
202 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
203 	else
204 		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR);
205 
206 out:
207 	aie2_sched_notify(job);
208 	return ret;
209 }
210 
211 static int
aie2_sched_drvcmd_resp_handler(void * handle,void __iomem * data,size_t size)212 aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size)
213 {
214 	struct amdxdna_sched_job *job = handle;
215 	int ret = 0;
216 
217 	if (unlikely(!data))
218 		goto out;
219 
220 	if (unlikely(size != sizeof(u32))) {
221 		ret = -EINVAL;
222 		goto out;
223 	}
224 
225 	job->drv_cmd->result = readl(data);
226 
227 out:
228 	aie2_sched_notify(job);
229 	return ret;
230 }
231 
232 static int
aie2_sched_cmdlist_resp_handler(void * handle,void __iomem * data,size_t size)233 aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
234 {
235 	struct amdxdna_sched_job *job = handle;
236 	struct amdxdna_gem_obj *cmd_abo;
237 	struct amdxdna_dev *xdna;
238 	u32 fail_cmd_status;
239 	u32 fail_cmd_idx;
240 	u32 cmd_status;
241 	int ret = 0;
242 
243 	cmd_abo = job->cmd_bo;
244 
245 	if (unlikely(job->job_timeout)) {
246 		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
247 		ret = -EINVAL;
248 		goto out;
249 	}
250 
251 	if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
252 		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
253 		ret = -EINVAL;
254 		goto out;
255 	}
256 
257 	cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
258 	xdna = job->hwctx->client->xdna;
259 	XDNA_DBG(xdna, "Status 0x%x", cmd_status);
260 	if (cmd_status == AIE2_STATUS_SUCCESS) {
261 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
262 		goto out;
263 	}
264 
265 	/* Slow path to handle error, read from ringbuf on BAR */
266 	fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
267 	fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
268 	XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
269 		 fail_cmd_idx, fail_cmd_status);
270 
271 	if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
272 		amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT);
273 		ret = -EINVAL;
274 	} else {
275 		amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR);
276 	}
277 
278 out:
279 	aie2_sched_notify(job);
280 	return ret;
281 }
282 
283 static struct dma_fence *
aie2_sched_job_run(struct drm_sched_job * sched_job)284 aie2_sched_job_run(struct drm_sched_job *sched_job)
285 {
286 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
287 	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
288 	struct amdxdna_hwctx *hwctx = job->hwctx;
289 	struct dma_fence *fence;
290 	int ret;
291 
292 	if (!hwctx->priv->mbox_chann)
293 		return NULL;
294 
295 	if (!mmget_not_zero(job->mm))
296 		return ERR_PTR(-ESRCH);
297 
298 	kref_get(&job->refcnt);
299 	fence = dma_fence_get(job->fence);
300 
301 	if (job->drv_cmd) {
302 		switch (job->drv_cmd->opcode) {
303 		case SYNC_DEBUG_BO:
304 			ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
305 			break;
306 		case ATTACH_DEBUG_BO:
307 			ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
308 			break;
309 		default:
310 			ret = -EINVAL;
311 			break;
312 		}
313 		goto out;
314 	}
315 
316 	amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
317 
318 	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
319 		ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
320 	else if (force_cmdlist)
321 		ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
322 	else
323 		ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
324 
325 out:
326 	if (ret) {
327 		dma_fence_put(job->fence);
328 		aie2_job_put(job);
329 		mmput(job->mm);
330 		fence = ERR_PTR(ret);
331 	}
332 	trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
333 
334 	return fence;
335 }
336 
aie2_sched_job_free(struct drm_sched_job * sched_job)337 static void aie2_sched_job_free(struct drm_sched_job *sched_job)
338 {
339 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
340 	struct amdxdna_hwctx *hwctx = job->hwctx;
341 
342 	trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
343 	if (!job->job_done)
344 		up(&hwctx->priv->job_sem);
345 
346 	drm_sched_job_cleanup(sched_job);
347 	aie2_job_put(job);
348 }
349 
350 static enum drm_gpu_sched_stat
aie2_sched_job_timedout(struct drm_sched_job * sched_job)351 aie2_sched_job_timedout(struct drm_sched_job *sched_job)
352 {
353 	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
354 	struct amdxdna_hwctx *hwctx = job->hwctx;
355 	struct amdxdna_dev *xdna;
356 
357 	xdna = hwctx->client->xdna;
358 	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
359 	job->job_timeout = true;
360 	mutex_lock(&xdna->dev_lock);
361 	aie2_hwctx_stop(xdna, hwctx, sched_job);
362 
363 	aie2_hwctx_restart(xdna, hwctx);
364 	mutex_unlock(&xdna->dev_lock);
365 
366 	return DRM_GPU_SCHED_STAT_RESET;
367 }
368 
369 static const struct drm_sched_backend_ops sched_ops = {
370 	.run_job = aie2_sched_job_run,
371 	.free_job = aie2_sched_job_free,
372 	.timedout_job = aie2_sched_job_timedout,
373 };
374 
aie2_hwctx_col_list(struct amdxdna_hwctx * hwctx)375 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
376 {
377 	struct amdxdna_dev *xdna = hwctx->client->xdna;
378 	struct amdxdna_dev_hdl *ndev;
379 	int start, end, first, last;
380 	u32 width = 1, entries = 0;
381 	int i;
382 
383 	if (!hwctx->num_tiles) {
384 		XDNA_ERR(xdna, "Number of tiles is zero");
385 		return -EINVAL;
386 	}
387 
388 	ndev = xdna->dev_handle;
389 	if (unlikely(!ndev->metadata.core.row_count)) {
390 		XDNA_WARN(xdna, "Core tile row count is zero");
391 		return -EINVAL;
392 	}
393 
394 	hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
395 	if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
396 		XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
397 		return -EINVAL;
398 	}
399 
400 	if (ndev->priv->col_align == COL_ALIGN_NATURE)
401 		width = hwctx->num_col;
402 
403 	/*
404 	 * In range [start, end], find out columns that is multiple of width.
405 	 *	'first' is the first column,
406 	 *	'last' is the last column,
407 	 *	'entries' is the total number of columns.
408 	 */
409 	start =  xdna->dev_info->first_col;
410 	end =  ndev->total_col - hwctx->num_col;
411 	if (start > 0 && end == 0) {
412 		XDNA_DBG(xdna, "Force start from col 0");
413 		start = 0;
414 	}
415 	first = start + (width - start % width) % width;
416 	last = end - end % width;
417 	if (last >= first)
418 		entries = (last - first) / width + 1;
419 	XDNA_DBG(xdna, "start %d end %d first %d last %d",
420 		 start, end, first, last);
421 
422 	if (unlikely(!entries)) {
423 		XDNA_ERR(xdna, "Start %d end %d width %d",
424 			 start, end, width);
425 		return -EINVAL;
426 	}
427 
428 	hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
429 	if (!hwctx->col_list)
430 		return -ENOMEM;
431 
432 	hwctx->col_list_len = entries;
433 	hwctx->col_list[0] = first;
434 	for (i = 1; i < entries; i++)
435 		hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
436 
437 	print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
438 			     entries * sizeof(*hwctx->col_list), false);
439 	return 0;
440 }
441 
aie2_alloc_resource(struct amdxdna_hwctx * hwctx)442 static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
443 {
444 	struct amdxdna_dev *xdna = hwctx->client->xdna;
445 	struct alloc_requests *xrs_req;
446 	int ret;
447 
448 	if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
449 		hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col;
450 		hwctx->num_col = xdna->dev_handle->total_col;
451 		return aie2_create_context(xdna->dev_handle, hwctx);
452 	}
453 
454 	xrs_req = kzalloc_obj(*xrs_req);
455 	if (!xrs_req)
456 		return -ENOMEM;
457 
458 	xrs_req->cdo.start_cols = hwctx->col_list;
459 	xrs_req->cdo.cols_len = hwctx->col_list_len;
460 	xrs_req->cdo.ncols = hwctx->num_col;
461 	xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
462 
463 	xrs_req->rqos.gops = hwctx->qos.gops;
464 	xrs_req->rqos.fps = hwctx->qos.fps;
465 	xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
466 	xrs_req->rqos.latency = hwctx->qos.latency;
467 	xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
468 	xrs_req->rqos.priority = hwctx->qos.priority;
469 
470 	xrs_req->rid = (uintptr_t)hwctx;
471 
472 	ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
473 	if (ret)
474 		XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
475 
476 	kfree(xrs_req);
477 	return ret;
478 }
479 
aie2_release_resource(struct amdxdna_hwctx * hwctx)480 static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
481 {
482 	struct amdxdna_dev *xdna = hwctx->client->xdna;
483 	int ret;
484 
485 	if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
486 		ret = aie2_destroy_context(xdna->dev_handle, hwctx);
487 		if (ret && ret != -ENODEV)
488 			XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret);
489 	} else {
490 		ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
491 		if (ret)
492 			XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
493 	}
494 }
495 
aie2_ctx_syncobj_create(struct amdxdna_hwctx * hwctx)496 static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
497 {
498 	struct amdxdna_dev *xdna = hwctx->client->xdna;
499 	struct drm_file *filp = hwctx->client->filp;
500 	struct drm_syncobj *syncobj;
501 	u32 hdl;
502 	int ret;
503 
504 	hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
505 
506 	ret = drm_syncobj_create(&syncobj, 0, NULL);
507 	if (ret) {
508 		XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
509 		return ret;
510 	}
511 	ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
512 	if (ret) {
513 		drm_syncobj_put(syncobj);
514 		XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
515 		return ret;
516 	}
517 	hwctx->priv->syncobj = syncobj;
518 	hwctx->syncobj_hdl = hdl;
519 
520 	return 0;
521 }
522 
aie2_ctx_syncobj_destroy(struct amdxdna_hwctx * hwctx)523 static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
524 {
525 	/*
526 	 * The syncobj_hdl is owned by user space and will be cleaned up
527 	 * separately.
528 	 */
529 	drm_syncobj_put(hwctx->priv->syncobj);
530 }
531 
aie2_hwctx_init(struct amdxdna_hwctx * hwctx)532 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
533 {
534 	struct amdxdna_client *client = hwctx->client;
535 	struct amdxdna_dev *xdna = client->xdna;
536 	const struct drm_sched_init_args args = {
537 		.ops = &sched_ops,
538 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
539 		.credit_limit = HWCTX_MAX_CMDS,
540 		.timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
541 		.name = "amdxdna_js",
542 		.dev = xdna->ddev.dev,
543 	};
544 	struct drm_gpu_scheduler *sched;
545 	struct amdxdna_hwctx_priv *priv;
546 	struct amdxdna_gem_obj *heap;
547 	int i, ret;
548 
549 	priv = kzalloc_obj(*hwctx->priv);
550 	if (!priv)
551 		return -ENOMEM;
552 	hwctx->priv = priv;
553 
554 	mutex_lock(&client->mm_lock);
555 	heap = client->dev_heap;
556 	if (!heap) {
557 		XDNA_ERR(xdna, "The client dev heap object not exist");
558 		mutex_unlock(&client->mm_lock);
559 		ret = -ENOENT;
560 		goto free_priv;
561 	}
562 	drm_gem_object_get(to_gobj(heap));
563 	mutex_unlock(&client->mm_lock);
564 	priv->heap = heap;
565 	sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
566 
567 	ret = amdxdna_gem_pin(heap);
568 	if (ret) {
569 		XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
570 		goto put_heap;
571 	}
572 
573 	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
574 		struct amdxdna_gem_obj *abo;
575 		struct amdxdna_drm_create_bo args = {
576 			.flags = 0,
577 			.type = AMDXDNA_BO_DEV,
578 			.vaddr = 0,
579 			.size = MAX_CHAIN_CMDBUF_SIZE,
580 		};
581 
582 		abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
583 		if (IS_ERR(abo)) {
584 			ret = PTR_ERR(abo);
585 			goto free_cmd_bufs;
586 		}
587 
588 		XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
589 			 i, abo->mem.dev_addr, abo->mem.size);
590 		priv->cmd_buf[i] = abo;
591 	}
592 
593 	sched = &priv->sched;
594 	mutex_init(&priv->io_lock);
595 
596 	fs_reclaim_acquire(GFP_KERNEL);
597 	might_lock(&priv->io_lock);
598 	fs_reclaim_release(GFP_KERNEL);
599 
600 	ret = drm_sched_init(sched, &args);
601 	if (ret) {
602 		XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
603 		goto free_cmd_bufs;
604 	}
605 
606 	ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
607 				    &sched, 1, NULL);
608 	if (ret) {
609 		XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
610 		goto free_sched;
611 	}
612 
613 	ret = aie2_hwctx_col_list(hwctx);
614 	if (ret) {
615 		XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
616 		goto free_entity;
617 	}
618 
619 	ret = amdxdna_pm_resume_get_locked(xdna);
620 	if (ret)
621 		goto free_col_list;
622 
623 	ret = aie2_alloc_resource(hwctx);
624 	if (ret) {
625 		XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
626 		goto suspend_put;
627 	}
628 
629 	ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
630 				heap->mem.userptr, heap->mem.size);
631 	if (ret) {
632 		XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
633 		goto release_resource;
634 	}
635 
636 	ret = aie2_ctx_syncobj_create(hwctx);
637 	if (ret) {
638 		XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
639 		goto release_resource;
640 	}
641 	amdxdna_pm_suspend_put(xdna);
642 
643 	init_waitqueue_head(&priv->job_free_wq);
644 
645 	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
646 
647 	return 0;
648 
649 release_resource:
650 	aie2_release_resource(hwctx);
651 suspend_put:
652 	amdxdna_pm_suspend_put(xdna);
653 free_col_list:
654 	kfree(hwctx->col_list);
655 free_entity:
656 	drm_sched_entity_destroy(&priv->entity);
657 free_sched:
658 	drm_sched_fini(&priv->sched);
659 free_cmd_bufs:
660 	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
661 		if (!priv->cmd_buf[i])
662 			continue;
663 		drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
664 	}
665 	amdxdna_gem_unpin(heap);
666 put_heap:
667 	drm_gem_object_put(to_gobj(heap));
668 free_priv:
669 	kfree(priv);
670 	return ret;
671 }
672 
aie2_hwctx_fini(struct amdxdna_hwctx * hwctx)673 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
674 {
675 	struct amdxdna_dev *xdna;
676 	int idx;
677 
678 	xdna = hwctx->client->xdna;
679 
680 	XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
681 	aie2_hwctx_wait_for_idle(hwctx);
682 
683 	/* Request fw to destroy hwctx and cancel the rest pending requests */
684 	drm_sched_stop(&hwctx->priv->sched, NULL);
685 	aie2_release_resource(hwctx);
686 	drm_sched_start(&hwctx->priv->sched, 0);
687 
688 	mutex_unlock(&xdna->dev_lock);
689 	drm_sched_entity_destroy(&hwctx->priv->entity);
690 
691 	/* Wait for all submitted jobs to be completed or canceled */
692 	wait_event(hwctx->priv->job_free_wq,
693 		   atomic64_read(&hwctx->job_submit_cnt) ==
694 		   atomic64_read(&hwctx->job_free_cnt));
695 	mutex_lock(&xdna->dev_lock);
696 
697 	drm_sched_fini(&hwctx->priv->sched);
698 	aie2_ctx_syncobj_destroy(hwctx);
699 
700 	for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
701 		drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
702 	amdxdna_gem_unpin(hwctx->priv->heap);
703 	drm_gem_object_put(to_gobj(hwctx->priv->heap));
704 
705 	mutex_destroy(&hwctx->priv->io_lock);
706 	kfree(hwctx->col_list);
707 	kfree(hwctx->priv);
708 	kfree(hwctx->cus);
709 }
710 
aie2_config_cu_resp_handler(void * handle,void __iomem * data,size_t size)711 static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
712 {
713 	struct amdxdna_hwctx *hwctx = handle;
714 
715 	amdxdna_pm_suspend_put(hwctx->client->xdna);
716 	return 0;
717 }
718 
aie2_hwctx_cu_config(struct amdxdna_hwctx * hwctx,void * buf,u32 size)719 static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
720 {
721 	struct amdxdna_hwctx_param_config_cu *config = buf;
722 	struct amdxdna_dev *xdna = hwctx->client->xdna;
723 	u32 total_size;
724 	int ret;
725 
726 	XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
727 	if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
728 		return -EINVAL;
729 
730 	if (hwctx->cus) {
731 		XDNA_ERR(xdna, "Not support re-config CU");
732 		return -EINVAL;
733 	}
734 
735 	if (!config->num_cus) {
736 		XDNA_ERR(xdna, "Number of CU is zero");
737 		return -EINVAL;
738 	}
739 
740 	total_size = struct_size(config, cu_configs, config->num_cus);
741 	if (total_size > size) {
742 		XDNA_ERR(xdna, "CU config larger than size");
743 		return -EINVAL;
744 	}
745 
746 	hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
747 	if (!hwctx->cus)
748 		return -ENOMEM;
749 
750 	ret = amdxdna_pm_resume_get_locked(xdna);
751 	if (ret)
752 		goto free_cus;
753 
754 	ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
755 	if (ret) {
756 		XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
757 		goto pm_suspend_put;
758 	}
759 
760 	wmb(); /* To avoid locking in command submit when check status */
761 
762 	return 0;
763 
764 pm_suspend_put:
765 	amdxdna_pm_suspend_put(xdna);
766 free_cus:
767 	kfree(hwctx->cus);
768 	hwctx->cus = NULL;
769 	return ret;
770 }
771 
aie2_cmd_wait(struct amdxdna_hwctx * hwctx,u64 seq)772 static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq)
773 {
774 	struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
775 
776 	if (!out_fence) {
777 		XDNA_ERR(hwctx->client->xdna, "Failed to get fence");
778 		return;
779 	}
780 
781 	dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT);
782 	dma_fence_put(out_fence);
783 }
784 
aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx * hwctx,u32 bo_hdl,bool attach)785 static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
786 				   bool attach)
787 {
788 	struct amdxdna_client *client = hwctx->client;
789 	struct amdxdna_dev *xdna = client->xdna;
790 	struct amdxdna_drv_cmd cmd = { 0 };
791 	struct amdxdna_gem_obj *abo;
792 	u64 seq;
793 	int ret;
794 
795 	abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV);
796 	if (!abo) {
797 		XDNA_ERR(xdna, "Get bo %d failed", bo_hdl);
798 		return -EINVAL;
799 	}
800 
801 	if (attach) {
802 		if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
803 			ret = -EBUSY;
804 			goto put_obj;
805 		}
806 		cmd.opcode = ATTACH_DEBUG_BO;
807 	} else {
808 		if (abo->assigned_hwctx != hwctx->id) {
809 			ret = -EINVAL;
810 			goto put_obj;
811 		}
812 		cmd.opcode = DETACH_DEBUG_BO;
813 	}
814 
815 	ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
816 				 &bo_hdl, 1, hwctx->id, &seq);
817 	if (ret) {
818 		XDNA_ERR(xdna, "Submit command failed");
819 		goto put_obj;
820 	}
821 
822 	aie2_cmd_wait(hwctx, seq);
823 	if (cmd.result) {
824 		XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
825 		goto put_obj;
826 	}
827 
828 	if (attach)
829 		abo->assigned_hwctx = hwctx->id;
830 	else
831 		abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
832 
833 	XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
834 
835 put_obj:
836 	amdxdna_gem_put_obj(abo);
837 	return ret;
838 }
839 
aie2_hwctx_config(struct amdxdna_hwctx * hwctx,u32 type,u64 value,void * buf,u32 size)840 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
841 {
842 	struct amdxdna_dev *xdna = hwctx->client->xdna;
843 
844 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
845 	switch (type) {
846 	case DRM_AMDXDNA_HWCTX_CONFIG_CU:
847 		return aie2_hwctx_cu_config(hwctx, buf, size);
848 	case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
849 		return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true);
850 	case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
851 		return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false);
852 	default:
853 		XDNA_DBG(xdna, "Not supported type %d", type);
854 		return -EOPNOTSUPP;
855 	}
856 }
857 
aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx * hwctx,u32 debug_bo_hdl)858 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
859 {
860 	struct amdxdna_client *client = hwctx->client;
861 	struct amdxdna_dev *xdna = client->xdna;
862 	struct amdxdna_drv_cmd cmd = { 0 };
863 	u64 seq;
864 	int ret;
865 
866 	cmd.opcode = SYNC_DEBUG_BO;
867 	ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
868 				 &debug_bo_hdl, 1, hwctx->id, &seq);
869 	if (ret) {
870 		XDNA_ERR(xdna, "Submit command failed");
871 		return ret;
872 	}
873 
874 	aie2_cmd_wait(hwctx, seq);
875 	if (cmd.result) {
876 		XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
877 		return -EINVAL;
878 	}
879 
880 	return 0;
881 }
882 
aie2_populate_range(struct amdxdna_gem_obj * abo)883 static int aie2_populate_range(struct amdxdna_gem_obj *abo)
884 {
885 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
886 	struct amdxdna_umap *mapp;
887 	unsigned long timeout;
888 	struct mm_struct *mm;
889 	bool found;
890 	int ret;
891 
892 	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
893 again:
894 	found = false;
895 	down_write(&xdna->notifier_lock);
896 	list_for_each_entry(mapp, &abo->mem.umap_list, node) {
897 		if (mapp->invalid) {
898 			found = true;
899 			break;
900 		}
901 	}
902 
903 	if (!found) {
904 		abo->mem.map_invalid = false;
905 		up_write(&xdna->notifier_lock);
906 		return 0;
907 	}
908 	kref_get(&mapp->refcnt);
909 	up_write(&xdna->notifier_lock);
910 
911 	XDNA_DBG(xdna, "populate memory range %lx %lx",
912 		 mapp->vma->vm_start, mapp->vma->vm_end);
913 	mm = mapp->notifier.mm;
914 	if (!mmget_not_zero(mm)) {
915 		amdxdna_umap_put(mapp);
916 		return -EFAULT;
917 	}
918 
919 	mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
920 	mmap_read_lock(mm);
921 	ret = hmm_range_fault(&mapp->range);
922 	mmap_read_unlock(mm);
923 	if (ret) {
924 		if (time_after(jiffies, timeout)) {
925 			ret = -ETIME;
926 			goto put_mm;
927 		}
928 
929 		if (ret == -EBUSY) {
930 			amdxdna_umap_put(mapp);
931 			goto again;
932 		}
933 
934 		goto put_mm;
935 	}
936 
937 	down_write(&xdna->notifier_lock);
938 	if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
939 		up_write(&xdna->notifier_lock);
940 		amdxdna_umap_put(mapp);
941 		goto again;
942 	}
943 	mapp->invalid = false;
944 	up_write(&xdna->notifier_lock);
945 	amdxdna_umap_put(mapp);
946 	goto again;
947 
948 put_mm:
949 	amdxdna_umap_put(mapp);
950 	mmput(mm);
951 	return ret;
952 }
953 
aie2_cmd_submit(struct amdxdna_hwctx * hwctx,struct amdxdna_sched_job * job,u64 * seq)954 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
955 {
956 	struct amdxdna_dev *xdna = hwctx->client->xdna;
957 	struct ww_acquire_ctx acquire_ctx;
958 	struct dma_fence_chain *chain;
959 	struct amdxdna_gem_obj *abo;
960 	unsigned long timeout = 0;
961 	int ret, i;
962 
963 	ret = down_interruptible(&hwctx->priv->job_sem);
964 	if (ret) {
965 		XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
966 		return ret;
967 	}
968 
969 	chain = dma_fence_chain_alloc();
970 	if (!chain) {
971 		XDNA_ERR(xdna, "Alloc fence chain failed");
972 		ret = -ENOMEM;
973 		goto up_sem;
974 	}
975 
976 	ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
977 				 hwctx->client->filp->client_id);
978 	if (ret) {
979 		XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
980 		goto free_chain;
981 	}
982 
983 retry:
984 	ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
985 	if (ret) {
986 		XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
987 		goto cleanup_job;
988 	}
989 
990 	for (i = 0; i < job->bo_cnt; i++) {
991 		ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
992 		if (ret) {
993 			XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
994 			drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
995 			goto cleanup_job;
996 		}
997 	}
998 
999 	down_read(&xdna->notifier_lock);
1000 	for (i = 0; i < job->bo_cnt; i++) {
1001 		abo = to_xdna_obj(job->bos[i]);
1002 		if (abo->mem.map_invalid) {
1003 			up_read(&xdna->notifier_lock);
1004 			drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
1005 			if (!timeout) {
1006 				timeout = jiffies +
1007 					msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1008 			} else if (time_after(jiffies, timeout)) {
1009 				ret = -ETIME;
1010 				goto cleanup_job;
1011 			}
1012 
1013 			ret = aie2_populate_range(abo);
1014 			if (ret)
1015 				goto cleanup_job;
1016 			goto retry;
1017 		}
1018 	}
1019 
1020 	mutex_lock(&hwctx->priv->io_lock);
1021 	drm_sched_job_arm(&job->base);
1022 	job->out_fence = dma_fence_get(&job->base.s_fence->finished);
1023 	for (i = 0; i < job->bo_cnt; i++)
1024 		dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
1025 	job->seq = hwctx->priv->seq++;
1026 	kref_get(&job->refcnt);
1027 	drm_sched_entity_push_job(&job->base);
1028 
1029 	*seq = job->seq;
1030 	drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
1031 	mutex_unlock(&hwctx->priv->io_lock);
1032 
1033 	up_read(&xdna->notifier_lock);
1034 	drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
1035 
1036 	aie2_job_put(job);
1037 	atomic64_inc(&hwctx->job_submit_cnt);
1038 
1039 	return 0;
1040 
1041 cleanup_job:
1042 	drm_sched_job_cleanup(&job->base);
1043 free_chain:
1044 	dma_fence_chain_free(chain);
1045 up_sem:
1046 	up(&hwctx->priv->job_sem);
1047 	job->job_done = true;
1048 	return ret;
1049 }
1050 
aie2_hmm_invalidate(struct amdxdna_gem_obj * abo,unsigned long cur_seq)1051 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
1052 			 unsigned long cur_seq)
1053 {
1054 	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
1055 	struct drm_gem_object *gobj = to_gobj(abo);
1056 	long ret;
1057 
1058 	ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
1059 				    true, MAX_SCHEDULE_TIMEOUT);
1060 	if (!ret)
1061 		XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
1062 	else if (ret == -ERESTARTSYS)
1063 		XDNA_DBG(xdna, "Wait for bo interrupted by signal");
1064 }
1065