1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2024, Advanced Micro Devices, Inc.
4 */
5
6 #include <drm/amdxdna_accel.h>
7 #include <drm/drm_device.h>
8 #include <drm/drm_gem.h>
9 #include <drm/drm_gem_shmem_helper.h>
10 #include <drm/drm_print.h>
11 #include <drm/drm_syncobj.h>
12 #include <linux/hmm.h>
13 #include <linux/types.h>
14 #include <linux/xarray.h>
15 #include <trace/events/amdxdna.h>
16
17 #include "aie2_msg_priv.h"
18 #include "aie2_pci.h"
19 #include "aie2_solver.h"
20 #include "amdxdna_ctx.h"
21 #include "amdxdna_gem.h"
22 #include "amdxdna_mailbox.h"
23 #include "amdxdna_pci_drv.h"
24 #include "amdxdna_pm.h"
25
26 static bool force_cmdlist = true;
27 module_param(force_cmdlist, bool, 0600);
28 MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)");
29
30 #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
31
aie2_job_release(struct kref * ref)32 static void aie2_job_release(struct kref *ref)
33 {
34 struct amdxdna_sched_job *job;
35
36 job = container_of(ref, struct amdxdna_sched_job, refcnt);
37 amdxdna_sched_job_cleanup(job);
38 atomic64_inc(&job->hwctx->job_free_cnt);
39 wake_up(&job->hwctx->priv->job_free_wq);
40 if (job->out_fence)
41 dma_fence_put(job->out_fence);
42 kfree(job);
43 }
44
aie2_job_put(struct amdxdna_sched_job * job)45 static void aie2_job_put(struct amdxdna_sched_job *job)
46 {
47 kref_put(&job->refcnt, aie2_job_release);
48 }
49
50 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
aie2_hwctx_stop(struct amdxdna_dev * xdna,struct amdxdna_hwctx * hwctx,struct drm_sched_job * bad_job)51 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
52 struct drm_sched_job *bad_job)
53 {
54 drm_sched_stop(&hwctx->priv->sched, bad_job);
55 aie2_destroy_context(xdna->dev_handle, hwctx);
56 drm_sched_start(&hwctx->priv->sched, 0);
57 }
58
aie2_hwctx_restart(struct amdxdna_dev * xdna,struct amdxdna_hwctx * hwctx)59 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
60 {
61 struct amdxdna_gem_obj *heap = hwctx->priv->heap;
62 int ret;
63
64 ret = aie2_create_context(xdna->dev_handle, hwctx);
65 if (ret) {
66 XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
67 goto out;
68 }
69
70 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
71 heap->mem.userptr, heap->mem.size);
72 if (ret) {
73 XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
74 goto out;
75 }
76
77 ret = aie2_config_cu(hwctx, NULL);
78 if (ret) {
79 XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
80 goto out;
81 }
82
83 out:
84 XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
85 return ret;
86 }
87
aie2_cmd_get_out_fence(struct amdxdna_hwctx * hwctx,u64 seq)88 static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
89 {
90 struct dma_fence *fence, *out_fence = NULL;
91 int ret;
92
93 fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
94 if (!fence)
95 return NULL;
96
97 ret = dma_fence_chain_find_seqno(&fence, seq);
98 if (ret)
99 goto out;
100
101 out_fence = dma_fence_get(dma_fence_chain_contained(fence));
102
103 out:
104 dma_fence_put(fence);
105 return out_fence;
106 }
107
aie2_hwctx_wait_for_idle(struct amdxdna_hwctx * hwctx)108 static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
109 {
110 struct dma_fence *fence;
111
112 fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
113 if (!fence)
114 return;
115
116 /* Wait up to 2 seconds for fw to finish all pending requests */
117 dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
118 dma_fence_put(fence);
119 }
120
aie2_hwctx_suspend_cb(struct amdxdna_hwctx * hwctx,void * arg)121 static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
122 {
123 struct amdxdna_dev *xdna = hwctx->client->xdna;
124
125 aie2_hwctx_wait_for_idle(hwctx);
126 aie2_hwctx_stop(xdna, hwctx, NULL);
127
128 return 0;
129 }
130
aie2_hwctx_suspend(struct amdxdna_client * client)131 void aie2_hwctx_suspend(struct amdxdna_client *client)
132 {
133 struct amdxdna_dev *xdna = client->xdna;
134
135 /*
136 * Command timeout is unlikely. But if it happens, it doesn't
137 * break the system. aie2_hwctx_stop() will destroy mailbox
138 * and abort all commands.
139 */
140 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
141 amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb);
142 }
143
aie2_hwctx_resume_cb(struct amdxdna_hwctx * hwctx,void * arg)144 static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
145 {
146 struct amdxdna_dev *xdna = hwctx->client->xdna;
147
148 return aie2_hwctx_restart(xdna, hwctx);
149 }
150
aie2_hwctx_resume(struct amdxdna_client * client)151 int aie2_hwctx_resume(struct amdxdna_client *client)
152 {
153 /*
154 * The resume path cannot guarantee that mailbox channel can be
155 * regenerated. If this happen, when submit message to this
156 * mailbox channel, error will return.
157 */
158 return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
159 }
160
161 static void
aie2_sched_notify(struct amdxdna_sched_job * job)162 aie2_sched_notify(struct amdxdna_sched_job *job)
163 {
164 struct dma_fence *fence = job->fence;
165
166 trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
167
168 job->hwctx->priv->completed++;
169 dma_fence_signal(fence);
170
171 up(&job->hwctx->priv->job_sem);
172 job->job_done = true;
173 mmput_async(job->mm);
174 aie2_job_put(job);
175 }
176
177 static int
aie2_sched_resp_handler(void * handle,void __iomem * data,size_t size)178 aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
179 {
180 struct amdxdna_sched_job *job = handle;
181 struct amdxdna_gem_obj *cmd_abo;
182 int ret = 0;
183 u32 status;
184
185 cmd_abo = job->cmd_bo;
186
187 if (unlikely(job->job_timeout)) {
188 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
189 ret = -EINVAL;
190 goto out;
191 }
192
193 if (unlikely(!data) || unlikely(size != sizeof(u32))) {
194 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
195 ret = -EINVAL;
196 goto out;
197 }
198
199 status = readl(data);
200 XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
201 if (status == AIE2_STATUS_SUCCESS)
202 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
203 else
204 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR);
205
206 out:
207 aie2_sched_notify(job);
208 return ret;
209 }
210
211 static int
aie2_sched_drvcmd_resp_handler(void * handle,void __iomem * data,size_t size)212 aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size)
213 {
214 struct amdxdna_sched_job *job = handle;
215 int ret = 0;
216
217 if (unlikely(!data))
218 goto out;
219
220 if (unlikely(size != sizeof(u32))) {
221 ret = -EINVAL;
222 goto out;
223 }
224
225 job->drv_cmd->result = readl(data);
226
227 out:
228 aie2_sched_notify(job);
229 return ret;
230 }
231
232 static int
aie2_sched_cmdlist_resp_handler(void * handle,void __iomem * data,size_t size)233 aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
234 {
235 struct amdxdna_sched_job *job = handle;
236 struct amdxdna_gem_obj *cmd_abo;
237 struct amdxdna_dev *xdna;
238 u32 fail_cmd_status;
239 u32 fail_cmd_idx;
240 u32 cmd_status;
241 int ret = 0;
242
243 cmd_abo = job->cmd_bo;
244
245 if (unlikely(job->job_timeout)) {
246 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
247 ret = -EINVAL;
248 goto out;
249 }
250
251 if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
252 amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
253 ret = -EINVAL;
254 goto out;
255 }
256
257 cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
258 xdna = job->hwctx->client->xdna;
259 XDNA_DBG(xdna, "Status 0x%x", cmd_status);
260 if (cmd_status == AIE2_STATUS_SUCCESS) {
261 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
262 goto out;
263 }
264
265 /* Slow path to handle error, read from ringbuf on BAR */
266 fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
267 fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
268 XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
269 fail_cmd_idx, fail_cmd_status);
270
271 if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
272 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT);
273 ret = -EINVAL;
274 } else {
275 amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR);
276 }
277
278 out:
279 aie2_sched_notify(job);
280 return ret;
281 }
282
283 static struct dma_fence *
aie2_sched_job_run(struct drm_sched_job * sched_job)284 aie2_sched_job_run(struct drm_sched_job *sched_job)
285 {
286 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
287 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
288 struct amdxdna_hwctx *hwctx = job->hwctx;
289 struct dma_fence *fence;
290 int ret;
291
292 if (!hwctx->priv->mbox_chann)
293 return NULL;
294
295 if (!mmget_not_zero(job->mm))
296 return ERR_PTR(-ESRCH);
297
298 kref_get(&job->refcnt);
299 fence = dma_fence_get(job->fence);
300
301 if (job->drv_cmd) {
302 switch (job->drv_cmd->opcode) {
303 case SYNC_DEBUG_BO:
304 ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
305 break;
306 case ATTACH_DEBUG_BO:
307 ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
308 break;
309 default:
310 ret = -EINVAL;
311 break;
312 }
313 goto out;
314 }
315
316 amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
317
318 if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
319 ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
320 else if (force_cmdlist)
321 ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
322 else
323 ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
324
325 out:
326 if (ret) {
327 dma_fence_put(job->fence);
328 aie2_job_put(job);
329 mmput(job->mm);
330 fence = ERR_PTR(ret);
331 }
332 trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
333
334 return fence;
335 }
336
aie2_sched_job_free(struct drm_sched_job * sched_job)337 static void aie2_sched_job_free(struct drm_sched_job *sched_job)
338 {
339 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
340 struct amdxdna_hwctx *hwctx = job->hwctx;
341
342 trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
343 if (!job->job_done)
344 up(&hwctx->priv->job_sem);
345
346 drm_sched_job_cleanup(sched_job);
347 aie2_job_put(job);
348 }
349
350 static enum drm_gpu_sched_stat
aie2_sched_job_timedout(struct drm_sched_job * sched_job)351 aie2_sched_job_timedout(struct drm_sched_job *sched_job)
352 {
353 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
354 struct amdxdna_hwctx *hwctx = job->hwctx;
355 struct amdxdna_dev *xdna;
356
357 xdna = hwctx->client->xdna;
358 trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
359 job->job_timeout = true;
360 mutex_lock(&xdna->dev_lock);
361 aie2_hwctx_stop(xdna, hwctx, sched_job);
362
363 aie2_hwctx_restart(xdna, hwctx);
364 mutex_unlock(&xdna->dev_lock);
365
366 return DRM_GPU_SCHED_STAT_RESET;
367 }
368
369 static const struct drm_sched_backend_ops sched_ops = {
370 .run_job = aie2_sched_job_run,
371 .free_job = aie2_sched_job_free,
372 .timedout_job = aie2_sched_job_timedout,
373 };
374
aie2_hwctx_col_list(struct amdxdna_hwctx * hwctx)375 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
376 {
377 struct amdxdna_dev *xdna = hwctx->client->xdna;
378 struct amdxdna_dev_hdl *ndev;
379 int start, end, first, last;
380 u32 width = 1, entries = 0;
381 int i;
382
383 if (!hwctx->num_tiles) {
384 XDNA_ERR(xdna, "Number of tiles is zero");
385 return -EINVAL;
386 }
387
388 ndev = xdna->dev_handle;
389 if (unlikely(!ndev->metadata.core.row_count)) {
390 XDNA_WARN(xdna, "Core tile row count is zero");
391 return -EINVAL;
392 }
393
394 hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
395 if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
396 XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
397 return -EINVAL;
398 }
399
400 if (ndev->priv->col_align == COL_ALIGN_NATURE)
401 width = hwctx->num_col;
402
403 /*
404 * In range [start, end], find out columns that is multiple of width.
405 * 'first' is the first column,
406 * 'last' is the last column,
407 * 'entries' is the total number of columns.
408 */
409 start = xdna->dev_info->first_col;
410 end = ndev->total_col - hwctx->num_col;
411 if (start > 0 && end == 0) {
412 XDNA_DBG(xdna, "Force start from col 0");
413 start = 0;
414 }
415 first = start + (width - start % width) % width;
416 last = end - end % width;
417 if (last >= first)
418 entries = (last - first) / width + 1;
419 XDNA_DBG(xdna, "start %d end %d first %d last %d",
420 start, end, first, last);
421
422 if (unlikely(!entries)) {
423 XDNA_ERR(xdna, "Start %d end %d width %d",
424 start, end, width);
425 return -EINVAL;
426 }
427
428 hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
429 if (!hwctx->col_list)
430 return -ENOMEM;
431
432 hwctx->col_list_len = entries;
433 hwctx->col_list[0] = first;
434 for (i = 1; i < entries; i++)
435 hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
436
437 print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
438 entries * sizeof(*hwctx->col_list), false);
439 return 0;
440 }
441
aie2_alloc_resource(struct amdxdna_hwctx * hwctx)442 static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
443 {
444 struct amdxdna_dev *xdna = hwctx->client->xdna;
445 struct alloc_requests *xrs_req;
446 int ret;
447
448 if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
449 hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col;
450 hwctx->num_col = xdna->dev_handle->total_col;
451 return aie2_create_context(xdna->dev_handle, hwctx);
452 }
453
454 xrs_req = kzalloc_obj(*xrs_req);
455 if (!xrs_req)
456 return -ENOMEM;
457
458 xrs_req->cdo.start_cols = hwctx->col_list;
459 xrs_req->cdo.cols_len = hwctx->col_list_len;
460 xrs_req->cdo.ncols = hwctx->num_col;
461 xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
462
463 xrs_req->rqos.gops = hwctx->qos.gops;
464 xrs_req->rqos.fps = hwctx->qos.fps;
465 xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
466 xrs_req->rqos.latency = hwctx->qos.latency;
467 xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
468 xrs_req->rqos.priority = hwctx->qos.priority;
469
470 xrs_req->rid = (uintptr_t)hwctx;
471
472 ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
473 if (ret)
474 XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
475
476 kfree(xrs_req);
477 return ret;
478 }
479
aie2_release_resource(struct amdxdna_hwctx * hwctx)480 static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
481 {
482 struct amdxdna_dev *xdna = hwctx->client->xdna;
483 int ret;
484
485 if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
486 ret = aie2_destroy_context(xdna->dev_handle, hwctx);
487 if (ret && ret != -ENODEV)
488 XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret);
489 } else {
490 ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
491 if (ret)
492 XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
493 }
494 }
495
aie2_ctx_syncobj_create(struct amdxdna_hwctx * hwctx)496 static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
497 {
498 struct amdxdna_dev *xdna = hwctx->client->xdna;
499 struct drm_file *filp = hwctx->client->filp;
500 struct drm_syncobj *syncobj;
501 u32 hdl;
502 int ret;
503
504 hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
505
506 ret = drm_syncobj_create(&syncobj, 0, NULL);
507 if (ret) {
508 XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
509 return ret;
510 }
511 ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
512 if (ret) {
513 drm_syncobj_put(syncobj);
514 XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
515 return ret;
516 }
517 hwctx->priv->syncobj = syncobj;
518 hwctx->syncobj_hdl = hdl;
519
520 return 0;
521 }
522
aie2_ctx_syncobj_destroy(struct amdxdna_hwctx * hwctx)523 static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
524 {
525 /*
526 * The syncobj_hdl is owned by user space and will be cleaned up
527 * separately.
528 */
529 drm_syncobj_put(hwctx->priv->syncobj);
530 }
531
aie2_hwctx_init(struct amdxdna_hwctx * hwctx)532 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
533 {
534 struct amdxdna_client *client = hwctx->client;
535 struct amdxdna_dev *xdna = client->xdna;
536 const struct drm_sched_init_args args = {
537 .ops = &sched_ops,
538 .num_rqs = DRM_SCHED_PRIORITY_COUNT,
539 .credit_limit = HWCTX_MAX_CMDS,
540 .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
541 .name = "amdxdna_js",
542 .dev = xdna->ddev.dev,
543 };
544 struct drm_gpu_scheduler *sched;
545 struct amdxdna_hwctx_priv *priv;
546 struct amdxdna_gem_obj *heap;
547 int i, ret;
548
549 priv = kzalloc_obj(*hwctx->priv);
550 if (!priv)
551 return -ENOMEM;
552 hwctx->priv = priv;
553
554 mutex_lock(&client->mm_lock);
555 heap = client->dev_heap;
556 if (!heap) {
557 XDNA_ERR(xdna, "The client dev heap object not exist");
558 mutex_unlock(&client->mm_lock);
559 ret = -ENOENT;
560 goto free_priv;
561 }
562 drm_gem_object_get(to_gobj(heap));
563 mutex_unlock(&client->mm_lock);
564 priv->heap = heap;
565 sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
566
567 ret = amdxdna_gem_pin(heap);
568 if (ret) {
569 XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
570 goto put_heap;
571 }
572
573 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
574 struct amdxdna_gem_obj *abo;
575 struct amdxdna_drm_create_bo args = {
576 .flags = 0,
577 .type = AMDXDNA_BO_DEV,
578 .vaddr = 0,
579 .size = MAX_CHAIN_CMDBUF_SIZE,
580 };
581
582 abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
583 if (IS_ERR(abo)) {
584 ret = PTR_ERR(abo);
585 goto free_cmd_bufs;
586 }
587
588 XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
589 i, abo->mem.dev_addr, abo->mem.size);
590 priv->cmd_buf[i] = abo;
591 }
592
593 sched = &priv->sched;
594 mutex_init(&priv->io_lock);
595
596 fs_reclaim_acquire(GFP_KERNEL);
597 might_lock(&priv->io_lock);
598 fs_reclaim_release(GFP_KERNEL);
599
600 ret = drm_sched_init(sched, &args);
601 if (ret) {
602 XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
603 goto free_cmd_bufs;
604 }
605
606 ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
607 &sched, 1, NULL);
608 if (ret) {
609 XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
610 goto free_sched;
611 }
612
613 ret = aie2_hwctx_col_list(hwctx);
614 if (ret) {
615 XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
616 goto free_entity;
617 }
618
619 ret = amdxdna_pm_resume_get_locked(xdna);
620 if (ret)
621 goto free_col_list;
622
623 ret = aie2_alloc_resource(hwctx);
624 if (ret) {
625 XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
626 goto suspend_put;
627 }
628
629 ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
630 heap->mem.userptr, heap->mem.size);
631 if (ret) {
632 XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
633 goto release_resource;
634 }
635
636 ret = aie2_ctx_syncobj_create(hwctx);
637 if (ret) {
638 XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
639 goto release_resource;
640 }
641 amdxdna_pm_suspend_put(xdna);
642
643 init_waitqueue_head(&priv->job_free_wq);
644
645 XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
646
647 return 0;
648
649 release_resource:
650 aie2_release_resource(hwctx);
651 suspend_put:
652 amdxdna_pm_suspend_put(xdna);
653 free_col_list:
654 kfree(hwctx->col_list);
655 free_entity:
656 drm_sched_entity_destroy(&priv->entity);
657 free_sched:
658 drm_sched_fini(&priv->sched);
659 free_cmd_bufs:
660 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
661 if (!priv->cmd_buf[i])
662 continue;
663 drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
664 }
665 amdxdna_gem_unpin(heap);
666 put_heap:
667 drm_gem_object_put(to_gobj(heap));
668 free_priv:
669 kfree(priv);
670 return ret;
671 }
672
aie2_hwctx_fini(struct amdxdna_hwctx * hwctx)673 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
674 {
675 struct amdxdna_dev *xdna;
676 int idx;
677
678 xdna = hwctx->client->xdna;
679
680 XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
681 aie2_hwctx_wait_for_idle(hwctx);
682
683 /* Request fw to destroy hwctx and cancel the rest pending requests */
684 drm_sched_stop(&hwctx->priv->sched, NULL);
685 aie2_release_resource(hwctx);
686 drm_sched_start(&hwctx->priv->sched, 0);
687
688 mutex_unlock(&xdna->dev_lock);
689 drm_sched_entity_destroy(&hwctx->priv->entity);
690
691 /* Wait for all submitted jobs to be completed or canceled */
692 wait_event(hwctx->priv->job_free_wq,
693 atomic64_read(&hwctx->job_submit_cnt) ==
694 atomic64_read(&hwctx->job_free_cnt));
695 mutex_lock(&xdna->dev_lock);
696
697 drm_sched_fini(&hwctx->priv->sched);
698 aie2_ctx_syncobj_destroy(hwctx);
699
700 for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
701 drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
702 amdxdna_gem_unpin(hwctx->priv->heap);
703 drm_gem_object_put(to_gobj(hwctx->priv->heap));
704
705 mutex_destroy(&hwctx->priv->io_lock);
706 kfree(hwctx->col_list);
707 kfree(hwctx->priv);
708 kfree(hwctx->cus);
709 }
710
aie2_config_cu_resp_handler(void * handle,void __iomem * data,size_t size)711 static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
712 {
713 struct amdxdna_hwctx *hwctx = handle;
714
715 amdxdna_pm_suspend_put(hwctx->client->xdna);
716 return 0;
717 }
718
aie2_hwctx_cu_config(struct amdxdna_hwctx * hwctx,void * buf,u32 size)719 static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
720 {
721 struct amdxdna_hwctx_param_config_cu *config = buf;
722 struct amdxdna_dev *xdna = hwctx->client->xdna;
723 u32 total_size;
724 int ret;
725
726 XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
727 if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
728 return -EINVAL;
729
730 if (hwctx->cus) {
731 XDNA_ERR(xdna, "Not support re-config CU");
732 return -EINVAL;
733 }
734
735 if (!config->num_cus) {
736 XDNA_ERR(xdna, "Number of CU is zero");
737 return -EINVAL;
738 }
739
740 total_size = struct_size(config, cu_configs, config->num_cus);
741 if (total_size > size) {
742 XDNA_ERR(xdna, "CU config larger than size");
743 return -EINVAL;
744 }
745
746 hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
747 if (!hwctx->cus)
748 return -ENOMEM;
749
750 ret = amdxdna_pm_resume_get_locked(xdna);
751 if (ret)
752 goto free_cus;
753
754 ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
755 if (ret) {
756 XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
757 goto pm_suspend_put;
758 }
759
760 wmb(); /* To avoid locking in command submit when check status */
761
762 return 0;
763
764 pm_suspend_put:
765 amdxdna_pm_suspend_put(xdna);
766 free_cus:
767 kfree(hwctx->cus);
768 hwctx->cus = NULL;
769 return ret;
770 }
771
aie2_cmd_wait(struct amdxdna_hwctx * hwctx,u64 seq)772 static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq)
773 {
774 struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
775
776 if (!out_fence) {
777 XDNA_ERR(hwctx->client->xdna, "Failed to get fence");
778 return;
779 }
780
781 dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT);
782 dma_fence_put(out_fence);
783 }
784
aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx * hwctx,u32 bo_hdl,bool attach)785 static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
786 bool attach)
787 {
788 struct amdxdna_client *client = hwctx->client;
789 struct amdxdna_dev *xdna = client->xdna;
790 struct amdxdna_drv_cmd cmd = { 0 };
791 struct amdxdna_gem_obj *abo;
792 u64 seq;
793 int ret;
794
795 abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV);
796 if (!abo) {
797 XDNA_ERR(xdna, "Get bo %d failed", bo_hdl);
798 return -EINVAL;
799 }
800
801 if (attach) {
802 if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
803 ret = -EBUSY;
804 goto put_obj;
805 }
806 cmd.opcode = ATTACH_DEBUG_BO;
807 } else {
808 if (abo->assigned_hwctx != hwctx->id) {
809 ret = -EINVAL;
810 goto put_obj;
811 }
812 cmd.opcode = DETACH_DEBUG_BO;
813 }
814
815 ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
816 &bo_hdl, 1, hwctx->id, &seq);
817 if (ret) {
818 XDNA_ERR(xdna, "Submit command failed");
819 goto put_obj;
820 }
821
822 aie2_cmd_wait(hwctx, seq);
823 if (cmd.result) {
824 XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
825 goto put_obj;
826 }
827
828 if (attach)
829 abo->assigned_hwctx = hwctx->id;
830 else
831 abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
832
833 XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
834
835 put_obj:
836 amdxdna_gem_put_obj(abo);
837 return ret;
838 }
839
aie2_hwctx_config(struct amdxdna_hwctx * hwctx,u32 type,u64 value,void * buf,u32 size)840 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
841 {
842 struct amdxdna_dev *xdna = hwctx->client->xdna;
843
844 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
845 switch (type) {
846 case DRM_AMDXDNA_HWCTX_CONFIG_CU:
847 return aie2_hwctx_cu_config(hwctx, buf, size);
848 case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
849 return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true);
850 case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
851 return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false);
852 default:
853 XDNA_DBG(xdna, "Not supported type %d", type);
854 return -EOPNOTSUPP;
855 }
856 }
857
aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx * hwctx,u32 debug_bo_hdl)858 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
859 {
860 struct amdxdna_client *client = hwctx->client;
861 struct amdxdna_dev *xdna = client->xdna;
862 struct amdxdna_drv_cmd cmd = { 0 };
863 u64 seq;
864 int ret;
865
866 cmd.opcode = SYNC_DEBUG_BO;
867 ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
868 &debug_bo_hdl, 1, hwctx->id, &seq);
869 if (ret) {
870 XDNA_ERR(xdna, "Submit command failed");
871 return ret;
872 }
873
874 aie2_cmd_wait(hwctx, seq);
875 if (cmd.result) {
876 XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
877 return -EINVAL;
878 }
879
880 return 0;
881 }
882
aie2_populate_range(struct amdxdna_gem_obj * abo)883 static int aie2_populate_range(struct amdxdna_gem_obj *abo)
884 {
885 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
886 struct amdxdna_umap *mapp;
887 unsigned long timeout;
888 struct mm_struct *mm;
889 bool found;
890 int ret;
891
892 timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
893 again:
894 found = false;
895 down_write(&xdna->notifier_lock);
896 list_for_each_entry(mapp, &abo->mem.umap_list, node) {
897 if (mapp->invalid) {
898 found = true;
899 break;
900 }
901 }
902
903 if (!found) {
904 abo->mem.map_invalid = false;
905 up_write(&xdna->notifier_lock);
906 return 0;
907 }
908 kref_get(&mapp->refcnt);
909 up_write(&xdna->notifier_lock);
910
911 XDNA_DBG(xdna, "populate memory range %lx %lx",
912 mapp->vma->vm_start, mapp->vma->vm_end);
913 mm = mapp->notifier.mm;
914 if (!mmget_not_zero(mm)) {
915 amdxdna_umap_put(mapp);
916 return -EFAULT;
917 }
918
919 mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
920 mmap_read_lock(mm);
921 ret = hmm_range_fault(&mapp->range);
922 mmap_read_unlock(mm);
923 if (ret) {
924 if (time_after(jiffies, timeout)) {
925 ret = -ETIME;
926 goto put_mm;
927 }
928
929 if (ret == -EBUSY) {
930 amdxdna_umap_put(mapp);
931 goto again;
932 }
933
934 goto put_mm;
935 }
936
937 down_write(&xdna->notifier_lock);
938 if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
939 up_write(&xdna->notifier_lock);
940 amdxdna_umap_put(mapp);
941 goto again;
942 }
943 mapp->invalid = false;
944 up_write(&xdna->notifier_lock);
945 amdxdna_umap_put(mapp);
946 goto again;
947
948 put_mm:
949 amdxdna_umap_put(mapp);
950 mmput(mm);
951 return ret;
952 }
953
aie2_cmd_submit(struct amdxdna_hwctx * hwctx,struct amdxdna_sched_job * job,u64 * seq)954 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
955 {
956 struct amdxdna_dev *xdna = hwctx->client->xdna;
957 struct ww_acquire_ctx acquire_ctx;
958 struct dma_fence_chain *chain;
959 struct amdxdna_gem_obj *abo;
960 unsigned long timeout = 0;
961 int ret, i;
962
963 ret = down_interruptible(&hwctx->priv->job_sem);
964 if (ret) {
965 XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
966 return ret;
967 }
968
969 chain = dma_fence_chain_alloc();
970 if (!chain) {
971 XDNA_ERR(xdna, "Alloc fence chain failed");
972 ret = -ENOMEM;
973 goto up_sem;
974 }
975
976 ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
977 hwctx->client->filp->client_id);
978 if (ret) {
979 XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
980 goto free_chain;
981 }
982
983 retry:
984 ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
985 if (ret) {
986 XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
987 goto cleanup_job;
988 }
989
990 for (i = 0; i < job->bo_cnt; i++) {
991 ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
992 if (ret) {
993 XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
994 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
995 goto cleanup_job;
996 }
997 }
998
999 down_read(&xdna->notifier_lock);
1000 for (i = 0; i < job->bo_cnt; i++) {
1001 abo = to_xdna_obj(job->bos[i]);
1002 if (abo->mem.map_invalid) {
1003 up_read(&xdna->notifier_lock);
1004 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
1005 if (!timeout) {
1006 timeout = jiffies +
1007 msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1008 } else if (time_after(jiffies, timeout)) {
1009 ret = -ETIME;
1010 goto cleanup_job;
1011 }
1012
1013 ret = aie2_populate_range(abo);
1014 if (ret)
1015 goto cleanup_job;
1016 goto retry;
1017 }
1018 }
1019
1020 mutex_lock(&hwctx->priv->io_lock);
1021 drm_sched_job_arm(&job->base);
1022 job->out_fence = dma_fence_get(&job->base.s_fence->finished);
1023 for (i = 0; i < job->bo_cnt; i++)
1024 dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
1025 job->seq = hwctx->priv->seq++;
1026 kref_get(&job->refcnt);
1027 drm_sched_entity_push_job(&job->base);
1028
1029 *seq = job->seq;
1030 drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
1031 mutex_unlock(&hwctx->priv->io_lock);
1032
1033 up_read(&xdna->notifier_lock);
1034 drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
1035
1036 aie2_job_put(job);
1037 atomic64_inc(&hwctx->job_submit_cnt);
1038
1039 return 0;
1040
1041 cleanup_job:
1042 drm_sched_job_cleanup(&job->base);
1043 free_chain:
1044 dma_fence_chain_free(chain);
1045 up_sem:
1046 up(&hwctx->priv->job_sem);
1047 job->job_done = true;
1048 return ret;
1049 }
1050
aie2_hmm_invalidate(struct amdxdna_gem_obj * abo,unsigned long cur_seq)1051 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
1052 unsigned long cur_seq)
1053 {
1054 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
1055 struct drm_gem_object *gobj = to_gobj(abo);
1056 long ret;
1057
1058 ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
1059 true, MAX_SCHEDULE_TIMEOUT);
1060 if (!ret)
1061 XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
1062 else if (ret == -ERESTARTSYS)
1063 XDNA_DBG(xdna, "Wait for bo interrupted by signal");
1064 }
1065