xref: /linux/drivers/gpu/drm/xe/xe_bb.c (revision c0d6f52f9b62479d61f8cd4faf9fb2f8bce6e301)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_bb.h"
7 
8 #include "instructions/xe_mi_commands.h"
9 #include "xe_assert.h"
10 #include "xe_device_types.h"
11 #include "xe_exec_queue_types.h"
12 #include "xe_gt.h"
13 #include "xe_sa.h"
14 #include "xe_sched_job.h"
15 #include "xe_vm_types.h"
16 
17 static int bb_prefetch(struct xe_gt *gt)
18 {
19 	struct xe_device *xe = gt_to_xe(gt);
20 
21 	if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt))
22 		/*
23 		 * RCS and CCS require 1K, although other engines would be
24 		 * okay with 512.
25 		 */
26 		return SZ_1K;
27 	else
28 		return SZ_512;
29 }
30 
31 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
32 {
33 	struct xe_tile *tile = gt_to_tile(gt);
34 	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
35 	int err;
36 
37 	if (!bb)
38 		return ERR_PTR(-ENOMEM);
39 
40 	/*
41 	 * We need to allocate space for the requested number of dwords,
42 	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
43 	 * space to accommodate the platform-specific hardware prefetch
44 	 * requirements.
45 	 */
46 	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
47 			      4 * (dwords + 1) + bb_prefetch(gt));
48 	if (IS_ERR(bb->bo)) {
49 		err = PTR_ERR(bb->bo);
50 		goto err;
51 	}
52 
53 	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
54 	bb->len = 0;
55 
56 	return bb;
57 err:
58 	kfree(bb);
59 	return ERR_PTR(err);
60 }
61 
62 struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
63 			    enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
64 {
65 	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
66 	struct xe_device *xe = gt_to_xe(gt);
67 	struct xe_sa_manager *bb_pool;
68 	int err;
69 
70 	if (!bb)
71 		return ERR_PTR(-ENOMEM);
72 	/*
73 	 * We need to allocate space for the requested number of dwords &
74 	 * one additional MI_BATCH_BUFFER_END dword. Since the whole SA
75 	 * is submitted to HW, we need to make sure that the last instruction
76 	 * is not over written when the last chunk of SA is allocated for BB.
77 	 * So, this extra DW acts as a guard here.
78 	 */
79 
80 	bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
81 	bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1));
82 
83 	if (IS_ERR(bb->bo)) {
84 		err = PTR_ERR(bb->bo);
85 		goto err;
86 	}
87 
88 	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
89 	bb->len = 0;
90 
91 	return bb;
92 err:
93 	kfree(bb);
94 	return ERR_PTR(err);
95 }
96 
97 static struct xe_sched_job *
98 __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
99 {
100 	u32 size = drm_suballoc_size(bb->bo);
101 
102 	if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END)
103 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
104 
105 	xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
106 
107 	xe_sa_bo_flush_write(bb->bo);
108 
109 	return xe_sched_job_create(q, addr);
110 }
111 
112 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
113 						struct xe_bb *bb,
114 						u64 batch_base_ofs,
115 						u32 second_idx)
116 {
117 	u64 addr[2] = {
118 		batch_base_ofs + drm_suballoc_soffset(bb->bo),
119 		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
120 		4 * second_idx,
121 	};
122 
123 	xe_gt_assert(q->gt, second_idx <= bb->len);
124 	xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
125 	xe_gt_assert(q->gt, q->width == 1);
126 
127 	return __xe_bb_create_job(q, bb, addr);
128 }
129 
130 struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
131 				      struct xe_bb *bb)
132 {
133 	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
134 
135 	xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
136 	xe_gt_assert(q->gt, q->width == 1);
137 	return __xe_bb_create_job(q, bb, &addr);
138 }
139 
140 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
141 {
142 	if (!bb)
143 		return;
144 
145 	xe_sa_bo_free(bb->bo, fence);
146 	kfree(bb);
147 }
148