xref: /linux/drivers/gpu/drm/xe/xe_bb.c (revision 55a42f78ffd386e01a5404419f8c5ded7db70a21)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_bb.h"
7 
8 #include "instructions/xe_mi_commands.h"
9 #include "xe_assert.h"
10 #include "xe_device.h"
11 #include "xe_exec_queue_types.h"
12 #include "xe_gt.h"
13 #include "xe_hw_fence.h"
14 #include "xe_sa.h"
15 #include "xe_sched_job.h"
16 #include "xe_vm_types.h"
17 
18 static int bb_prefetch(struct xe_gt *gt)
19 {
20 	struct xe_device *xe = gt_to_xe(gt);
21 
22 	if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt))
23 		/*
24 		 * RCS and CCS require 1K, although other engines would be
25 		 * okay with 512.
26 		 */
27 		return SZ_1K;
28 	else
29 		return SZ_512;
30 }
31 
32 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
33 {
34 	struct xe_tile *tile = gt_to_tile(gt);
35 	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
36 	int err;
37 
38 	if (!bb)
39 		return ERR_PTR(-ENOMEM);
40 
41 	/*
42 	 * We need to allocate space for the requested number of dwords,
43 	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
44 	 * space to accommodate the platform-specific hardware prefetch
45 	 * requirements.
46 	 */
47 	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
48 			      4 * (dwords + 1) + bb_prefetch(gt));
49 	if (IS_ERR(bb->bo)) {
50 		err = PTR_ERR(bb->bo);
51 		goto err;
52 	}
53 
54 	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
55 	bb->len = 0;
56 
57 	return bb;
58 err:
59 	kfree(bb);
60 	return ERR_PTR(err);
61 }
62 
63 struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
64 			    enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
65 {
66 	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
67 	struct xe_device *xe = gt_to_xe(gt);
68 	struct xe_sa_manager *bb_pool;
69 	int err;
70 
71 	if (!bb)
72 		return ERR_PTR(-ENOMEM);
73 	/*
74 	 * We need to allocate space for the requested number of dwords &
75 	 * one additional MI_BATCH_BUFFER_END dword. Since the whole SA
76 	 * is submitted to HW, we need to make sure that the last instruction
77 	 * is not over written when the last chunk of SA is allocated for BB.
78 	 * So, this extra DW acts as a guard here.
79 	 */
80 
81 	bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
82 	bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1));
83 
84 	if (IS_ERR(bb->bo)) {
85 		err = PTR_ERR(bb->bo);
86 		goto err;
87 	}
88 
89 	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
90 	bb->len = 0;
91 
92 	return bb;
93 err:
94 	kfree(bb);
95 	return ERR_PTR(err);
96 }
97 
98 static struct xe_sched_job *
99 __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
100 {
101 	u32 size = drm_suballoc_size(bb->bo);
102 
103 	if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END)
104 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
105 
106 	xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
107 
108 	xe_sa_bo_flush_write(bb->bo);
109 
110 	return xe_sched_job_create(q, addr);
111 }
112 
113 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
114 						struct xe_bb *bb,
115 						u64 batch_base_ofs,
116 						u32 second_idx)
117 {
118 	u64 addr[2] = {
119 		batch_base_ofs + drm_suballoc_soffset(bb->bo),
120 		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
121 		4 * second_idx,
122 	};
123 
124 	xe_gt_assert(q->gt, second_idx <= bb->len);
125 	xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
126 	xe_gt_assert(q->gt, q->width == 1);
127 
128 	return __xe_bb_create_job(q, bb, addr);
129 }
130 
131 struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
132 				      struct xe_bb *bb)
133 {
134 	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
135 
136 	xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
137 	xe_gt_assert(q->gt, q->width == 1);
138 	return __xe_bb_create_job(q, bb, &addr);
139 }
140 
141 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
142 {
143 	if (!bb)
144 		return;
145 
146 	xe_sa_bo_free(bb->bo, fence);
147 	kfree(bb);
148 }
149