xref: /linux/drivers/gpu/drm/xe/xe_bb.c (revision d2e20c8951e4bb5f4a828aed39813599980353b6)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_bb.h"
7 
8 #include "instructions/xe_mi_commands.h"
9 #include "xe_assert.h"
10 #include "xe_device_types.h"
11 #include "xe_exec_queue_types.h"
12 #include "xe_gt.h"
13 #include "xe_sa.h"
14 #include "xe_sched_job.h"
15 #include "xe_vm_types.h"
16 
17 static int bb_prefetch(struct xe_gt *gt)
18 {
19 	struct xe_device *xe = gt_to_xe(gt);
20 
21 	if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt))
22 		/*
23 		 * RCS and CCS require 1K, although other engines would be
24 		 * okay with 512.
25 		 */
26 		return SZ_1K;
27 	else
28 		return SZ_512;
29 }
30 
31 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
32 {
33 	struct xe_tile *tile = gt_to_tile(gt);
34 	struct xe_bb *bb = kmalloc_obj(*bb);
35 	int err;
36 
37 	if (!bb)
38 		return ERR_PTR(-ENOMEM);
39 
40 	/*
41 	 * We need to allocate space for the requested number of dwords,
42 	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
43 	 * space to accommodate the platform-specific hardware prefetch
44 	 * requirements.
45 	 */
46 	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
47 			      4 * (dwords + 1) + bb_prefetch(gt));
48 	if (IS_ERR(bb->bo)) {
49 		err = PTR_ERR(bb->bo);
50 		goto err;
51 	}
52 
53 	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
54 	bb->len = 0;
55 
56 	return bb;
57 err:
58 	kfree(bb);
59 	return ERR_PTR(err);
60 }
61 
62 /**
63  * xe_bb_alloc() - Allocate a new batch buffer structure
64  * @gt: the &xe_gt
65  *
66  * Allocates and initializes a new xe_bb structure with an associated
67  * uninitialized suballoc object.
68  *
69  * Returns: Batch buffer structure or an ERR_PTR(-ENOMEM).
70  */
71 struct xe_bb *xe_bb_alloc(struct xe_gt *gt)
72 {
73 	struct xe_bb *bb = kmalloc_obj(*bb);
74 	int err;
75 
76 	if (!bb)
77 		return ERR_PTR(-ENOMEM);
78 
79 	bb->bo = xe_sa_bo_alloc(GFP_KERNEL);
80 	if (IS_ERR(bb->bo)) {
81 		err = PTR_ERR(bb->bo);
82 		goto err;
83 	}
84 
85 	return bb;
86 
87 err:
88 	kfree(bb);
89 	return ERR_PTR(err);
90 }
91 
92 /**
93  * xe_bb_init() - Initialize a batch buffer with memory from a sub-allocator pool
94  * @bb: Batch buffer structure to initialize
95  * @bb_pool: Suballoc memory pool to allocate from
96  * @dwords: Number of dwords to be allocated
97  *
98  * Initializes the batch buffer by allocating memory from the specified
99  * suballoc pool.
100  *
101  * Return: 0 on success, negative error code on failure.
102  */
103 int xe_bb_init(struct xe_bb *bb, struct xe_sa_manager *bb_pool, u32 dwords)
104 {
105 	int err;
106 
107 	/*
108 	 * We need to allocate space for the requested number of dwords &
109 	 * one additional MI_BATCH_BUFFER_END dword. Since the whole SA
110 	 * is submitted to HW, we need to make sure that the last instruction
111 	 * is not over written when the last chunk of SA is allocated for BB.
112 	 * So, this extra DW acts as a guard here.
113 	 */
114 	err = xe_sa_bo_init(bb_pool, bb->bo, 4 * (dwords + 1));
115 	if (err)
116 		return err;
117 
118 	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
119 	bb->len = 0;
120 
121 	return 0;
122 }
123 
124 static struct xe_sched_job *
125 __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
126 {
127 	u32 size = drm_suballoc_size(bb->bo);
128 
129 	if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END)
130 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
131 
132 	xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
133 
134 	xe_sa_bo_flush_write(bb->bo);
135 
136 	return xe_sched_job_create(q, addr);
137 }
138 
139 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
140 						struct xe_bb *bb,
141 						u64 batch_base_ofs,
142 						u32 second_idx)
143 {
144 	u64 addr[2] = {
145 		batch_base_ofs + drm_suballoc_soffset(bb->bo),
146 		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
147 		4 * second_idx,
148 	};
149 
150 	xe_gt_assert(q->gt, second_idx <= bb->len);
151 	xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
152 	xe_gt_assert(q->gt, q->width == 1);
153 
154 	return __xe_bb_create_job(q, bb, addr);
155 }
156 
157 struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
158 				      struct xe_bb *bb)
159 {
160 	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
161 
162 	xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
163 	xe_gt_assert(q->gt, q->width == 1);
164 	return __xe_bb_create_job(q, bb, &addr);
165 }
166 
167 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
168 {
169 	if (!bb)
170 		return;
171 
172 	xe_sa_bo_free(bb->bo, fence);
173 	kfree(bb);
174 }
175