xref: /linux/drivers/gpu/drm/i915/gt/intel_renderstate.c (revision f6e8dc9edf963dbc99085e54f6ced6da9daa6100)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5 
6 #include <drm/drm_print.h>
7 
8 #include "gem/i915_gem_internal.h"
9 
10 #include "i915_drv.h"
11 #include "intel_renderstate.h"
12 #include "intel_context.h"
13 #include "intel_gpu_commands.h"
14 #include "intel_ring.h"
15 
16 static const struct intel_renderstate_rodata *
17 render_state_get_rodata(const struct intel_engine_cs *engine)
18 {
19 	if (engine->class != RENDER_CLASS)
20 		return NULL;
21 
22 	switch (GRAPHICS_VER(engine->i915)) {
23 	case 6:
24 		return &gen6_null_state;
25 	case 7:
26 		return &gen7_null_state;
27 	case 8:
28 		return &gen8_null_state;
29 	case 9:
30 		return &gen9_null_state;
31 	}
32 
33 	return NULL;
34 }
35 
36 /*
37  * Macro to add commands to auxiliary batch.
38  * This macro only checks for page overflow before inserting the commands,
39  * this is sufficient as the null state generator makes the final batch
40  * with two passes to build command and state separately. At this point
41  * the size of both are known and it compacts them by relocating the state
42  * right after the commands taking care of alignment so we should sufficient
43  * space below them for adding new commands.
44  */
45 #define OUT_BATCH(batch, i, val)				\
46 	do {							\
47 		if ((i) >= PAGE_SIZE / sizeof(u32))		\
48 			goto out;				\
49 		(batch)[(i)++] = (val);				\
50 	} while (0)
51 
52 static int render_state_setup(struct intel_renderstate *so,
53 			      struct drm_i915_private *i915)
54 {
55 	const struct intel_renderstate_rodata *rodata = so->rodata;
56 	unsigned int i = 0, reloc_index = 0;
57 	int ret = -EINVAL;
58 	u32 *d;
59 
60 	d = i915_gem_object_pin_map(so->vma->obj, I915_MAP_WB);
61 	if (IS_ERR(d))
62 		return PTR_ERR(d);
63 
64 	while (i < rodata->batch_items) {
65 		u32 s = rodata->batch[i];
66 
67 		if (i * 4  == rodata->reloc[reloc_index]) {
68 			u64 r = s + i915_vma_offset(so->vma);
69 
70 			s = lower_32_bits(r);
71 			if (HAS_64BIT_RELOC(i915)) {
72 				if (i + 1 >= rodata->batch_items ||
73 				    rodata->batch[i + 1] != 0)
74 					goto out;
75 
76 				d[i++] = s;
77 				s = upper_32_bits(r);
78 			}
79 
80 			reloc_index++;
81 		}
82 
83 		d[i++] = s;
84 	}
85 
86 	if (rodata->reloc[reloc_index] != -1) {
87 		drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index);
88 		goto out;
89 	}
90 
91 	so->batch_offset = i915_ggtt_offset(so->vma);
92 	so->batch_size = rodata->batch_items * sizeof(u32);
93 
94 	while (i % CACHELINE_DWORDS)
95 		OUT_BATCH(d, i, MI_NOOP);
96 
97 	so->aux_offset = i * sizeof(u32);
98 
99 	if (HAS_POOLED_EU(i915)) {
100 		/*
101 		 * We always program 3x6 pool config but depending upon which
102 		 * subslice is disabled HW drops down to appropriate config
103 		 * shown below.
104 		 *
105 		 * In the below table 2x6 config always refers to
106 		 * fused-down version, native 2x6 is not available and can
107 		 * be ignored
108 		 *
109 		 * SNo  subslices config                eu pool configuration
110 		 * -----------------------------------------------------------
111 		 * 1    3 subslices enabled (3x6)  -    0x00777000  (9+9)
112 		 * 2    ss0 disabled (2x6)         -    0x00777000  (3+9)
113 		 * 3    ss1 disabled (2x6)         -    0x00770000  (6+6)
114 		 * 4    ss2 disabled (2x6)         -    0x00007000  (9+3)
115 		 */
116 		u32 eu_pool_config = 0x00777000;
117 
118 		OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
119 		OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
120 		OUT_BATCH(d, i, eu_pool_config);
121 		OUT_BATCH(d, i, 0);
122 		OUT_BATCH(d, i, 0);
123 		OUT_BATCH(d, i, 0);
124 	}
125 
126 	OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
127 	so->aux_size = i * sizeof(u32) - so->aux_offset;
128 	so->aux_offset += so->batch_offset;
129 	/*
130 	 * Since we are sending length, we need to strictly conform to
131 	 * all requirements. For Gen2 this must be a multiple of 8.
132 	 */
133 	so->aux_size = ALIGN(so->aux_size, 8);
134 
135 	ret = 0;
136 out:
137 	__i915_gem_object_flush_map(so->vma->obj, 0, i * sizeof(u32));
138 	__i915_gem_object_release_map(so->vma->obj);
139 	return ret;
140 }
141 
142 #undef OUT_BATCH
143 
144 int intel_renderstate_init(struct intel_renderstate *so,
145 			   struct intel_context *ce)
146 {
147 	struct intel_engine_cs *engine = ce->engine;
148 	struct drm_i915_gem_object *obj = NULL;
149 	int err;
150 
151 	memset(so, 0, sizeof(*so));
152 
153 	so->rodata = render_state_get_rodata(engine);
154 	if (so->rodata) {
155 		if (so->rodata->batch_items * 4 > PAGE_SIZE)
156 			return -EINVAL;
157 
158 		obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
159 		if (IS_ERR(obj))
160 			return PTR_ERR(obj);
161 
162 		so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
163 		if (IS_ERR(so->vma)) {
164 			err = PTR_ERR(so->vma);
165 			goto err_obj;
166 		}
167 	}
168 
169 	i915_gem_ww_ctx_init(&so->ww, true);
170 retry:
171 	err = intel_context_pin_ww(ce, &so->ww);
172 	if (err)
173 		goto err_fini;
174 
175 	/* return early if there's nothing to setup */
176 	if (!err && !so->rodata)
177 		return 0;
178 
179 	err = i915_gem_object_lock(so->vma->obj, &so->ww);
180 	if (err)
181 		goto err_context;
182 
183 	err = i915_vma_pin_ww(so->vma, &so->ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
184 	if (err)
185 		goto err_context;
186 
187 	err = render_state_setup(so, engine->i915);
188 	if (err)
189 		goto err_unpin;
190 
191 	return 0;
192 
193 err_unpin:
194 	i915_vma_unpin(so->vma);
195 err_context:
196 	intel_context_unpin(ce);
197 err_fini:
198 	if (err == -EDEADLK) {
199 		err = i915_gem_ww_ctx_backoff(&so->ww);
200 		if (!err)
201 			goto retry;
202 	}
203 	i915_gem_ww_ctx_fini(&so->ww);
204 err_obj:
205 	if (obj)
206 		i915_gem_object_put(obj);
207 	so->vma = NULL;
208 	return err;
209 }
210 
211 int intel_renderstate_emit(struct intel_renderstate *so,
212 			   struct i915_request *rq)
213 {
214 	struct intel_engine_cs *engine = rq->engine;
215 	int err;
216 
217 	if (!so->vma)
218 		return 0;
219 
220 	err = i915_vma_move_to_active(so->vma, rq, 0);
221 	if (err)
222 		return err;
223 
224 	err = engine->emit_bb_start(rq,
225 				    so->batch_offset, so->batch_size,
226 				    I915_DISPATCH_SECURE);
227 	if (err)
228 		return err;
229 
230 	if (so->aux_size > 8) {
231 		err = engine->emit_bb_start(rq,
232 					    so->aux_offset, so->aux_size,
233 					    I915_DISPATCH_SECURE);
234 		if (err)
235 			return err;
236 	}
237 
238 	return 0;
239 }
240 
241 void intel_renderstate_fini(struct intel_renderstate *so,
242 			    struct intel_context *ce)
243 {
244 	if (so->vma) {
245 		i915_vma_unpin(so->vma);
246 		i915_vma_close(so->vma);
247 	}
248 
249 	intel_context_unpin(ce);
250 	i915_gem_ww_ctx_fini(&so->ww);
251 
252 	if (so->vma)
253 		i915_gem_object_put(so->vma->obj);
254 }
255