xref: /linux/drivers/gpu/drm/i915/gvt/execlist.c (revision e9f0878c4b2004ac19581274c1ae4c61ae3ca70e)
1 /*
2  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Zhiyuan Lv <zhiyuan.lv@intel.com>
25  *    Zhi Wang <zhi.a.wang@intel.com>
26  *
27  * Contributors:
28  *    Min He <min.he@intel.com>
29  *    Bing Niu <bing.niu@intel.com>
30  *    Ping Gao <ping.a.gao@intel.com>
31  *    Tina Zhang <tina.zhang@intel.com>
32  *
33  */
34 
35 #include "i915_drv.h"
36 #include "gvt.h"
37 
38 #define _EL_OFFSET_STATUS       0x234
39 #define _EL_OFFSET_STATUS_BUF   0x370
40 #define _EL_OFFSET_STATUS_PTR   0x3A0
41 
42 #define execlist_ring_mmio(gvt, ring_id, offset) \
43 	(gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
44 
45 #define valid_context(ctx) ((ctx)->valid)
46 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
47 		((a)->lrca == (b)->lrca))
48 
49 static int context_switch_events[] = {
50 	[RCS] = RCS_AS_CONTEXT_SWITCH,
51 	[BCS] = BCS_AS_CONTEXT_SWITCH,
52 	[VCS] = VCS_AS_CONTEXT_SWITCH,
53 	[VCS2] = VCS2_AS_CONTEXT_SWITCH,
54 	[VECS] = VECS_AS_CONTEXT_SWITCH,
55 };
56 
57 static int ring_id_to_context_switch_event(int ring_id)
58 {
59 	if (WARN_ON(ring_id < RCS ||
60 		    ring_id >= ARRAY_SIZE(context_switch_events)))
61 		return -EINVAL;
62 
63 	return context_switch_events[ring_id];
64 }
65 
66 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist)
67 {
68 	gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
69 			execlist->running_slot ?
70 			execlist->running_slot->index : -1,
71 			execlist->running_context ?
72 			execlist->running_context->context_id : 0,
73 			execlist->pending_slot ?
74 			execlist->pending_slot->index : -1);
75 
76 	execlist->running_slot = execlist->pending_slot;
77 	execlist->pending_slot = NULL;
78 	execlist->running_context = execlist->running_context ?
79 		&execlist->running_slot->ctx[0] : NULL;
80 
81 	gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
82 			execlist->running_slot ?
83 			execlist->running_slot->index : -1,
84 			execlist->running_context ?
85 			execlist->running_context->context_id : 0,
86 			execlist->pending_slot ?
87 			execlist->pending_slot->index : -1);
88 }
89 
90 static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
91 {
92 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
93 	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
94 	struct execlist_ctx_descriptor_format *desc = execlist->running_context;
95 	struct intel_vgpu *vgpu = execlist->vgpu;
96 	struct execlist_status_format status;
97 	int ring_id = execlist->ring_id;
98 	u32 status_reg = execlist_ring_mmio(vgpu->gvt,
99 			ring_id, _EL_OFFSET_STATUS);
100 
101 	status.ldw = vgpu_vreg(vgpu, status_reg);
102 	status.udw = vgpu_vreg(vgpu, status_reg + 4);
103 
104 	if (running) {
105 		status.current_execlist_pointer = !!running->index;
106 		status.execlist_write_pointer = !!!running->index;
107 		status.execlist_0_active = status.execlist_0_valid =
108 			!!!(running->index);
109 		status.execlist_1_active = status.execlist_1_valid =
110 			!!(running->index);
111 	} else {
112 		status.context_id = 0;
113 		status.execlist_0_active = status.execlist_0_valid = 0;
114 		status.execlist_1_active = status.execlist_1_valid = 0;
115 	}
116 
117 	status.context_id = desc ? desc->context_id : 0;
118 	status.execlist_queue_full = !!(pending);
119 
120 	vgpu_vreg(vgpu, status_reg) = status.ldw;
121 	vgpu_vreg(vgpu, status_reg + 4) = status.udw;
122 
123 	gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
124 		vgpu->id, status_reg, status.ldw, status.udw);
125 }
126 
127 static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
128 		struct execlist_context_status_format *status,
129 		bool trigger_interrupt_later)
130 {
131 	struct intel_vgpu *vgpu = execlist->vgpu;
132 	int ring_id = execlist->ring_id;
133 	struct execlist_context_status_pointer_format ctx_status_ptr;
134 	u32 write_pointer;
135 	u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset;
136 	unsigned long hwsp_gpa;
137 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
138 
139 	ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
140 			_EL_OFFSET_STATUS_PTR);
141 	ctx_status_buf_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
142 			_EL_OFFSET_STATUS_BUF);
143 
144 	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
145 
146 	write_pointer = ctx_status_ptr.write_ptr;
147 
148 	if (write_pointer == 0x7)
149 		write_pointer = 0;
150 	else {
151 		++write_pointer;
152 		write_pointer %= 0x6;
153 	}
154 
155 	offset = ctx_status_buf_reg + write_pointer * 8;
156 
157 	vgpu_vreg(vgpu, offset) = status->ldw;
158 	vgpu_vreg(vgpu, offset + 4) = status->udw;
159 
160 	ctx_status_ptr.write_ptr = write_pointer;
161 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
162 
163 	/* Update the CSB and CSB write pointer in HWSP */
164 	hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
165 					 vgpu->hws_pga[ring_id]);
166 	if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) {
167 		intel_gvt_hypervisor_write_gpa(vgpu,
168 			hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 +
169 			write_pointer * 8,
170 			status, 8);
171 		intel_gvt_hypervisor_write_gpa(vgpu,
172 			hwsp_gpa +
173 			intel_hws_csb_write_index(dev_priv) * 4,
174 			&write_pointer, 4);
175 	}
176 
177 	gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
178 		vgpu->id, write_pointer, offset, status->ldw, status->udw);
179 
180 	if (trigger_interrupt_later)
181 		return;
182 
183 	intel_vgpu_trigger_virtual_event(vgpu,
184 			ring_id_to_context_switch_event(execlist->ring_id));
185 }
186 
187 static int emulate_execlist_ctx_schedule_out(
188 		struct intel_vgpu_execlist *execlist,
189 		struct execlist_ctx_descriptor_format *ctx)
190 {
191 	struct intel_vgpu *vgpu = execlist->vgpu;
192 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
193 	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
194 	struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
195 	struct execlist_ctx_descriptor_format *ctx1 = &running->ctx[1];
196 	struct execlist_context_status_format status;
197 
198 	memset(&status, 0, sizeof(status));
199 
200 	gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
201 
202 	if (WARN_ON(!same_context(ctx, execlist->running_context))) {
203 		gvt_vgpu_err("schedule out context is not running context,"
204 				"ctx id %x running ctx id %x\n",
205 				ctx->context_id,
206 				execlist->running_context->context_id);
207 		return -EINVAL;
208 	}
209 
210 	/* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
211 	if (valid_context(ctx1) && same_context(ctx0, ctx)) {
212 		gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
213 
214 		execlist->running_context = ctx1;
215 
216 		emulate_execlist_status(execlist);
217 
218 		status.context_complete = status.element_switch = 1;
219 		status.context_id = ctx->context_id;
220 
221 		emulate_csb_update(execlist, &status, false);
222 		/*
223 		 * ctx1 is not valid, ctx == ctx0
224 		 * ctx1 is valid, ctx1 == ctx
225 		 *	--> last element is finished
226 		 * emulate:
227 		 *	active-to-idle if there is *no* pending execlist
228 		 *	context-complete if there *is* pending execlist
229 		 */
230 	} else if ((!valid_context(ctx1) && same_context(ctx0, ctx))
231 			|| (valid_context(ctx1) && same_context(ctx1, ctx))) {
232 		gvt_dbg_el("need to switch virtual execlist slot\n");
233 
234 		switch_virtual_execlist_slot(execlist);
235 
236 		emulate_execlist_status(execlist);
237 
238 		status.context_complete = status.active_to_idle = 1;
239 		status.context_id = ctx->context_id;
240 
241 		if (!pending) {
242 			emulate_csb_update(execlist, &status, false);
243 		} else {
244 			emulate_csb_update(execlist, &status, true);
245 
246 			memset(&status, 0, sizeof(status));
247 
248 			status.idle_to_active = 1;
249 			status.context_id = 0;
250 
251 			emulate_csb_update(execlist, &status, false);
252 		}
253 	} else {
254 		WARN_ON(1);
255 		return -EINVAL;
256 	}
257 
258 	return 0;
259 }
260 
261 static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
262 		struct intel_vgpu_execlist *execlist)
263 {
264 	struct intel_vgpu *vgpu = execlist->vgpu;
265 	int ring_id = execlist->ring_id;
266 	u32 status_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
267 			_EL_OFFSET_STATUS);
268 	struct execlist_status_format status;
269 
270 	status.ldw = vgpu_vreg(vgpu, status_reg);
271 	status.udw = vgpu_vreg(vgpu, status_reg + 4);
272 
273 	if (status.execlist_queue_full) {
274 		gvt_vgpu_err("virtual execlist slots are full\n");
275 		return NULL;
276 	}
277 
278 	return &execlist->slot[status.execlist_write_pointer];
279 }
280 
281 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
282 		struct execlist_ctx_descriptor_format ctx[2])
283 {
284 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
285 	struct intel_vgpu_execlist_slot *slot =
286 		get_next_execlist_slot(execlist);
287 
288 	struct execlist_ctx_descriptor_format *ctx0, *ctx1;
289 	struct execlist_context_status_format status;
290 	struct intel_vgpu *vgpu = execlist->vgpu;
291 
292 	gvt_dbg_el("emulate schedule-in\n");
293 
294 	if (!slot) {
295 		gvt_vgpu_err("no available execlist slot\n");
296 		return -EINVAL;
297 	}
298 
299 	memset(&status, 0, sizeof(status));
300 	memset(slot->ctx, 0, sizeof(slot->ctx));
301 
302 	slot->ctx[0] = ctx[0];
303 	slot->ctx[1] = ctx[1];
304 
305 	gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
306 			slot->index, ctx[0].context_id,
307 			ctx[1].context_id);
308 
309 	/*
310 	 * no running execlist, make this write bundle as running execlist
311 	 * -> idle-to-active
312 	 */
313 	if (!running) {
314 		gvt_dbg_el("no current running execlist\n");
315 
316 		execlist->running_slot = slot;
317 		execlist->pending_slot = NULL;
318 		execlist->running_context = &slot->ctx[0];
319 
320 		gvt_dbg_el("running slot index %d running context %x\n",
321 				execlist->running_slot->index,
322 				execlist->running_context->context_id);
323 
324 		emulate_execlist_status(execlist);
325 
326 		status.idle_to_active = 1;
327 		status.context_id = 0;
328 
329 		emulate_csb_update(execlist, &status, false);
330 		return 0;
331 	}
332 
333 	ctx0 = &running->ctx[0];
334 	ctx1 = &running->ctx[1];
335 
336 	gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
337 		running->index, ctx0->context_id, ctx1->context_id);
338 
339 	/*
340 	 * already has an running execlist
341 	 *	a. running ctx1 is valid,
342 	 *	   ctx0 is finished, and running ctx1 == new execlist ctx[0]
343 	 *	b. running ctx1 is not valid,
344 	 *	   ctx0 == new execlist ctx[0]
345 	 * ----> lite-restore + preempted
346 	 */
347 	if ((valid_context(ctx1) && same_context(ctx1, &slot->ctx[0]) &&
348 		/* condition a */
349 		(!same_context(ctx0, execlist->running_context))) ||
350 			(!valid_context(ctx1) &&
351 			 same_context(ctx0, &slot->ctx[0]))) { /* condition b */
352 		gvt_dbg_el("need to switch virtual execlist slot\n");
353 
354 		execlist->pending_slot = slot;
355 		switch_virtual_execlist_slot(execlist);
356 
357 		emulate_execlist_status(execlist);
358 
359 		status.lite_restore = status.preempted = 1;
360 		status.context_id = ctx[0].context_id;
361 
362 		emulate_csb_update(execlist, &status, false);
363 	} else {
364 		gvt_dbg_el("emulate as pending slot\n");
365 		/*
366 		 * otherwise
367 		 * --> emulate pending execlist exist + but no preemption case
368 		 */
369 		execlist->pending_slot = slot;
370 		emulate_execlist_status(execlist);
371 	}
372 	return 0;
373 }
374 
375 #define get_desc_from_elsp_dwords(ed, i) \
376 	((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
377 
378 static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
379 {
380 	struct intel_vgpu *vgpu = workload->vgpu;
381 	struct intel_vgpu_submission *s = &vgpu->submission;
382 	struct execlist_ctx_descriptor_format ctx[2];
383 	int ring_id = workload->ring_id;
384 	int ret;
385 
386 	if (!workload->emulate_schedule_in)
387 		return 0;
388 
389 	ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
390 	ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
391 
392 	ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx);
393 	if (ret) {
394 		gvt_vgpu_err("fail to emulate execlist schedule in\n");
395 		return ret;
396 	}
397 	return 0;
398 }
399 
400 static int complete_execlist_workload(struct intel_vgpu_workload *workload)
401 {
402 	struct intel_vgpu *vgpu = workload->vgpu;
403 	int ring_id = workload->ring_id;
404 	struct intel_vgpu_submission *s = &vgpu->submission;
405 	struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
406 	struct intel_vgpu_workload *next_workload;
407 	struct list_head *next = workload_q_head(vgpu, ring_id)->next;
408 	bool lite_restore = false;
409 	int ret = 0;
410 
411 	gvt_dbg_el("complete workload %p status %d\n", workload,
412 			workload->status);
413 
414 	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
415 		goto out;
416 
417 	if (!list_empty(workload_q_head(vgpu, ring_id))) {
418 		struct execlist_ctx_descriptor_format *this_desc, *next_desc;
419 
420 		next_workload = container_of(next,
421 				struct intel_vgpu_workload, list);
422 		this_desc = &workload->ctx_desc;
423 		next_desc = &next_workload->ctx_desc;
424 
425 		lite_restore = same_context(this_desc, next_desc);
426 	}
427 
428 	if (lite_restore) {
429 		gvt_dbg_el("next context == current - no schedule-out\n");
430 		goto out;
431 	}
432 
433 	ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
434 out:
435 	intel_vgpu_unpin_mm(workload->shadow_mm);
436 	intel_vgpu_destroy_workload(workload);
437 	return ret;
438 }
439 
440 static int submit_context(struct intel_vgpu *vgpu, int ring_id,
441 		struct execlist_ctx_descriptor_format *desc,
442 		bool emulate_schedule_in)
443 {
444 	struct intel_vgpu_submission *s = &vgpu->submission;
445 	struct intel_vgpu_workload *workload = NULL;
446 
447 	workload = intel_vgpu_create_workload(vgpu, ring_id, desc);
448 	if (IS_ERR(workload))
449 		return PTR_ERR(workload);
450 
451 	workload->prepare = prepare_execlist_workload;
452 	workload->complete = complete_execlist_workload;
453 	workload->emulate_schedule_in = emulate_schedule_in;
454 
455 	if (emulate_schedule_in)
456 		workload->elsp_dwords = s->execlist[ring_id].elsp_dwords;
457 
458 	gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
459 			emulate_schedule_in);
460 
461 	intel_vgpu_queue_workload(workload);
462 	return 0;
463 }
464 
465 int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
466 {
467 	struct intel_vgpu_submission *s = &vgpu->submission;
468 	struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
469 	struct execlist_ctx_descriptor_format *desc[2];
470 	int i, ret;
471 
472 	desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
473 	desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
474 
475 	if (!desc[0]->valid) {
476 		gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
477 		goto inv_desc;
478 	}
479 
480 	for (i = 0; i < ARRAY_SIZE(desc); i++) {
481 		if (!desc[i]->valid)
482 			continue;
483 		if (!desc[i]->privilege_access) {
484 			gvt_vgpu_err("unexpected GGTT elsp submission\n");
485 			goto inv_desc;
486 		}
487 	}
488 
489 	/* submit workload */
490 	for (i = 0; i < ARRAY_SIZE(desc); i++) {
491 		if (!desc[i]->valid)
492 			continue;
493 		ret = submit_context(vgpu, ring_id, desc[i], i == 0);
494 		if (ret) {
495 			gvt_vgpu_err("failed to submit desc %d\n", i);
496 			return ret;
497 		}
498 	}
499 
500 	return 0;
501 
502 inv_desc:
503 	gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
504 		     desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw);
505 	return -EINVAL;
506 }
507 
508 static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
509 {
510 	struct intel_vgpu_submission *s = &vgpu->submission;
511 	struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
512 	struct execlist_context_status_pointer_format ctx_status_ptr;
513 	u32 ctx_status_ptr_reg;
514 
515 	memset(execlist, 0, sizeof(*execlist));
516 
517 	execlist->vgpu = vgpu;
518 	execlist->ring_id = ring_id;
519 	execlist->slot[0].index = 0;
520 	execlist->slot[1].index = 1;
521 
522 	ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
523 			_EL_OFFSET_STATUS_PTR);
524 	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
525 	ctx_status_ptr.read_ptr = 0;
526 	ctx_status_ptr.write_ptr = 0x7;
527 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
528 }
529 
530 static void clean_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask)
531 {
532 	unsigned int tmp;
533 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
534 	struct intel_engine_cs *engine;
535 	struct intel_vgpu_submission *s = &vgpu->submission;
536 
537 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
538 		kfree(s->ring_scan_buffer[engine->id]);
539 		s->ring_scan_buffer[engine->id] = NULL;
540 		s->ring_scan_buffer_size[engine->id] = 0;
541 	}
542 }
543 
544 static void reset_execlist(struct intel_vgpu *vgpu,
545 		unsigned long engine_mask)
546 {
547 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
548 	struct intel_engine_cs *engine;
549 	unsigned int tmp;
550 
551 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
552 		init_vgpu_execlist(vgpu, engine->id);
553 }
554 
555 static int init_execlist(struct intel_vgpu *vgpu,
556 			 unsigned long engine_mask)
557 {
558 	reset_execlist(vgpu, engine_mask);
559 	return 0;
560 }
561 
562 const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = {
563 	.name = "execlist",
564 	.init = init_execlist,
565 	.reset = reset_execlist,
566 	.clean = clean_execlist,
567 };
568