xref: /linux/drivers/gpu/drm/i915/gt/intel_timeline.c (revision 815e260a18a3af4dab59025ee99a7156c0e8b5e0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2016-2018 Intel Corporation
4  */
5 
6 #include <drm/drm_cache.h>
7 #include <drm/drm_print.h>
8 
9 #include "gem/i915_gem_internal.h"
10 
11 #include "i915_active.h"
12 #include "i915_drv.h"
13 #include "i915_syncmap.h"
14 #include "intel_gt.h"
15 #include "intel_ring.h"
16 #include "intel_timeline.h"
17 
18 #define TIMELINE_SEQNO_BYTES 8
19 
20 static struct i915_vma *hwsp_alloc(struct intel_gt *gt)
21 {
22 	struct drm_i915_private *i915 = gt->i915;
23 	struct drm_i915_gem_object *obj;
24 	struct i915_vma *vma;
25 
26 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
27 	if (IS_ERR(obj))
28 		return ERR_CAST(obj);
29 
30 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
31 
32 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
33 	if (IS_ERR(vma))
34 		i915_gem_object_put(obj);
35 
36 	return vma;
37 }
38 
39 static void __timeline_retire(struct i915_active *active)
40 {
41 	struct intel_timeline *tl =
42 		container_of(active, typeof(*tl), active);
43 
44 	i915_vma_unpin(tl->hwsp_ggtt);
45 	intel_timeline_put(tl);
46 }
47 
48 static int __timeline_active(struct i915_active *active)
49 {
50 	struct intel_timeline *tl =
51 		container_of(active, typeof(*tl), active);
52 
53 	__i915_vma_pin(tl->hwsp_ggtt);
54 	intel_timeline_get(tl);
55 	return 0;
56 }
57 
58 I915_SELFTEST_EXPORT int
59 intel_timeline_pin_map(struct intel_timeline *timeline)
60 {
61 	struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj;
62 	u32 ofs = offset_in_page(timeline->hwsp_offset);
63 	void *vaddr;
64 
65 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
66 	if (IS_ERR(vaddr))
67 		return PTR_ERR(vaddr);
68 
69 	timeline->hwsp_map = vaddr;
70 	timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
71 	drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
72 
73 	return 0;
74 }
75 
76 static int intel_timeline_init(struct intel_timeline *timeline,
77 			       struct intel_gt *gt,
78 			       struct i915_vma *hwsp,
79 			       unsigned int offset)
80 {
81 	kref_init(&timeline->kref);
82 	atomic_set(&timeline->pin_count, 0);
83 
84 	timeline->gt = gt;
85 
86 	if (hwsp) {
87 		timeline->hwsp_offset = offset;
88 		timeline->hwsp_ggtt = i915_vma_get(hwsp);
89 	} else {
90 		timeline->has_initial_breadcrumb = true;
91 		hwsp = hwsp_alloc(gt);
92 		if (IS_ERR(hwsp))
93 			return PTR_ERR(hwsp);
94 		timeline->hwsp_ggtt = hwsp;
95 	}
96 
97 	timeline->hwsp_map = NULL;
98 	timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset;
99 
100 	GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
101 
102 	timeline->fence_context = dma_fence_context_alloc(1);
103 
104 	mutex_init(&timeline->mutex);
105 
106 	INIT_ACTIVE_FENCE(&timeline->last_request);
107 	INIT_LIST_HEAD(&timeline->requests);
108 
109 	i915_syncmap_init(&timeline->sync);
110 	i915_active_init(&timeline->active, __timeline_active,
111 			 __timeline_retire, 0);
112 
113 	return 0;
114 }
115 
116 void intel_gt_init_timelines(struct intel_gt *gt)
117 {
118 	struct intel_gt_timelines *timelines = &gt->timelines;
119 
120 	spin_lock_init(&timelines->lock);
121 	INIT_LIST_HEAD(&timelines->active_list);
122 }
123 
124 static void intel_timeline_fini(struct rcu_head *rcu)
125 {
126 	struct intel_timeline *timeline =
127 		container_of(rcu, struct intel_timeline, rcu);
128 
129 	if (timeline->hwsp_map)
130 		i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
131 
132 	i915_vma_put(timeline->hwsp_ggtt);
133 	i915_active_fini(&timeline->active);
134 
135 	/*
136 	 * A small race exists between intel_gt_retire_requests_timeout and
137 	 * intel_timeline_exit which could result in the syncmap not getting
138 	 * free'd. Rather than work to hard to seal this race, simply cleanup
139 	 * the syncmap on fini.
140 	 */
141 	i915_syncmap_free(&timeline->sync);
142 
143 	kfree(timeline);
144 }
145 
146 struct intel_timeline *
147 __intel_timeline_create(struct intel_gt *gt,
148 			struct i915_vma *global_hwsp,
149 			unsigned int offset)
150 {
151 	struct intel_timeline *timeline;
152 	int err;
153 
154 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
155 	if (!timeline)
156 		return ERR_PTR(-ENOMEM);
157 
158 	err = intel_timeline_init(timeline, gt, global_hwsp, offset);
159 	if (err) {
160 		kfree(timeline);
161 		return ERR_PTR(err);
162 	}
163 
164 	return timeline;
165 }
166 
167 struct intel_timeline *
168 intel_timeline_create_from_engine(struct intel_engine_cs *engine,
169 				  unsigned int offset)
170 {
171 	struct i915_vma *hwsp = engine->status_page.vma;
172 	struct intel_timeline *tl;
173 
174 	tl = __intel_timeline_create(engine->gt, hwsp, offset);
175 	if (IS_ERR(tl))
176 		return tl;
177 
178 	/* Borrow a nearby lock; we only create these timelines during init */
179 	mutex_lock(&hwsp->vm->mutex);
180 	list_add_tail(&tl->engine_link, &engine->status_page.timelines);
181 	mutex_unlock(&hwsp->vm->mutex);
182 
183 	return tl;
184 }
185 
186 void __intel_timeline_pin(struct intel_timeline *tl)
187 {
188 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
189 	atomic_inc(&tl->pin_count);
190 }
191 
192 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
193 {
194 	int err;
195 
196 	if (atomic_add_unless(&tl->pin_count, 1, 0))
197 		return 0;
198 
199 	if (!tl->hwsp_map) {
200 		err = intel_timeline_pin_map(tl);
201 		if (err)
202 			return err;
203 	}
204 
205 	err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
206 	if (err)
207 		return err;
208 
209 	tl->hwsp_offset =
210 		i915_ggtt_offset(tl->hwsp_ggtt) +
211 		offset_in_page(tl->hwsp_offset);
212 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
213 		 tl->fence_context, tl->hwsp_offset);
214 
215 	i915_active_acquire(&tl->active);
216 	if (atomic_fetch_inc(&tl->pin_count)) {
217 		i915_active_release(&tl->active);
218 		__i915_vma_unpin(tl->hwsp_ggtt);
219 	}
220 
221 	return 0;
222 }
223 
224 void intel_timeline_reset_seqno(const struct intel_timeline *tl)
225 {
226 	u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno;
227 	/* Must be pinned to be writable, and no requests in flight. */
228 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
229 
230 	memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
231 	WRITE_ONCE(*hwsp_seqno, tl->seqno);
232 	drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
233 }
234 
235 void intel_timeline_enter(struct intel_timeline *tl)
236 {
237 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
238 
239 	/*
240 	 * Pretend we are serialised by the timeline->mutex.
241 	 *
242 	 * While generally true, there are a few exceptions to the rule
243 	 * for the engine->kernel_context being used to manage power
244 	 * transitions. As the engine_park may be called from under any
245 	 * timeline, it uses the power mutex as a global serialisation
246 	 * lock to prevent any other request entering its timeline.
247 	 *
248 	 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
249 	 *
250 	 * However, intel_gt_retire_request() does not know which engine
251 	 * it is retiring along and so cannot partake in the engine-pm
252 	 * barrier, and there we use the tl->active_count as a means to
253 	 * pin the timeline in the active_list while the locks are dropped.
254 	 * Ergo, as that is outside of the engine-pm barrier, we need to
255 	 * use atomic to manipulate tl->active_count.
256 	 */
257 	lockdep_assert_held(&tl->mutex);
258 
259 	if (atomic_add_unless(&tl->active_count, 1, 0))
260 		return;
261 
262 	spin_lock(&timelines->lock);
263 	if (!atomic_fetch_inc(&tl->active_count)) {
264 		/*
265 		 * The HWSP is volatile, and may have been lost while inactive,
266 		 * e.g. across suspend/resume. Be paranoid, and ensure that
267 		 * the HWSP value matches our seqno so we don't proclaim
268 		 * the next request as already complete.
269 		 */
270 		intel_timeline_reset_seqno(tl);
271 		list_add_tail(&tl->link, &timelines->active_list);
272 	}
273 	spin_unlock(&timelines->lock);
274 }
275 
276 void intel_timeline_exit(struct intel_timeline *tl)
277 {
278 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
279 
280 	/* See intel_timeline_enter() */
281 	lockdep_assert_held(&tl->mutex);
282 
283 	GEM_BUG_ON(!atomic_read(&tl->active_count));
284 	if (atomic_add_unless(&tl->active_count, -1, 1))
285 		return;
286 
287 	spin_lock(&timelines->lock);
288 	if (atomic_dec_and_test(&tl->active_count))
289 		list_del(&tl->link);
290 	spin_unlock(&timelines->lock);
291 
292 	/*
293 	 * Since this timeline is idle, all bariers upon which we were waiting
294 	 * must also be complete and so we can discard the last used barriers
295 	 * without loss of information.
296 	 */
297 	i915_syncmap_free(&tl->sync);
298 }
299 
300 static u32 timeline_advance(struct intel_timeline *tl)
301 {
302 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
303 	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
304 
305 	return tl->seqno += 1 + tl->has_initial_breadcrumb;
306 }
307 
308 static noinline int
309 __intel_timeline_get_seqno(struct intel_timeline *tl,
310 			   u32 *seqno)
311 {
312 	u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES);
313 
314 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
315 	if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5)))
316 		next_ofs = offset_in_page(next_ofs + BIT(5));
317 
318 	tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs;
319 	tl->hwsp_seqno = tl->hwsp_map + next_ofs;
320 	intel_timeline_reset_seqno(tl);
321 
322 	*seqno = timeline_advance(tl);
323 	GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
324 	return 0;
325 }
326 
327 int intel_timeline_get_seqno(struct intel_timeline *tl,
328 			     struct i915_request *rq,
329 			     u32 *seqno)
330 {
331 	*seqno = timeline_advance(tl);
332 
333 	/* Replace the HWSP on wraparound for HW semaphores */
334 	if (unlikely(!*seqno && tl->has_initial_breadcrumb))
335 		return __intel_timeline_get_seqno(tl, seqno);
336 
337 	return 0;
338 }
339 
340 int intel_timeline_read_hwsp(struct i915_request *from,
341 			     struct i915_request *to,
342 			     u32 *hwsp)
343 {
344 	struct intel_timeline *tl;
345 	int err;
346 
347 	rcu_read_lock();
348 	tl = rcu_dereference(from->timeline);
349 	if (i915_request_signaled(from) ||
350 	    !i915_active_acquire_if_busy(&tl->active))
351 		tl = NULL;
352 
353 	if (tl) {
354 		/* hwsp_offset may wraparound, so use from->hwsp_seqno */
355 		*hwsp = i915_ggtt_offset(tl->hwsp_ggtt) +
356 			offset_in_page(from->hwsp_seqno);
357 	}
358 
359 	/* ensure we wait on the right request, if not, we completed */
360 	if (tl && __i915_request_is_complete(from)) {
361 		i915_active_release(&tl->active);
362 		tl = NULL;
363 	}
364 	rcu_read_unlock();
365 
366 	if (!tl)
367 		return 1;
368 
369 	/* Can't do semaphore waits on kernel context */
370 	if (!tl->has_initial_breadcrumb) {
371 		err = -EINVAL;
372 		goto out;
373 	}
374 
375 	err = i915_active_add_request(&tl->active, to);
376 
377 out:
378 	i915_active_release(&tl->active);
379 	return err;
380 }
381 
382 void intel_timeline_unpin(struct intel_timeline *tl)
383 {
384 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
385 	if (!atomic_dec_and_test(&tl->pin_count))
386 		return;
387 
388 	i915_active_release(&tl->active);
389 	__i915_vma_unpin(tl->hwsp_ggtt);
390 }
391 
392 void __intel_timeline_free(struct kref *kref)
393 {
394 	struct intel_timeline *timeline =
395 		container_of(kref, typeof(*timeline), kref);
396 
397 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
398 	GEM_BUG_ON(!list_empty(&timeline->requests));
399 	GEM_BUG_ON(timeline->retire);
400 
401 	call_rcu(&timeline->rcu, intel_timeline_fini);
402 }
403 
404 void intel_gt_fini_timelines(struct intel_gt *gt)
405 {
406 	struct intel_gt_timelines *timelines = &gt->timelines;
407 
408 	GEM_BUG_ON(!list_empty(&timelines->active_list));
409 }
410 
411 void intel_gt_show_timelines(struct intel_gt *gt,
412 			     struct drm_printer *m,
413 			     void (*show_request)(struct drm_printer *m,
414 						  const struct i915_request *rq,
415 						  const char *prefix,
416 						  int indent))
417 {
418 	struct intel_gt_timelines *timelines = &gt->timelines;
419 	struct intel_timeline *tl, *tn;
420 	LIST_HEAD(free);
421 
422 	spin_lock(&timelines->lock);
423 	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
424 		unsigned long count, ready, inflight;
425 		struct i915_request *rq, *rn;
426 		struct dma_fence *fence;
427 
428 		if (!mutex_trylock(&tl->mutex)) {
429 			drm_printf(m, "Timeline %llx: busy; skipping\n",
430 				   tl->fence_context);
431 			continue;
432 		}
433 
434 		intel_timeline_get(tl);
435 		GEM_BUG_ON(!atomic_read(&tl->active_count));
436 		atomic_inc(&tl->active_count); /* pin the list element */
437 		spin_unlock(&timelines->lock);
438 
439 		count = 0;
440 		ready = 0;
441 		inflight = 0;
442 		list_for_each_entry_safe(rq, rn, &tl->requests, link) {
443 			if (i915_request_completed(rq))
444 				continue;
445 
446 			count++;
447 			if (i915_request_is_ready(rq))
448 				ready++;
449 			if (i915_request_is_active(rq))
450 				inflight++;
451 		}
452 
453 		drm_printf(m, "Timeline %llx: { ", tl->fence_context);
454 		drm_printf(m, "count: %lu, ready: %lu, inflight: %lu",
455 			   count, ready, inflight);
456 		drm_printf(m, ", seqno: { current: %d, last: %d }",
457 			   *tl->hwsp_seqno, tl->seqno);
458 		fence = i915_active_fence_get(&tl->last_request);
459 		if (fence) {
460 			drm_printf(m, ", engine: %s",
461 				   to_request(fence)->engine->name);
462 			dma_fence_put(fence);
463 		}
464 		drm_printf(m, " }\n");
465 
466 		if (show_request) {
467 			list_for_each_entry_safe(rq, rn, &tl->requests, link)
468 				show_request(m, rq, "", 2);
469 		}
470 
471 		mutex_unlock(&tl->mutex);
472 		spin_lock(&timelines->lock);
473 
474 		/* Resume list iteration after reacquiring spinlock */
475 		list_safe_reset_next(tl, tn, link);
476 		if (atomic_dec_and_test(&tl->active_count))
477 			list_del(&tl->link);
478 
479 		/* Defer the final release to after the spinlock */
480 		if (refcount_dec_and_test(&tl->kref.refcount)) {
481 			GEM_BUG_ON(atomic_read(&tl->active_count));
482 			list_add(&tl->link, &free);
483 		}
484 	}
485 	spin_unlock(&timelines->lock);
486 
487 	list_for_each_entry_safe(tl, tn, &free, link)
488 		__intel_timeline_free(&tl->kref);
489 }
490 
491 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
492 #include "gt/selftests/mock_timeline.c"
493 #include "gt/selftest_timeline.c"
494 #endif
495