xref: /linux/drivers/gpu/drm/i915/gt/intel_timeline.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2016-2018 Intel Corporation
4  */
5 
6 #include <drm/drm_cache.h>
7 
8 #include "gem/i915_gem_internal.h"
9 
10 #include "i915_active.h"
11 #include "i915_drv.h"
12 #include "i915_syncmap.h"
13 #include "intel_gt.h"
14 #include "intel_ring.h"
15 #include "intel_timeline.h"
16 
17 #define TIMELINE_SEQNO_BYTES 8
18 
hwsp_alloc(struct intel_gt * gt)19 static struct i915_vma *hwsp_alloc(struct intel_gt *gt)
20 {
21 	struct drm_i915_private *i915 = gt->i915;
22 	struct drm_i915_gem_object *obj;
23 	struct i915_vma *vma;
24 
25 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
26 	if (IS_ERR(obj))
27 		return ERR_CAST(obj);
28 
29 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
30 
31 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
32 	if (IS_ERR(vma))
33 		i915_gem_object_put(obj);
34 
35 	return vma;
36 }
37 
__timeline_retire(struct i915_active * active)38 static void __timeline_retire(struct i915_active *active)
39 {
40 	struct intel_timeline *tl =
41 		container_of(active, typeof(*tl), active);
42 
43 	i915_vma_unpin(tl->hwsp_ggtt);
44 	intel_timeline_put(tl);
45 }
46 
__timeline_active(struct i915_active * active)47 static int __timeline_active(struct i915_active *active)
48 {
49 	struct intel_timeline *tl =
50 		container_of(active, typeof(*tl), active);
51 
52 	__i915_vma_pin(tl->hwsp_ggtt);
53 	intel_timeline_get(tl);
54 	return 0;
55 }
56 
57 I915_SELFTEST_EXPORT int
intel_timeline_pin_map(struct intel_timeline * timeline)58 intel_timeline_pin_map(struct intel_timeline *timeline)
59 {
60 	struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj;
61 	u32 ofs = offset_in_page(timeline->hwsp_offset);
62 	void *vaddr;
63 
64 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
65 	if (IS_ERR(vaddr))
66 		return PTR_ERR(vaddr);
67 
68 	timeline->hwsp_map = vaddr;
69 	timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
70 	drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
71 
72 	return 0;
73 }
74 
intel_timeline_init(struct intel_timeline * timeline,struct intel_gt * gt,struct i915_vma * hwsp,unsigned int offset)75 static int intel_timeline_init(struct intel_timeline *timeline,
76 			       struct intel_gt *gt,
77 			       struct i915_vma *hwsp,
78 			       unsigned int offset)
79 {
80 	kref_init(&timeline->kref);
81 	atomic_set(&timeline->pin_count, 0);
82 
83 	timeline->gt = gt;
84 
85 	if (hwsp) {
86 		timeline->hwsp_offset = offset;
87 		timeline->hwsp_ggtt = i915_vma_get(hwsp);
88 	} else {
89 		timeline->has_initial_breadcrumb = true;
90 		hwsp = hwsp_alloc(gt);
91 		if (IS_ERR(hwsp))
92 			return PTR_ERR(hwsp);
93 		timeline->hwsp_ggtt = hwsp;
94 	}
95 
96 	timeline->hwsp_map = NULL;
97 	timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset;
98 
99 	GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
100 
101 	timeline->fence_context = dma_fence_context_alloc(1);
102 
103 	mutex_init(&timeline->mutex);
104 
105 	INIT_ACTIVE_FENCE(&timeline->last_request);
106 	INIT_LIST_HEAD(&timeline->requests);
107 
108 	i915_syncmap_init(&timeline->sync);
109 	i915_active_init(&timeline->active, __timeline_active,
110 			 __timeline_retire, 0);
111 
112 	return 0;
113 }
114 
intel_gt_init_timelines(struct intel_gt * gt)115 void intel_gt_init_timelines(struct intel_gt *gt)
116 {
117 	struct intel_gt_timelines *timelines = &gt->timelines;
118 
119 	spin_lock_init(&timelines->lock);
120 	INIT_LIST_HEAD(&timelines->active_list);
121 }
122 
intel_timeline_fini(struct rcu_head * rcu)123 static void intel_timeline_fini(struct rcu_head *rcu)
124 {
125 	struct intel_timeline *timeline =
126 		container_of(rcu, struct intel_timeline, rcu);
127 
128 	if (timeline->hwsp_map)
129 		i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
130 
131 	i915_vma_put(timeline->hwsp_ggtt);
132 	i915_active_fini(&timeline->active);
133 
134 	/*
135 	 * A small race exists between intel_gt_retire_requests_timeout and
136 	 * intel_timeline_exit which could result in the syncmap not getting
137 	 * free'd. Rather than work to hard to seal this race, simply cleanup
138 	 * the syncmap on fini.
139 	 */
140 	i915_syncmap_free(&timeline->sync);
141 
142 	kfree(timeline);
143 }
144 
145 struct intel_timeline *
__intel_timeline_create(struct intel_gt * gt,struct i915_vma * global_hwsp,unsigned int offset)146 __intel_timeline_create(struct intel_gt *gt,
147 			struct i915_vma *global_hwsp,
148 			unsigned int offset)
149 {
150 	struct intel_timeline *timeline;
151 	int err;
152 
153 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
154 	if (!timeline)
155 		return ERR_PTR(-ENOMEM);
156 
157 	err = intel_timeline_init(timeline, gt, global_hwsp, offset);
158 	if (err) {
159 		kfree(timeline);
160 		return ERR_PTR(err);
161 	}
162 
163 	return timeline;
164 }
165 
166 struct intel_timeline *
intel_timeline_create_from_engine(struct intel_engine_cs * engine,unsigned int offset)167 intel_timeline_create_from_engine(struct intel_engine_cs *engine,
168 				  unsigned int offset)
169 {
170 	struct i915_vma *hwsp = engine->status_page.vma;
171 	struct intel_timeline *tl;
172 
173 	tl = __intel_timeline_create(engine->gt, hwsp, offset);
174 	if (IS_ERR(tl))
175 		return tl;
176 
177 	/* Borrow a nearby lock; we only create these timelines during init */
178 	mutex_lock(&hwsp->vm->mutex);
179 	list_add_tail(&tl->engine_link, &engine->status_page.timelines);
180 	mutex_unlock(&hwsp->vm->mutex);
181 
182 	return tl;
183 }
184 
__intel_timeline_pin(struct intel_timeline * tl)185 void __intel_timeline_pin(struct intel_timeline *tl)
186 {
187 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
188 	atomic_inc(&tl->pin_count);
189 }
190 
intel_timeline_pin(struct intel_timeline * tl,struct i915_gem_ww_ctx * ww)191 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
192 {
193 	int err;
194 
195 	if (atomic_add_unless(&tl->pin_count, 1, 0))
196 		return 0;
197 
198 	if (!tl->hwsp_map) {
199 		err = intel_timeline_pin_map(tl);
200 		if (err)
201 			return err;
202 	}
203 
204 	err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
205 	if (err)
206 		return err;
207 
208 	tl->hwsp_offset =
209 		i915_ggtt_offset(tl->hwsp_ggtt) +
210 		offset_in_page(tl->hwsp_offset);
211 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
212 		 tl->fence_context, tl->hwsp_offset);
213 
214 	i915_active_acquire(&tl->active);
215 	if (atomic_fetch_inc(&tl->pin_count)) {
216 		i915_active_release(&tl->active);
217 		__i915_vma_unpin(tl->hwsp_ggtt);
218 	}
219 
220 	return 0;
221 }
222 
intel_timeline_reset_seqno(const struct intel_timeline * tl)223 void intel_timeline_reset_seqno(const struct intel_timeline *tl)
224 {
225 	u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno;
226 	/* Must be pinned to be writable, and no requests in flight. */
227 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
228 
229 	memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
230 	WRITE_ONCE(*hwsp_seqno, tl->seqno);
231 	drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
232 }
233 
intel_timeline_enter(struct intel_timeline * tl)234 void intel_timeline_enter(struct intel_timeline *tl)
235 {
236 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
237 
238 	/*
239 	 * Pretend we are serialised by the timeline->mutex.
240 	 *
241 	 * While generally true, there are a few exceptions to the rule
242 	 * for the engine->kernel_context being used to manage power
243 	 * transitions. As the engine_park may be called from under any
244 	 * timeline, it uses the power mutex as a global serialisation
245 	 * lock to prevent any other request entering its timeline.
246 	 *
247 	 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
248 	 *
249 	 * However, intel_gt_retire_request() does not know which engine
250 	 * it is retiring along and so cannot partake in the engine-pm
251 	 * barrier, and there we use the tl->active_count as a means to
252 	 * pin the timeline in the active_list while the locks are dropped.
253 	 * Ergo, as that is outside of the engine-pm barrier, we need to
254 	 * use atomic to manipulate tl->active_count.
255 	 */
256 	lockdep_assert_held(&tl->mutex);
257 
258 	if (atomic_add_unless(&tl->active_count, 1, 0))
259 		return;
260 
261 	spin_lock(&timelines->lock);
262 	if (!atomic_fetch_inc(&tl->active_count)) {
263 		/*
264 		 * The HWSP is volatile, and may have been lost while inactive,
265 		 * e.g. across suspend/resume. Be paranoid, and ensure that
266 		 * the HWSP value matches our seqno so we don't proclaim
267 		 * the next request as already complete.
268 		 */
269 		intel_timeline_reset_seqno(tl);
270 		list_add_tail(&tl->link, &timelines->active_list);
271 	}
272 	spin_unlock(&timelines->lock);
273 }
274 
intel_timeline_exit(struct intel_timeline * tl)275 void intel_timeline_exit(struct intel_timeline *tl)
276 {
277 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
278 
279 	/* See intel_timeline_enter() */
280 	lockdep_assert_held(&tl->mutex);
281 
282 	GEM_BUG_ON(!atomic_read(&tl->active_count));
283 	if (atomic_add_unless(&tl->active_count, -1, 1))
284 		return;
285 
286 	spin_lock(&timelines->lock);
287 	if (atomic_dec_and_test(&tl->active_count))
288 		list_del(&tl->link);
289 	spin_unlock(&timelines->lock);
290 
291 	/*
292 	 * Since this timeline is idle, all bariers upon which we were waiting
293 	 * must also be complete and so we can discard the last used barriers
294 	 * without loss of information.
295 	 */
296 	i915_syncmap_free(&tl->sync);
297 }
298 
timeline_advance(struct intel_timeline * tl)299 static u32 timeline_advance(struct intel_timeline *tl)
300 {
301 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
302 	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
303 
304 	return tl->seqno += 1 + tl->has_initial_breadcrumb;
305 }
306 
307 static noinline int
__intel_timeline_get_seqno(struct intel_timeline * tl,u32 * seqno)308 __intel_timeline_get_seqno(struct intel_timeline *tl,
309 			   u32 *seqno)
310 {
311 	u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES);
312 
313 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
314 	if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5)))
315 		next_ofs = offset_in_page(next_ofs + BIT(5));
316 
317 	tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs;
318 	tl->hwsp_seqno = tl->hwsp_map + next_ofs;
319 	intel_timeline_reset_seqno(tl);
320 
321 	*seqno = timeline_advance(tl);
322 	GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
323 	return 0;
324 }
325 
intel_timeline_get_seqno(struct intel_timeline * tl,struct i915_request * rq,u32 * seqno)326 int intel_timeline_get_seqno(struct intel_timeline *tl,
327 			     struct i915_request *rq,
328 			     u32 *seqno)
329 {
330 	*seqno = timeline_advance(tl);
331 
332 	/* Replace the HWSP on wraparound for HW semaphores */
333 	if (unlikely(!*seqno && tl->has_initial_breadcrumb))
334 		return __intel_timeline_get_seqno(tl, seqno);
335 
336 	return 0;
337 }
338 
intel_timeline_read_hwsp(struct i915_request * from,struct i915_request * to,u32 * hwsp)339 int intel_timeline_read_hwsp(struct i915_request *from,
340 			     struct i915_request *to,
341 			     u32 *hwsp)
342 {
343 	struct intel_timeline *tl;
344 	int err;
345 
346 	rcu_read_lock();
347 	tl = rcu_dereference(from->timeline);
348 	if (i915_request_signaled(from) ||
349 	    !i915_active_acquire_if_busy(&tl->active))
350 		tl = NULL;
351 
352 	if (tl) {
353 		/* hwsp_offset may wraparound, so use from->hwsp_seqno */
354 		*hwsp = i915_ggtt_offset(tl->hwsp_ggtt) +
355 			offset_in_page(from->hwsp_seqno);
356 	}
357 
358 	/* ensure we wait on the right request, if not, we completed */
359 	if (tl && __i915_request_is_complete(from)) {
360 		i915_active_release(&tl->active);
361 		tl = NULL;
362 	}
363 	rcu_read_unlock();
364 
365 	if (!tl)
366 		return 1;
367 
368 	/* Can't do semaphore waits on kernel context */
369 	if (!tl->has_initial_breadcrumb) {
370 		err = -EINVAL;
371 		goto out;
372 	}
373 
374 	err = i915_active_add_request(&tl->active, to);
375 
376 out:
377 	i915_active_release(&tl->active);
378 	return err;
379 }
380 
intel_timeline_unpin(struct intel_timeline * tl)381 void intel_timeline_unpin(struct intel_timeline *tl)
382 {
383 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
384 	if (!atomic_dec_and_test(&tl->pin_count))
385 		return;
386 
387 	i915_active_release(&tl->active);
388 	__i915_vma_unpin(tl->hwsp_ggtt);
389 }
390 
__intel_timeline_free(struct kref * kref)391 void __intel_timeline_free(struct kref *kref)
392 {
393 	struct intel_timeline *timeline =
394 		container_of(kref, typeof(*timeline), kref);
395 
396 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
397 	GEM_BUG_ON(!list_empty(&timeline->requests));
398 	GEM_BUG_ON(timeline->retire);
399 
400 	call_rcu(&timeline->rcu, intel_timeline_fini);
401 }
402 
intel_gt_fini_timelines(struct intel_gt * gt)403 void intel_gt_fini_timelines(struct intel_gt *gt)
404 {
405 	struct intel_gt_timelines *timelines = &gt->timelines;
406 
407 	GEM_BUG_ON(!list_empty(&timelines->active_list));
408 }
409 
intel_gt_show_timelines(struct intel_gt * gt,struct drm_printer * m,void (* show_request)(struct drm_printer * m,const struct i915_request * rq,const char * prefix,int indent))410 void intel_gt_show_timelines(struct intel_gt *gt,
411 			     struct drm_printer *m,
412 			     void (*show_request)(struct drm_printer *m,
413 						  const struct i915_request *rq,
414 						  const char *prefix,
415 						  int indent))
416 {
417 	struct intel_gt_timelines *timelines = &gt->timelines;
418 	struct intel_timeline *tl, *tn;
419 	LIST_HEAD(free);
420 
421 	spin_lock(&timelines->lock);
422 	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
423 		unsigned long count, ready, inflight;
424 		struct i915_request *rq, *rn;
425 		struct dma_fence *fence;
426 
427 		if (!mutex_trylock(&tl->mutex)) {
428 			drm_printf(m, "Timeline %llx: busy; skipping\n",
429 				   tl->fence_context);
430 			continue;
431 		}
432 
433 		intel_timeline_get(tl);
434 		GEM_BUG_ON(!atomic_read(&tl->active_count));
435 		atomic_inc(&tl->active_count); /* pin the list element */
436 		spin_unlock(&timelines->lock);
437 
438 		count = 0;
439 		ready = 0;
440 		inflight = 0;
441 		list_for_each_entry_safe(rq, rn, &tl->requests, link) {
442 			if (i915_request_completed(rq))
443 				continue;
444 
445 			count++;
446 			if (i915_request_is_ready(rq))
447 				ready++;
448 			if (i915_request_is_active(rq))
449 				inflight++;
450 		}
451 
452 		drm_printf(m, "Timeline %llx: { ", tl->fence_context);
453 		drm_printf(m, "count: %lu, ready: %lu, inflight: %lu",
454 			   count, ready, inflight);
455 		drm_printf(m, ", seqno: { current: %d, last: %d }",
456 			   *tl->hwsp_seqno, tl->seqno);
457 		fence = i915_active_fence_get(&tl->last_request);
458 		if (fence) {
459 			drm_printf(m, ", engine: %s",
460 				   to_request(fence)->engine->name);
461 			dma_fence_put(fence);
462 		}
463 		drm_printf(m, " }\n");
464 
465 		if (show_request) {
466 			list_for_each_entry_safe(rq, rn, &tl->requests, link)
467 				show_request(m, rq, "", 2);
468 		}
469 
470 		mutex_unlock(&tl->mutex);
471 		spin_lock(&timelines->lock);
472 
473 		/* Resume list iteration after reacquiring spinlock */
474 		list_safe_reset_next(tl, tn, link);
475 		if (atomic_dec_and_test(&tl->active_count))
476 			list_del(&tl->link);
477 
478 		/* Defer the final release to after the spinlock */
479 		if (refcount_dec_and_test(&tl->kref.refcount)) {
480 			GEM_BUG_ON(atomic_read(&tl->active_count));
481 			list_add(&tl->link, &free);
482 		}
483 	}
484 	spin_unlock(&timelines->lock);
485 
486 	list_for_each_entry_safe(tl, tn, &free, link)
487 		__intel_timeline_free(&tl->kref);
488 }
489 
490 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
491 #include "gt/selftests/mock_timeline.c"
492 #include "gt/selftest_timeline.c"
493 #endif
494