xref: /linux/drivers/gpu/drm/v3d/v3d_gem.c (revision 55223394d56bab42ebac71ba52e0fd8bfdc6fc07)
1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (C) 2014-2018 Broadcom */
3 
4 #include <drm/drmP.h>
5 #include <drm/drm_syncobj.h>
6 #include <linux/module.h>
7 #include <linux/platform_device.h>
8 #include <linux/pm_runtime.h>
9 #include <linux/reset.h>
10 #include <linux/device.h>
11 #include <linux/io.h>
12 #include <linux/sched/signal.h>
13 
14 #include "uapi/drm/v3d_drm.h"
15 #include "v3d_drv.h"
16 #include "v3d_regs.h"
17 #include "v3d_trace.h"
18 
19 static void
20 v3d_init_core(struct v3d_dev *v3d, int core)
21 {
22 	/* Set OVRTMUOUT, which means that the texture sampler uniform
23 	 * configuration's tmu output type field is used, instead of
24 	 * using the hardware default behavior based on the texture
25 	 * type.  If you want the default behavior, you can still put
26 	 * "2" in the indirect texture state's output_type field.
27 	 */
28 	if (v3d->ver < 40)
29 		V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
30 
31 	/* Whenever we flush the L2T cache, we always want to flush
32 	 * the whole thing.
33 	 */
34 	V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
35 	V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
36 }
37 
38 /* Sets invariant state for the HW. */
39 static void
40 v3d_init_hw_state(struct v3d_dev *v3d)
41 {
42 	v3d_init_core(v3d, 0);
43 }
44 
45 static void
46 v3d_idle_axi(struct v3d_dev *v3d, int core)
47 {
48 	V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
49 
50 	if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
51 		      (V3D_GMP_STATUS_RD_COUNT_MASK |
52 		       V3D_GMP_STATUS_WR_COUNT_MASK |
53 		       V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
54 		DRM_ERROR("Failed to wait for safe GMP shutdown\n");
55 	}
56 }
57 
58 static void
59 v3d_idle_gca(struct v3d_dev *v3d)
60 {
61 	if (v3d->ver >= 41)
62 		return;
63 
64 	V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
65 
66 	if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
67 		      V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
68 		     V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
69 		DRM_ERROR("Failed to wait for safe GCA shutdown\n");
70 	}
71 }
72 
73 static void
74 v3d_reset_by_bridge(struct v3d_dev *v3d)
75 {
76 	int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
77 
78 	if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
79 		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
80 				 V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
81 		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
82 
83 		/* GFXH-1383: The SW_INIT may cause a stray write to address 0
84 		 * of the unit, so reset it to its power-on value here.
85 		 */
86 		V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
87 	} else {
88 		WARN_ON_ONCE(V3D_GET_FIELD(version,
89 					   V3D_TOP_GR_BRIDGE_MAJOR) != 7);
90 		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
91 				 V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
92 		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
93 	}
94 }
95 
96 static void
97 v3d_reset_v3d(struct v3d_dev *v3d)
98 {
99 	if (v3d->reset)
100 		reset_control_reset(v3d->reset);
101 	else
102 		v3d_reset_by_bridge(v3d);
103 
104 	v3d_init_hw_state(v3d);
105 }
106 
107 void
108 v3d_reset(struct v3d_dev *v3d)
109 {
110 	struct drm_device *dev = &v3d->drm;
111 
112 	DRM_ERROR("Resetting GPU.\n");
113 	trace_v3d_reset_begin(dev);
114 
115 	/* XXX: only needed for safe powerdown, not reset. */
116 	if (false)
117 		v3d_idle_axi(v3d, 0);
118 
119 	v3d_idle_gca(v3d);
120 	v3d_reset_v3d(v3d);
121 
122 	v3d_mmu_set_page_table(v3d);
123 	v3d_irq_reset(v3d);
124 
125 	trace_v3d_reset_end(dev);
126 }
127 
128 static void
129 v3d_flush_l3(struct v3d_dev *v3d)
130 {
131 	if (v3d->ver < 41) {
132 		u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
133 
134 		V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
135 			      gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
136 
137 		if (v3d->ver < 33) {
138 			V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
139 				      gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
140 		}
141 	}
142 }
143 
144 /* Invalidates the (read-only) L2C cache.  This was the L2 cache for
145  * uniforms and instructions on V3D 3.2.
146  */
147 static void
148 v3d_invalidate_l2c(struct v3d_dev *v3d, int core)
149 {
150 	if (v3d->ver > 32)
151 		return;
152 
153 	V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
154 		       V3D_L2CACTL_L2CCLR |
155 		       V3D_L2CACTL_L2CENA);
156 }
157 
158 /* Invalidates texture L2 cachelines */
159 static void
160 v3d_flush_l2t(struct v3d_dev *v3d, int core)
161 {
162 	/* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
163 	 * need to wait for completion before dispatching the job --
164 	 * L2T accesses will be stalled until the flush has completed.
165 	 */
166 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
167 		       V3D_L2TCACTL_L2TFLS |
168 		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
169 }
170 
171 /* Invalidates the slice caches.  These are read-only caches. */
172 static void
173 v3d_invalidate_slices(struct v3d_dev *v3d, int core)
174 {
175 	V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
176 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
177 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
178 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
179 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
180 }
181 
182 void
183 v3d_invalidate_caches(struct v3d_dev *v3d)
184 {
185 	/* Invalidate the caches from the outside in.  That way if
186 	 * another CL's concurrent use of nearby memory were to pull
187 	 * an invalidated cacheline back in, we wouldn't leave stale
188 	 * data in the inner cache.
189 	 */
190 	v3d_flush_l3(v3d);
191 	v3d_invalidate_l2c(v3d, 0);
192 	v3d_flush_l2t(v3d, 0);
193 	v3d_invalidate_slices(v3d, 0);
194 }
195 
196 static void
197 v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
198 			 struct dma_fence *fence)
199 {
200 	int i;
201 
202 	for (i = 0; i < bo_count; i++) {
203 		/* XXX: Use shared fences for read-only objects. */
204 		reservation_object_add_excl_fence(bos[i]->base.base.resv,
205 						  fence);
206 	}
207 }
208 
209 static void
210 v3d_unlock_bo_reservations(struct v3d_bo **bos,
211 			   int bo_count,
212 			   struct ww_acquire_ctx *acquire_ctx)
213 {
214 	drm_gem_unlock_reservations((struct drm_gem_object **)bos, bo_count,
215 				    acquire_ctx);
216 }
217 
218 /* Takes the reservation lock on all the BOs being referenced, so that
219  * at queue submit time we can update the reservations.
220  *
221  * We don't lock the RCL the tile alloc/state BOs, or overflow memory
222  * (all of which are on exec->unref_list).  They're entirely private
223  * to v3d, so we don't attach dma-buf fences to them.
224  */
225 static int
226 v3d_lock_bo_reservations(struct v3d_bo **bos,
227 			 int bo_count,
228 			 struct ww_acquire_ctx *acquire_ctx)
229 {
230 	int i, ret;
231 
232 	ret = drm_gem_lock_reservations((struct drm_gem_object **)bos,
233 					bo_count, acquire_ctx);
234 	if (ret)
235 		return ret;
236 
237 	/* Reserve space for our shared (read-only) fence references,
238 	 * before we commit the CL to the hardware.
239 	 */
240 	for (i = 0; i < bo_count; i++) {
241 		ret = reservation_object_reserve_shared(bos[i]->base.base.resv,
242 							1);
243 		if (ret) {
244 			v3d_unlock_bo_reservations(bos, bo_count,
245 						   acquire_ctx);
246 			return ret;
247 		}
248 	}
249 
250 	return 0;
251 }
252 
253 /**
254  * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
255  * referenced by the job.
256  * @dev: DRM device
257  * @file_priv: DRM file for this fd
258  * @exec: V3D job being set up
259  *
260  * The command validator needs to reference BOs by their index within
261  * the submitted job's BO list.  This does the validation of the job's
262  * BO list and reference counting for the lifetime of the job.
263  *
264  * Note that this function doesn't need to unreference the BOs on
265  * failure, because that will happen at v3d_exec_cleanup() time.
266  */
267 static int
268 v3d_cl_lookup_bos(struct drm_device *dev,
269 		  struct drm_file *file_priv,
270 		  struct drm_v3d_submit_cl *args,
271 		  struct v3d_exec_info *exec)
272 {
273 	u32 *handles;
274 	int ret = 0;
275 	int i;
276 
277 	exec->bo_count = args->bo_handle_count;
278 
279 	if (!exec->bo_count) {
280 		/* See comment on bo_index for why we have to check
281 		 * this.
282 		 */
283 		DRM_DEBUG("Rendering requires BOs\n");
284 		return -EINVAL;
285 	}
286 
287 	exec->bo = kvmalloc_array(exec->bo_count,
288 				  sizeof(struct drm_gem_cma_object *),
289 				  GFP_KERNEL | __GFP_ZERO);
290 	if (!exec->bo) {
291 		DRM_DEBUG("Failed to allocate validated BO pointers\n");
292 		return -ENOMEM;
293 	}
294 
295 	handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
296 	if (!handles) {
297 		ret = -ENOMEM;
298 		DRM_DEBUG("Failed to allocate incoming GEM handles\n");
299 		goto fail;
300 	}
301 
302 	if (copy_from_user(handles,
303 			   (void __user *)(uintptr_t)args->bo_handles,
304 			   exec->bo_count * sizeof(u32))) {
305 		ret = -EFAULT;
306 		DRM_DEBUG("Failed to copy in GEM handles\n");
307 		goto fail;
308 	}
309 
310 	spin_lock(&file_priv->table_lock);
311 	for (i = 0; i < exec->bo_count; i++) {
312 		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
313 						     handles[i]);
314 		if (!bo) {
315 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
316 				  i, handles[i]);
317 			ret = -ENOENT;
318 			spin_unlock(&file_priv->table_lock);
319 			goto fail;
320 		}
321 		drm_gem_object_get(bo);
322 		exec->bo[i] = to_v3d_bo(bo);
323 	}
324 	spin_unlock(&file_priv->table_lock);
325 
326 fail:
327 	kvfree(handles);
328 	return ret;
329 }
330 
331 static void
332 v3d_exec_cleanup(struct kref *ref)
333 {
334 	struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
335 						  refcount);
336 	struct v3d_dev *v3d = exec->v3d;
337 	unsigned int i;
338 	struct v3d_bo *bo, *save;
339 
340 	dma_fence_put(exec->bin.in_fence);
341 	dma_fence_put(exec->render.in_fence);
342 
343 	dma_fence_put(exec->bin.irq_fence);
344 	dma_fence_put(exec->render.irq_fence);
345 
346 	dma_fence_put(exec->bin_done_fence);
347 	dma_fence_put(exec->render_done_fence);
348 
349 	for (i = 0; i < exec->bo_count; i++)
350 		drm_gem_object_put_unlocked(&exec->bo[i]->base.base);
351 	kvfree(exec->bo);
352 
353 	list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
354 		drm_gem_object_put_unlocked(&bo->base.base);
355 	}
356 
357 	pm_runtime_mark_last_busy(v3d->dev);
358 	pm_runtime_put_autosuspend(v3d->dev);
359 
360 	kfree(exec);
361 }
362 
363 void v3d_exec_put(struct v3d_exec_info *exec)
364 {
365 	kref_put(&exec->refcount, v3d_exec_cleanup);
366 }
367 
368 static void
369 v3d_tfu_job_cleanup(struct kref *ref)
370 {
371 	struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
372 					       refcount);
373 	struct v3d_dev *v3d = job->v3d;
374 	unsigned int i;
375 
376 	dma_fence_put(job->in_fence);
377 	dma_fence_put(job->irq_fence);
378 
379 	for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
380 		if (job->bo[i])
381 			drm_gem_object_put_unlocked(&job->bo[i]->base.base);
382 	}
383 
384 	pm_runtime_mark_last_busy(v3d->dev);
385 	pm_runtime_put_autosuspend(v3d->dev);
386 
387 	kfree(job);
388 }
389 
390 void v3d_tfu_job_put(struct v3d_tfu_job *job)
391 {
392 	kref_put(&job->refcount, v3d_tfu_job_cleanup);
393 }
394 
395 int
396 v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
397 		  struct drm_file *file_priv)
398 {
399 	int ret;
400 	struct drm_v3d_wait_bo *args = data;
401 	ktime_t start = ktime_get();
402 	u64 delta_ns;
403 	unsigned long timeout_jiffies =
404 		nsecs_to_jiffies_timeout(args->timeout_ns);
405 
406 	if (args->pad != 0)
407 		return -EINVAL;
408 
409 	ret = drm_gem_reservation_object_wait(file_priv, args->handle,
410 					      true, timeout_jiffies);
411 
412 	/* Decrement the user's timeout, in case we got interrupted
413 	 * such that the ioctl will be restarted.
414 	 */
415 	delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
416 	if (delta_ns < args->timeout_ns)
417 		args->timeout_ns -= delta_ns;
418 	else
419 		args->timeout_ns = 0;
420 
421 	/* Asked to wait beyond the jiffie/scheduler precision? */
422 	if (ret == -ETIME && args->timeout_ns)
423 		ret = -EAGAIN;
424 
425 	return ret;
426 }
427 
428 /**
429  * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
430  * @dev: DRM device
431  * @data: ioctl argument
432  * @file_priv: DRM file for this fd
433  *
434  * This is the main entrypoint for userspace to submit a 3D frame to
435  * the GPU.  Userspace provides the binner command list (if
436  * applicable), and the kernel sets up the render command list to draw
437  * to the framebuffer described in the ioctl, using the command lists
438  * that the 3D engine's binner will produce.
439  */
440 int
441 v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
442 		    struct drm_file *file_priv)
443 {
444 	struct v3d_dev *v3d = to_v3d_dev(dev);
445 	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
446 	struct drm_v3d_submit_cl *args = data;
447 	struct v3d_exec_info *exec;
448 	struct ww_acquire_ctx acquire_ctx;
449 	struct drm_syncobj *sync_out;
450 	int ret = 0;
451 
452 	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
453 
454 	if (args->pad != 0) {
455 		DRM_INFO("pad must be zero: %d\n", args->pad);
456 		return -EINVAL;
457 	}
458 
459 	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
460 	if (!exec)
461 		return -ENOMEM;
462 
463 	ret = pm_runtime_get_sync(v3d->dev);
464 	if (ret < 0) {
465 		kfree(exec);
466 		return ret;
467 	}
468 
469 	kref_init(&exec->refcount);
470 
471 	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
472 				     0, 0, &exec->bin.in_fence);
473 	if (ret == -EINVAL)
474 		goto fail;
475 
476 	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
477 				     0, 0, &exec->render.in_fence);
478 	if (ret == -EINVAL)
479 		goto fail;
480 
481 	exec->qma = args->qma;
482 	exec->qms = args->qms;
483 	exec->qts = args->qts;
484 	exec->bin.exec = exec;
485 	exec->bin.start = args->bcl_start;
486 	exec->bin.end = args->bcl_end;
487 	exec->render.exec = exec;
488 	exec->render.start = args->rcl_start;
489 	exec->render.end = args->rcl_end;
490 	exec->v3d = v3d;
491 	INIT_LIST_HEAD(&exec->unref_list);
492 
493 	ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
494 	if (ret)
495 		goto fail;
496 
497 	ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count,
498 				       &acquire_ctx);
499 	if (ret)
500 		goto fail;
501 
502 	mutex_lock(&v3d->sched_lock);
503 	if (exec->bin.start != exec->bin.end) {
504 		ret = drm_sched_job_init(&exec->bin.base,
505 					 &v3d_priv->sched_entity[V3D_BIN],
506 					 v3d_priv);
507 		if (ret)
508 			goto fail_unreserve;
509 
510 		exec->bin_done_fence =
511 			dma_fence_get(&exec->bin.base.s_fence->finished);
512 
513 		kref_get(&exec->refcount); /* put by scheduler job completion */
514 		drm_sched_entity_push_job(&exec->bin.base,
515 					  &v3d_priv->sched_entity[V3D_BIN]);
516 	}
517 
518 	ret = drm_sched_job_init(&exec->render.base,
519 				 &v3d_priv->sched_entity[V3D_RENDER],
520 				 v3d_priv);
521 	if (ret)
522 		goto fail_unreserve;
523 
524 	exec->render_done_fence =
525 		dma_fence_get(&exec->render.base.s_fence->finished);
526 
527 	kref_get(&exec->refcount); /* put by scheduler job completion */
528 	drm_sched_entity_push_job(&exec->render.base,
529 				  &v3d_priv->sched_entity[V3D_RENDER]);
530 	mutex_unlock(&v3d->sched_lock);
531 
532 	v3d_attach_object_fences(exec->bo, exec->bo_count,
533 				 exec->render_done_fence);
534 
535 	v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
536 
537 	/* Update the return sync object for the */
538 	sync_out = drm_syncobj_find(file_priv, args->out_sync);
539 	if (sync_out) {
540 		drm_syncobj_replace_fence(sync_out, exec->render_done_fence);
541 		drm_syncobj_put(sync_out);
542 	}
543 
544 	v3d_exec_put(exec);
545 
546 	return 0;
547 
548 fail_unreserve:
549 	mutex_unlock(&v3d->sched_lock);
550 	v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
551 fail:
552 	v3d_exec_put(exec);
553 
554 	return ret;
555 }
556 
557 /**
558  * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
559  * @dev: DRM device
560  * @data: ioctl argument
561  * @file_priv: DRM file for this fd
562  *
563  * Userspace provides the register setup for the TFU, which we don't
564  * need to validate since the TFU is behind the MMU.
565  */
566 int
567 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
568 		     struct drm_file *file_priv)
569 {
570 	struct v3d_dev *v3d = to_v3d_dev(dev);
571 	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
572 	struct drm_v3d_submit_tfu *args = data;
573 	struct v3d_tfu_job *job;
574 	struct ww_acquire_ctx acquire_ctx;
575 	struct drm_syncobj *sync_out;
576 	struct dma_fence *sched_done_fence;
577 	int ret = 0;
578 	int bo_count;
579 
580 	trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
581 
582 	job = kcalloc(1, sizeof(*job), GFP_KERNEL);
583 	if (!job)
584 		return -ENOMEM;
585 
586 	ret = pm_runtime_get_sync(v3d->dev);
587 	if (ret < 0) {
588 		kfree(job);
589 		return ret;
590 	}
591 
592 	kref_init(&job->refcount);
593 
594 	ret = drm_syncobj_find_fence(file_priv, args->in_sync,
595 				     0, 0, &job->in_fence);
596 	if (ret == -EINVAL)
597 		goto fail;
598 
599 	job->args = *args;
600 	job->v3d = v3d;
601 
602 	spin_lock(&file_priv->table_lock);
603 	for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
604 		struct drm_gem_object *bo;
605 
606 		if (!args->bo_handles[bo_count])
607 			break;
608 
609 		bo = idr_find(&file_priv->object_idr,
610 			      args->bo_handles[bo_count]);
611 		if (!bo) {
612 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
613 				  bo_count, args->bo_handles[bo_count]);
614 			ret = -ENOENT;
615 			spin_unlock(&file_priv->table_lock);
616 			goto fail;
617 		}
618 		drm_gem_object_get(bo);
619 		job->bo[bo_count] = to_v3d_bo(bo);
620 	}
621 	spin_unlock(&file_priv->table_lock);
622 
623 	ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx);
624 	if (ret)
625 		goto fail;
626 
627 	mutex_lock(&v3d->sched_lock);
628 	ret = drm_sched_job_init(&job->base,
629 				 &v3d_priv->sched_entity[V3D_TFU],
630 				 v3d_priv);
631 	if (ret)
632 		goto fail_unreserve;
633 
634 	sched_done_fence = dma_fence_get(&job->base.s_fence->finished);
635 
636 	kref_get(&job->refcount); /* put by scheduler job completion */
637 	drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
638 	mutex_unlock(&v3d->sched_lock);
639 
640 	v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);
641 
642 	v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
643 
644 	/* Update the return sync object */
645 	sync_out = drm_syncobj_find(file_priv, args->out_sync);
646 	if (sync_out) {
647 		drm_syncobj_replace_fence(sync_out, sched_done_fence);
648 		drm_syncobj_put(sync_out);
649 	}
650 	dma_fence_put(sched_done_fence);
651 
652 	v3d_tfu_job_put(job);
653 
654 	return 0;
655 
656 fail_unreserve:
657 	mutex_unlock(&v3d->sched_lock);
658 	v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
659 fail:
660 	v3d_tfu_job_put(job);
661 
662 	return ret;
663 }
664 
665 int
666 v3d_gem_init(struct drm_device *dev)
667 {
668 	struct v3d_dev *v3d = to_v3d_dev(dev);
669 	u32 pt_size = 4096 * 1024;
670 	int ret, i;
671 
672 	for (i = 0; i < V3D_MAX_QUEUES; i++)
673 		v3d->queue[i].fence_context = dma_fence_context_alloc(1);
674 
675 	spin_lock_init(&v3d->mm_lock);
676 	spin_lock_init(&v3d->job_lock);
677 	mutex_init(&v3d->bo_lock);
678 	mutex_init(&v3d->reset_lock);
679 	mutex_init(&v3d->sched_lock);
680 
681 	/* Note: We don't allocate address 0.  Various bits of HW
682 	 * treat 0 as special, such as the occlusion query counters
683 	 * where 0 means "disabled".
684 	 */
685 	drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
686 
687 	v3d->pt = dma_alloc_wc(v3d->dev, pt_size,
688 			       &v3d->pt_paddr,
689 			       GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
690 	if (!v3d->pt) {
691 		drm_mm_takedown(&v3d->mm);
692 		dev_err(v3d->dev,
693 			"Failed to allocate page tables. "
694 			"Please ensure you have CMA enabled.\n");
695 		return -ENOMEM;
696 	}
697 
698 	v3d_init_hw_state(v3d);
699 	v3d_mmu_set_page_table(v3d);
700 
701 	ret = v3d_sched_init(v3d);
702 	if (ret) {
703 		drm_mm_takedown(&v3d->mm);
704 		dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt,
705 				  v3d->pt_paddr);
706 	}
707 
708 	return 0;
709 }
710 
711 void
712 v3d_gem_destroy(struct drm_device *dev)
713 {
714 	struct v3d_dev *v3d = to_v3d_dev(dev);
715 
716 	v3d_sched_fini(v3d);
717 
718 	/* Waiting for exec to finish would need to be done before
719 	 * unregistering V3D.
720 	 */
721 	WARN_ON(v3d->bin_job);
722 	WARN_ON(v3d->render_job);
723 
724 	drm_mm_takedown(&v3d->mm);
725 
726 	dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr);
727 }
728