xref: /linux/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c (revision cfda8617e22a8bf217a613d0b3ba3a38778443ba)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2008,2010 Intel Corporation
5  */
6 
7 #include <linux/intel-iommu.h>
8 #include <linux/dma-resv.h>
9 #include <linux/sync_file.h>
10 #include <linux/uaccess.h>
11 
12 #include <drm/drm_syncobj.h>
13 #include <drm/i915_drm.h>
14 
15 #include "display/intel_frontbuffer.h"
16 
17 #include "gem/i915_gem_ioctls.h"
18 #include "gt/intel_context.h"
19 #include "gt/intel_engine_pool.h"
20 #include "gt/intel_gt.h"
21 #include "gt/intel_gt_pm.h"
22 #include "gt/intel_ring.h"
23 
24 #include "i915_drv.h"
25 #include "i915_gem_clflush.h"
26 #include "i915_gem_context.h"
27 #include "i915_gem_ioctls.h"
28 #include "i915_trace.h"
29 
30 enum {
31 	FORCE_CPU_RELOC = 1,
32 	FORCE_GTT_RELOC,
33 	FORCE_GPU_RELOC,
34 #define DBG_FORCE_RELOC 0 /* choose one of the above! */
35 };
36 
37 #define __EXEC_OBJECT_HAS_REF		BIT(31)
38 #define __EXEC_OBJECT_HAS_PIN		BIT(30)
39 #define __EXEC_OBJECT_HAS_FENCE		BIT(29)
40 #define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
41 #define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
42 #define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
43 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
44 
45 #define __EXEC_HAS_RELOC	BIT(31)
46 #define __EXEC_VALIDATED	BIT(30)
47 #define __EXEC_INTERNAL_FLAGS	(~0u << 30)
48 #define UPDATE			PIN_OFFSET_FIXED
49 
50 #define BATCH_OFFSET_BIAS (256*1024)
51 
52 #define __I915_EXEC_ILLEGAL_FLAGS \
53 	(__I915_EXEC_UNKNOWN_FLAGS | \
54 	 I915_EXEC_CONSTANTS_MASK  | \
55 	 I915_EXEC_RESOURCE_STREAMER)
56 
57 /* Catch emission of unexpected errors for CI! */
58 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
59 #undef EINVAL
60 #define EINVAL ({ \
61 	DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
62 	22; \
63 })
64 #endif
65 
66 /**
67  * DOC: User command execution
68  *
69  * Userspace submits commands to be executed on the GPU as an instruction
70  * stream within a GEM object we call a batchbuffer. This instructions may
71  * refer to other GEM objects containing auxiliary state such as kernels,
72  * samplers, render targets and even secondary batchbuffers. Userspace does
73  * not know where in the GPU memory these objects reside and so before the
74  * batchbuffer is passed to the GPU for execution, those addresses in the
75  * batchbuffer and auxiliary objects are updated. This is known as relocation,
76  * or patching. To try and avoid having to relocate each object on the next
77  * execution, userspace is told the location of those objects in this pass,
78  * but this remains just a hint as the kernel may choose a new location for
79  * any object in the future.
80  *
81  * At the level of talking to the hardware, submitting a batchbuffer for the
82  * GPU to execute is to add content to a buffer from which the HW
83  * command streamer is reading.
84  *
85  * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
86  *    Execlists, this command is not placed on the same buffer as the
87  *    remaining items.
88  *
89  * 2. Add a command to invalidate caches to the buffer.
90  *
91  * 3. Add a batchbuffer start command to the buffer; the start command is
92  *    essentially a token together with the GPU address of the batchbuffer
93  *    to be executed.
94  *
95  * 4. Add a pipeline flush to the buffer.
96  *
97  * 5. Add a memory write command to the buffer to record when the GPU
98  *    is done executing the batchbuffer. The memory write writes the
99  *    global sequence number of the request, ``i915_request::global_seqno``;
100  *    the i915 driver uses the current value in the register to determine
101  *    if the GPU has completed the batchbuffer.
102  *
103  * 6. Add a user interrupt command to the buffer. This command instructs
104  *    the GPU to issue an interrupt when the command, pipeline flush and
105  *    memory write are completed.
106  *
107  * 7. Inform the hardware of the additional commands added to the buffer
108  *    (by updating the tail pointer).
109  *
110  * Processing an execbuf ioctl is conceptually split up into a few phases.
111  *
112  * 1. Validation - Ensure all the pointers, handles and flags are valid.
113  * 2. Reservation - Assign GPU address space for every object
114  * 3. Relocation - Update any addresses to point to the final locations
115  * 4. Serialisation - Order the request with respect to its dependencies
116  * 5. Construction - Construct a request to execute the batchbuffer
117  * 6. Submission (at some point in the future execution)
118  *
119  * Reserving resources for the execbuf is the most complicated phase. We
120  * neither want to have to migrate the object in the address space, nor do
121  * we want to have to update any relocations pointing to this object. Ideally,
122  * we want to leave the object where it is and for all the existing relocations
123  * to match. If the object is given a new address, or if userspace thinks the
124  * object is elsewhere, we have to parse all the relocation entries and update
125  * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
126  * all the target addresses in all of its objects match the value in the
127  * relocation entries and that they all match the presumed offsets given by the
128  * list of execbuffer objects. Using this knowledge, we know that if we haven't
129  * moved any buffers, all the relocation entries are valid and we can skip
130  * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
131  * hang.) The requirement for using I915_EXEC_NO_RELOC are:
132  *
133  *      The addresses written in the objects must match the corresponding
134  *      reloc.presumed_offset which in turn must match the corresponding
135  *      execobject.offset.
136  *
137  *      Any render targets written to in the batch must be flagged with
138  *      EXEC_OBJECT_WRITE.
139  *
140  *      To avoid stalling, execobject.offset should match the current
141  *      address of that object within the active context.
142  *
143  * The reservation is done is multiple phases. First we try and keep any
144  * object already bound in its current location - so as long as meets the
145  * constraints imposed by the new execbuffer. Any object left unbound after the
146  * first pass is then fitted into any available idle space. If an object does
147  * not fit, all objects are removed from the reservation and the process rerun
148  * after sorting the objects into a priority order (more difficult to fit
149  * objects are tried first). Failing that, the entire VM is cleared and we try
150  * to fit the execbuf once last time before concluding that it simply will not
151  * fit.
152  *
153  * A small complication to all of this is that we allow userspace not only to
154  * specify an alignment and a size for the object in the address space, but
155  * we also allow userspace to specify the exact offset. This objects are
156  * simpler to place (the location is known a priori) all we have to do is make
157  * sure the space is available.
158  *
159  * Once all the objects are in place, patching up the buried pointers to point
160  * to the final locations is a fairly simple job of walking over the relocation
161  * entry arrays, looking up the right address and rewriting the value into
162  * the object. Simple! ... The relocation entries are stored in user memory
163  * and so to access them we have to copy them into a local buffer. That copy
164  * has to avoid taking any pagefaults as they may lead back to a GEM object
165  * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
166  * the relocation into multiple passes. First we try to do everything within an
167  * atomic context (avoid the pagefaults) which requires that we never wait. If
168  * we detect that we may wait, or if we need to fault, then we have to fallback
169  * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
170  * bells yet?) Dropping the mutex means that we lose all the state we have
171  * built up so far for the execbuf and we must reset any global data. However,
172  * we do leave the objects pinned in their final locations - which is a
173  * potential issue for concurrent execbufs. Once we have left the mutex, we can
174  * allocate and copy all the relocation entries into a large array at our
175  * leisure, reacquire the mutex, reclaim all the objects and other state and
176  * then proceed to update any incorrect addresses with the objects.
177  *
178  * As we process the relocation entries, we maintain a record of whether the
179  * object is being written to. Using NORELOC, we expect userspace to provide
180  * this information instead. We also check whether we can skip the relocation
181  * by comparing the expected value inside the relocation entry with the target's
182  * final address. If they differ, we have to map the current object and rewrite
183  * the 4 or 8 byte pointer within.
184  *
185  * Serialising an execbuf is quite simple according to the rules of the GEM
186  * ABI. Execution within each context is ordered by the order of submission.
187  * Writes to any GEM object are in order of submission and are exclusive. Reads
188  * from a GEM object are unordered with respect to other reads, but ordered by
189  * writes. A write submitted after a read cannot occur before the read, and
190  * similarly any read submitted after a write cannot occur before the write.
191  * Writes are ordered between engines such that only one write occurs at any
192  * time (completing any reads beforehand) - using semaphores where available
193  * and CPU serialisation otherwise. Other GEM access obey the same rules, any
194  * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
195  * reads before starting, and any read (either using set-domain or pread) must
196  * flush all GPU writes before starting. (Note we only employ a barrier before,
197  * we currently rely on userspace not concurrently starting a new execution
198  * whilst reading or writing to an object. This may be an advantage or not
199  * depending on how much you trust userspace not to shoot themselves in the
200  * foot.) Serialisation may just result in the request being inserted into
201  * a DAG awaiting its turn, but most simple is to wait on the CPU until
202  * all dependencies are resolved.
203  *
204  * After all of that, is just a matter of closing the request and handing it to
205  * the hardware (well, leaving it in a queue to be executed). However, we also
206  * offer the ability for batchbuffers to be run with elevated privileges so
207  * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
208  * Before any batch is given extra privileges we first must check that it
209  * contains no nefarious instructions, we check that each instruction is from
210  * our whitelist and all registers are also from an allowed list. We first
211  * copy the user's batchbuffer to a shadow (so that the user doesn't have
212  * access to it, either by the CPU or GPU as we scan it) and then parse each
213  * instruction. If everything is ok, we set a flag telling the hardware to run
214  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
215  */
216 
217 struct i915_execbuffer {
218 	struct drm_i915_private *i915; /** i915 backpointer */
219 	struct drm_file *file; /** per-file lookup tables and limits */
220 	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
221 	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
222 	struct i915_vma **vma;
223 	unsigned int *flags;
224 
225 	struct intel_engine_cs *engine; /** engine to queue the request to */
226 	struct intel_context *context; /* logical state for the request */
227 	struct i915_gem_context *gem_context; /** caller's context */
228 
229 	struct i915_request *request; /** our request to build */
230 	struct i915_vma *batch; /** identity of the batch obj/vma */
231 
232 	/** actual size of execobj[] as we may extend it for the cmdparser */
233 	unsigned int buffer_count;
234 
235 	/** list of vma not yet bound during reservation phase */
236 	struct list_head unbound;
237 
238 	/** list of vma that have execobj.relocation_count */
239 	struct list_head relocs;
240 
241 	/**
242 	 * Track the most recently used object for relocations, as we
243 	 * frequently have to perform multiple relocations within the same
244 	 * obj/page
245 	 */
246 	struct reloc_cache {
247 		struct drm_mm_node node; /** temporary GTT binding */
248 		unsigned long vaddr; /** Current kmap address */
249 		unsigned long page; /** Currently mapped page index */
250 		unsigned int gen; /** Cached value of INTEL_GEN */
251 		bool use_64bit_reloc : 1;
252 		bool has_llc : 1;
253 		bool has_fence : 1;
254 		bool needs_unfenced : 1;
255 
256 		struct intel_context *ce;
257 		struct i915_request *rq;
258 		u32 *rq_cmd;
259 		unsigned int rq_size;
260 	} reloc_cache;
261 
262 	u64 invalid_flags; /** Set of execobj.flags that are invalid */
263 	u32 context_flags; /** Set of execobj.flags to insert from the ctx */
264 
265 	u32 batch_start_offset; /** Location within object of batch */
266 	u32 batch_len; /** Length of batch within object */
267 	u32 batch_flags; /** Flags composed for emit_bb_start() */
268 
269 	/**
270 	 * Indicate either the size of the hastable used to resolve
271 	 * relocation handles, or if negative that we are using a direct
272 	 * index into the execobj[].
273 	 */
274 	int lut_size;
275 	struct hlist_head *buckets; /** ht for relocation handles */
276 };
277 
278 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
279 
280 /*
281  * Used to convert any address to canonical form.
282  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
283  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
284  * addresses to be in a canonical form:
285  * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
286  * canonical form [63:48] == [47]."
287  */
288 #define GEN8_HIGH_ADDRESS_BIT 47
289 static inline u64 gen8_canonical_addr(u64 address)
290 {
291 	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
292 }
293 
294 static inline u64 gen8_noncanonical_addr(u64 address)
295 {
296 	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
297 }
298 
299 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
300 {
301 	return intel_engine_requires_cmd_parser(eb->engine) ||
302 		(intel_engine_using_cmd_parser(eb->engine) &&
303 		 eb->args->batch_len);
304 }
305 
306 static int eb_create(struct i915_execbuffer *eb)
307 {
308 	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
309 		unsigned int size = 1 + ilog2(eb->buffer_count);
310 
311 		/*
312 		 * Without a 1:1 association between relocation handles and
313 		 * the execobject[] index, we instead create a hashtable.
314 		 * We size it dynamically based on available memory, starting
315 		 * first with 1:1 assocative hash and scaling back until
316 		 * the allocation succeeds.
317 		 *
318 		 * Later on we use a positive lut_size to indicate we are
319 		 * using this hashtable, and a negative value to indicate a
320 		 * direct lookup.
321 		 */
322 		do {
323 			gfp_t flags;
324 
325 			/* While we can still reduce the allocation size, don't
326 			 * raise a warning and allow the allocation to fail.
327 			 * On the last pass though, we want to try as hard
328 			 * as possible to perform the allocation and warn
329 			 * if it fails.
330 			 */
331 			flags = GFP_KERNEL;
332 			if (size > 1)
333 				flags |= __GFP_NORETRY | __GFP_NOWARN;
334 
335 			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
336 					      flags);
337 			if (eb->buckets)
338 				break;
339 		} while (--size);
340 
341 		if (unlikely(!size))
342 			return -ENOMEM;
343 
344 		eb->lut_size = size;
345 	} else {
346 		eb->lut_size = -eb->buffer_count;
347 	}
348 
349 	return 0;
350 }
351 
352 static bool
353 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
354 		 const struct i915_vma *vma,
355 		 unsigned int flags)
356 {
357 	if (vma->node.size < entry->pad_to_size)
358 		return true;
359 
360 	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
361 		return true;
362 
363 	if (flags & EXEC_OBJECT_PINNED &&
364 	    vma->node.start != entry->offset)
365 		return true;
366 
367 	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
368 	    vma->node.start < BATCH_OFFSET_BIAS)
369 		return true;
370 
371 	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
372 	    (vma->node.start + vma->node.size - 1) >> 32)
373 		return true;
374 
375 	if (flags & __EXEC_OBJECT_NEEDS_MAP &&
376 	    !i915_vma_is_map_and_fenceable(vma))
377 		return true;
378 
379 	return false;
380 }
381 
382 static inline bool
383 eb_pin_vma(struct i915_execbuffer *eb,
384 	   const struct drm_i915_gem_exec_object2 *entry,
385 	   struct i915_vma *vma)
386 {
387 	unsigned int exec_flags = *vma->exec_flags;
388 	u64 pin_flags;
389 
390 	if (vma->node.size)
391 		pin_flags = vma->node.start;
392 	else
393 		pin_flags = entry->offset & PIN_OFFSET_MASK;
394 
395 	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
396 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
397 		pin_flags |= PIN_GLOBAL;
398 
399 	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
400 		return false;
401 
402 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
403 		if (unlikely(i915_vma_pin_fence(vma))) {
404 			i915_vma_unpin(vma);
405 			return false;
406 		}
407 
408 		if (vma->fence)
409 			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
410 	}
411 
412 	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
413 	return !eb_vma_misplaced(entry, vma, exec_flags);
414 }
415 
416 static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
417 {
418 	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
419 
420 	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
421 		__i915_vma_unpin_fence(vma);
422 
423 	__i915_vma_unpin(vma);
424 }
425 
426 static inline void
427 eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
428 {
429 	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
430 		return;
431 
432 	__eb_unreserve_vma(vma, *flags);
433 	*flags &= ~__EXEC_OBJECT_RESERVED;
434 }
435 
436 static int
437 eb_validate_vma(struct i915_execbuffer *eb,
438 		struct drm_i915_gem_exec_object2 *entry,
439 		struct i915_vma *vma)
440 {
441 	if (unlikely(entry->flags & eb->invalid_flags))
442 		return -EINVAL;
443 
444 	if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
445 		return -EINVAL;
446 
447 	/*
448 	 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
449 	 * any non-page-aligned or non-canonical addresses.
450 	 */
451 	if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
452 		     entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
453 		return -EINVAL;
454 
455 	/* pad_to_size was once a reserved field, so sanitize it */
456 	if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
457 		if (unlikely(offset_in_page(entry->pad_to_size)))
458 			return -EINVAL;
459 	} else {
460 		entry->pad_to_size = 0;
461 	}
462 
463 	if (unlikely(vma->exec_flags)) {
464 		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
465 			  entry->handle, (int)(entry - eb->exec));
466 		return -EINVAL;
467 	}
468 
469 	/*
470 	 * From drm_mm perspective address space is continuous,
471 	 * so from this point we're always using non-canonical
472 	 * form internally.
473 	 */
474 	entry->offset = gen8_noncanonical_addr(entry->offset);
475 
476 	if (!eb->reloc_cache.has_fence) {
477 		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
478 	} else {
479 		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
480 		     eb->reloc_cache.needs_unfenced) &&
481 		    i915_gem_object_is_tiled(vma->obj))
482 			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
483 	}
484 
485 	if (!(entry->flags & EXEC_OBJECT_PINNED))
486 		entry->flags |= eb->context_flags;
487 
488 	return 0;
489 }
490 
491 static int
492 eb_add_vma(struct i915_execbuffer *eb,
493 	   unsigned int i, unsigned batch_idx,
494 	   struct i915_vma *vma)
495 {
496 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
497 	int err;
498 
499 	GEM_BUG_ON(i915_vma_is_closed(vma));
500 
501 	if (!(eb->args->flags & __EXEC_VALIDATED)) {
502 		err = eb_validate_vma(eb, entry, vma);
503 		if (unlikely(err))
504 			return err;
505 	}
506 
507 	if (eb->lut_size > 0) {
508 		vma->exec_handle = entry->handle;
509 		hlist_add_head(&vma->exec_node,
510 			       &eb->buckets[hash_32(entry->handle,
511 						    eb->lut_size)]);
512 	}
513 
514 	if (entry->relocation_count)
515 		list_add_tail(&vma->reloc_link, &eb->relocs);
516 
517 	/*
518 	 * Stash a pointer from the vma to execobj, so we can query its flags,
519 	 * size, alignment etc as provided by the user. Also we stash a pointer
520 	 * to the vma inside the execobj so that we can use a direct lookup
521 	 * to find the right target VMA when doing relocations.
522 	 */
523 	eb->vma[i] = vma;
524 	eb->flags[i] = entry->flags;
525 	vma->exec_flags = &eb->flags[i];
526 
527 	/*
528 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
529 	 * to negative relocation deltas. Usually that works out ok since the
530 	 * relocate address is still positive, except when the batch is placed
531 	 * very low in the GTT. Ensure this doesn't happen.
532 	 *
533 	 * Note that actual hangs have only been observed on gen7, but for
534 	 * paranoia do it everywhere.
535 	 */
536 	if (i == batch_idx) {
537 		if (entry->relocation_count &&
538 		    !(eb->flags[i] & EXEC_OBJECT_PINNED))
539 			eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
540 		if (eb->reloc_cache.has_fence)
541 			eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
542 
543 		eb->batch = vma;
544 	}
545 
546 	err = 0;
547 	if (eb_pin_vma(eb, entry, vma)) {
548 		if (entry->offset != vma->node.start) {
549 			entry->offset = vma->node.start | UPDATE;
550 			eb->args->flags |= __EXEC_HAS_RELOC;
551 		}
552 	} else {
553 		eb_unreserve_vma(vma, vma->exec_flags);
554 
555 		list_add_tail(&vma->exec_link, &eb->unbound);
556 		if (drm_mm_node_allocated(&vma->node))
557 			err = i915_vma_unbind(vma);
558 		if (unlikely(err))
559 			vma->exec_flags = NULL;
560 	}
561 	return err;
562 }
563 
564 static inline int use_cpu_reloc(const struct reloc_cache *cache,
565 				const struct drm_i915_gem_object *obj)
566 {
567 	if (!i915_gem_object_has_struct_page(obj))
568 		return false;
569 
570 	if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
571 		return true;
572 
573 	if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
574 		return false;
575 
576 	return (cache->has_llc ||
577 		obj->cache_dirty ||
578 		obj->cache_level != I915_CACHE_NONE);
579 }
580 
581 static int eb_reserve_vma(const struct i915_execbuffer *eb,
582 			  struct i915_vma *vma)
583 {
584 	struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
585 	unsigned int exec_flags = *vma->exec_flags;
586 	u64 pin_flags;
587 	int err;
588 
589 	pin_flags = PIN_USER | PIN_NONBLOCK;
590 	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
591 		pin_flags |= PIN_GLOBAL;
592 
593 	/*
594 	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
595 	 * limit address to the first 4GBs for unflagged objects.
596 	 */
597 	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
598 		pin_flags |= PIN_ZONE_4G;
599 
600 	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
601 		pin_flags |= PIN_MAPPABLE;
602 
603 	if (exec_flags & EXEC_OBJECT_PINNED) {
604 		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
605 		pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
606 	} else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
607 		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
608 	}
609 
610 	err = i915_vma_pin(vma,
611 			   entry->pad_to_size, entry->alignment,
612 			   pin_flags);
613 	if (err)
614 		return err;
615 
616 	if (entry->offset != vma->node.start) {
617 		entry->offset = vma->node.start | UPDATE;
618 		eb->args->flags |= __EXEC_HAS_RELOC;
619 	}
620 
621 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
622 		err = i915_vma_pin_fence(vma);
623 		if (unlikely(err)) {
624 			i915_vma_unpin(vma);
625 			return err;
626 		}
627 
628 		if (vma->fence)
629 			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
630 	}
631 
632 	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
633 	GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
634 
635 	return 0;
636 }
637 
638 static int eb_reserve(struct i915_execbuffer *eb)
639 {
640 	const unsigned int count = eb->buffer_count;
641 	struct list_head last;
642 	struct i915_vma *vma;
643 	unsigned int i, pass;
644 	int err;
645 
646 	/*
647 	 * Attempt to pin all of the buffers into the GTT.
648 	 * This is done in 3 phases:
649 	 *
650 	 * 1a. Unbind all objects that do not match the GTT constraints for
651 	 *     the execbuffer (fenceable, mappable, alignment etc).
652 	 * 1b. Increment pin count for already bound objects.
653 	 * 2.  Bind new objects.
654 	 * 3.  Decrement pin count.
655 	 *
656 	 * This avoid unnecessary unbinding of later objects in order to make
657 	 * room for the earlier objects *unless* we need to defragment.
658 	 */
659 
660 	pass = 0;
661 	err = 0;
662 	do {
663 		list_for_each_entry(vma, &eb->unbound, exec_link) {
664 			err = eb_reserve_vma(eb, vma);
665 			if (err)
666 				break;
667 		}
668 		if (err != -ENOSPC)
669 			return err;
670 
671 		/* Resort *all* the objects into priority order */
672 		INIT_LIST_HEAD(&eb->unbound);
673 		INIT_LIST_HEAD(&last);
674 		for (i = 0; i < count; i++) {
675 			unsigned int flags = eb->flags[i];
676 			struct i915_vma *vma = eb->vma[i];
677 
678 			if (flags & EXEC_OBJECT_PINNED &&
679 			    flags & __EXEC_OBJECT_HAS_PIN)
680 				continue;
681 
682 			eb_unreserve_vma(vma, &eb->flags[i]);
683 
684 			if (flags & EXEC_OBJECT_PINNED)
685 				/* Pinned must have their slot */
686 				list_add(&vma->exec_link, &eb->unbound);
687 			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
688 				/* Map require the lowest 256MiB (aperture) */
689 				list_add_tail(&vma->exec_link, &eb->unbound);
690 			else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
691 				/* Prioritise 4GiB region for restricted bo */
692 				list_add(&vma->exec_link, &last);
693 			else
694 				list_add_tail(&vma->exec_link, &last);
695 		}
696 		list_splice_tail(&last, &eb->unbound);
697 
698 		switch (pass++) {
699 		case 0:
700 			break;
701 
702 		case 1:
703 			/* Too fragmented, unbind everything and retry */
704 			mutex_lock(&eb->context->vm->mutex);
705 			err = i915_gem_evict_vm(eb->context->vm);
706 			mutex_unlock(&eb->context->vm->mutex);
707 			if (err)
708 				return err;
709 			break;
710 
711 		default:
712 			return -ENOSPC;
713 		}
714 	} while (1);
715 }
716 
717 static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
718 {
719 	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
720 		return 0;
721 	else
722 		return eb->buffer_count - 1;
723 }
724 
725 static int eb_select_context(struct i915_execbuffer *eb)
726 {
727 	struct i915_gem_context *ctx;
728 
729 	ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
730 	if (unlikely(!ctx))
731 		return -ENOENT;
732 
733 	eb->gem_context = ctx;
734 	if (rcu_access_pointer(ctx->vm))
735 		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
736 
737 	eb->context_flags = 0;
738 	if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
739 		eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
740 
741 	return 0;
742 }
743 
744 static int eb_lookup_vmas(struct i915_execbuffer *eb)
745 {
746 	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
747 	struct drm_i915_gem_object *obj;
748 	unsigned int i, batch;
749 	int err;
750 
751 	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
752 		return -EIO;
753 
754 	INIT_LIST_HEAD(&eb->relocs);
755 	INIT_LIST_HEAD(&eb->unbound);
756 
757 	batch = eb_batch_index(eb);
758 
759 	mutex_lock(&eb->gem_context->mutex);
760 	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
761 		err = -ENOENT;
762 		goto err_ctx;
763 	}
764 
765 	for (i = 0; i < eb->buffer_count; i++) {
766 		u32 handle = eb->exec[i].handle;
767 		struct i915_lut_handle *lut;
768 		struct i915_vma *vma;
769 
770 		vma = radix_tree_lookup(handles_vma, handle);
771 		if (likely(vma))
772 			goto add_vma;
773 
774 		obj = i915_gem_object_lookup(eb->file, handle);
775 		if (unlikely(!obj)) {
776 			err = -ENOENT;
777 			goto err_vma;
778 		}
779 
780 		vma = i915_vma_instance(obj, eb->context->vm, NULL);
781 		if (IS_ERR(vma)) {
782 			err = PTR_ERR(vma);
783 			goto err_obj;
784 		}
785 
786 		lut = i915_lut_handle_alloc();
787 		if (unlikely(!lut)) {
788 			err = -ENOMEM;
789 			goto err_obj;
790 		}
791 
792 		err = radix_tree_insert(handles_vma, handle, vma);
793 		if (unlikely(err)) {
794 			i915_lut_handle_free(lut);
795 			goto err_obj;
796 		}
797 
798 		/* transfer ref to lut */
799 		if (!atomic_fetch_inc(&vma->open_count))
800 			i915_vma_reopen(vma);
801 		lut->handle = handle;
802 		lut->ctx = eb->gem_context;
803 
804 		i915_gem_object_lock(obj);
805 		list_add(&lut->obj_link, &obj->lut_list);
806 		i915_gem_object_unlock(obj);
807 
808 add_vma:
809 		err = eb_add_vma(eb, i, batch, vma);
810 		if (unlikely(err))
811 			goto err_vma;
812 
813 		GEM_BUG_ON(vma != eb->vma[i]);
814 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
815 		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
816 			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
817 	}
818 
819 	mutex_unlock(&eb->gem_context->mutex);
820 
821 	eb->args->flags |= __EXEC_VALIDATED;
822 	return eb_reserve(eb);
823 
824 err_obj:
825 	i915_gem_object_put(obj);
826 err_vma:
827 	eb->vma[i] = NULL;
828 err_ctx:
829 	mutex_unlock(&eb->gem_context->mutex);
830 	return err;
831 }
832 
833 static struct i915_vma *
834 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
835 {
836 	if (eb->lut_size < 0) {
837 		if (handle >= -eb->lut_size)
838 			return NULL;
839 		return eb->vma[handle];
840 	} else {
841 		struct hlist_head *head;
842 		struct i915_vma *vma;
843 
844 		head = &eb->buckets[hash_32(handle, eb->lut_size)];
845 		hlist_for_each_entry(vma, head, exec_node) {
846 			if (vma->exec_handle == handle)
847 				return vma;
848 		}
849 		return NULL;
850 	}
851 }
852 
853 static void eb_release_vmas(const struct i915_execbuffer *eb)
854 {
855 	const unsigned int count = eb->buffer_count;
856 	unsigned int i;
857 
858 	for (i = 0; i < count; i++) {
859 		struct i915_vma *vma = eb->vma[i];
860 		unsigned int flags = eb->flags[i];
861 
862 		if (!vma)
863 			break;
864 
865 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
866 		vma->exec_flags = NULL;
867 		eb->vma[i] = NULL;
868 
869 		if (flags & __EXEC_OBJECT_HAS_PIN)
870 			__eb_unreserve_vma(vma, flags);
871 
872 		if (flags & __EXEC_OBJECT_HAS_REF)
873 			i915_vma_put(vma);
874 	}
875 }
876 
877 static void eb_reset_vmas(const struct i915_execbuffer *eb)
878 {
879 	eb_release_vmas(eb);
880 	if (eb->lut_size > 0)
881 		memset(eb->buckets, 0,
882 		       sizeof(struct hlist_head) << eb->lut_size);
883 }
884 
885 static void eb_destroy(const struct i915_execbuffer *eb)
886 {
887 	GEM_BUG_ON(eb->reloc_cache.rq);
888 
889 	if (eb->reloc_cache.ce)
890 		intel_context_put(eb->reloc_cache.ce);
891 
892 	if (eb->lut_size > 0)
893 		kfree(eb->buckets);
894 }
895 
896 static inline u64
897 relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
898 		  const struct i915_vma *target)
899 {
900 	return gen8_canonical_addr((int)reloc->delta + target->node.start);
901 }
902 
903 static void reloc_cache_init(struct reloc_cache *cache,
904 			     struct drm_i915_private *i915)
905 {
906 	cache->page = -1;
907 	cache->vaddr = 0;
908 	/* Must be a variable in the struct to allow GCC to unroll. */
909 	cache->gen = INTEL_GEN(i915);
910 	cache->has_llc = HAS_LLC(i915);
911 	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
912 	cache->has_fence = cache->gen < 4;
913 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
914 	cache->node.flags = 0;
915 	cache->ce = NULL;
916 	cache->rq = NULL;
917 	cache->rq_size = 0;
918 }
919 
920 static inline void *unmask_page(unsigned long p)
921 {
922 	return (void *)(uintptr_t)(p & PAGE_MASK);
923 }
924 
925 static inline unsigned int unmask_flags(unsigned long p)
926 {
927 	return p & ~PAGE_MASK;
928 }
929 
930 #define KMAP 0x4 /* after CLFLUSH_FLAGS */
931 
932 static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
933 {
934 	struct drm_i915_private *i915 =
935 		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
936 	return &i915->ggtt;
937 }
938 
939 static void reloc_gpu_flush(struct reloc_cache *cache)
940 {
941 	GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
942 	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
943 
944 	__i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size);
945 	i915_gem_object_unpin_map(cache->rq->batch->obj);
946 
947 	intel_gt_chipset_flush(cache->rq->engine->gt);
948 
949 	i915_request_add(cache->rq);
950 	cache->rq = NULL;
951 }
952 
953 static void reloc_cache_reset(struct reloc_cache *cache)
954 {
955 	void *vaddr;
956 
957 	if (cache->rq)
958 		reloc_gpu_flush(cache);
959 
960 	if (!cache->vaddr)
961 		return;
962 
963 	vaddr = unmask_page(cache->vaddr);
964 	if (cache->vaddr & KMAP) {
965 		if (cache->vaddr & CLFLUSH_AFTER)
966 			mb();
967 
968 		kunmap_atomic(vaddr);
969 		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
970 	} else {
971 		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
972 
973 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
974 		io_mapping_unmap_atomic((void __iomem *)vaddr);
975 
976 		if (drm_mm_node_allocated(&cache->node)) {
977 			ggtt->vm.clear_range(&ggtt->vm,
978 					     cache->node.start,
979 					     cache->node.size);
980 			mutex_lock(&ggtt->vm.mutex);
981 			drm_mm_remove_node(&cache->node);
982 			mutex_unlock(&ggtt->vm.mutex);
983 		} else {
984 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
985 		}
986 	}
987 
988 	cache->vaddr = 0;
989 	cache->page = -1;
990 }
991 
992 static void *reloc_kmap(struct drm_i915_gem_object *obj,
993 			struct reloc_cache *cache,
994 			unsigned long page)
995 {
996 	void *vaddr;
997 
998 	if (cache->vaddr) {
999 		kunmap_atomic(unmask_page(cache->vaddr));
1000 	} else {
1001 		unsigned int flushes;
1002 		int err;
1003 
1004 		err = i915_gem_object_prepare_write(obj, &flushes);
1005 		if (err)
1006 			return ERR_PTR(err);
1007 
1008 		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
1009 		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
1010 
1011 		cache->vaddr = flushes | KMAP;
1012 		cache->node.mm = (void *)obj;
1013 		if (flushes)
1014 			mb();
1015 	}
1016 
1017 	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
1018 	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
1019 	cache->page = page;
1020 
1021 	return vaddr;
1022 }
1023 
1024 static void *reloc_iomap(struct drm_i915_gem_object *obj,
1025 			 struct reloc_cache *cache,
1026 			 unsigned long page)
1027 {
1028 	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1029 	unsigned long offset;
1030 	void *vaddr;
1031 
1032 	if (cache->vaddr) {
1033 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1034 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1035 	} else {
1036 		struct i915_vma *vma;
1037 		int err;
1038 
1039 		if (i915_gem_object_is_tiled(obj))
1040 			return ERR_PTR(-EINVAL);
1041 
1042 		if (use_cpu_reloc(cache, obj))
1043 			return NULL;
1044 
1045 		i915_gem_object_lock(obj);
1046 		err = i915_gem_object_set_to_gtt_domain(obj, true);
1047 		i915_gem_object_unlock(obj);
1048 		if (err)
1049 			return ERR_PTR(err);
1050 
1051 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1052 					       PIN_MAPPABLE |
1053 					       PIN_NONBLOCK /* NOWARN */ |
1054 					       PIN_NOEVICT);
1055 		if (IS_ERR(vma)) {
1056 			memset(&cache->node, 0, sizeof(cache->node));
1057 			mutex_lock(&ggtt->vm.mutex);
1058 			err = drm_mm_insert_node_in_range
1059 				(&ggtt->vm.mm, &cache->node,
1060 				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
1061 				 0, ggtt->mappable_end,
1062 				 DRM_MM_INSERT_LOW);
1063 			mutex_unlock(&ggtt->vm.mutex);
1064 			if (err) /* no inactive aperture space, use cpu reloc */
1065 				return NULL;
1066 		} else {
1067 			cache->node.start = vma->node.start;
1068 			cache->node.mm = (void *)vma;
1069 		}
1070 	}
1071 
1072 	offset = cache->node.start;
1073 	if (drm_mm_node_allocated(&cache->node)) {
1074 		ggtt->vm.insert_page(&ggtt->vm,
1075 				     i915_gem_object_get_dma_address(obj, page),
1076 				     offset, I915_CACHE_NONE, 0);
1077 	} else {
1078 		offset += page << PAGE_SHIFT;
1079 	}
1080 
1081 	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1082 							 offset);
1083 	cache->page = page;
1084 	cache->vaddr = (unsigned long)vaddr;
1085 
1086 	return vaddr;
1087 }
1088 
1089 static void *reloc_vaddr(struct drm_i915_gem_object *obj,
1090 			 struct reloc_cache *cache,
1091 			 unsigned long page)
1092 {
1093 	void *vaddr;
1094 
1095 	if (cache->page == page) {
1096 		vaddr = unmask_page(cache->vaddr);
1097 	} else {
1098 		vaddr = NULL;
1099 		if ((cache->vaddr & KMAP) == 0)
1100 			vaddr = reloc_iomap(obj, cache, page);
1101 		if (!vaddr)
1102 			vaddr = reloc_kmap(obj, cache, page);
1103 	}
1104 
1105 	return vaddr;
1106 }
1107 
1108 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1109 {
1110 	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
1111 		if (flushes & CLFLUSH_BEFORE) {
1112 			clflushopt(addr);
1113 			mb();
1114 		}
1115 
1116 		*addr = value;
1117 
1118 		/*
1119 		 * Writes to the same cacheline are serialised by the CPU
1120 		 * (including clflush). On the write path, we only require
1121 		 * that it hits memory in an orderly fashion and place
1122 		 * mb barriers at the start and end of the relocation phase
1123 		 * to ensure ordering of clflush wrt to the system.
1124 		 */
1125 		if (flushes & CLFLUSH_AFTER)
1126 			clflushopt(addr);
1127 	} else
1128 		*addr = value;
1129 }
1130 
1131 static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
1132 {
1133 	struct drm_i915_gem_object *obj = vma->obj;
1134 	int err;
1135 
1136 	i915_vma_lock(vma);
1137 
1138 	if (obj->cache_dirty & ~obj->cache_coherent)
1139 		i915_gem_clflush_object(obj, 0);
1140 	obj->write_domain = 0;
1141 
1142 	err = i915_request_await_object(rq, vma->obj, true);
1143 	if (err == 0)
1144 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1145 
1146 	i915_vma_unlock(vma);
1147 
1148 	return err;
1149 }
1150 
1151 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1152 			     struct i915_vma *vma,
1153 			     unsigned int len)
1154 {
1155 	struct reloc_cache *cache = &eb->reloc_cache;
1156 	struct intel_engine_pool_node *pool;
1157 	struct i915_request *rq;
1158 	struct i915_vma *batch;
1159 	u32 *cmd;
1160 	int err;
1161 
1162 	pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
1163 	if (IS_ERR(pool))
1164 		return PTR_ERR(pool);
1165 
1166 	cmd = i915_gem_object_pin_map(pool->obj,
1167 				      cache->has_llc ?
1168 				      I915_MAP_FORCE_WB :
1169 				      I915_MAP_FORCE_WC);
1170 	if (IS_ERR(cmd)) {
1171 		err = PTR_ERR(cmd);
1172 		goto out_pool;
1173 	}
1174 
1175 	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
1176 	if (IS_ERR(batch)) {
1177 		err = PTR_ERR(batch);
1178 		goto err_unmap;
1179 	}
1180 
1181 	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
1182 	if (err)
1183 		goto err_unmap;
1184 
1185 	rq = intel_context_create_request(cache->ce);
1186 	if (IS_ERR(rq)) {
1187 		err = PTR_ERR(rq);
1188 		goto err_unpin;
1189 	}
1190 
1191 	err = intel_engine_pool_mark_active(pool, rq);
1192 	if (err)
1193 		goto err_request;
1194 
1195 	err = reloc_move_to_gpu(rq, vma);
1196 	if (err)
1197 		goto err_request;
1198 
1199 	err = eb->engine->emit_bb_start(rq,
1200 					batch->node.start, PAGE_SIZE,
1201 					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
1202 	if (err)
1203 		goto skip_request;
1204 
1205 	i915_vma_lock(batch);
1206 	err = i915_request_await_object(rq, batch->obj, false);
1207 	if (err == 0)
1208 		err = i915_vma_move_to_active(batch, rq, 0);
1209 	i915_vma_unlock(batch);
1210 	if (err)
1211 		goto skip_request;
1212 
1213 	rq->batch = batch;
1214 	i915_vma_unpin(batch);
1215 
1216 	cache->rq = rq;
1217 	cache->rq_cmd = cmd;
1218 	cache->rq_size = 0;
1219 
1220 	/* Return with batch mapping (cmd) still pinned */
1221 	goto out_pool;
1222 
1223 skip_request:
1224 	i915_request_skip(rq, err);
1225 err_request:
1226 	i915_request_add(rq);
1227 err_unpin:
1228 	i915_vma_unpin(batch);
1229 err_unmap:
1230 	i915_gem_object_unpin_map(pool->obj);
1231 out_pool:
1232 	intel_engine_pool_put(pool);
1233 	return err;
1234 }
1235 
1236 static u32 *reloc_gpu(struct i915_execbuffer *eb,
1237 		      struct i915_vma *vma,
1238 		      unsigned int len)
1239 {
1240 	struct reloc_cache *cache = &eb->reloc_cache;
1241 	u32 *cmd;
1242 
1243 	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
1244 		reloc_gpu_flush(cache);
1245 
1246 	if (unlikely(!cache->rq)) {
1247 		int err;
1248 
1249 		/* If we need to copy for the cmdparser, we will stall anyway */
1250 		if (eb_use_cmdparser(eb))
1251 			return ERR_PTR(-EWOULDBLOCK);
1252 
1253 		if (!intel_engine_can_store_dword(eb->engine))
1254 			return ERR_PTR(-ENODEV);
1255 
1256 		if (!cache->ce) {
1257 			struct intel_context *ce;
1258 
1259 			/*
1260 			 * The CS pre-parser can pre-fetch commands across
1261 			 * memory sync points and starting gen12 it is able to
1262 			 * pre-fetch across BB_START and BB_END boundaries
1263 			 * (within the same context). We therefore use a
1264 			 * separate context gen12+ to guarantee that the reloc
1265 			 * writes land before the parser gets to the target
1266 			 * memory location.
1267 			 */
1268 			if (cache->gen >= 12)
1269 				ce = intel_context_create(eb->context->gem_context,
1270 							  eb->engine);
1271 			else
1272 				ce = intel_context_get(eb->context);
1273 			if (IS_ERR(ce))
1274 				return ERR_CAST(ce);
1275 
1276 			cache->ce = ce;
1277 		}
1278 
1279 		err = __reloc_gpu_alloc(eb, vma, len);
1280 		if (unlikely(err))
1281 			return ERR_PTR(err);
1282 	}
1283 
1284 	cmd = cache->rq_cmd + cache->rq_size;
1285 	cache->rq_size += len;
1286 
1287 	return cmd;
1288 }
1289 
1290 static u64
1291 relocate_entry(struct i915_vma *vma,
1292 	       const struct drm_i915_gem_relocation_entry *reloc,
1293 	       struct i915_execbuffer *eb,
1294 	       const struct i915_vma *target)
1295 {
1296 	u64 offset = reloc->offset;
1297 	u64 target_offset = relocation_target(reloc, target);
1298 	bool wide = eb->reloc_cache.use_64bit_reloc;
1299 	void *vaddr;
1300 
1301 	if (!eb->reloc_cache.vaddr &&
1302 	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1303 	     !dma_resv_test_signaled_rcu(vma->resv, true))) {
1304 		const unsigned int gen = eb->reloc_cache.gen;
1305 		unsigned int len;
1306 		u32 *batch;
1307 		u64 addr;
1308 
1309 		if (wide)
1310 			len = offset & 7 ? 8 : 5;
1311 		else if (gen >= 4)
1312 			len = 4;
1313 		else
1314 			len = 3;
1315 
1316 		batch = reloc_gpu(eb, vma, len);
1317 		if (IS_ERR(batch))
1318 			goto repeat;
1319 
1320 		addr = gen8_canonical_addr(vma->node.start + offset);
1321 		if (wide) {
1322 			if (offset & 7) {
1323 				*batch++ = MI_STORE_DWORD_IMM_GEN4;
1324 				*batch++ = lower_32_bits(addr);
1325 				*batch++ = upper_32_bits(addr);
1326 				*batch++ = lower_32_bits(target_offset);
1327 
1328 				addr = gen8_canonical_addr(addr + 4);
1329 
1330 				*batch++ = MI_STORE_DWORD_IMM_GEN4;
1331 				*batch++ = lower_32_bits(addr);
1332 				*batch++ = upper_32_bits(addr);
1333 				*batch++ = upper_32_bits(target_offset);
1334 			} else {
1335 				*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
1336 				*batch++ = lower_32_bits(addr);
1337 				*batch++ = upper_32_bits(addr);
1338 				*batch++ = lower_32_bits(target_offset);
1339 				*batch++ = upper_32_bits(target_offset);
1340 			}
1341 		} else if (gen >= 6) {
1342 			*batch++ = MI_STORE_DWORD_IMM_GEN4;
1343 			*batch++ = 0;
1344 			*batch++ = addr;
1345 			*batch++ = target_offset;
1346 		} else if (gen >= 4) {
1347 			*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1348 			*batch++ = 0;
1349 			*batch++ = addr;
1350 			*batch++ = target_offset;
1351 		} else {
1352 			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
1353 			*batch++ = addr;
1354 			*batch++ = target_offset;
1355 		}
1356 
1357 		goto out;
1358 	}
1359 
1360 repeat:
1361 	vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
1362 	if (IS_ERR(vaddr))
1363 		return PTR_ERR(vaddr);
1364 
1365 	clflush_write32(vaddr + offset_in_page(offset),
1366 			lower_32_bits(target_offset),
1367 			eb->reloc_cache.vaddr);
1368 
1369 	if (wide) {
1370 		offset += sizeof(u32);
1371 		target_offset >>= 32;
1372 		wide = false;
1373 		goto repeat;
1374 	}
1375 
1376 out:
1377 	return target->node.start | UPDATE;
1378 }
1379 
1380 static u64
1381 eb_relocate_entry(struct i915_execbuffer *eb,
1382 		  struct i915_vma *vma,
1383 		  const struct drm_i915_gem_relocation_entry *reloc)
1384 {
1385 	struct i915_vma *target;
1386 	int err;
1387 
1388 	/* we've already hold a reference to all valid objects */
1389 	target = eb_get_vma(eb, reloc->target_handle);
1390 	if (unlikely(!target))
1391 		return -ENOENT;
1392 
1393 	/* Validate that the target is in a valid r/w GPU domain */
1394 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
1395 		DRM_DEBUG("reloc with multiple write domains: "
1396 			  "target %d offset %d "
1397 			  "read %08x write %08x",
1398 			  reloc->target_handle,
1399 			  (int) reloc->offset,
1400 			  reloc->read_domains,
1401 			  reloc->write_domain);
1402 		return -EINVAL;
1403 	}
1404 	if (unlikely((reloc->write_domain | reloc->read_domains)
1405 		     & ~I915_GEM_GPU_DOMAINS)) {
1406 		DRM_DEBUG("reloc with read/write non-GPU domains: "
1407 			  "target %d offset %d "
1408 			  "read %08x write %08x",
1409 			  reloc->target_handle,
1410 			  (int) reloc->offset,
1411 			  reloc->read_domains,
1412 			  reloc->write_domain);
1413 		return -EINVAL;
1414 	}
1415 
1416 	if (reloc->write_domain) {
1417 		*target->exec_flags |= EXEC_OBJECT_WRITE;
1418 
1419 		/*
1420 		 * Sandybridge PPGTT errata: We need a global gtt mapping
1421 		 * for MI and pipe_control writes because the gpu doesn't
1422 		 * properly redirect them through the ppgtt for non_secure
1423 		 * batchbuffers.
1424 		 */
1425 		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
1426 		    IS_GEN(eb->i915, 6)) {
1427 			err = i915_vma_bind(target, target->obj->cache_level,
1428 					    PIN_GLOBAL, NULL);
1429 			if (WARN_ONCE(err,
1430 				      "Unexpected failure to bind target VMA!"))
1431 				return err;
1432 		}
1433 	}
1434 
1435 	/*
1436 	 * If the relocation already has the right value in it, no
1437 	 * more work needs to be done.
1438 	 */
1439 	if (!DBG_FORCE_RELOC &&
1440 	    gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
1441 		return 0;
1442 
1443 	/* Check that the relocation address is valid... */
1444 	if (unlikely(reloc->offset >
1445 		     vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
1446 		DRM_DEBUG("Relocation beyond object bounds: "
1447 			  "target %d offset %d size %d.\n",
1448 			  reloc->target_handle,
1449 			  (int)reloc->offset,
1450 			  (int)vma->size);
1451 		return -EINVAL;
1452 	}
1453 	if (unlikely(reloc->offset & 3)) {
1454 		DRM_DEBUG("Relocation not 4-byte aligned: "
1455 			  "target %d offset %d.\n",
1456 			  reloc->target_handle,
1457 			  (int)reloc->offset);
1458 		return -EINVAL;
1459 	}
1460 
1461 	/*
1462 	 * If we write into the object, we need to force the synchronisation
1463 	 * barrier, either with an asynchronous clflush or if we executed the
1464 	 * patching using the GPU (though that should be serialised by the
1465 	 * timeline). To be completely sure, and since we are required to
1466 	 * do relocations we are already stalling, disable the user's opt
1467 	 * out of our synchronisation.
1468 	 */
1469 	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
1470 
1471 	/* and update the user's relocation entry */
1472 	return relocate_entry(vma, reloc, eb, target);
1473 }
1474 
1475 static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
1476 {
1477 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1478 	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
1479 	struct drm_i915_gem_relocation_entry __user *urelocs;
1480 	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1481 	unsigned int remain;
1482 
1483 	urelocs = u64_to_user_ptr(entry->relocs_ptr);
1484 	remain = entry->relocation_count;
1485 	if (unlikely(remain > N_RELOC(ULONG_MAX)))
1486 		return -EINVAL;
1487 
1488 	/*
1489 	 * We must check that the entire relocation array is safe
1490 	 * to read. However, if the array is not writable the user loses
1491 	 * the updated relocation values.
1492 	 */
1493 	if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs))))
1494 		return -EFAULT;
1495 
1496 	do {
1497 		struct drm_i915_gem_relocation_entry *r = stack;
1498 		unsigned int count =
1499 			min_t(unsigned int, remain, ARRAY_SIZE(stack));
1500 		unsigned int copied;
1501 
1502 		/*
1503 		 * This is the fast path and we cannot handle a pagefault
1504 		 * whilst holding the struct mutex lest the user pass in the
1505 		 * relocations contained within a mmaped bo. For in such a case
1506 		 * we, the page fault handler would call i915_gem_fault() and
1507 		 * we would try to acquire the struct mutex again. Obviously
1508 		 * this is bad and so lockdep complains vehemently.
1509 		 */
1510 		pagefault_disable();
1511 		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1512 		pagefault_enable();
1513 		if (unlikely(copied)) {
1514 			remain = -EFAULT;
1515 			goto out;
1516 		}
1517 
1518 		remain -= count;
1519 		do {
1520 			u64 offset = eb_relocate_entry(eb, vma, r);
1521 
1522 			if (likely(offset == 0)) {
1523 			} else if ((s64)offset < 0) {
1524 				remain = (int)offset;
1525 				goto out;
1526 			} else {
1527 				/*
1528 				 * Note that reporting an error now
1529 				 * leaves everything in an inconsistent
1530 				 * state as we have *already* changed
1531 				 * the relocation value inside the
1532 				 * object. As we have not changed the
1533 				 * reloc.presumed_offset or will not
1534 				 * change the execobject.offset, on the
1535 				 * call we may not rewrite the value
1536 				 * inside the object, leaving it
1537 				 * dangling and causing a GPU hang. Unless
1538 				 * userspace dynamically rebuilds the
1539 				 * relocations on each execbuf rather than
1540 				 * presume a static tree.
1541 				 *
1542 				 * We did previously check if the relocations
1543 				 * were writable (access_ok), an error now
1544 				 * would be a strange race with mprotect,
1545 				 * having already demonstrated that we
1546 				 * can read from this userspace address.
1547 				 */
1548 				offset = gen8_canonical_addr(offset & ~UPDATE);
1549 				if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) {
1550 					remain = -EFAULT;
1551 					goto out;
1552 				}
1553 			}
1554 		} while (r++, --count);
1555 		urelocs += ARRAY_SIZE(stack);
1556 	} while (remain);
1557 out:
1558 	reloc_cache_reset(&eb->reloc_cache);
1559 	return remain;
1560 }
1561 
1562 static int
1563 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
1564 {
1565 	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1566 	struct drm_i915_gem_relocation_entry *relocs =
1567 		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1568 	unsigned int i;
1569 	int err;
1570 
1571 	for (i = 0; i < entry->relocation_count; i++) {
1572 		u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
1573 
1574 		if ((s64)offset < 0) {
1575 			err = (int)offset;
1576 			goto err;
1577 		}
1578 	}
1579 	err = 0;
1580 err:
1581 	reloc_cache_reset(&eb->reloc_cache);
1582 	return err;
1583 }
1584 
1585 static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1586 {
1587 	const char __user *addr, *end;
1588 	unsigned long size;
1589 	char __maybe_unused c;
1590 
1591 	size = entry->relocation_count;
1592 	if (size == 0)
1593 		return 0;
1594 
1595 	if (size > N_RELOC(ULONG_MAX))
1596 		return -EINVAL;
1597 
1598 	addr = u64_to_user_ptr(entry->relocs_ptr);
1599 	size *= sizeof(struct drm_i915_gem_relocation_entry);
1600 	if (!access_ok(addr, size))
1601 		return -EFAULT;
1602 
1603 	end = addr + size;
1604 	for (; addr < end; addr += PAGE_SIZE) {
1605 		int err = __get_user(c, addr);
1606 		if (err)
1607 			return err;
1608 	}
1609 	return __get_user(c, end - 1);
1610 }
1611 
1612 static int eb_copy_relocations(const struct i915_execbuffer *eb)
1613 {
1614 	struct drm_i915_gem_relocation_entry *relocs;
1615 	const unsigned int count = eb->buffer_count;
1616 	unsigned int i;
1617 	int err;
1618 
1619 	for (i = 0; i < count; i++) {
1620 		const unsigned int nreloc = eb->exec[i].relocation_count;
1621 		struct drm_i915_gem_relocation_entry __user *urelocs;
1622 		unsigned long size;
1623 		unsigned long copied;
1624 
1625 		if (nreloc == 0)
1626 			continue;
1627 
1628 		err = check_relocations(&eb->exec[i]);
1629 		if (err)
1630 			goto err;
1631 
1632 		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
1633 		size = nreloc * sizeof(*relocs);
1634 
1635 		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
1636 		if (!relocs) {
1637 			err = -ENOMEM;
1638 			goto err;
1639 		}
1640 
1641 		/* copy_from_user is limited to < 4GiB */
1642 		copied = 0;
1643 		do {
1644 			unsigned int len =
1645 				min_t(u64, BIT_ULL(31), size - copied);
1646 
1647 			if (__copy_from_user((char *)relocs + copied,
1648 					     (char __user *)urelocs + copied,
1649 					     len))
1650 				goto end;
1651 
1652 			copied += len;
1653 		} while (copied < size);
1654 
1655 		/*
1656 		 * As we do not update the known relocation offsets after
1657 		 * relocating (due to the complexities in lock handling),
1658 		 * we need to mark them as invalid now so that we force the
1659 		 * relocation processing next time. Just in case the target
1660 		 * object is evicted and then rebound into its old
1661 		 * presumed_offset before the next execbuffer - if that
1662 		 * happened we would make the mistake of assuming that the
1663 		 * relocations were valid.
1664 		 */
1665 		if (!user_access_begin(urelocs, size))
1666 			goto end;
1667 
1668 		for (copied = 0; copied < nreloc; copied++)
1669 			unsafe_put_user(-1,
1670 					&urelocs[copied].presumed_offset,
1671 					end_user);
1672 		user_access_end();
1673 
1674 		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1675 	}
1676 
1677 	return 0;
1678 
1679 end_user:
1680 	user_access_end();
1681 end:
1682 	kvfree(relocs);
1683 	err = -EFAULT;
1684 err:
1685 	while (i--) {
1686 		relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1687 		if (eb->exec[i].relocation_count)
1688 			kvfree(relocs);
1689 	}
1690 	return err;
1691 }
1692 
1693 static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1694 {
1695 	const unsigned int count = eb->buffer_count;
1696 	unsigned int i;
1697 
1698 	if (unlikely(i915_modparams.prefault_disable))
1699 		return 0;
1700 
1701 	for (i = 0; i < count; i++) {
1702 		int err;
1703 
1704 		err = check_relocations(&eb->exec[i]);
1705 		if (err)
1706 			return err;
1707 	}
1708 
1709 	return 0;
1710 }
1711 
1712 static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
1713 {
1714 	struct drm_device *dev = &eb->i915->drm;
1715 	bool have_copy = false;
1716 	struct i915_vma *vma;
1717 	int err = 0;
1718 
1719 repeat:
1720 	if (signal_pending(current)) {
1721 		err = -ERESTARTSYS;
1722 		goto out;
1723 	}
1724 
1725 	/* We may process another execbuffer during the unlock... */
1726 	eb_reset_vmas(eb);
1727 	mutex_unlock(&dev->struct_mutex);
1728 
1729 	/*
1730 	 * We take 3 passes through the slowpatch.
1731 	 *
1732 	 * 1 - we try to just prefault all the user relocation entries and
1733 	 * then attempt to reuse the atomic pagefault disabled fast path again.
1734 	 *
1735 	 * 2 - we copy the user entries to a local buffer here outside of the
1736 	 * local and allow ourselves to wait upon any rendering before
1737 	 * relocations
1738 	 *
1739 	 * 3 - we already have a local copy of the relocation entries, but
1740 	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1741 	 */
1742 	if (!err) {
1743 		err = eb_prefault_relocations(eb);
1744 	} else if (!have_copy) {
1745 		err = eb_copy_relocations(eb);
1746 		have_copy = err == 0;
1747 	} else {
1748 		cond_resched();
1749 		err = 0;
1750 	}
1751 	if (err) {
1752 		mutex_lock(&dev->struct_mutex);
1753 		goto out;
1754 	}
1755 
1756 	/* A frequent cause for EAGAIN are currently unavailable client pages */
1757 	flush_workqueue(eb->i915->mm.userptr_wq);
1758 
1759 	err = i915_mutex_lock_interruptible(dev);
1760 	if (err) {
1761 		mutex_lock(&dev->struct_mutex);
1762 		goto out;
1763 	}
1764 
1765 	/* reacquire the objects */
1766 	err = eb_lookup_vmas(eb);
1767 	if (err)
1768 		goto err;
1769 
1770 	GEM_BUG_ON(!eb->batch);
1771 
1772 	list_for_each_entry(vma, &eb->relocs, reloc_link) {
1773 		if (!have_copy) {
1774 			pagefault_disable();
1775 			err = eb_relocate_vma(eb, vma);
1776 			pagefault_enable();
1777 			if (err)
1778 				goto repeat;
1779 		} else {
1780 			err = eb_relocate_vma_slow(eb, vma);
1781 			if (err)
1782 				goto err;
1783 		}
1784 	}
1785 
1786 	/*
1787 	 * Leave the user relocations as are, this is the painfully slow path,
1788 	 * and we want to avoid the complication of dropping the lock whilst
1789 	 * having buffers reserved in the aperture and so causing spurious
1790 	 * ENOSPC for random operations.
1791 	 */
1792 
1793 err:
1794 	if (err == -EAGAIN)
1795 		goto repeat;
1796 
1797 out:
1798 	if (have_copy) {
1799 		const unsigned int count = eb->buffer_count;
1800 		unsigned int i;
1801 
1802 		for (i = 0; i < count; i++) {
1803 			const struct drm_i915_gem_exec_object2 *entry =
1804 				&eb->exec[i];
1805 			struct drm_i915_gem_relocation_entry *relocs;
1806 
1807 			if (!entry->relocation_count)
1808 				continue;
1809 
1810 			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1811 			kvfree(relocs);
1812 		}
1813 	}
1814 
1815 	return err;
1816 }
1817 
1818 static int eb_relocate(struct i915_execbuffer *eb)
1819 {
1820 	if (eb_lookup_vmas(eb))
1821 		goto slow;
1822 
1823 	/* The objects are in their final locations, apply the relocations. */
1824 	if (eb->args->flags & __EXEC_HAS_RELOC) {
1825 		struct i915_vma *vma;
1826 
1827 		list_for_each_entry(vma, &eb->relocs, reloc_link) {
1828 			if (eb_relocate_vma(eb, vma))
1829 				goto slow;
1830 		}
1831 	}
1832 
1833 	return 0;
1834 
1835 slow:
1836 	return eb_relocate_slow(eb);
1837 }
1838 
1839 static int eb_move_to_gpu(struct i915_execbuffer *eb)
1840 {
1841 	const unsigned int count = eb->buffer_count;
1842 	struct ww_acquire_ctx acquire;
1843 	unsigned int i;
1844 	int err = 0;
1845 
1846 	ww_acquire_init(&acquire, &reservation_ww_class);
1847 
1848 	for (i = 0; i < count; i++) {
1849 		struct i915_vma *vma = eb->vma[i];
1850 
1851 		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
1852 		if (!err)
1853 			continue;
1854 
1855 		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
1856 
1857 		if (err == -EDEADLK) {
1858 			GEM_BUG_ON(i == 0);
1859 			do {
1860 				int j = i - 1;
1861 
1862 				ww_mutex_unlock(&eb->vma[j]->resv->lock);
1863 
1864 				swap(eb->flags[i], eb->flags[j]);
1865 				swap(eb->vma[i],  eb->vma[j]);
1866 				eb->vma[i]->exec_flags = &eb->flags[i];
1867 			} while (--i);
1868 			GEM_BUG_ON(vma != eb->vma[0]);
1869 			vma->exec_flags = &eb->flags[0];
1870 
1871 			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
1872 							       &acquire);
1873 		}
1874 		if (err)
1875 			break;
1876 	}
1877 	ww_acquire_done(&acquire);
1878 
1879 	while (i--) {
1880 		unsigned int flags = eb->flags[i];
1881 		struct i915_vma *vma = eb->vma[i];
1882 		struct drm_i915_gem_object *obj = vma->obj;
1883 
1884 		assert_vma_held(vma);
1885 
1886 		if (flags & EXEC_OBJECT_CAPTURE) {
1887 			struct i915_capture_list *capture;
1888 
1889 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1890 			if (capture) {
1891 				capture->next = eb->request->capture_list;
1892 				capture->vma = vma;
1893 				eb->request->capture_list = capture;
1894 			}
1895 		}
1896 
1897 		/*
1898 		 * If the GPU is not _reading_ through the CPU cache, we need
1899 		 * to make sure that any writes (both previous GPU writes from
1900 		 * before a change in snooping levels and normal CPU writes)
1901 		 * caught in that cache are flushed to main memory.
1902 		 *
1903 		 * We want to say
1904 		 *   obj->cache_dirty &&
1905 		 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
1906 		 * but gcc's optimiser doesn't handle that as well and emits
1907 		 * two jumps instead of one. Maybe one day...
1908 		 */
1909 		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
1910 			if (i915_gem_clflush_object(obj, 0))
1911 				flags &= ~EXEC_OBJECT_ASYNC;
1912 		}
1913 
1914 		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
1915 			err = i915_request_await_object
1916 				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
1917 		}
1918 
1919 		if (err == 0)
1920 			err = i915_vma_move_to_active(vma, eb->request, flags);
1921 
1922 		i915_vma_unlock(vma);
1923 
1924 		__eb_unreserve_vma(vma, flags);
1925 		vma->exec_flags = NULL;
1926 
1927 		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
1928 			i915_vma_put(vma);
1929 	}
1930 	ww_acquire_fini(&acquire);
1931 
1932 	if (unlikely(err))
1933 		goto err_skip;
1934 
1935 	eb->exec = NULL;
1936 
1937 	/* Unconditionally flush any chipset caches (for streaming writes). */
1938 	intel_gt_chipset_flush(eb->engine->gt);
1939 	return 0;
1940 
1941 err_skip:
1942 	i915_request_skip(eb->request, err);
1943 	return err;
1944 }
1945 
1946 static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1947 {
1948 	if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
1949 		return false;
1950 
1951 	/* Kernel clipping was a DRI1 misfeature */
1952 	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
1953 		if (exec->num_cliprects || exec->cliprects_ptr)
1954 			return false;
1955 	}
1956 
1957 	if (exec->DR4 == 0xffffffff) {
1958 		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1959 		exec->DR4 = 0;
1960 	}
1961 	if (exec->DR1 || exec->DR4)
1962 		return false;
1963 
1964 	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1965 		return false;
1966 
1967 	return true;
1968 }
1969 
1970 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
1971 {
1972 	u32 *cs;
1973 	int i;
1974 
1975 	if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) {
1976 		DRM_DEBUG("sol reset is gen7/rcs only\n");
1977 		return -EINVAL;
1978 	}
1979 
1980 	cs = intel_ring_begin(rq, 4 * 2 + 2);
1981 	if (IS_ERR(cs))
1982 		return PTR_ERR(cs);
1983 
1984 	*cs++ = MI_LOAD_REGISTER_IMM(4);
1985 	for (i = 0; i < 4; i++) {
1986 		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1987 		*cs++ = 0;
1988 	}
1989 	*cs++ = MI_NOOP;
1990 	intel_ring_advance(rq, cs);
1991 
1992 	return 0;
1993 }
1994 
1995 static struct i915_vma *
1996 shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
1997 {
1998 	struct drm_i915_private *dev_priv = eb->i915;
1999 	struct i915_vma * const vma = *eb->vma;
2000 	struct i915_address_space *vm;
2001 	u64 flags;
2002 
2003 	/*
2004 	 * PPGTT backed shadow buffers must be mapped RO, to prevent
2005 	 * post-scan tampering
2006 	 */
2007 	if (CMDPARSER_USES_GGTT(dev_priv)) {
2008 		flags = PIN_GLOBAL;
2009 		vm = &dev_priv->ggtt.vm;
2010 	} else if (vma->vm->has_read_only) {
2011 		flags = PIN_USER;
2012 		vm = vma->vm;
2013 		i915_gem_object_set_readonly(obj);
2014 	} else {
2015 		DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
2016 		return ERR_PTR(-EINVAL);
2017 	}
2018 
2019 	return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
2020 }
2021 
2022 static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
2023 {
2024 	struct intel_engine_pool_node *pool;
2025 	struct i915_vma *vma;
2026 	u64 batch_start;
2027 	u64 shadow_batch_start;
2028 	int err;
2029 
2030 	pool = intel_engine_get_pool(eb->engine, eb->batch_len);
2031 	if (IS_ERR(pool))
2032 		return ERR_CAST(pool);
2033 
2034 	vma = shadow_batch_pin(eb, pool->obj);
2035 	if (IS_ERR(vma))
2036 		goto err;
2037 
2038 	batch_start = gen8_canonical_addr(eb->batch->node.start) +
2039 		      eb->batch_start_offset;
2040 
2041 	shadow_batch_start = gen8_canonical_addr(vma->node.start);
2042 
2043 	err = intel_engine_cmd_parser(eb->gem_context,
2044 				      eb->engine,
2045 				      eb->batch->obj,
2046 				      batch_start,
2047 				      eb->batch_start_offset,
2048 				      eb->batch_len,
2049 				      pool->obj,
2050 				      shadow_batch_start);
2051 
2052 	if (err) {
2053 		i915_vma_unpin(vma);
2054 
2055 		/*
2056 		 * Unsafe GGTT-backed buffers can still be submitted safely
2057 		 * as non-secure.
2058 		 * For PPGTT backing however, we have no choice but to forcibly
2059 		 * reject unsafe buffers
2060 		 */
2061 		if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
2062 			/* Execute original buffer non-secure */
2063 			vma = NULL;
2064 		else
2065 			vma = ERR_PTR(err);
2066 		goto err;
2067 	}
2068 
2069 	eb->vma[eb->buffer_count] = i915_vma_get(vma);
2070 	eb->flags[eb->buffer_count] =
2071 		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
2072 	vma->exec_flags = &eb->flags[eb->buffer_count];
2073 	eb->buffer_count++;
2074 
2075 	eb->batch_start_offset = 0;
2076 	eb->batch = vma;
2077 
2078 	if (CMDPARSER_USES_GGTT(eb->i915))
2079 		eb->batch_flags |= I915_DISPATCH_SECURE;
2080 
2081 	/* eb->batch_len unchanged */
2082 
2083 	vma->private = pool;
2084 	return vma;
2085 
2086 err:
2087 	intel_engine_pool_put(pool);
2088 	return vma;
2089 }
2090 
2091 static void
2092 add_to_client(struct i915_request *rq, struct drm_file *file)
2093 {
2094 	struct drm_i915_file_private *file_priv = file->driver_priv;
2095 
2096 	rq->file_priv = file_priv;
2097 
2098 	spin_lock(&file_priv->mm.lock);
2099 	list_add_tail(&rq->client_link, &file_priv->mm.request_list);
2100 	spin_unlock(&file_priv->mm.lock);
2101 }
2102 
2103 static int eb_submit(struct i915_execbuffer *eb)
2104 {
2105 	int err;
2106 
2107 	err = eb_move_to_gpu(eb);
2108 	if (err)
2109 		return err;
2110 
2111 	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
2112 		err = i915_reset_gen7_sol_offsets(eb->request);
2113 		if (err)
2114 			return err;
2115 	}
2116 
2117 	/*
2118 	 * After we completed waiting for other engines (using HW semaphores)
2119 	 * then we can signal that this request/batch is ready to run. This
2120 	 * allows us to determine if the batch is still waiting on the GPU
2121 	 * or actually running by checking the breadcrumb.
2122 	 */
2123 	if (eb->engine->emit_init_breadcrumb) {
2124 		err = eb->engine->emit_init_breadcrumb(eb->request);
2125 		if (err)
2126 			return err;
2127 	}
2128 
2129 	err = eb->engine->emit_bb_start(eb->request,
2130 					eb->batch->node.start +
2131 					eb->batch_start_offset,
2132 					eb->batch_len,
2133 					eb->batch_flags);
2134 	if (err)
2135 		return err;
2136 
2137 	if (i915_gem_context_nopreempt(eb->gem_context))
2138 		eb->request->flags |= I915_REQUEST_NOPREEMPT;
2139 
2140 	return 0;
2141 }
2142 
2143 static int num_vcs_engines(const struct drm_i915_private *i915)
2144 {
2145 	return hweight64(INTEL_INFO(i915)->engine_mask &
2146 			 GENMASK_ULL(VCS0 + I915_MAX_VCS - 1, VCS0));
2147 }
2148 
2149 /*
2150  * Find one BSD ring to dispatch the corresponding BSD command.
2151  * The engine index is returned.
2152  */
2153 static unsigned int
2154 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
2155 			 struct drm_file *file)
2156 {
2157 	struct drm_i915_file_private *file_priv = file->driver_priv;
2158 
2159 	/* Check whether the file_priv has already selected one ring. */
2160 	if ((int)file_priv->bsd_engine < 0)
2161 		file_priv->bsd_engine =
2162 			get_random_int() % num_vcs_engines(dev_priv);
2163 
2164 	return file_priv->bsd_engine;
2165 }
2166 
2167 static const enum intel_engine_id user_ring_map[] = {
2168 	[I915_EXEC_DEFAULT]	= RCS0,
2169 	[I915_EXEC_RENDER]	= RCS0,
2170 	[I915_EXEC_BLT]		= BCS0,
2171 	[I915_EXEC_BSD]		= VCS0,
2172 	[I915_EXEC_VEBOX]	= VECS0
2173 };
2174 
2175 static struct i915_request *eb_throttle(struct intel_context *ce)
2176 {
2177 	struct intel_ring *ring = ce->ring;
2178 	struct intel_timeline *tl = ce->timeline;
2179 	struct i915_request *rq;
2180 
2181 	/*
2182 	 * Completely unscientific finger-in-the-air estimates for suitable
2183 	 * maximum user request size (to avoid blocking) and then backoff.
2184 	 */
2185 	if (intel_ring_update_space(ring) >= PAGE_SIZE)
2186 		return NULL;
2187 
2188 	/*
2189 	 * Find a request that after waiting upon, there will be at least half
2190 	 * the ring available. The hysteresis allows us to compete for the
2191 	 * shared ring and should mean that we sleep less often prior to
2192 	 * claiming our resources, but not so long that the ring completely
2193 	 * drains before we can submit our next request.
2194 	 */
2195 	list_for_each_entry(rq, &tl->requests, link) {
2196 		if (rq->ring != ring)
2197 			continue;
2198 
2199 		if (__intel_ring_space(rq->postfix,
2200 				       ring->emit, ring->size) > ring->size / 2)
2201 			break;
2202 	}
2203 	if (&rq->link == &tl->requests)
2204 		return NULL; /* weird, we will check again later for real */
2205 
2206 	return i915_request_get(rq);
2207 }
2208 
2209 static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
2210 {
2211 	struct intel_timeline *tl;
2212 	struct i915_request *rq;
2213 	int err;
2214 
2215 	/*
2216 	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2217 	 * EIO if the GPU is already wedged.
2218 	 */
2219 	err = intel_gt_terminally_wedged(ce->engine->gt);
2220 	if (err)
2221 		return err;
2222 
2223 	/*
2224 	 * Pinning the contexts may generate requests in order to acquire
2225 	 * GGTT space, so do this first before we reserve a seqno for
2226 	 * ourselves.
2227 	 */
2228 	err = intel_context_pin(ce);
2229 	if (err)
2230 		return err;
2231 
2232 	/*
2233 	 * Take a local wakeref for preparing to dispatch the execbuf as
2234 	 * we expect to access the hardware fairly frequently in the
2235 	 * process, and require the engine to be kept awake between accesses.
2236 	 * Upon dispatch, we acquire another prolonged wakeref that we hold
2237 	 * until the timeline is idle, which in turn releases the wakeref
2238 	 * taken on the engine, and the parent device.
2239 	 */
2240 	tl = intel_context_timeline_lock(ce);
2241 	if (IS_ERR(tl)) {
2242 		err = PTR_ERR(tl);
2243 		goto err_unpin;
2244 	}
2245 
2246 	intel_context_enter(ce);
2247 	rq = eb_throttle(ce);
2248 
2249 	intel_context_timeline_unlock(tl);
2250 
2251 	if (rq) {
2252 		if (i915_request_wait(rq,
2253 				      I915_WAIT_INTERRUPTIBLE,
2254 				      MAX_SCHEDULE_TIMEOUT) < 0) {
2255 			i915_request_put(rq);
2256 			err = -EINTR;
2257 			goto err_exit;
2258 		}
2259 
2260 		i915_request_put(rq);
2261 	}
2262 
2263 	eb->engine = ce->engine;
2264 	eb->context = ce;
2265 	return 0;
2266 
2267 err_exit:
2268 	mutex_lock(&tl->mutex);
2269 	intel_context_exit(ce);
2270 	intel_context_timeline_unlock(tl);
2271 err_unpin:
2272 	intel_context_unpin(ce);
2273 	return err;
2274 }
2275 
2276 static void eb_unpin_engine(struct i915_execbuffer *eb)
2277 {
2278 	struct intel_context *ce = eb->context;
2279 	struct intel_timeline *tl = ce->timeline;
2280 
2281 	mutex_lock(&tl->mutex);
2282 	intel_context_exit(ce);
2283 	mutex_unlock(&tl->mutex);
2284 
2285 	intel_context_unpin(ce);
2286 }
2287 
2288 static unsigned int
2289 eb_select_legacy_ring(struct i915_execbuffer *eb,
2290 		      struct drm_file *file,
2291 		      struct drm_i915_gem_execbuffer2 *args)
2292 {
2293 	struct drm_i915_private *i915 = eb->i915;
2294 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
2295 
2296 	if (user_ring_id != I915_EXEC_BSD &&
2297 	    (args->flags & I915_EXEC_BSD_MASK)) {
2298 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
2299 			  "bsd dispatch flags: %d\n", (int)(args->flags));
2300 		return -1;
2301 	}
2302 
2303 	if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
2304 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
2305 
2306 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
2307 			bsd_idx = gen8_dispatch_bsd_engine(i915, file);
2308 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
2309 			   bsd_idx <= I915_EXEC_BSD_RING2) {
2310 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
2311 			bsd_idx--;
2312 		} else {
2313 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
2314 				  bsd_idx);
2315 			return -1;
2316 		}
2317 
2318 		return _VCS(bsd_idx);
2319 	}
2320 
2321 	if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
2322 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
2323 		return -1;
2324 	}
2325 
2326 	return user_ring_map[user_ring_id];
2327 }
2328 
2329 static int
2330 eb_pin_engine(struct i915_execbuffer *eb,
2331 	      struct drm_file *file,
2332 	      struct drm_i915_gem_execbuffer2 *args)
2333 {
2334 	struct intel_context *ce;
2335 	unsigned int idx;
2336 	int err;
2337 
2338 	if (i915_gem_context_user_engines(eb->gem_context))
2339 		idx = args->flags & I915_EXEC_RING_MASK;
2340 	else
2341 		idx = eb_select_legacy_ring(eb, file, args);
2342 
2343 	ce = i915_gem_context_get_engine(eb->gem_context, idx);
2344 	if (IS_ERR(ce))
2345 		return PTR_ERR(ce);
2346 
2347 	err = __eb_pin_engine(eb, ce);
2348 	intel_context_put(ce);
2349 
2350 	return err;
2351 }
2352 
2353 static void
2354 __free_fence_array(struct drm_syncobj **fences, unsigned int n)
2355 {
2356 	while (n--)
2357 		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
2358 	kvfree(fences);
2359 }
2360 
2361 static struct drm_syncobj **
2362 get_fence_array(struct drm_i915_gem_execbuffer2 *args,
2363 		struct drm_file *file)
2364 {
2365 	const unsigned long nfences = args->num_cliprects;
2366 	struct drm_i915_gem_exec_fence __user *user;
2367 	struct drm_syncobj **fences;
2368 	unsigned long n;
2369 	int err;
2370 
2371 	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
2372 		return NULL;
2373 
2374 	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
2375 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2376 	if (nfences > min_t(unsigned long,
2377 			    ULONG_MAX / sizeof(*user),
2378 			    SIZE_MAX / sizeof(*fences)))
2379 		return ERR_PTR(-EINVAL);
2380 
2381 	user = u64_to_user_ptr(args->cliprects_ptr);
2382 	if (!access_ok(user, nfences * sizeof(*user)))
2383 		return ERR_PTR(-EFAULT);
2384 
2385 	fences = kvmalloc_array(nfences, sizeof(*fences),
2386 				__GFP_NOWARN | GFP_KERNEL);
2387 	if (!fences)
2388 		return ERR_PTR(-ENOMEM);
2389 
2390 	for (n = 0; n < nfences; n++) {
2391 		struct drm_i915_gem_exec_fence fence;
2392 		struct drm_syncobj *syncobj;
2393 
2394 		if (__copy_from_user(&fence, user++, sizeof(fence))) {
2395 			err = -EFAULT;
2396 			goto err;
2397 		}
2398 
2399 		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
2400 			err = -EINVAL;
2401 			goto err;
2402 		}
2403 
2404 		syncobj = drm_syncobj_find(file, fence.handle);
2405 		if (!syncobj) {
2406 			DRM_DEBUG("Invalid syncobj handle provided\n");
2407 			err = -ENOENT;
2408 			goto err;
2409 		}
2410 
2411 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2412 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2413 
2414 		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
2415 	}
2416 
2417 	return fences;
2418 
2419 err:
2420 	__free_fence_array(fences, n);
2421 	return ERR_PTR(err);
2422 }
2423 
2424 static void
2425 put_fence_array(struct drm_i915_gem_execbuffer2 *args,
2426 		struct drm_syncobj **fences)
2427 {
2428 	if (fences)
2429 		__free_fence_array(fences, args->num_cliprects);
2430 }
2431 
2432 static int
2433 await_fence_array(struct i915_execbuffer *eb,
2434 		  struct drm_syncobj **fences)
2435 {
2436 	const unsigned int nfences = eb->args->num_cliprects;
2437 	unsigned int n;
2438 	int err;
2439 
2440 	for (n = 0; n < nfences; n++) {
2441 		struct drm_syncobj *syncobj;
2442 		struct dma_fence *fence;
2443 		unsigned int flags;
2444 
2445 		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2446 		if (!(flags & I915_EXEC_FENCE_WAIT))
2447 			continue;
2448 
2449 		fence = drm_syncobj_fence_get(syncobj);
2450 		if (!fence)
2451 			return -EINVAL;
2452 
2453 		err = i915_request_await_dma_fence(eb->request, fence);
2454 		dma_fence_put(fence);
2455 		if (err < 0)
2456 			return err;
2457 	}
2458 
2459 	return 0;
2460 }
2461 
2462 static void
2463 signal_fence_array(struct i915_execbuffer *eb,
2464 		   struct drm_syncobj **fences)
2465 {
2466 	const unsigned int nfences = eb->args->num_cliprects;
2467 	struct dma_fence * const fence = &eb->request->fence;
2468 	unsigned int n;
2469 
2470 	for (n = 0; n < nfences; n++) {
2471 		struct drm_syncobj *syncobj;
2472 		unsigned int flags;
2473 
2474 		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2475 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
2476 			continue;
2477 
2478 		drm_syncobj_replace_fence(syncobj, fence);
2479 	}
2480 }
2481 
2482 static int
2483 i915_gem_do_execbuffer(struct drm_device *dev,
2484 		       struct drm_file *file,
2485 		       struct drm_i915_gem_execbuffer2 *args,
2486 		       struct drm_i915_gem_exec_object2 *exec,
2487 		       struct drm_syncobj **fences)
2488 {
2489 	struct drm_i915_private *i915 = to_i915(dev);
2490 	struct i915_execbuffer eb;
2491 	struct dma_fence *in_fence = NULL;
2492 	struct dma_fence *exec_fence = NULL;
2493 	struct sync_file *out_fence = NULL;
2494 	int out_fence_fd = -1;
2495 	int err;
2496 
2497 	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
2498 	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
2499 		     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
2500 
2501 	eb.i915 = i915;
2502 	eb.file = file;
2503 	eb.args = args;
2504 	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
2505 		args->flags |= __EXEC_HAS_RELOC;
2506 
2507 	eb.exec = exec;
2508 	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
2509 	eb.vma[0] = NULL;
2510 	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
2511 
2512 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
2513 	reloc_cache_init(&eb.reloc_cache, eb.i915);
2514 
2515 	eb.buffer_count = args->buffer_count;
2516 	eb.batch_start_offset = args->batch_start_offset;
2517 	eb.batch_len = args->batch_len;
2518 
2519 	eb.batch_flags = 0;
2520 	if (args->flags & I915_EXEC_SECURE) {
2521 		if (INTEL_GEN(i915) >= 11)
2522 			return -ENODEV;
2523 
2524 		/* Return -EPERM to trigger fallback code on old binaries. */
2525 		if (!HAS_SECURE_BATCHES(i915))
2526 			return -EPERM;
2527 
2528 		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
2529 			return -EPERM;
2530 
2531 		eb.batch_flags |= I915_DISPATCH_SECURE;
2532 	}
2533 	if (args->flags & I915_EXEC_IS_PINNED)
2534 		eb.batch_flags |= I915_DISPATCH_PINNED;
2535 
2536 	if (args->flags & I915_EXEC_FENCE_IN) {
2537 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2538 		if (!in_fence)
2539 			return -EINVAL;
2540 	}
2541 
2542 	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
2543 		if (in_fence) {
2544 			err = -EINVAL;
2545 			goto err_in_fence;
2546 		}
2547 
2548 		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2549 		if (!exec_fence) {
2550 			err = -EINVAL;
2551 			goto err_in_fence;
2552 		}
2553 	}
2554 
2555 	if (args->flags & I915_EXEC_FENCE_OUT) {
2556 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
2557 		if (out_fence_fd < 0) {
2558 			err = out_fence_fd;
2559 			goto err_exec_fence;
2560 		}
2561 	}
2562 
2563 	err = eb_create(&eb);
2564 	if (err)
2565 		goto err_out_fence;
2566 
2567 	GEM_BUG_ON(!eb.lut_size);
2568 
2569 	err = eb_select_context(&eb);
2570 	if (unlikely(err))
2571 		goto err_destroy;
2572 
2573 	err = eb_pin_engine(&eb, file, args);
2574 	if (unlikely(err))
2575 		goto err_context;
2576 
2577 	err = i915_mutex_lock_interruptible(dev);
2578 	if (err)
2579 		goto err_engine;
2580 
2581 	err = eb_relocate(&eb);
2582 	if (err) {
2583 		/*
2584 		 * If the user expects the execobject.offset and
2585 		 * reloc.presumed_offset to be an exact match,
2586 		 * as for using NO_RELOC, then we cannot update
2587 		 * the execobject.offset until we have completed
2588 		 * relocation.
2589 		 */
2590 		args->flags &= ~__EXEC_HAS_RELOC;
2591 		goto err_vma;
2592 	}
2593 
2594 	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
2595 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
2596 		err = -EINVAL;
2597 		goto err_vma;
2598 	}
2599 	if (eb.batch_start_offset > eb.batch->size ||
2600 	    eb.batch_len > eb.batch->size - eb.batch_start_offset) {
2601 		DRM_DEBUG("Attempting to use out-of-bounds batch\n");
2602 		err = -EINVAL;
2603 		goto err_vma;
2604 	}
2605 
2606 	if (eb.batch_len == 0)
2607 		eb.batch_len = eb.batch->size - eb.batch_start_offset;
2608 
2609 	if (eb_use_cmdparser(&eb)) {
2610 		struct i915_vma *vma;
2611 
2612 		vma = eb_parse(&eb);
2613 		if (IS_ERR(vma)) {
2614 			err = PTR_ERR(vma);
2615 			goto err_vma;
2616 		}
2617 	}
2618 
2619 	/*
2620 	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2621 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
2622 	 * hsw should have this fixed, but bdw mucks it up again. */
2623 	if (eb.batch_flags & I915_DISPATCH_SECURE) {
2624 		struct i915_vma *vma;
2625 
2626 		/*
2627 		 * So on first glance it looks freaky that we pin the batch here
2628 		 * outside of the reservation loop. But:
2629 		 * - The batch is already pinned into the relevant ppgtt, so we
2630 		 *   already have the backing storage fully allocated.
2631 		 * - No other BO uses the global gtt (well contexts, but meh),
2632 		 *   so we don't really have issues with multiple objects not
2633 		 *   fitting due to fragmentation.
2634 		 * So this is actually safe.
2635 		 */
2636 		vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
2637 		if (IS_ERR(vma)) {
2638 			err = PTR_ERR(vma);
2639 			goto err_vma;
2640 		}
2641 
2642 		eb.batch = vma;
2643 	}
2644 
2645 	/* All GPU relocation batches must be submitted prior to the user rq */
2646 	GEM_BUG_ON(eb.reloc_cache.rq);
2647 
2648 	/* Allocate a request for this batch buffer nice and early. */
2649 	eb.request = i915_request_create(eb.context);
2650 	if (IS_ERR(eb.request)) {
2651 		err = PTR_ERR(eb.request);
2652 		goto err_batch_unpin;
2653 	}
2654 
2655 	if (in_fence) {
2656 		err = i915_request_await_dma_fence(eb.request, in_fence);
2657 		if (err < 0)
2658 			goto err_request;
2659 	}
2660 
2661 	if (exec_fence) {
2662 		err = i915_request_await_execution(eb.request, exec_fence,
2663 						   eb.engine->bond_execute);
2664 		if (err < 0)
2665 			goto err_request;
2666 	}
2667 
2668 	if (fences) {
2669 		err = await_fence_array(&eb, fences);
2670 		if (err)
2671 			goto err_request;
2672 	}
2673 
2674 	if (out_fence_fd != -1) {
2675 		out_fence = sync_file_create(&eb.request->fence);
2676 		if (!out_fence) {
2677 			err = -ENOMEM;
2678 			goto err_request;
2679 		}
2680 	}
2681 
2682 	/*
2683 	 * Whilst this request exists, batch_obj will be on the
2684 	 * active_list, and so will hold the active reference. Only when this
2685 	 * request is retired will the the batch_obj be moved onto the
2686 	 * inactive_list and lose its active reference. Hence we do not need
2687 	 * to explicitly hold another reference here.
2688 	 */
2689 	eb.request->batch = eb.batch;
2690 	if (eb.batch->private)
2691 		intel_engine_pool_mark_active(eb.batch->private, eb.request);
2692 
2693 	trace_i915_request_queue(eb.request, eb.batch_flags);
2694 	err = eb_submit(&eb);
2695 err_request:
2696 	add_to_client(eb.request, file);
2697 	i915_request_get(eb.request);
2698 	i915_request_add(eb.request);
2699 
2700 	if (fences)
2701 		signal_fence_array(&eb, fences);
2702 
2703 	if (out_fence) {
2704 		if (err == 0) {
2705 			fd_install(out_fence_fd, out_fence->file);
2706 			args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
2707 			args->rsvd2 |= (u64)out_fence_fd << 32;
2708 			out_fence_fd = -1;
2709 		} else {
2710 			fput(out_fence->file);
2711 		}
2712 	}
2713 	i915_request_put(eb.request);
2714 
2715 err_batch_unpin:
2716 	if (eb.batch_flags & I915_DISPATCH_SECURE)
2717 		i915_vma_unpin(eb.batch);
2718 	if (eb.batch->private)
2719 		intel_engine_pool_put(eb.batch->private);
2720 err_vma:
2721 	if (eb.exec)
2722 		eb_release_vmas(&eb);
2723 	mutex_unlock(&dev->struct_mutex);
2724 err_engine:
2725 	eb_unpin_engine(&eb);
2726 err_context:
2727 	i915_gem_context_put(eb.gem_context);
2728 err_destroy:
2729 	eb_destroy(&eb);
2730 err_out_fence:
2731 	if (out_fence_fd != -1)
2732 		put_unused_fd(out_fence_fd);
2733 err_exec_fence:
2734 	dma_fence_put(exec_fence);
2735 err_in_fence:
2736 	dma_fence_put(in_fence);
2737 	return err;
2738 }
2739 
2740 static size_t eb_element_size(void)
2741 {
2742 	return (sizeof(struct drm_i915_gem_exec_object2) +
2743 		sizeof(struct i915_vma *) +
2744 		sizeof(unsigned int));
2745 }
2746 
2747 static bool check_buffer_count(size_t count)
2748 {
2749 	const size_t sz = eb_element_size();
2750 
2751 	/*
2752 	 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
2753 	 * array size (see eb_create()). Otherwise, we can accept an array as
2754 	 * large as can be addressed (though use large arrays at your peril)!
2755 	 */
2756 
2757 	return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
2758 }
2759 
2760 /*
2761  * Legacy execbuffer just creates an exec2 list from the original exec object
2762  * list array and passes it to the real function.
2763  */
2764 int
2765 i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
2766 			  struct drm_file *file)
2767 {
2768 	struct drm_i915_gem_execbuffer *args = data;
2769 	struct drm_i915_gem_execbuffer2 exec2;
2770 	struct drm_i915_gem_exec_object *exec_list = NULL;
2771 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
2772 	const size_t count = args->buffer_count;
2773 	unsigned int i;
2774 	int err;
2775 
2776 	if (!check_buffer_count(count)) {
2777 		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2778 		return -EINVAL;
2779 	}
2780 
2781 	exec2.buffers_ptr = args->buffers_ptr;
2782 	exec2.buffer_count = args->buffer_count;
2783 	exec2.batch_start_offset = args->batch_start_offset;
2784 	exec2.batch_len = args->batch_len;
2785 	exec2.DR1 = args->DR1;
2786 	exec2.DR4 = args->DR4;
2787 	exec2.num_cliprects = args->num_cliprects;
2788 	exec2.cliprects_ptr = args->cliprects_ptr;
2789 	exec2.flags = I915_EXEC_RENDER;
2790 	i915_execbuffer2_set_context_id(exec2, 0);
2791 
2792 	if (!i915_gem_check_execbuffer(&exec2))
2793 		return -EINVAL;
2794 
2795 	/* Copy in the exec list from userland */
2796 	exec_list = kvmalloc_array(count, sizeof(*exec_list),
2797 				   __GFP_NOWARN | GFP_KERNEL);
2798 	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2799 				    __GFP_NOWARN | GFP_KERNEL);
2800 	if (exec_list == NULL || exec2_list == NULL) {
2801 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
2802 			  args->buffer_count);
2803 		kvfree(exec_list);
2804 		kvfree(exec2_list);
2805 		return -ENOMEM;
2806 	}
2807 	err = copy_from_user(exec_list,
2808 			     u64_to_user_ptr(args->buffers_ptr),
2809 			     sizeof(*exec_list) * count);
2810 	if (err) {
2811 		DRM_DEBUG("copy %d exec entries failed %d\n",
2812 			  args->buffer_count, err);
2813 		kvfree(exec_list);
2814 		kvfree(exec2_list);
2815 		return -EFAULT;
2816 	}
2817 
2818 	for (i = 0; i < args->buffer_count; i++) {
2819 		exec2_list[i].handle = exec_list[i].handle;
2820 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
2821 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
2822 		exec2_list[i].alignment = exec_list[i].alignment;
2823 		exec2_list[i].offset = exec_list[i].offset;
2824 		if (INTEL_GEN(to_i915(dev)) < 4)
2825 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
2826 		else
2827 			exec2_list[i].flags = 0;
2828 	}
2829 
2830 	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
2831 	if (exec2.flags & __EXEC_HAS_RELOC) {
2832 		struct drm_i915_gem_exec_object __user *user_exec_list =
2833 			u64_to_user_ptr(args->buffers_ptr);
2834 
2835 		/* Copy the new buffer offsets back to the user's exec list. */
2836 		for (i = 0; i < args->buffer_count; i++) {
2837 			if (!(exec2_list[i].offset & UPDATE))
2838 				continue;
2839 
2840 			exec2_list[i].offset =
2841 				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2842 			exec2_list[i].offset &= PIN_OFFSET_MASK;
2843 			if (__copy_to_user(&user_exec_list[i].offset,
2844 					   &exec2_list[i].offset,
2845 					   sizeof(user_exec_list[i].offset)))
2846 				break;
2847 		}
2848 	}
2849 
2850 	kvfree(exec_list);
2851 	kvfree(exec2_list);
2852 	return err;
2853 }
2854 
2855 int
2856 i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
2857 			   struct drm_file *file)
2858 {
2859 	struct drm_i915_gem_execbuffer2 *args = data;
2860 	struct drm_i915_gem_exec_object2 *exec2_list;
2861 	struct drm_syncobj **fences = NULL;
2862 	const size_t count = args->buffer_count;
2863 	int err;
2864 
2865 	if (!check_buffer_count(count)) {
2866 		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2867 		return -EINVAL;
2868 	}
2869 
2870 	if (!i915_gem_check_execbuffer(args))
2871 		return -EINVAL;
2872 
2873 	/* Allocate an extra slot for use by the command parser */
2874 	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2875 				    __GFP_NOWARN | GFP_KERNEL);
2876 	if (exec2_list == NULL) {
2877 		DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
2878 			  count);
2879 		return -ENOMEM;
2880 	}
2881 	if (copy_from_user(exec2_list,
2882 			   u64_to_user_ptr(args->buffers_ptr),
2883 			   sizeof(*exec2_list) * count)) {
2884 		DRM_DEBUG("copy %zd exec entries failed\n", count);
2885 		kvfree(exec2_list);
2886 		return -EFAULT;
2887 	}
2888 
2889 	if (args->flags & I915_EXEC_FENCE_ARRAY) {
2890 		fences = get_fence_array(args, file);
2891 		if (IS_ERR(fences)) {
2892 			kvfree(exec2_list);
2893 			return PTR_ERR(fences);
2894 		}
2895 	}
2896 
2897 	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
2898 
2899 	/*
2900 	 * Now that we have begun execution of the batchbuffer, we ignore
2901 	 * any new error after this point. Also given that we have already
2902 	 * updated the associated relocations, we try to write out the current
2903 	 * object locations irrespective of any error.
2904 	 */
2905 	if (args->flags & __EXEC_HAS_RELOC) {
2906 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
2907 			u64_to_user_ptr(args->buffers_ptr);
2908 		unsigned int i;
2909 
2910 		/* Copy the new buffer offsets back to the user's exec list. */
2911 		/*
2912 		 * Note: count * sizeof(*user_exec_list) does not overflow,
2913 		 * because we checked 'count' in check_buffer_count().
2914 		 *
2915 		 * And this range already got effectively checked earlier
2916 		 * when we did the "copy_from_user()" above.
2917 		 */
2918 		if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
2919 			goto end;
2920 
2921 		for (i = 0; i < args->buffer_count; i++) {
2922 			if (!(exec2_list[i].offset & UPDATE))
2923 				continue;
2924 
2925 			exec2_list[i].offset =
2926 				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2927 			unsafe_put_user(exec2_list[i].offset,
2928 					&user_exec_list[i].offset,
2929 					end_user);
2930 		}
2931 end_user:
2932 		user_access_end();
2933 end:;
2934 	}
2935 
2936 	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
2937 	put_fence_array(args, fences);
2938 	kvfree(exec2_list);
2939 	return err;
2940 }
2941