xref: /linux/drivers/gpu/drm/radeon/radeon_cs.c (revision 9cfc5c90ad38c8fc11bfd39de42a107da00871ba)
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/list_sort.h>
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_reg.h"
31 #include "radeon.h"
32 #include "radeon_trace.h"
33 
34 #define RADEON_CS_MAX_PRIORITY		32u
35 #define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
36 
37 /* This is based on the bucket sort with O(n) time complexity.
38  * An item with priority "i" is added to bucket[i]. The lists are then
39  * concatenated in descending order.
40  */
41 struct radeon_cs_buckets {
42 	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43 };
44 
45 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46 {
47 	unsigned i;
48 
49 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 		INIT_LIST_HEAD(&b->bucket[i]);
51 }
52 
53 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 				  struct list_head *item, unsigned priority)
55 {
56 	/* Since buffers which appear sooner in the relocation list are
57 	 * likely to be used more often than buffers which appear later
58 	 * in the list, the sort mustn't change the ordering of buffers
59 	 * with the same priority, i.e. it must be stable.
60 	 */
61 	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62 }
63 
64 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 				       struct list_head *out_list)
66 {
67 	unsigned i;
68 
69 	/* Connect the sorted buckets in the output list. */
70 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 		list_splice(&b->bucket[i], out_list);
72 	}
73 }
74 
75 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
76 {
77 	struct drm_device *ddev = p->rdev->ddev;
78 	struct radeon_cs_chunk *chunk;
79 	struct radeon_cs_buckets buckets;
80 	unsigned i;
81 	bool need_mmap_lock = false;
82 	int r;
83 
84 	if (p->chunk_relocs == NULL) {
85 		return 0;
86 	}
87 	chunk = p->chunk_relocs;
88 	p->dma_reloc_idx = 0;
89 	/* FIXME: we assume that each relocs use 4 dwords */
90 	p->nrelocs = chunk->length_dw / 4;
91 	p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
92 	if (p->relocs == NULL) {
93 		return -ENOMEM;
94 	}
95 
96 	radeon_cs_buckets_init(&buckets);
97 
98 	for (i = 0; i < p->nrelocs; i++) {
99 		struct drm_radeon_cs_reloc *r;
100 		struct drm_gem_object *gobj;
101 		unsigned priority;
102 
103 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
104 		gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
105 		if (gobj == NULL) {
106 			DRM_ERROR("gem object lookup failed 0x%x\n",
107 				  r->handle);
108 			return -ENOENT;
109 		}
110 		p->relocs[i].robj = gem_to_radeon_bo(gobj);
111 
112 		/* The userspace buffer priorities are from 0 to 15. A higher
113 		 * number means the buffer is more important.
114 		 * Also, the buffers used for write have a higher priority than
115 		 * the buffers used for read only, which doubles the range
116 		 * to 0 to 31. 32 is reserved for the kernel driver.
117 		 */
118 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
119 			   + !!r->write_domain;
120 
121 		/* the first reloc of an UVD job is the msg and that must be in
122 		   VRAM, also but everything into VRAM on AGP cards and older
123 		   IGP chips to avoid image corruptions */
124 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
125 		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
126 		     p->rdev->family == CHIP_RS780 ||
127 		     p->rdev->family == CHIP_RS880)) {
128 
129 			/* TODO: is this still needed for NI+ ? */
130 			p->relocs[i].prefered_domains =
131 				RADEON_GEM_DOMAIN_VRAM;
132 
133 			p->relocs[i].allowed_domains =
134 				RADEON_GEM_DOMAIN_VRAM;
135 
136 			/* prioritize this over any other relocation */
137 			priority = RADEON_CS_MAX_PRIORITY;
138 		} else {
139 			uint32_t domain = r->write_domain ?
140 				r->write_domain : r->read_domains;
141 
142 			if (domain & RADEON_GEM_DOMAIN_CPU) {
143 				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
144 					  "for command submission\n");
145 				return -EINVAL;
146 			}
147 
148 			p->relocs[i].prefered_domains = domain;
149 			if (domain == RADEON_GEM_DOMAIN_VRAM)
150 				domain |= RADEON_GEM_DOMAIN_GTT;
151 			p->relocs[i].allowed_domains = domain;
152 		}
153 
154 		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
155 			uint32_t domain = p->relocs[i].prefered_domains;
156 			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
157 				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
158 					  "allowed for userptr BOs\n");
159 				return -EINVAL;
160 			}
161 			need_mmap_lock = true;
162 			domain = RADEON_GEM_DOMAIN_GTT;
163 			p->relocs[i].prefered_domains = domain;
164 			p->relocs[i].allowed_domains = domain;
165 		}
166 
167 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
168 		p->relocs[i].tv.shared = !r->write_domain;
169 
170 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
171 				      priority);
172 	}
173 
174 	radeon_cs_buckets_get_list(&buckets, &p->validated);
175 
176 	if (p->cs_flags & RADEON_CS_USE_VM)
177 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
178 					      &p->validated);
179 	if (need_mmap_lock)
180 		down_read(&current->mm->mmap_sem);
181 
182 	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
183 
184 	if (need_mmap_lock)
185 		up_read(&current->mm->mmap_sem);
186 
187 	return r;
188 }
189 
190 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
191 {
192 	p->priority = priority;
193 
194 	switch (ring) {
195 	default:
196 		DRM_ERROR("unknown ring id: %d\n", ring);
197 		return -EINVAL;
198 	case RADEON_CS_RING_GFX:
199 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
200 		break;
201 	case RADEON_CS_RING_COMPUTE:
202 		if (p->rdev->family >= CHIP_TAHITI) {
203 			if (p->priority > 0)
204 				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
205 			else
206 				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
207 		} else
208 			p->ring = RADEON_RING_TYPE_GFX_INDEX;
209 		break;
210 	case RADEON_CS_RING_DMA:
211 		if (p->rdev->family >= CHIP_CAYMAN) {
212 			if (p->priority > 0)
213 				p->ring = R600_RING_TYPE_DMA_INDEX;
214 			else
215 				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
216 		} else if (p->rdev->family >= CHIP_RV770) {
217 			p->ring = R600_RING_TYPE_DMA_INDEX;
218 		} else {
219 			return -EINVAL;
220 		}
221 		break;
222 	case RADEON_CS_RING_UVD:
223 		p->ring = R600_RING_TYPE_UVD_INDEX;
224 		break;
225 	case RADEON_CS_RING_VCE:
226 		/* TODO: only use the low priority ring for now */
227 		p->ring = TN_RING_TYPE_VCE1_INDEX;
228 		break;
229 	}
230 	return 0;
231 }
232 
233 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
234 {
235 	struct radeon_bo_list *reloc;
236 	int r;
237 
238 	list_for_each_entry(reloc, &p->validated, tv.head) {
239 		struct reservation_object *resv;
240 
241 		resv = reloc->robj->tbo.resv;
242 		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
243 				     reloc->tv.shared);
244 		if (r)
245 			return r;
246 	}
247 	return 0;
248 }
249 
250 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
251 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
252 {
253 	struct drm_radeon_cs *cs = data;
254 	uint64_t *chunk_array_ptr;
255 	unsigned size, i;
256 	u32 ring = RADEON_CS_RING_GFX;
257 	s32 priority = 0;
258 
259 	INIT_LIST_HEAD(&p->validated);
260 
261 	if (!cs->num_chunks) {
262 		return 0;
263 	}
264 
265 	/* get chunks */
266 	p->idx = 0;
267 	p->ib.sa_bo = NULL;
268 	p->const_ib.sa_bo = NULL;
269 	p->chunk_ib = NULL;
270 	p->chunk_relocs = NULL;
271 	p->chunk_flags = NULL;
272 	p->chunk_const_ib = NULL;
273 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
274 	if (p->chunks_array == NULL) {
275 		return -ENOMEM;
276 	}
277 	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
278 	if (copy_from_user(p->chunks_array, chunk_array_ptr,
279 			       sizeof(uint64_t)*cs->num_chunks)) {
280 		return -EFAULT;
281 	}
282 	p->cs_flags = 0;
283 	p->nchunks = cs->num_chunks;
284 	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
285 	if (p->chunks == NULL) {
286 		return -ENOMEM;
287 	}
288 	for (i = 0; i < p->nchunks; i++) {
289 		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
290 		struct drm_radeon_cs_chunk user_chunk;
291 		uint32_t __user *cdata;
292 
293 		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
294 		if (copy_from_user(&user_chunk, chunk_ptr,
295 				       sizeof(struct drm_radeon_cs_chunk))) {
296 			return -EFAULT;
297 		}
298 		p->chunks[i].length_dw = user_chunk.length_dw;
299 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
300 			p->chunk_relocs = &p->chunks[i];
301 		}
302 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
303 			p->chunk_ib = &p->chunks[i];
304 			/* zero length IB isn't useful */
305 			if (p->chunks[i].length_dw == 0)
306 				return -EINVAL;
307 		}
308 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
309 			p->chunk_const_ib = &p->chunks[i];
310 			/* zero length CONST IB isn't useful */
311 			if (p->chunks[i].length_dw == 0)
312 				return -EINVAL;
313 		}
314 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
315 			p->chunk_flags = &p->chunks[i];
316 			/* zero length flags aren't useful */
317 			if (p->chunks[i].length_dw == 0)
318 				return -EINVAL;
319 		}
320 
321 		size = p->chunks[i].length_dw;
322 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
323 		p->chunks[i].user_ptr = cdata;
324 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
325 			continue;
326 
327 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
328 			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
329 				continue;
330 		}
331 
332 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
333 		size *= sizeof(uint32_t);
334 		if (p->chunks[i].kdata == NULL) {
335 			return -ENOMEM;
336 		}
337 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
338 			return -EFAULT;
339 		}
340 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
341 			p->cs_flags = p->chunks[i].kdata[0];
342 			if (p->chunks[i].length_dw > 1)
343 				ring = p->chunks[i].kdata[1];
344 			if (p->chunks[i].length_dw > 2)
345 				priority = (s32)p->chunks[i].kdata[2];
346 		}
347 	}
348 
349 	/* these are KMS only */
350 	if (p->rdev) {
351 		if ((p->cs_flags & RADEON_CS_USE_VM) &&
352 		    !p->rdev->vm_manager.enabled) {
353 			DRM_ERROR("VM not active on asic!\n");
354 			return -EINVAL;
355 		}
356 
357 		if (radeon_cs_get_ring(p, ring, priority))
358 			return -EINVAL;
359 
360 		/* we only support VM on some SI+ rings */
361 		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
362 			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
363 				DRM_ERROR("Ring %d requires VM!\n", p->ring);
364 				return -EINVAL;
365 			}
366 		} else {
367 			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
368 				DRM_ERROR("VM not supported on ring %d!\n",
369 					  p->ring);
370 				return -EINVAL;
371 			}
372 		}
373 	}
374 
375 	return 0;
376 }
377 
378 static int cmp_size_smaller_first(void *priv, struct list_head *a,
379 				  struct list_head *b)
380 {
381 	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
382 	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
383 
384 	/* Sort A before B if A is smaller. */
385 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
386 }
387 
388 /**
389  * cs_parser_fini() - clean parser states
390  * @parser:	parser structure holding parsing context.
391  * @error:	error number
392  *
393  * If error is set than unvalidate buffer, otherwise just free memory
394  * used by parsing context.
395  **/
396 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
397 {
398 	unsigned i;
399 
400 	if (!error) {
401 		/* Sort the buffer list from the smallest to largest buffer,
402 		 * which affects the order of buffers in the LRU list.
403 		 * This assures that the smallest buffers are added first
404 		 * to the LRU list, so they are likely to be later evicted
405 		 * first, instead of large buffers whose eviction is more
406 		 * expensive.
407 		 *
408 		 * This slightly lowers the number of bytes moved by TTM
409 		 * per frame under memory pressure.
410 		 */
411 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
412 
413 		ttm_eu_fence_buffer_objects(&parser->ticket,
414 					    &parser->validated,
415 					    &parser->ib.fence->base);
416 	} else if (backoff) {
417 		ttm_eu_backoff_reservation(&parser->ticket,
418 					   &parser->validated);
419 	}
420 
421 	if (parser->relocs != NULL) {
422 		for (i = 0; i < parser->nrelocs; i++) {
423 			struct radeon_bo *bo = parser->relocs[i].robj;
424 			if (bo == NULL)
425 				continue;
426 
427 			drm_gem_object_unreference_unlocked(&bo->gem_base);
428 		}
429 	}
430 	kfree(parser->track);
431 	drm_free_large(parser->relocs);
432 	drm_free_large(parser->vm_bos);
433 	for (i = 0; i < parser->nchunks; i++)
434 		drm_free_large(parser->chunks[i].kdata);
435 	kfree(parser->chunks);
436 	kfree(parser->chunks_array);
437 	radeon_ib_free(parser->rdev, &parser->ib);
438 	radeon_ib_free(parser->rdev, &parser->const_ib);
439 }
440 
441 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
442 			      struct radeon_cs_parser *parser)
443 {
444 	int r;
445 
446 	if (parser->chunk_ib == NULL)
447 		return 0;
448 
449 	if (parser->cs_flags & RADEON_CS_USE_VM)
450 		return 0;
451 
452 	r = radeon_cs_parse(rdev, parser->ring, parser);
453 	if (r || parser->parser_error) {
454 		DRM_ERROR("Invalid command stream !\n");
455 		return r;
456 	}
457 
458 	r = radeon_cs_sync_rings(parser);
459 	if (r) {
460 		if (r != -ERESTARTSYS)
461 			DRM_ERROR("Failed to sync rings: %i\n", r);
462 		return r;
463 	}
464 
465 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
466 		radeon_uvd_note_usage(rdev);
467 	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
468 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
469 		radeon_vce_note_usage(rdev);
470 
471 	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
472 	if (r) {
473 		DRM_ERROR("Failed to schedule IB !\n");
474 	}
475 	return r;
476 }
477 
478 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
479 				   struct radeon_vm *vm)
480 {
481 	struct radeon_device *rdev = p->rdev;
482 	struct radeon_bo_va *bo_va;
483 	int i, r;
484 
485 	r = radeon_vm_update_page_directory(rdev, vm);
486 	if (r)
487 		return r;
488 
489 	r = radeon_vm_clear_freed(rdev, vm);
490 	if (r)
491 		return r;
492 
493 	if (vm->ib_bo_va == NULL) {
494 		DRM_ERROR("Tmp BO not in VM!\n");
495 		return -EINVAL;
496 	}
497 
498 	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
499 				&rdev->ring_tmp_bo.bo->tbo.mem);
500 	if (r)
501 		return r;
502 
503 	for (i = 0; i < p->nrelocs; i++) {
504 		struct radeon_bo *bo;
505 
506 		bo = p->relocs[i].robj;
507 		bo_va = radeon_vm_bo_find(vm, bo);
508 		if (bo_va == NULL) {
509 			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
510 			return -EINVAL;
511 		}
512 
513 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
514 		if (r)
515 			return r;
516 
517 		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
518 	}
519 
520 	return radeon_vm_clear_invalids(rdev, vm);
521 }
522 
523 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
524 				 struct radeon_cs_parser *parser)
525 {
526 	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
527 	struct radeon_vm *vm = &fpriv->vm;
528 	int r;
529 
530 	if (parser->chunk_ib == NULL)
531 		return 0;
532 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
533 		return 0;
534 
535 	if (parser->const_ib.length_dw) {
536 		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
537 		if (r) {
538 			return r;
539 		}
540 	}
541 
542 	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
543 	if (r) {
544 		return r;
545 	}
546 
547 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
548 		radeon_uvd_note_usage(rdev);
549 
550 	mutex_lock(&vm->mutex);
551 	r = radeon_bo_vm_update_pte(parser, vm);
552 	if (r) {
553 		goto out;
554 	}
555 
556 	r = radeon_cs_sync_rings(parser);
557 	if (r) {
558 		if (r != -ERESTARTSYS)
559 			DRM_ERROR("Failed to sync rings: %i\n", r);
560 		goto out;
561 	}
562 
563 	if ((rdev->family >= CHIP_TAHITI) &&
564 	    (parser->chunk_const_ib != NULL)) {
565 		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
566 	} else {
567 		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
568 	}
569 
570 out:
571 	mutex_unlock(&vm->mutex);
572 	return r;
573 }
574 
575 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
576 {
577 	if (r == -EDEADLK) {
578 		r = radeon_gpu_reset(rdev);
579 		if (!r)
580 			r = -EAGAIN;
581 	}
582 	return r;
583 }
584 
585 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
586 {
587 	struct radeon_cs_chunk *ib_chunk;
588 	struct radeon_vm *vm = NULL;
589 	int r;
590 
591 	if (parser->chunk_ib == NULL)
592 		return 0;
593 
594 	if (parser->cs_flags & RADEON_CS_USE_VM) {
595 		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
596 		vm = &fpriv->vm;
597 
598 		if ((rdev->family >= CHIP_TAHITI) &&
599 		    (parser->chunk_const_ib != NULL)) {
600 			ib_chunk = parser->chunk_const_ib;
601 			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
602 				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
603 				return -EINVAL;
604 			}
605 			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
606 					   vm, ib_chunk->length_dw * 4);
607 			if (r) {
608 				DRM_ERROR("Failed to get const ib !\n");
609 				return r;
610 			}
611 			parser->const_ib.is_const_ib = true;
612 			parser->const_ib.length_dw = ib_chunk->length_dw;
613 			if (copy_from_user(parser->const_ib.ptr,
614 					       ib_chunk->user_ptr,
615 					       ib_chunk->length_dw * 4))
616 				return -EFAULT;
617 		}
618 
619 		ib_chunk = parser->chunk_ib;
620 		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
621 			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
622 			return -EINVAL;
623 		}
624 	}
625 	ib_chunk = parser->chunk_ib;
626 
627 	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
628 			   vm, ib_chunk->length_dw * 4);
629 	if (r) {
630 		DRM_ERROR("Failed to get ib !\n");
631 		return r;
632 	}
633 	parser->ib.length_dw = ib_chunk->length_dw;
634 	if (ib_chunk->kdata)
635 		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
636 	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
637 		return -EFAULT;
638 	return 0;
639 }
640 
641 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
642 {
643 	struct radeon_device *rdev = dev->dev_private;
644 	struct radeon_cs_parser parser;
645 	int r;
646 
647 	down_read(&rdev->exclusive_lock);
648 	if (!rdev->accel_working) {
649 		up_read(&rdev->exclusive_lock);
650 		return -EBUSY;
651 	}
652 	if (rdev->in_reset) {
653 		up_read(&rdev->exclusive_lock);
654 		r = radeon_gpu_reset(rdev);
655 		if (!r)
656 			r = -EAGAIN;
657 		return r;
658 	}
659 	/* initialize parser */
660 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
661 	parser.filp = filp;
662 	parser.rdev = rdev;
663 	parser.dev = rdev->dev;
664 	parser.family = rdev->family;
665 	r = radeon_cs_parser_init(&parser, data);
666 	if (r) {
667 		DRM_ERROR("Failed to initialize parser !\n");
668 		radeon_cs_parser_fini(&parser, r, false);
669 		up_read(&rdev->exclusive_lock);
670 		r = radeon_cs_handle_lockup(rdev, r);
671 		return r;
672 	}
673 
674 	r = radeon_cs_ib_fill(rdev, &parser);
675 	if (!r) {
676 		r = radeon_cs_parser_relocs(&parser);
677 		if (r && r != -ERESTARTSYS)
678 			DRM_ERROR("Failed to parse relocation %d!\n", r);
679 	}
680 
681 	if (r) {
682 		radeon_cs_parser_fini(&parser, r, false);
683 		up_read(&rdev->exclusive_lock);
684 		r = radeon_cs_handle_lockup(rdev, r);
685 		return r;
686 	}
687 
688 	trace_radeon_cs(&parser);
689 
690 	r = radeon_cs_ib_chunk(rdev, &parser);
691 	if (r) {
692 		goto out;
693 	}
694 	r = radeon_cs_ib_vm_chunk(rdev, &parser);
695 	if (r) {
696 		goto out;
697 	}
698 out:
699 	radeon_cs_parser_fini(&parser, r, true);
700 	up_read(&rdev->exclusive_lock);
701 	r = radeon_cs_handle_lockup(rdev, r);
702 	return r;
703 }
704 
705 /**
706  * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
707  * @parser:	parser structure holding parsing context.
708  * @pkt:	where to store packet information
709  *
710  * Assume that chunk_ib_index is properly set. Will return -EINVAL
711  * if packet is bigger than remaining ib size. or if packets is unknown.
712  **/
713 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
714 			   struct radeon_cs_packet *pkt,
715 			   unsigned idx)
716 {
717 	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
718 	struct radeon_device *rdev = p->rdev;
719 	uint32_t header;
720 	int ret = 0, i;
721 
722 	if (idx >= ib_chunk->length_dw) {
723 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
724 			  idx, ib_chunk->length_dw);
725 		return -EINVAL;
726 	}
727 	header = radeon_get_ib_value(p, idx);
728 	pkt->idx = idx;
729 	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
730 	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
731 	pkt->one_reg_wr = 0;
732 	switch (pkt->type) {
733 	case RADEON_PACKET_TYPE0:
734 		if (rdev->family < CHIP_R600) {
735 			pkt->reg = R100_CP_PACKET0_GET_REG(header);
736 			pkt->one_reg_wr =
737 				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
738 		} else
739 			pkt->reg = R600_CP_PACKET0_GET_REG(header);
740 		break;
741 	case RADEON_PACKET_TYPE3:
742 		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
743 		break;
744 	case RADEON_PACKET_TYPE2:
745 		pkt->count = -1;
746 		break;
747 	default:
748 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
749 		ret = -EINVAL;
750 		goto dump_ib;
751 	}
752 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
753 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
754 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
755 		ret = -EINVAL;
756 		goto dump_ib;
757 	}
758 	return 0;
759 
760 dump_ib:
761 	for (i = 0; i < ib_chunk->length_dw; i++) {
762 		if (i == idx)
763 			printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
764 		else
765 			printk("\t0x%08x\n", radeon_get_ib_value(p, i));
766 	}
767 	return ret;
768 }
769 
770 /**
771  * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
772  * @p:		structure holding the parser context.
773  *
774  * Check if the next packet is NOP relocation packet3.
775  **/
776 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
777 {
778 	struct radeon_cs_packet p3reloc;
779 	int r;
780 
781 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
782 	if (r)
783 		return false;
784 	if (p3reloc.type != RADEON_PACKET_TYPE3)
785 		return false;
786 	if (p3reloc.opcode != RADEON_PACKET3_NOP)
787 		return false;
788 	return true;
789 }
790 
791 /**
792  * radeon_cs_dump_packet() - dump raw packet context
793  * @p:		structure holding the parser context.
794  * @pkt:	structure holding the packet.
795  *
796  * Used mostly for debugging and error reporting.
797  **/
798 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
799 			   struct radeon_cs_packet *pkt)
800 {
801 	volatile uint32_t *ib;
802 	unsigned i;
803 	unsigned idx;
804 
805 	ib = p->ib.ptr;
806 	idx = pkt->idx;
807 	for (i = 0; i <= (pkt->count + 1); i++, idx++)
808 		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
809 }
810 
811 /**
812  * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
813  * @parser:		parser structure holding parsing context.
814  * @data:		pointer to relocation data
815  * @offset_start:	starting offset
816  * @offset_mask:	offset mask (to align start offset on)
817  * @reloc:		reloc informations
818  *
819  * Check if next packet is relocation packet3, do bo validation and compute
820  * GPU offset using the provided start.
821  **/
822 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
823 				struct radeon_bo_list **cs_reloc,
824 				int nomm)
825 {
826 	struct radeon_cs_chunk *relocs_chunk;
827 	struct radeon_cs_packet p3reloc;
828 	unsigned idx;
829 	int r;
830 
831 	if (p->chunk_relocs == NULL) {
832 		DRM_ERROR("No relocation chunk !\n");
833 		return -EINVAL;
834 	}
835 	*cs_reloc = NULL;
836 	relocs_chunk = p->chunk_relocs;
837 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
838 	if (r)
839 		return r;
840 	p->idx += p3reloc.count + 2;
841 	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
842 	    p3reloc.opcode != RADEON_PACKET3_NOP) {
843 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
844 			  p3reloc.idx);
845 		radeon_cs_dump_packet(p, &p3reloc);
846 		return -EINVAL;
847 	}
848 	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
849 	if (idx >= relocs_chunk->length_dw) {
850 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
851 			  idx, relocs_chunk->length_dw);
852 		radeon_cs_dump_packet(p, &p3reloc);
853 		return -EINVAL;
854 	}
855 	/* FIXME: we assume reloc size is 4 dwords */
856 	if (nomm) {
857 		*cs_reloc = p->relocs;
858 		(*cs_reloc)->gpu_offset =
859 			(u64)relocs_chunk->kdata[idx + 3] << 32;
860 		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
861 	} else
862 		*cs_reloc = &p->relocs[(idx / 4)];
863 	return 0;
864 }
865