xref: /linux/drivers/gpu/host1x/job.c (revision 17cfcb68af3bc7d5e8ae08779b1853310a2949f3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Tegra host1x Job
4  *
5  * Copyright (c) 2010-2015, NVIDIA Corporation.
6  */
7 
8 #include <linux/dma-mapping.h>
9 #include <linux/err.h>
10 #include <linux/host1x.h>
11 #include <linux/kref.h>
12 #include <linux/module.h>
13 #include <linux/scatterlist.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <trace/events/host1x.h>
17 
18 #include "channel.h"
19 #include "dev.h"
20 #include "job.h"
21 #include "syncpt.h"
22 
23 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
24 
25 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
26 				    u32 num_cmdbufs, u32 num_relocs)
27 {
28 	struct host1x_job *job = NULL;
29 	unsigned int num_unpins = num_cmdbufs + num_relocs;
30 	u64 total;
31 	void *mem;
32 
33 	/* Check that we're not going to overflow */
34 	total = sizeof(struct host1x_job) +
35 		(u64)num_relocs * sizeof(struct host1x_reloc) +
36 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
37 		(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
38 		(u64)num_unpins * sizeof(dma_addr_t) +
39 		(u64)num_unpins * sizeof(u32 *);
40 	if (total > ULONG_MAX)
41 		return NULL;
42 
43 	mem = job = kzalloc(total, GFP_KERNEL);
44 	if (!job)
45 		return NULL;
46 
47 	kref_init(&job->ref);
48 	job->channel = ch;
49 
50 	/* Redistribute memory to the structs  */
51 	mem += sizeof(struct host1x_job);
52 	job->relocs = num_relocs ? mem : NULL;
53 	mem += num_relocs * sizeof(struct host1x_reloc);
54 	job->unpins = num_unpins ? mem : NULL;
55 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
56 	job->gathers = num_cmdbufs ? mem : NULL;
57 	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
58 	job->addr_phys = num_unpins ? mem : NULL;
59 
60 	job->reloc_addr_phys = job->addr_phys;
61 	job->gather_addr_phys = &job->addr_phys[num_relocs];
62 
63 	return job;
64 }
65 EXPORT_SYMBOL(host1x_job_alloc);
66 
67 struct host1x_job *host1x_job_get(struct host1x_job *job)
68 {
69 	kref_get(&job->ref);
70 	return job;
71 }
72 EXPORT_SYMBOL(host1x_job_get);
73 
74 static void job_free(struct kref *ref)
75 {
76 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
77 
78 	kfree(job);
79 }
80 
81 void host1x_job_put(struct host1x_job *job)
82 {
83 	kref_put(&job->ref, job_free);
84 }
85 EXPORT_SYMBOL(host1x_job_put);
86 
87 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
88 			   unsigned int words, unsigned int offset)
89 {
90 	struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
91 
92 	gather->words = words;
93 	gather->bo = bo;
94 	gather->offset = offset;
95 
96 	job->num_gathers++;
97 }
98 EXPORT_SYMBOL(host1x_job_add_gather);
99 
100 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
101 {
102 	unsigned int i;
103 	int err;
104 
105 	job->num_unpins = 0;
106 
107 	for (i = 0; i < job->num_relocs; i++) {
108 		struct host1x_reloc *reloc = &job->relocs[i];
109 		struct sg_table *sgt;
110 		dma_addr_t phys_addr;
111 
112 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
113 		if (!reloc->target.bo) {
114 			err = -EINVAL;
115 			goto unpin;
116 		}
117 
118 		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
119 
120 		job->addr_phys[job->num_unpins] = phys_addr;
121 		job->unpins[job->num_unpins].bo = reloc->target.bo;
122 		job->unpins[job->num_unpins].sgt = sgt;
123 		job->num_unpins++;
124 	}
125 
126 	for (i = 0; i < job->num_gathers; i++) {
127 		struct host1x_job_gather *g = &job->gathers[i];
128 		size_t gather_size = 0;
129 		struct scatterlist *sg;
130 		struct sg_table *sgt;
131 		dma_addr_t phys_addr;
132 		unsigned long shift;
133 		struct iova *alloc;
134 		unsigned int j;
135 
136 		g->bo = host1x_bo_get(g->bo);
137 		if (!g->bo) {
138 			err = -EINVAL;
139 			goto unpin;
140 		}
141 
142 		phys_addr = host1x_bo_pin(g->bo, &sgt);
143 
144 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
145 			for_each_sg(sgt->sgl, sg, sgt->nents, j)
146 				gather_size += sg->length;
147 			gather_size = iova_align(&host->iova, gather_size);
148 
149 			shift = iova_shift(&host->iova);
150 			alloc = alloc_iova(&host->iova, gather_size >> shift,
151 					   host->iova_end >> shift, true);
152 			if (!alloc) {
153 				err = -ENOMEM;
154 				goto unpin;
155 			}
156 
157 			err = iommu_map_sg(host->domain,
158 					iova_dma_addr(&host->iova, alloc),
159 					sgt->sgl, sgt->nents, IOMMU_READ);
160 			if (err == 0) {
161 				__free_iova(&host->iova, alloc);
162 				err = -EINVAL;
163 				goto unpin;
164 			}
165 
166 			job->addr_phys[job->num_unpins] =
167 				iova_dma_addr(&host->iova, alloc);
168 			job->unpins[job->num_unpins].size = gather_size;
169 		} else {
170 			job->addr_phys[job->num_unpins] = phys_addr;
171 		}
172 
173 		job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
174 
175 		job->unpins[job->num_unpins].bo = g->bo;
176 		job->unpins[job->num_unpins].sgt = sgt;
177 		job->num_unpins++;
178 	}
179 
180 	return 0;
181 
182 unpin:
183 	host1x_job_unpin(job);
184 	return err;
185 }
186 
187 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
188 {
189 	void *cmdbuf_addr = NULL;
190 	struct host1x_bo *cmdbuf = g->bo;
191 	unsigned int i;
192 
193 	/* pin & patch the relocs for one gather */
194 	for (i = 0; i < job->num_relocs; i++) {
195 		struct host1x_reloc *reloc = &job->relocs[i];
196 		u32 reloc_addr = (job->reloc_addr_phys[i] +
197 				  reloc->target.offset) >> reloc->shift;
198 		u32 *target;
199 
200 		/* skip all other gathers */
201 		if (cmdbuf != reloc->cmdbuf.bo)
202 			continue;
203 
204 		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
205 			target = (u32 *)job->gather_copy_mapped +
206 					reloc->cmdbuf.offset / sizeof(u32) +
207 						g->offset / sizeof(u32);
208 			goto patch_reloc;
209 		}
210 
211 		if (!cmdbuf_addr) {
212 			cmdbuf_addr = host1x_bo_mmap(cmdbuf);
213 
214 			if (unlikely(!cmdbuf_addr)) {
215 				pr_err("Could not map cmdbuf for relocation\n");
216 				return -ENOMEM;
217 			}
218 		}
219 
220 		target = cmdbuf_addr + reloc->cmdbuf.offset;
221 patch_reloc:
222 		*target = reloc_addr;
223 	}
224 
225 	if (cmdbuf_addr)
226 		host1x_bo_munmap(cmdbuf, cmdbuf_addr);
227 
228 	return 0;
229 }
230 
231 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
232 			unsigned int offset)
233 {
234 	offset *= sizeof(u32);
235 
236 	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
237 		return false;
238 
239 	/* relocation shift value validation isn't implemented yet */
240 	if (reloc->shift)
241 		return false;
242 
243 	return true;
244 }
245 
246 struct host1x_firewall {
247 	struct host1x_job *job;
248 	struct device *dev;
249 
250 	unsigned int num_relocs;
251 	struct host1x_reloc *reloc;
252 
253 	struct host1x_bo *cmdbuf;
254 	unsigned int offset;
255 
256 	u32 words;
257 	u32 class;
258 	u32 reg;
259 	u32 mask;
260 	u32 count;
261 };
262 
263 static int check_register(struct host1x_firewall *fw, unsigned long offset)
264 {
265 	if (!fw->job->is_addr_reg)
266 		return 0;
267 
268 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
269 		if (!fw->num_relocs)
270 			return -EINVAL;
271 
272 		if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
273 			return -EINVAL;
274 
275 		fw->num_relocs--;
276 		fw->reloc++;
277 	}
278 
279 	return 0;
280 }
281 
282 static int check_class(struct host1x_firewall *fw, u32 class)
283 {
284 	if (!fw->job->is_valid_class) {
285 		if (fw->class != class)
286 			return -EINVAL;
287 	} else {
288 		if (!fw->job->is_valid_class(fw->class))
289 			return -EINVAL;
290 	}
291 
292 	return 0;
293 }
294 
295 static int check_mask(struct host1x_firewall *fw)
296 {
297 	u32 mask = fw->mask;
298 	u32 reg = fw->reg;
299 	int ret;
300 
301 	while (mask) {
302 		if (fw->words == 0)
303 			return -EINVAL;
304 
305 		if (mask & 1) {
306 			ret = check_register(fw, reg);
307 			if (ret < 0)
308 				return ret;
309 
310 			fw->words--;
311 			fw->offset++;
312 		}
313 		mask >>= 1;
314 		reg++;
315 	}
316 
317 	return 0;
318 }
319 
320 static int check_incr(struct host1x_firewall *fw)
321 {
322 	u32 count = fw->count;
323 	u32 reg = fw->reg;
324 	int ret;
325 
326 	while (count) {
327 		if (fw->words == 0)
328 			return -EINVAL;
329 
330 		ret = check_register(fw, reg);
331 		if (ret < 0)
332 			return ret;
333 
334 		reg++;
335 		fw->words--;
336 		fw->offset++;
337 		count--;
338 	}
339 
340 	return 0;
341 }
342 
343 static int check_nonincr(struct host1x_firewall *fw)
344 {
345 	u32 count = fw->count;
346 	int ret;
347 
348 	while (count) {
349 		if (fw->words == 0)
350 			return -EINVAL;
351 
352 		ret = check_register(fw, fw->reg);
353 		if (ret < 0)
354 			return ret;
355 
356 		fw->words--;
357 		fw->offset++;
358 		count--;
359 	}
360 
361 	return 0;
362 }
363 
364 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
365 {
366 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
367 		(g->offset / sizeof(u32));
368 	u32 job_class = fw->class;
369 	int err = 0;
370 
371 	fw->words = g->words;
372 	fw->cmdbuf = g->bo;
373 	fw->offset = 0;
374 
375 	while (fw->words && !err) {
376 		u32 word = cmdbuf_base[fw->offset];
377 		u32 opcode = (word & 0xf0000000) >> 28;
378 
379 		fw->mask = 0;
380 		fw->reg = 0;
381 		fw->count = 0;
382 		fw->words--;
383 		fw->offset++;
384 
385 		switch (opcode) {
386 		case 0:
387 			fw->class = word >> 6 & 0x3ff;
388 			fw->mask = word & 0x3f;
389 			fw->reg = word >> 16 & 0xfff;
390 			err = check_class(fw, job_class);
391 			if (!err)
392 				err = check_mask(fw);
393 			if (err)
394 				goto out;
395 			break;
396 		case 1:
397 			fw->reg = word >> 16 & 0xfff;
398 			fw->count = word & 0xffff;
399 			err = check_incr(fw);
400 			if (err)
401 				goto out;
402 			break;
403 
404 		case 2:
405 			fw->reg = word >> 16 & 0xfff;
406 			fw->count = word & 0xffff;
407 			err = check_nonincr(fw);
408 			if (err)
409 				goto out;
410 			break;
411 
412 		case 3:
413 			fw->mask = word & 0xffff;
414 			fw->reg = word >> 16 & 0xfff;
415 			err = check_mask(fw);
416 			if (err)
417 				goto out;
418 			break;
419 		case 4:
420 		case 14:
421 			break;
422 		default:
423 			err = -EINVAL;
424 			break;
425 		}
426 	}
427 
428 out:
429 	return err;
430 }
431 
432 static inline int copy_gathers(struct host1x_job *job, struct device *dev)
433 {
434 	struct host1x_firewall fw;
435 	size_t size = 0;
436 	size_t offset = 0;
437 	unsigned int i;
438 
439 	fw.job = job;
440 	fw.dev = dev;
441 	fw.reloc = job->relocs;
442 	fw.num_relocs = job->num_relocs;
443 	fw.class = job->class;
444 
445 	for (i = 0; i < job->num_gathers; i++) {
446 		struct host1x_job_gather *g = &job->gathers[i];
447 
448 		size += g->words * sizeof(u32);
449 	}
450 
451 	/*
452 	 * Try a non-blocking allocation from a higher priority pools first,
453 	 * as awaiting for the allocation here is a major performance hit.
454 	 */
455 	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
456 					       GFP_NOWAIT);
457 
458 	/* the higher priority allocation failed, try the generic-blocking */
459 	if (!job->gather_copy_mapped)
460 		job->gather_copy_mapped = dma_alloc_wc(dev, size,
461 						       &job->gather_copy,
462 						       GFP_KERNEL);
463 	if (!job->gather_copy_mapped)
464 		return -ENOMEM;
465 
466 	job->gather_copy_size = size;
467 
468 	for (i = 0; i < job->num_gathers; i++) {
469 		struct host1x_job_gather *g = &job->gathers[i];
470 		void *gather;
471 
472 		/* Copy the gather */
473 		gather = host1x_bo_mmap(g->bo);
474 		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
475 		       g->words * sizeof(u32));
476 		host1x_bo_munmap(g->bo, gather);
477 
478 		/* Store the location in the buffer */
479 		g->base = job->gather_copy;
480 		g->offset = offset;
481 
482 		/* Validate the job */
483 		if (validate(&fw, g))
484 			return -EINVAL;
485 
486 		offset += g->words * sizeof(u32);
487 	}
488 
489 	/* No relocs should remain at this point */
490 	if (fw.num_relocs)
491 		return -EINVAL;
492 
493 	return 0;
494 }
495 
496 int host1x_job_pin(struct host1x_job *job, struct device *dev)
497 {
498 	int err;
499 	unsigned int i, j;
500 	struct host1x *host = dev_get_drvdata(dev->parent);
501 
502 	/* pin memory */
503 	err = pin_job(host, job);
504 	if (err)
505 		goto out;
506 
507 	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
508 		err = copy_gathers(job, dev);
509 		if (err)
510 			goto out;
511 	}
512 
513 	/* patch gathers */
514 	for (i = 0; i < job->num_gathers; i++) {
515 		struct host1x_job_gather *g = &job->gathers[i];
516 
517 		/* process each gather mem only once */
518 		if (g->handled)
519 			continue;
520 
521 		/* copy_gathers() sets gathers base if firewall is enabled */
522 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
523 			g->base = job->gather_addr_phys[i];
524 
525 		for (j = i + 1; j < job->num_gathers; j++) {
526 			if (job->gathers[j].bo == g->bo) {
527 				job->gathers[j].handled = true;
528 				job->gathers[j].base = g->base;
529 			}
530 		}
531 
532 		err = do_relocs(job, g);
533 		if (err)
534 			break;
535 	}
536 
537 out:
538 	if (err)
539 		host1x_job_unpin(job);
540 	wmb();
541 
542 	return err;
543 }
544 EXPORT_SYMBOL(host1x_job_pin);
545 
546 void host1x_job_unpin(struct host1x_job *job)
547 {
548 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
549 	unsigned int i;
550 
551 	for (i = 0; i < job->num_unpins; i++) {
552 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
553 
554 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
555 		    unpin->size && host->domain) {
556 			iommu_unmap(host->domain, job->addr_phys[i],
557 				    unpin->size);
558 			free_iova(&host->iova,
559 				iova_pfn(&host->iova, job->addr_phys[i]));
560 		}
561 
562 		host1x_bo_unpin(unpin->bo, unpin->sgt);
563 		host1x_bo_put(unpin->bo);
564 	}
565 
566 	job->num_unpins = 0;
567 
568 	if (job->gather_copy_size)
569 		dma_free_wc(job->channel->dev, job->gather_copy_size,
570 			    job->gather_copy_mapped, job->gather_copy);
571 }
572 EXPORT_SYMBOL(host1x_job_unpin);
573 
574 /*
575  * Debug routine used to dump job entries
576  */
577 void host1x_job_dump(struct device *dev, struct host1x_job *job)
578 {
579 	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt_id);
580 	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
581 	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
582 	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
583 	dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
584 	dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
585 }
586