xref: /freebsd/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c (revision fcb560670601b2a4d87bb31d7531c8dcc37ee71b)
1 /**
2  * Copyright (c) 2010-2012 Broadcom. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions, and the following disclaimer,
9  *    without modification.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The names of the above-listed copyright holders may not be used
14  *    to endorse or promote products derived from this software without
15  *    specific prior written permission.
16  *
17  * ALTERNATIVELY, this software may be distributed under the terms of the
18  * GNU General Public License ("GPL") version 2, as published by the Free
19  * Software Foundation.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <interface/compat/vchi_bsd.h>
35 
36 #include <sys/malloc.h>
37 #include <sys/rwlock.h>
38 
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 #include <vm/vm_extern.h>
42 #include <vm/vm_kern.h>
43 #include <vm/vm_map.h>
44 #include <vm/vm_object.h>
45 #include <vm/vm_page.h>
46 #include <vm/vm_pager.h>
47 #include <vm/vm_param.h>
48 #include <vm/vm_phys.h>
49 
50 #include <machine/bus.h>
51 #include <arm/broadcom/bcm2835/bcm2835_mbox.h>
52 #include <arm/broadcom/bcm2835/bcm2835_vcbus.h>
53 
54 MALLOC_DEFINE(M_VCPAGELIST, "vcpagelist", "VideoCore pagelist memory");
55 
56 #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
57 
58 #define VCHIQ_DOORBELL_IRQ IRQ_ARM_DOORBELL_0
59 #define VCHIQ_ARM_ADDRESS(x) ((void *)PHYS_TO_VCBUS(pmap_kextract((vm_offset_t)(x))))
60 
61 #include "vchiq_arm.h"
62 #include "vchiq_2835.h"
63 #include "vchiq_connected.h"
64 
65 #define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2)
66 
67 typedef struct vchiq_2835_state_struct {
68    int inited;
69    VCHIQ_ARM_STATE_T arm_state;
70 } VCHIQ_2835_ARM_STATE_T;
71 
72 static char *g_slot_mem;
73 static int g_slot_mem_size;
74 vm_paddr_t g_slot_phys;
75 /* BSD DMA */
76 bus_dma_tag_t bcm_slots_dma_tag;
77 bus_dmamap_t bcm_slots_dma_map;
78 
79 static FRAGMENTS_T *g_fragments_base;
80 static FRAGMENTS_T *g_free_fragments;
81 struct semaphore g_free_fragments_sema;
82 
83 static DEFINE_SEMAPHORE(g_free_fragments_mutex);
84 
85 typedef struct bulkinfo_struct {
86 	PAGELIST_T	*pagelist;
87 	bus_dma_tag_t	pagelist_dma_tag;
88 	bus_dmamap_t	pagelist_dma_map;
89 	void		*buf;
90 	size_t		size;
91 } BULKINFO_T;
92 
93 static int
94 create_pagelist(char __user *buf, size_t count, unsigned short type,
95                 struct proc *p, BULKINFO_T *bi);
96 
97 static void
98 free_pagelist(BULKINFO_T *bi, int actual);
99 
100 static void
101 vchiq_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
102 {
103 	bus_addr_t *addr;
104 
105 	if (err)
106 		return;
107 
108 	addr = (bus_addr_t*)arg;
109 	*addr = PHYS_TO_VCBUS(segs[0].ds_addr);
110 }
111 
112 int __init
113 vchiq_platform_init(VCHIQ_STATE_T *state)
114 {
115 	VCHIQ_SLOT_ZERO_T *vchiq_slot_zero;
116 	int frag_mem_size;
117 	int err;
118 	int i;
119 
120 	/* Allocate space for the channels in coherent memory */
121 	g_slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
122 	frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
123 
124 	err = bus_dma_tag_create(
125 	    NULL,
126 	    PAGE_SIZE, 0,	       /* alignment, boundary */
127 	    BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
128 	    BUS_SPACE_MAXADDR,	  /* highaddr */
129 	    NULL, NULL,		 /* filter, filterarg */
130 	    g_slot_mem_size + frag_mem_size, 1,		/* maxsize, nsegments */
131 	    g_slot_mem_size + frag_mem_size, 0,		/* maxsegsize, flags */
132 	    NULL, NULL,		 /* lockfunc, lockarg */
133 	    &bcm_slots_dma_tag);
134 
135 	err = bus_dmamem_alloc(bcm_slots_dma_tag, (void **)&g_slot_mem,
136 	    BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bcm_slots_dma_map);
137 	if (err) {
138 		vchiq_log_error(vchiq_core_log_level, "Unable to allocate channel memory");
139 		err = -ENOMEM;
140 		goto failed_alloc;
141 	}
142 
143 	err = bus_dmamap_load(bcm_slots_dma_tag, bcm_slots_dma_map, g_slot_mem,
144 	    g_slot_mem_size + frag_mem_size, vchiq_dmamap_cb,
145 	    &g_slot_phys, 0);
146 
147 	if (err) {
148 		vchiq_log_error(vchiq_core_log_level, "cannot load DMA map");
149 		err = -ENOMEM;
150 		goto failed_load;
151 	}
152 
153 	WARN_ON(((int)g_slot_mem & (PAGE_SIZE - 1)) != 0);
154 
155 	vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size);
156 	if (!vchiq_slot_zero) {
157 		err = -EINVAL;
158 		goto failed_init_slots;
159 	}
160 
161 	vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
162 		(int)g_slot_phys + g_slot_mem_size;
163 	vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
164 		MAX_FRAGMENTS;
165 
166 	g_fragments_base = (FRAGMENTS_T *)(g_slot_mem + g_slot_mem_size);
167 	g_slot_mem_size += frag_mem_size;
168 
169 	g_free_fragments = g_fragments_base;
170 	for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
171 		*(FRAGMENTS_T **)&g_fragments_base[i] =
172 			&g_fragments_base[i + 1];
173 	}
174 	*(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
175 	_sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
176 
177 	if (vchiq_init_state(state, vchiq_slot_zero, 0/*slave*/) !=
178 		VCHIQ_SUCCESS) {
179 		err = -EINVAL;
180 		goto failed_vchiq_init;
181 	}
182 
183 	bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (unsigned int)g_slot_phys);
184 
185 	vchiq_log_info(vchiq_arm_log_level,
186 		"vchiq_init - done (slots %x, phys %x)",
187 		(unsigned int)vchiq_slot_zero, g_slot_phys);
188 
189    vchiq_call_connected_callbacks();
190 
191    return 0;
192 
193 failed_vchiq_init:
194 failed_init_slots:
195 failed_load:
196 	bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
197 failed_alloc:
198 	bus_dmamap_destroy(bcm_slots_dma_tag, bcm_slots_dma_map);
199 	bus_dma_tag_destroy(bcm_slots_dma_tag);
200 
201    return err;
202 }
203 
204 void __exit
205 vchiq_platform_exit(VCHIQ_STATE_T *state)
206 {
207 
208 	bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
209 	bus_dmamap_destroy(bcm_slots_dma_tag, bcm_slots_dma_map);
210 	bus_dma_tag_destroy(bcm_slots_dma_tag);
211 }
212 
213 VCHIQ_STATUS_T
214 vchiq_platform_init_state(VCHIQ_STATE_T *state)
215 {
216    VCHIQ_STATUS_T status = VCHIQ_SUCCESS;
217    state->platform_state = kzalloc(sizeof(VCHIQ_2835_ARM_STATE_T), GFP_KERNEL);
218    ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 1;
219    status = vchiq_arm_init_state(state, &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state);
220    if(status != VCHIQ_SUCCESS)
221    {
222       ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 0;
223    }
224    return status;
225 }
226 
227 VCHIQ_ARM_STATE_T*
228 vchiq_platform_get_arm_state(VCHIQ_STATE_T *state)
229 {
230    if(!((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited)
231    {
232       BUG();
233    }
234    return &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state;
235 }
236 
237 int
238 vchiq_copy_from_user(void *dst, const void *src, int size)
239 {
240 
241 	if (((vm_offset_t)(src)) < VM_MIN_KERNEL_ADDRESS) {
242 		int error = copyin(src, dst, size);
243 		return error ? VCHIQ_ERROR : VCHIQ_SUCCESS;
244 	}
245 	else
246 		bcopy(src, dst, size);
247 
248 	return 0;
249 }
250 
251 VCHIQ_STATUS_T
252 vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
253 	void *offset, int size, int dir)
254 {
255 	BULKINFO_T *bi;
256 	int ret;
257 
258 	WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID);
259 	bi = malloc(sizeof(*bi), M_VCPAGELIST, M_WAITOK | M_ZERO);
260 	if (bi == NULL)
261 		return VCHIQ_ERROR;
262 
263 	ret = create_pagelist((char __user *)offset, size,
264 			(dir == VCHIQ_BULK_RECEIVE)
265 			? PAGELIST_READ
266 			: PAGELIST_WRITE,
267 			current,
268 			bi);
269 	if (ret != 0)
270 		return VCHIQ_ERROR;
271 
272 	bulk->handle = memhandle;
273 	bulk->data = VCHIQ_ARM_ADDRESS(bi->pagelist);
274 
275 	/* Store the pagelist address in remote_data, which isn't used by the
276 	   slave. */
277 	bulk->remote_data = bi;
278 
279 	return VCHIQ_SUCCESS;
280 }
281 
282 void
283 vchiq_complete_bulk(VCHIQ_BULK_T *bulk)
284 {
285 	if (bulk && bulk->remote_data && bulk->actual)
286 		free_pagelist((BULKINFO_T *)bulk->remote_data, bulk->actual);
287 }
288 
289 void
290 vchiq_transfer_bulk(VCHIQ_BULK_T *bulk)
291 {
292 	/*
293 	 * This should only be called on the master (VideoCore) side, but
294 	 * provide an implementation to avoid the need for ifdefery.
295 	 */
296 	BUG();
297 }
298 
299 void
300 vchiq_dump_platform_state(void *dump_context)
301 {
302 	char buf[80];
303 	int len;
304 	len = snprintf(buf, sizeof(buf),
305 		"  Platform: 2835 (VC master)");
306 	vchiq_dump(dump_context, buf, len + 1);
307 }
308 
309 VCHIQ_STATUS_T
310 vchiq_platform_suspend(VCHIQ_STATE_T *state)
311 {
312    return VCHIQ_ERROR;
313 }
314 
315 VCHIQ_STATUS_T
316 vchiq_platform_resume(VCHIQ_STATE_T *state)
317 {
318    return VCHIQ_SUCCESS;
319 }
320 
321 void
322 vchiq_platform_paused(VCHIQ_STATE_T *state)
323 {
324 }
325 
326 void
327 vchiq_platform_resumed(VCHIQ_STATE_T *state)
328 {
329 }
330 
331 int
332 vchiq_platform_videocore_wanted(VCHIQ_STATE_T* state)
333 {
334    return 1; // autosuspend not supported - videocore always wanted
335 }
336 
337 int
338 vchiq_platform_use_suspend_timer(void)
339 {
340    return 0;
341 }
342 void
343 vchiq_dump_platform_use_state(VCHIQ_STATE_T *state)
344 {
345 	vchiq_log_info(vchiq_arm_log_level, "Suspend timer not in use");
346 }
347 void
348 vchiq_platform_handle_timeout(VCHIQ_STATE_T *state)
349 {
350 	(void)state;
351 }
352 /*
353  * Local functions
354  */
355 
356 /* There is a potential problem with partial cache lines (pages?)
357 ** at the ends of the block when reading. If the CPU accessed anything in
358 ** the same line (page?) then it may have pulled old data into the cache,
359 ** obscuring the new data underneath. We can solve this by transferring the
360 ** partial cache lines separately, and allowing the ARM to copy into the
361 ** cached area.
362 
363 ** N.B. This implementation plays slightly fast and loose with the Linux
364 ** driver programming rules, e.g. its use of __virt_to_bus instead of
365 ** dma_map_single, but it isn't a multi-platform driver and it benefits
366 ** from increased speed as a result.
367 */
368 
369 static int
370 create_pagelist(char __user *buf, size_t count, unsigned short type,
371 	struct proc *p, BULKINFO_T *bi)
372 {
373 	PAGELIST_T *pagelist;
374 	vm_page_t* pages;
375 	unsigned long *addrs;
376 	unsigned int num_pages, i;
377 	vm_offset_t offset;
378 	int pagelist_size;
379 	char *addr, *base_addr, *next_addr;
380 	int run, addridx, actual_pages;
381 	int err;
382 	vm_paddr_t pagelist_phys;
383 
384 	offset = (vm_offset_t)buf & (PAGE_SIZE - 1);
385 	num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE;
386 
387 	bi->pagelist = NULL;
388 	bi->buf = buf;
389 	bi->size = count;
390 
391 	/* Allocate enough storage to hold the page pointers and the page
392 	** list
393 	*/
394 	pagelist_size = sizeof(PAGELIST_T) +
395 		(num_pages * sizeof(unsigned long)) +
396 		(num_pages * sizeof(pages[0]));
397 
398 	err = bus_dma_tag_create(
399 	    NULL,
400 	    PAGE_SIZE, 0,	       /* alignment, boundary */
401 	    BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
402 	    BUS_SPACE_MAXADDR,	  /* highaddr */
403 	    NULL, NULL,		 /* filter, filterarg */
404 	    pagelist_size, 1,		/* maxsize, nsegments */
405 	    pagelist_size, 0,		/* maxsegsize, flags */
406 	    NULL, NULL,		 /* lockfunc, lockarg */
407 	    &bi->pagelist_dma_tag);
408 
409 
410 
411 	err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist,
412 	    BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map);
413 	if (err) {
414 		vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory");
415 		err = -ENOMEM;
416 		goto failed_alloc;
417 	}
418 
419 	err = bus_dmamap_load(bi->pagelist_dma_tag, bi->pagelist_dma_map, pagelist,
420 	    pagelist_size, vchiq_dmamap_cb,
421 	    &pagelist_phys, 0);
422 
423 	if (err) {
424 		vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory");
425 		err = -ENOMEM;
426 		goto failed_load;
427 	}
428 
429 	vchiq_log_trace(vchiq_arm_log_level,
430 		"create_pagelist - %x", (unsigned int)pagelist);
431 	if (!pagelist)
432 		return -ENOMEM;
433 
434 	addrs = pagelist->addrs;
435 	pages = (vm_page_t*)(addrs + num_pages);
436 
437 	actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map,
438 	    (vm_offset_t)buf, count,
439 	    (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages);
440 
441 	if (actual_pages != num_pages) {
442 		vm_page_unhold_pages(pages, actual_pages);
443 		free(pagelist, M_VCPAGELIST);
444 		return (-ENOMEM);
445 	}
446 
447 	pagelist->length = count;
448 	pagelist->type = type;
449 	pagelist->offset = offset;
450 
451 	/* Group the pages into runs of contiguous pages */
452 
453 	base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0]));
454 	next_addr = base_addr + PAGE_SIZE;
455 	addridx = 0;
456 	run = 0;
457 
458 	for (i = 1; i < num_pages; i++) {
459 		addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i]));
460 		if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) {
461 			next_addr += PAGE_SIZE;
462 			run++;
463 		} else {
464 			addrs[addridx] = (unsigned long)base_addr + run;
465 			addridx++;
466 			base_addr = addr;
467 			next_addr = addr + PAGE_SIZE;
468 			run = 0;
469 		}
470 	}
471 
472 	addrs[addridx] = (unsigned long)base_addr + run;
473 	addridx++;
474 
475 	/* Partial cache lines (fragments) require special measures */
476 	if ((type == PAGELIST_READ) &&
477 		((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
478 		((pagelist->offset + pagelist->length) &
479 		(CACHE_LINE_SIZE - 1)))) {
480 		FRAGMENTS_T *fragments;
481 
482 		if (down_interruptible(&g_free_fragments_sema) != 0) {
483       			free(pagelist, M_VCPAGELIST);
484 			return -EINTR;
485 		}
486 
487 		WARN_ON(g_free_fragments == NULL);
488 
489 		down(&g_free_fragments_mutex);
490 		fragments = (FRAGMENTS_T *) g_free_fragments;
491 		WARN_ON(fragments == NULL);
492 		g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
493 		up(&g_free_fragments_mutex);
494 		pagelist->type =
495 			 PAGELIST_READ_WITH_FRAGMENTS + (fragments -
496 							 g_fragments_base);
497 	}
498 
499 	/* XXX: optimize? INV operation for read WBINV for write? */
500 	cpu_dcache_wbinv_range((vm_offset_t)buf, count);
501 
502 	bi->pagelist = pagelist;
503 
504 	return 0;
505 
506 failed_load:
507 	bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map);
508 failed_alloc:
509 	bus_dmamap_destroy(bi->pagelist_dma_tag, bi->pagelist_dma_map);
510 	bus_dma_tag_destroy(bi->pagelist_dma_tag);
511 
512 	return err;
513 }
514 
515 static void
516 free_pagelist(BULKINFO_T *bi, int actual)
517 {
518 	vm_page_t*pages;
519 	unsigned int num_pages, i;
520 	PAGELIST_T *pagelist;
521 
522 	pagelist = bi->pagelist;
523 
524 	vchiq_log_trace(vchiq_arm_log_level,
525 		"free_pagelist - %x, %d", (unsigned int)pagelist, actual);
526 
527 	num_pages =
528 		(pagelist->length + pagelist->offset + PAGE_SIZE - 1) /
529 		PAGE_SIZE;
530 
531 	pages = (vm_page_t*)(pagelist->addrs + num_pages);
532 
533 	/* Deal with any partial cache lines (fragments) */
534 	if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
535 		FRAGMENTS_T *fragments = g_fragments_base +
536 			(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
537 		int head_bytes, tail_bytes;
538 		head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
539 			(CACHE_LINE_SIZE - 1);
540 		tail_bytes = (pagelist->offset + actual) &
541 			(CACHE_LINE_SIZE - 1);
542 
543 		if ((actual >= 0) && (head_bytes != 0)) {
544 			if (head_bytes > actual)
545 				head_bytes = actual;
546 
547 			memcpy((char *)bi->buf,
548 				fragments->headbuf,
549 				head_bytes);
550 		}
551 
552 		if ((actual >= 0) && (head_bytes < actual) &&
553 			(tail_bytes != 0)) {
554 			memcpy((char *)bi->buf + actual - tail_bytes,
555 					 fragments->tailbuf, tail_bytes);
556 		}
557 
558 		down(&g_free_fragments_mutex);
559 		*(FRAGMENTS_T **) fragments = g_free_fragments;
560 		g_free_fragments = fragments;
561 		up(&g_free_fragments_mutex);
562 		up(&g_free_fragments_sema);
563 	}
564 
565 	for (i = 0; i < num_pages; i++) {
566 		if (pagelist->type != PAGELIST_WRITE)
567 			vm_page_dirty(pages[i]);
568 	}
569 
570 	vm_page_unhold_pages(pages, num_pages);
571 
572 	bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map);
573 	bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map);
574 	bus_dmamap_destroy(bi->pagelist_dma_tag, bi->pagelist_dma_map);
575 	bus_dma_tag_destroy(bi->pagelist_dma_tag);
576 
577 	free(bi, M_VCPAGELIST);
578 }
579