1 /**
2 * Copyright (c) 2010-2012 Broadcom. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions, and the following disclaimer,
9 * without modification.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The names of the above-listed copyright holders may not be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * ALTERNATIVELY, this software may be distributed under the terms of the
18 * GNU General Public License ("GPL") version 2, as published by the Free
19 * Software Foundation.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <interface/compat/vchi_bsd.h>
35
36 #include <sys/malloc.h>
37 #include <sys/rwlock.h>
38
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 #include <vm/vm_extern.h>
42 #include <vm/vm_kern.h>
43 #include <vm/vm_map.h>
44 #include <vm/vm_object.h>
45 #include <vm/vm_page.h>
46 #include <vm/vm_pager.h>
47 #include <vm/vm_param.h>
48
49 #include <machine/bus.h>
50 #include <machine/cpu.h>
51 #include <arm/broadcom/bcm2835/bcm2835_mbox.h>
52 #include <arm/broadcom/bcm2835/bcm2835_vcbus.h>
53
54 MALLOC_DEFINE(M_VCPAGELIST, "vcpagelist", "VideoCore pagelist memory");
55
56 #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
57
58 #define VCHIQ_DOORBELL_IRQ IRQ_ARM_DOORBELL_0
59 #define VCHIQ_ARM_ADDRESS(x) ((void *)PHYS_TO_VCBUS(pmap_kextract((vm_offset_t)(x))))
60
61 #include "vchiq_arm.h"
62 #include "vchiq_2835.h"
63 #include "vchiq_connected.h"
64 #include "vchiq_killable.h"
65
66 #define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2)
67
68 /*
69 * XXXMDC
70 * Do this less ad-hoc-y -- e.g.
71 * https://github.com/raspberrypi/linux/commit/c683db8860a80562a2bb5b451d77b3e471d24f36
72 */
73 #if defined(__aarch64__)
74 int g_cache_line_size = 64;
75 #else
76 int g_cache_line_size = 32;
77 #endif
78 static int g_fragment_size;
79
80 unsigned int g_long_bulk_space = 0;
81 #define VM_PAGE_TO_VC_BULK_PAGE(x) (\
82 g_long_bulk_space ? VM_PAGE_TO_PHYS(x)\
83 : PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(x))\
84 )
85
86 typedef struct vchiq_2835_state_struct {
87 int inited;
88 VCHIQ_ARM_STATE_T arm_state;
89 } VCHIQ_2835_ARM_STATE_T;
90
91 static char *g_slot_mem;
92 static int g_slot_mem_size;
93 vm_paddr_t g_slot_phys;
94 /* BSD DMA */
95 bus_dma_tag_t bcm_slots_dma_tag;
96 bus_dmamap_t bcm_slots_dma_map;
97
98 static char *g_fragments_base;
99 static char *g_free_fragments;
100 struct semaphore g_free_fragments_sema;
101
102 static DEFINE_SEMAPHORE(g_free_fragments_mutex);
103
104 typedef struct bulkinfo_struct {
105 PAGELIST_T *pagelist;
106 bus_dma_tag_t pagelist_dma_tag;
107 bus_dmamap_t pagelist_dma_map;
108 void *buf;
109 size_t size;
110 } BULKINFO_T;
111
112 static int
113 create_pagelist(char __user *buf, size_t count, unsigned short type,
114 struct proc *p, BULKINFO_T *bi);
115
116 static void
117 free_pagelist(BULKINFO_T *bi, int actual);
118
119 static void
vchiq_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int err)120 vchiq_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
121 {
122 bus_addr_t *addr;
123
124 if (err)
125 return;
126
127 addr = (bus_addr_t*)arg;
128 *addr = PHYS_TO_VCBUS(segs[0].ds_addr);
129 }
130
131 #if defined(__aarch64__) /* See comment in free_pagelist */
132 static int
invalidate_cachelines_in_range_of_ppage(vm_page_t p,size_t offset,size_t count)133 invalidate_cachelines_in_range_of_ppage(
134 vm_page_t p,
135 size_t offset,
136 size_t count
137 )
138 {
139 if(offset + count > PAGE_SIZE){ return EINVAL; }
140 uint8_t *dst = (uint8_t*)pmap_quick_enter_page(p);
141 if (!dst){
142 return ENOMEM;
143 }
144 cpu_dcache_inv_range((void *)((vm_offset_t)dst + offset), count);
145 pmap_quick_remove_page((vm_offset_t)dst);
146 return 0;
147 }
148
149 /* XXXMDC bulk instead of loading and invalidating single pages? */
150 static void
invalidate_cachelines_in_range_of_ppage_seq(vm_page_t * p,size_t start,size_t count)151 invalidate_cachelines_in_range_of_ppage_seq(vm_page_t *p, size_t start,
152 size_t count)
153 {
154 if (start >= PAGE_SIZE)
155 goto invalid_input;
156
157 #define _NEXT_AT(x,_m) (((x)+((_m)-1)) & ~((_m)-1)) /* for power of two m */
158 size_t offset = _NEXT_AT(start,g_cache_line_size);
159 #undef _NEXT_AT
160 count = (offset < start + count) ? count - (offset - start) : 0;
161 offset = offset & (PAGE_SIZE - 1);
162 for (size_t done = 0; count > done;
163 p++, done += PAGE_SIZE - offset, offset = 0) {
164 size_t in_page = PAGE_SIZE - offset;
165 size_t todo = (count-done > in_page) ? in_page : count-done;
166 int e = invalidate_cachelines_in_range_of_ppage(*p, offset, todo);
167 if (e != 0)
168 goto problem_in_loop;
169 }
170 return;
171
172 problem_in_loop:
173 invalid_input:
174 WARN_ON(1);
175 return;
176 }
177 #endif
178
179 static int
copyout_page(vm_page_t p,size_t offset,void * kaddr,size_t size)180 copyout_page(vm_page_t p, size_t offset, void *kaddr, size_t size)
181 {
182 uint8_t *dst;
183
184 dst = (uint8_t*)pmap_quick_enter_page(p);
185 if (!dst)
186 return ENOMEM;
187
188 memcpy(dst + offset, kaddr, size);
189
190 pmap_quick_remove_page((vm_offset_t)dst);
191
192 return 0;
193 }
194
195 int __init
vchiq_platform_init(VCHIQ_STATE_T * state)196 vchiq_platform_init(VCHIQ_STATE_T *state)
197 {
198 VCHIQ_SLOT_ZERO_T *vchiq_slot_zero;
199 int frag_mem_size;
200 int err;
201 int i;
202
203 /* Allocate space for the channels in coherent memory */
204 g_slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
205 g_fragment_size = 2*g_cache_line_size;
206 frag_mem_size = PAGE_ALIGN(g_fragment_size * MAX_FRAGMENTS);
207
208 err = bus_dma_tag_create(
209 NULL,
210 PAGE_SIZE, 0, /* alignment, boundary */
211 BUS_SPACE_MAXADDR_32BIT, /* lowaddr */
212 BUS_SPACE_MAXADDR, /* highaddr */
213 NULL, NULL, /* filter, filterarg */
214 g_slot_mem_size + frag_mem_size, 1, /* maxsize, nsegments */
215 g_slot_mem_size + frag_mem_size, 0, /* maxsegsize, flags */
216 NULL, NULL, /* lockfunc, lockarg */
217 &bcm_slots_dma_tag);
218
219 err = bus_dmamem_alloc(bcm_slots_dma_tag, (void **)&g_slot_mem,
220 BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bcm_slots_dma_map);
221 if (err) {
222 vchiq_log_error(vchiq_core_log_level, "Unable to allocate channel memory");
223 err = -ENOMEM;
224 goto failed_alloc;
225 }
226
227 err = bus_dmamap_load(bcm_slots_dma_tag, bcm_slots_dma_map, g_slot_mem,
228 g_slot_mem_size + frag_mem_size, vchiq_dmamap_cb,
229 &g_slot_phys, 0);
230
231 if (err) {
232 vchiq_log_error(vchiq_core_log_level, "cannot load DMA map");
233 err = -ENOMEM;
234 goto failed_load;
235 }
236
237 WARN_ON(((size_t)g_slot_mem & (PAGE_SIZE - 1)) != 0);
238
239 vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size);
240 if (!vchiq_slot_zero) {
241 err = -EINVAL;
242 goto failed_init_slots;
243 }
244
245 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
246 (int)g_slot_phys + g_slot_mem_size;
247 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
248 MAX_FRAGMENTS;
249
250 g_fragments_base = (char *)(g_slot_mem + g_slot_mem_size);
251 g_slot_mem_size += frag_mem_size;
252
253 g_free_fragments = g_fragments_base;
254 for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
255 *(char **)&g_fragments_base[i*g_fragment_size] =
256 &g_fragments_base[(i + 1)*g_fragment_size];
257 }
258 *(char **)&g_fragments_base[i*g_fragment_size] = NULL;
259 _sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
260
261 if (vchiq_init_state(state, vchiq_slot_zero, 0/*slave*/) !=
262 VCHIQ_SUCCESS) {
263 err = -EINVAL;
264 goto failed_vchiq_init;
265 }
266
267 bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (unsigned int)g_slot_phys);
268
269 vchiq_log_info(vchiq_arm_log_level,
270 "vchiq_init - done (slots %zx, phys %zx)",
271 (size_t)vchiq_slot_zero, g_slot_phys);
272
273 vchiq_call_connected_callbacks();
274
275 return 0;
276
277 failed_vchiq_init:
278 failed_init_slots:
279 bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
280 failed_load:
281 bus_dmamem_free(bcm_slots_dma_tag, g_slot_mem, bcm_slots_dma_map);
282 failed_alloc:
283 bus_dma_tag_destroy(bcm_slots_dma_tag);
284
285 return err;
286 }
287
288 void __exit
vchiq_platform_exit(VCHIQ_STATE_T * state)289 vchiq_platform_exit(VCHIQ_STATE_T *state)
290 {
291
292 bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
293 bus_dmamem_free(bcm_slots_dma_tag, g_slot_mem, bcm_slots_dma_map);
294 bus_dma_tag_destroy(bcm_slots_dma_tag);
295 }
296
297 VCHIQ_STATUS_T
vchiq_platform_init_state(VCHIQ_STATE_T * state)298 vchiq_platform_init_state(VCHIQ_STATE_T *state)
299 {
300 VCHIQ_STATUS_T status = VCHIQ_SUCCESS;
301 state->platform_state = kzalloc(sizeof(VCHIQ_2835_ARM_STATE_T), GFP_KERNEL);
302 ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 1;
303 status = vchiq_arm_init_state(state, &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state);
304 if(status != VCHIQ_SUCCESS)
305 {
306 ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 0;
307 }
308 return status;
309 }
310
311 VCHIQ_ARM_STATE_T*
vchiq_platform_get_arm_state(VCHIQ_STATE_T * state)312 vchiq_platform_get_arm_state(VCHIQ_STATE_T *state)
313 {
314 if(!((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited)
315 {
316 BUG();
317 }
318 return &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state;
319 }
320
321 int
vchiq_copy_from_user(void * dst,const void * src,int size)322 vchiq_copy_from_user(void *dst, const void *src, int size)
323 {
324
325 if (((vm_offset_t)(src)) < VM_MIN_KERNEL_ADDRESS) {
326 int error = copyin(src, dst, size);
327 return error ? VCHIQ_ERROR : VCHIQ_SUCCESS;
328 }
329 else
330 bcopy(src, dst, size);
331
332 return 0;
333 }
334
335 VCHIQ_STATUS_T
vchiq_prepare_bulk_data(VCHIQ_BULK_T * bulk,VCHI_MEM_HANDLE_T memhandle,void * offset,int size,int dir)336 vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
337 void *offset, int size, int dir)
338 {
339 BULKINFO_T *bi;
340 int ret;
341
342 WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID);
343 bi = malloc(sizeof(*bi), M_VCPAGELIST, M_WAITOK | M_ZERO);
344
345 ret = create_pagelist((char __user *)offset, size,
346 (dir == VCHIQ_BULK_RECEIVE)
347 ? PAGELIST_READ
348 : PAGELIST_WRITE,
349 current,
350 bi);
351 if (ret != 0)
352 return VCHIQ_ERROR;
353
354 bulk->handle = memhandle;
355 bulk->data = VCHIQ_ARM_ADDRESS(bi->pagelist);
356
357 /* Store the pagelist address in remote_data, which isn't used by the
358 slave. */
359 bulk->remote_data = bi;
360
361 return VCHIQ_SUCCESS;
362 }
363
364 void
vchiq_complete_bulk(VCHIQ_BULK_T * bulk)365 vchiq_complete_bulk(VCHIQ_BULK_T *bulk)
366 {
367 if (bulk && bulk->remote_data && bulk->actual)
368 free_pagelist((BULKINFO_T *)bulk->remote_data, bulk->actual);
369 }
370
371 void
vchiq_transfer_bulk(VCHIQ_BULK_T * bulk)372 vchiq_transfer_bulk(VCHIQ_BULK_T *bulk)
373 {
374 /*
375 * This should only be called on the master (VideoCore) side, but
376 * provide an implementation to avoid the need for ifdefery.
377 */
378 BUG();
379 }
380
381 void
vchiq_dump_platform_state(void * dump_context)382 vchiq_dump_platform_state(void *dump_context)
383 {
384 char buf[80];
385 int len;
386 len = snprintf(buf, sizeof(buf),
387 " Platform: 2835 (VC master)");
388 vchiq_dump(dump_context, buf, len + 1);
389 }
390
391 VCHIQ_STATUS_T
vchiq_platform_suspend(VCHIQ_STATE_T * state)392 vchiq_platform_suspend(VCHIQ_STATE_T *state)
393 {
394 return VCHIQ_ERROR;
395 }
396
397 VCHIQ_STATUS_T
vchiq_platform_resume(VCHIQ_STATE_T * state)398 vchiq_platform_resume(VCHIQ_STATE_T *state)
399 {
400 return VCHIQ_SUCCESS;
401 }
402
403 void
vchiq_platform_paused(VCHIQ_STATE_T * state)404 vchiq_platform_paused(VCHIQ_STATE_T *state)
405 {
406 }
407
408 void
vchiq_platform_resumed(VCHIQ_STATE_T * state)409 vchiq_platform_resumed(VCHIQ_STATE_T *state)
410 {
411 }
412
413 int
vchiq_platform_videocore_wanted(VCHIQ_STATE_T * state)414 vchiq_platform_videocore_wanted(VCHIQ_STATE_T* state)
415 {
416 return 1; // autosuspend not supported - videocore always wanted
417 }
418
419 int
vchiq_platform_use_suspend_timer(void)420 vchiq_platform_use_suspend_timer(void)
421 {
422 return 0;
423 }
424 void
vchiq_dump_platform_use_state(VCHIQ_STATE_T * state)425 vchiq_dump_platform_use_state(VCHIQ_STATE_T *state)
426 {
427 vchiq_log_info(vchiq_arm_log_level, "Suspend timer not in use");
428 }
429 void
vchiq_platform_handle_timeout(VCHIQ_STATE_T * state)430 vchiq_platform_handle_timeout(VCHIQ_STATE_T *state)
431 {
432 (void)state;
433 }
434 /*
435 * Local functions
436 */
437
438 static void
pagelist_page_free(vm_page_t pp)439 pagelist_page_free(vm_page_t pp)
440 {
441 vm_page_unwire(pp, PQ_INACTIVE);
442 }
443
444 /* There is a potential problem with partial cache lines (pages?)
445 ** at the ends of the block when reading. If the CPU accessed anything in
446 ** the same line (page?) then it may have pulled old data into the cache,
447 ** obscuring the new data underneath. We can solve this by transferring the
448 ** partial cache lines separately, and allowing the ARM to copy into the
449 ** cached area.
450
451 ** N.B. This implementation plays slightly fast and loose with the Linux
452 ** driver programming rules, e.g. its use of __virt_to_bus instead of
453 ** dma_map_single, but it isn't a multi-platform driver and it benefits
454 ** from increased speed as a result.
455 */
456
457
458 static int
create_pagelist(char __user * buf,size_t count,unsigned short type,struct proc * p,BULKINFO_T * bi)459 create_pagelist(char __user *buf, size_t count, unsigned short type,
460 struct proc *p, BULKINFO_T *bi)
461 {
462 PAGELIST_T *pagelist;
463 vm_page_t* pages;
464 uint32_t *addrs;
465 unsigned int num_pages, i;
466 vm_offset_t offset;
467 int pagelist_size;
468 char *addr, *base_addr, *next_addr;
469 int run, addridx, actual_pages;
470 int err;
471 vm_paddr_t pagelist_phys;
472 vm_paddr_t pa;
473
474 offset = (vm_offset_t)buf & (PAGE_SIZE - 1);
475 num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE;
476
477 bi->pagelist = NULL;
478 bi->buf = buf;
479 bi->size = count;
480
481 /* Allocate enough storage to hold the page pointers and the page
482 ** list
483 */
484 pagelist_size = sizeof(PAGELIST_T) +
485 (num_pages * sizeof(unsigned long)) +
486 (num_pages * sizeof(pages[0]));
487
488 err = bus_dma_tag_create(
489 NULL,
490 PAGE_SIZE, 0, /* alignment, boundary */
491 BUS_SPACE_MAXADDR_32BIT, /* lowaddr */
492 BUS_SPACE_MAXADDR, /* highaddr */
493 NULL, NULL, /* filter, filterarg */
494 pagelist_size, 1, /* maxsize, nsegments */
495 pagelist_size, 0, /* maxsegsize, flags */
496 NULL, NULL, /* lockfunc, lockarg */
497 &bi->pagelist_dma_tag);
498
499 err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist,
500 BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map);
501 if (err || !pagelist) {
502 vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory");
503 err = -ENOMEM;
504 goto failed_alloc;
505 }
506
507 err = bus_dmamap_load(bi->pagelist_dma_tag, bi->pagelist_dma_map, pagelist,
508 pagelist_size, vchiq_dmamap_cb,
509 &pagelist_phys, 0);
510
511 if (err) {
512 vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory");
513 err = -ENOMEM;
514 bi->pagelist = pagelist;
515 goto failed_load;
516 }
517
518 vchiq_log_trace(vchiq_arm_log_level,
519 "create_pagelist - %zx (%zu bytes @%p)", (size_t)pagelist, count, buf);
520
521 addrs = pagelist->addrs;
522 pages = (vm_page_t*)(addrs + num_pages);
523
524 actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map,
525 (vm_offset_t)buf, count,
526 (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages);
527
528 if (actual_pages != num_pages) {
529 if (actual_pages > 0)
530 vm_page_unhold_pages(pages, actual_pages);
531 err = -ENOMEM;
532 bi->pagelist = pagelist;
533 goto failed_hold;
534 }
535
536 pagelist->length = count;
537 pagelist->type = type;
538 pagelist->offset = offset;
539
540 /* Group the pages into runs of contiguous pages */
541
542 size_t run_ceil = g_long_bulk_space ? 0x100 : PAGE_SIZE;
543 unsigned int pg_addr_rshift = g_long_bulk_space ? 4 : 0;
544 base_addr = (void *) VM_PAGE_TO_VC_BULK_PAGE(pages[0]);
545 next_addr = base_addr + PAGE_SIZE;
546 addridx = 0;
547 run = 0;
548 #define _PG_BLOCK(base,run) \
549 ((((size_t) (base)) >> pg_addr_rshift) & ~(run_ceil-1)) + (run)
550 for (i = 1; i < num_pages; i++) {
551 addr = (void *)VM_PAGE_TO_VC_BULK_PAGE(pages[i]);
552 if ((addr == next_addr) && (run < run_ceil - 1)) {
553 next_addr += PAGE_SIZE;
554 run++;
555 } else {
556 addrs[addridx++] = (uint32_t) _PG_BLOCK(base_addr,run);
557 base_addr = addr;
558 next_addr = addr + PAGE_SIZE;
559 run = 0;
560 }
561 }
562 addrs[addridx++] = _PG_BLOCK(base_addr, run);
563 #undef _PG_BLOCK
564
565 /* Partial cache lines (fragments) require special measures */
566 if ((type == PAGELIST_READ) &&
567 ((pagelist->offset & (g_cache_line_size - 1)) ||
568 ((pagelist->offset + pagelist->length) &
569 (g_cache_line_size - 1)))) {
570 char *fragments;
571
572 if (down_interruptible(&g_free_fragments_sema) != 0) {
573 free(pagelist, M_VCPAGELIST);
574 return -EINTR;
575 }
576
577 WARN_ON(g_free_fragments == NULL);
578
579 down(&g_free_fragments_mutex);
580 fragments = g_free_fragments;
581 WARN_ON(fragments == NULL);
582 g_free_fragments = *(char **) g_free_fragments;
583 up(&g_free_fragments_mutex);
584 pagelist->type = PAGELIST_READ_WITH_FRAGMENTS
585 + (fragments - g_fragments_base)/g_fragment_size;
586 #if defined(__aarch64__)
587 bus_dmamap_sync(bcm_slots_dma_tag, bcm_slots_dma_map,
588 BUS_DMASYNC_PREREAD);
589 #endif
590 }
591
592 #if defined(__aarch64__)
593 if(type == PAGELIST_READ) {
594 cpu_dcache_wbinv_range(buf, count);
595 } else {
596 cpu_dcache_wb_range(buf, count);
597 }
598 dsb(sy);
599 #else
600 pa = pmap_extract(PCPU_GET(curpmap), (vm_offset_t)buf);
601 dcache_wbinv_poc((vm_offset_t)buf, pa, count);
602 #endif
603
604 bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map,
605 BUS_DMASYNC_PREWRITE);
606
607 bi->pagelist = pagelist;
608
609 return 0;
610
611 failed_hold:
612 bus_dmamap_unload(bi->pagelist_dma_tag,bi->pagelist_dma_map);
613 failed_load:
614 bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map);
615 failed_alloc:
616 bus_dma_tag_destroy(bi->pagelist_dma_tag);
617
618 return err;
619 }
620
621 static void
free_pagelist(BULKINFO_T * bi,int actual)622 free_pagelist(BULKINFO_T *bi, int actual)
623 {
624 vm_page_t*pages;
625 unsigned int num_pages, i;
626 PAGELIST_T *pagelist;
627
628 pagelist = bi->pagelist;
629
630 vchiq_log_trace(vchiq_arm_log_level,
631 "free_pagelist - %zx, %d (%lu bytes @%p)",
632 (size_t)pagelist, (int)actual, (unsigned long)pagelist->length,
633 bi->buf);
634
635 num_pages =
636 (pagelist->length + pagelist->offset + PAGE_SIZE - 1) /
637 PAGE_SIZE;
638
639 pages = (vm_page_t*)(pagelist->addrs + num_pages);
640
641 #if defined(__aarch64__)
642 /*
643 * On arm64, even if the user keeps their end of the bargain
644 * -- do NOT touch the buffers sent to VC -- but reads around the
645 * pagelist after the invalidation above, the arm might preemptively
646 * load (and validate) cache lines for areas inside the page list,
647 * so we must invalidate them again.
648 *
649 * The functional test does it and without this it doesn't pass.
650 *
651 * XXXMDC might it be enough to invalidate a couple of pages at
652 * the ends of the page list?
653 */
654 if(pagelist->type >= PAGELIST_READ && actual > 0)
655 invalidate_cachelines_in_range_of_ppage_seq(pages,
656 pagelist->offset, actual);
657 #endif
658
659 /* Deal with any partial cache lines (fragments) */
660 if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
661 char *fragments = g_fragments_base +
662 (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS)*g_fragment_size;
663 int head_bytes, tail_bytes;
664 head_bytes = (g_cache_line_size - pagelist->offset) &
665 (g_cache_line_size - 1);
666 tail_bytes = (pagelist->offset + actual) &
667 (g_cache_line_size - 1);
668
669 if ((actual >= 0) && (head_bytes != 0)) {
670 if (head_bytes > actual)
671 head_bytes = actual;
672
673 copyout_page(pages[0],
674 pagelist->offset,
675 fragments,
676 head_bytes);
677 }
678
679 if ((actual >= 0) && (head_bytes < actual) &&
680 (tail_bytes != 0)) {
681
682 copyout_page(pages[num_pages-1],
683 (((vm_offset_t)bi->buf + actual) % PAGE_SIZE) - tail_bytes,
684 fragments + g_cache_line_size,
685 tail_bytes);
686 }
687
688 down(&g_free_fragments_mutex);
689 *(char **) fragments = g_free_fragments;
690 g_free_fragments = fragments;
691 up(&g_free_fragments_mutex);
692 up(&g_free_fragments_sema);
693 }
694
695 if (pagelist->type != PAGELIST_WRITE) {
696 for (i = 0; i < num_pages; i++) {
697 vm_page_dirty(pages[i]);
698 pagelist_page_free(pages[i]);
699 }
700 }
701
702 #if defined(__aarch64__)
703 /* XXXMDC necessary? */
704 dsb(sy);
705 #endif
706
707 bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map);
708 bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map);
709 bus_dma_tag_destroy(bi->pagelist_dma_tag);
710
711 free(bi, M_VCPAGELIST);
712 }
713