1 /** 2 * Copyright (c) 2010-2012 Broadcom. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions, and the following disclaimer, 9 * without modification. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The names of the above-listed copyright holders may not be used 14 * to endorse or promote products derived from this software without 15 * specific prior written permission. 16 * 17 * ALTERNATIVELY, this software may be distributed under the terms of the 18 * GNU General Public License ("GPL") version 2, as published by the Free 19 * Software Foundation. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <interface/compat/vchi_bsd.h> 35 36 #include <sys/malloc.h> 37 #include <sys/rwlock.h> 38 39 #include <vm/vm.h> 40 #include <vm/pmap.h> 41 #include <vm/vm_extern.h> 42 #include <vm/vm_kern.h> 43 #include <vm/vm_map.h> 44 #include <vm/vm_object.h> 45 #include <vm/vm_page.h> 46 #include <vm/vm_pager.h> 47 #include <vm/vm_param.h> 48 49 #include <machine/bus.h> 50 #include <machine/cpu.h> 51 #include <arm/broadcom/bcm2835/bcm2835_mbox.h> 52 #include <arm/broadcom/bcm2835/bcm2835_vcbus.h> 53 54 MALLOC_DEFINE(M_VCPAGELIST, "vcpagelist", "VideoCore pagelist memory"); 55 56 #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) 57 58 #define VCHIQ_DOORBELL_IRQ IRQ_ARM_DOORBELL_0 59 #define VCHIQ_ARM_ADDRESS(x) ((void *)PHYS_TO_VCBUS(pmap_kextract((vm_offset_t)(x)))) 60 61 #include "vchiq_arm.h" 62 #include "vchiq_2835.h" 63 #include "vchiq_connected.h" 64 #include "vchiq_killable.h" 65 66 #define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2) 67 68 int g_cache_line_size = 32; 69 static int g_fragment_size; 70 71 typedef struct vchiq_2835_state_struct { 72 int inited; 73 VCHIQ_ARM_STATE_T arm_state; 74 } VCHIQ_2835_ARM_STATE_T; 75 76 static char *g_slot_mem; 77 static int g_slot_mem_size; 78 vm_paddr_t g_slot_phys; 79 /* BSD DMA */ 80 bus_dma_tag_t bcm_slots_dma_tag; 81 bus_dmamap_t bcm_slots_dma_map; 82 83 static char *g_fragments_base; 84 static char *g_free_fragments; 85 struct semaphore g_free_fragments_sema; 86 87 static DEFINE_SEMAPHORE(g_free_fragments_mutex); 88 89 typedef struct bulkinfo_struct { 90 PAGELIST_T *pagelist; 91 bus_dma_tag_t pagelist_dma_tag; 92 bus_dmamap_t pagelist_dma_map; 93 void *buf; 94 size_t size; 95 } BULKINFO_T; 96 97 static int 98 create_pagelist(char __user *buf, size_t count, unsigned short type, 99 struct proc *p, BULKINFO_T *bi); 100 101 static void 102 free_pagelist(BULKINFO_T *bi, int actual); 103 104 static void 105 vchiq_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) 106 { 107 bus_addr_t *addr; 108 109 if (err) 110 return; 111 112 addr = (bus_addr_t*)arg; 113 *addr = PHYS_TO_VCBUS(segs[0].ds_addr); 114 } 115 116 static int 117 copyout_page(vm_page_t p, size_t offset, void *kaddr, size_t size) 118 { 119 uint8_t *dst; 120 121 dst = (uint8_t*)pmap_quick_enter_page(p); 122 if (!dst) 123 return ENOMEM; 124 125 memcpy(dst + offset, kaddr, size); 126 127 pmap_quick_remove_page((vm_offset_t)dst); 128 129 return 0; 130 } 131 132 int __init 133 vchiq_platform_init(VCHIQ_STATE_T *state) 134 { 135 VCHIQ_SLOT_ZERO_T *vchiq_slot_zero; 136 int frag_mem_size; 137 int err; 138 int i; 139 140 /* Allocate space for the channels in coherent memory */ 141 g_slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE); 142 g_fragment_size = 2*g_cache_line_size; 143 frag_mem_size = PAGE_ALIGN(g_fragment_size * MAX_FRAGMENTS); 144 145 err = bus_dma_tag_create( 146 NULL, 147 PAGE_SIZE, 0, /* alignment, boundary */ 148 BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ 149 BUS_SPACE_MAXADDR, /* highaddr */ 150 NULL, NULL, /* filter, filterarg */ 151 g_slot_mem_size + frag_mem_size, 1, /* maxsize, nsegments */ 152 g_slot_mem_size + frag_mem_size, 0, /* maxsegsize, flags */ 153 NULL, NULL, /* lockfunc, lockarg */ 154 &bcm_slots_dma_tag); 155 156 err = bus_dmamem_alloc(bcm_slots_dma_tag, (void **)&g_slot_mem, 157 BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bcm_slots_dma_map); 158 if (err) { 159 vchiq_log_error(vchiq_core_log_level, "Unable to allocate channel memory"); 160 err = -ENOMEM; 161 goto failed_alloc; 162 } 163 164 err = bus_dmamap_load(bcm_slots_dma_tag, bcm_slots_dma_map, g_slot_mem, 165 g_slot_mem_size + frag_mem_size, vchiq_dmamap_cb, 166 &g_slot_phys, 0); 167 168 if (err) { 169 vchiq_log_error(vchiq_core_log_level, "cannot load DMA map"); 170 err = -ENOMEM; 171 goto failed_load; 172 } 173 174 WARN_ON(((int)g_slot_mem & (PAGE_SIZE - 1)) != 0); 175 176 vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size); 177 if (!vchiq_slot_zero) { 178 err = -EINVAL; 179 goto failed_init_slots; 180 } 181 182 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] = 183 (int)g_slot_phys + g_slot_mem_size; 184 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] = 185 MAX_FRAGMENTS; 186 187 g_fragments_base = (char *)(g_slot_mem + g_slot_mem_size); 188 g_slot_mem_size += frag_mem_size; 189 190 g_free_fragments = g_fragments_base; 191 for (i = 0; i < (MAX_FRAGMENTS - 1); i++) { 192 *(char **)&g_fragments_base[i*g_fragment_size] = 193 &g_fragments_base[(i + 1)*g_fragment_size]; 194 } 195 *(char **)&g_fragments_base[i*g_fragment_size] = NULL; 196 _sema_init(&g_free_fragments_sema, MAX_FRAGMENTS); 197 198 if (vchiq_init_state(state, vchiq_slot_zero, 0/*slave*/) != 199 VCHIQ_SUCCESS) { 200 err = -EINVAL; 201 goto failed_vchiq_init; 202 } 203 204 bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (unsigned int)g_slot_phys); 205 206 vchiq_log_info(vchiq_arm_log_level, 207 "vchiq_init - done (slots %x, phys %x)", 208 (unsigned int)vchiq_slot_zero, g_slot_phys); 209 210 vchiq_call_connected_callbacks(); 211 212 return 0; 213 214 failed_vchiq_init: 215 failed_init_slots: 216 bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map); 217 failed_load: 218 bus_dmamem_free(bcm_slots_dma_tag, g_slot_mem, bcm_slots_dma_map); 219 failed_alloc: 220 bus_dma_tag_destroy(bcm_slots_dma_tag); 221 222 return err; 223 } 224 225 void __exit 226 vchiq_platform_exit(VCHIQ_STATE_T *state) 227 { 228 229 bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map); 230 bus_dmamem_free(bcm_slots_dma_tag, g_slot_mem, bcm_slots_dma_map); 231 bus_dma_tag_destroy(bcm_slots_dma_tag); 232 } 233 234 VCHIQ_STATUS_T 235 vchiq_platform_init_state(VCHIQ_STATE_T *state) 236 { 237 VCHIQ_STATUS_T status = VCHIQ_SUCCESS; 238 state->platform_state = kzalloc(sizeof(VCHIQ_2835_ARM_STATE_T), GFP_KERNEL); 239 ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 1; 240 status = vchiq_arm_init_state(state, &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state); 241 if(status != VCHIQ_SUCCESS) 242 { 243 ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 0; 244 } 245 return status; 246 } 247 248 VCHIQ_ARM_STATE_T* 249 vchiq_platform_get_arm_state(VCHIQ_STATE_T *state) 250 { 251 if(!((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited) 252 { 253 BUG(); 254 } 255 return &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state; 256 } 257 258 int 259 vchiq_copy_from_user(void *dst, const void *src, int size) 260 { 261 262 if (((vm_offset_t)(src)) < VM_MIN_KERNEL_ADDRESS) { 263 int error = copyin(src, dst, size); 264 return error ? VCHIQ_ERROR : VCHIQ_SUCCESS; 265 } 266 else 267 bcopy(src, dst, size); 268 269 return 0; 270 } 271 272 VCHIQ_STATUS_T 273 vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle, 274 void *offset, int size, int dir) 275 { 276 BULKINFO_T *bi; 277 int ret; 278 279 WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID); 280 bi = malloc(sizeof(*bi), M_VCPAGELIST, M_WAITOK | M_ZERO); 281 if (bi == NULL) 282 return VCHIQ_ERROR; 283 284 ret = create_pagelist((char __user *)offset, size, 285 (dir == VCHIQ_BULK_RECEIVE) 286 ? PAGELIST_READ 287 : PAGELIST_WRITE, 288 current, 289 bi); 290 if (ret != 0) 291 return VCHIQ_ERROR; 292 293 bulk->handle = memhandle; 294 bulk->data = VCHIQ_ARM_ADDRESS(bi->pagelist); 295 296 /* Store the pagelist address in remote_data, which isn't used by the 297 slave. */ 298 bulk->remote_data = bi; 299 300 return VCHIQ_SUCCESS; 301 } 302 303 void 304 vchiq_complete_bulk(VCHIQ_BULK_T *bulk) 305 { 306 if (bulk && bulk->remote_data && bulk->actual) 307 free_pagelist((BULKINFO_T *)bulk->remote_data, bulk->actual); 308 } 309 310 void 311 vchiq_transfer_bulk(VCHIQ_BULK_T *bulk) 312 { 313 /* 314 * This should only be called on the master (VideoCore) side, but 315 * provide an implementation to avoid the need for ifdefery. 316 */ 317 BUG(); 318 } 319 320 void 321 vchiq_dump_platform_state(void *dump_context) 322 { 323 char buf[80]; 324 int len; 325 len = snprintf(buf, sizeof(buf), 326 " Platform: 2835 (VC master)"); 327 vchiq_dump(dump_context, buf, len + 1); 328 } 329 330 VCHIQ_STATUS_T 331 vchiq_platform_suspend(VCHIQ_STATE_T *state) 332 { 333 return VCHIQ_ERROR; 334 } 335 336 VCHIQ_STATUS_T 337 vchiq_platform_resume(VCHIQ_STATE_T *state) 338 { 339 return VCHIQ_SUCCESS; 340 } 341 342 void 343 vchiq_platform_paused(VCHIQ_STATE_T *state) 344 { 345 } 346 347 void 348 vchiq_platform_resumed(VCHIQ_STATE_T *state) 349 { 350 } 351 352 int 353 vchiq_platform_videocore_wanted(VCHIQ_STATE_T* state) 354 { 355 return 1; // autosuspend not supported - videocore always wanted 356 } 357 358 int 359 vchiq_platform_use_suspend_timer(void) 360 { 361 return 0; 362 } 363 void 364 vchiq_dump_platform_use_state(VCHIQ_STATE_T *state) 365 { 366 vchiq_log_info(vchiq_arm_log_level, "Suspend timer not in use"); 367 } 368 void 369 vchiq_platform_handle_timeout(VCHIQ_STATE_T *state) 370 { 371 (void)state; 372 } 373 /* 374 * Local functions 375 */ 376 377 static void 378 pagelist_page_free(vm_page_t pp) 379 { 380 vm_page_lock(pp); 381 if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) 382 vm_page_free(pp); 383 vm_page_unlock(pp); 384 } 385 386 /* There is a potential problem with partial cache lines (pages?) 387 ** at the ends of the block when reading. If the CPU accessed anything in 388 ** the same line (page?) then it may have pulled old data into the cache, 389 ** obscuring the new data underneath. We can solve this by transferring the 390 ** partial cache lines separately, and allowing the ARM to copy into the 391 ** cached area. 392 393 ** N.B. This implementation plays slightly fast and loose with the Linux 394 ** driver programming rules, e.g. its use of __virt_to_bus instead of 395 ** dma_map_single, but it isn't a multi-platform driver and it benefits 396 ** from increased speed as a result. 397 */ 398 399 static int 400 create_pagelist(char __user *buf, size_t count, unsigned short type, 401 struct proc *p, BULKINFO_T *bi) 402 { 403 PAGELIST_T *pagelist; 404 vm_page_t* pages; 405 unsigned long *addrs; 406 unsigned int num_pages, i; 407 vm_offset_t offset; 408 int pagelist_size; 409 char *addr, *base_addr, *next_addr; 410 int run, addridx, actual_pages; 411 int err; 412 vm_paddr_t pagelist_phys; 413 vm_paddr_t pa; 414 415 offset = (vm_offset_t)buf & (PAGE_SIZE - 1); 416 num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE; 417 418 bi->pagelist = NULL; 419 bi->buf = buf; 420 bi->size = count; 421 422 /* Allocate enough storage to hold the page pointers and the page 423 ** list 424 */ 425 pagelist_size = sizeof(PAGELIST_T) + 426 (num_pages * sizeof(unsigned long)) + 427 (num_pages * sizeof(pages[0])); 428 429 err = bus_dma_tag_create( 430 NULL, 431 PAGE_SIZE, 0, /* alignment, boundary */ 432 BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ 433 BUS_SPACE_MAXADDR, /* highaddr */ 434 NULL, NULL, /* filter, filterarg */ 435 pagelist_size, 1, /* maxsize, nsegments */ 436 pagelist_size, 0, /* maxsegsize, flags */ 437 NULL, NULL, /* lockfunc, lockarg */ 438 &bi->pagelist_dma_tag); 439 440 err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist, 441 BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map); 442 if (err) { 443 vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory"); 444 err = -ENOMEM; 445 goto failed_alloc; 446 } 447 448 err = bus_dmamap_load(bi->pagelist_dma_tag, bi->pagelist_dma_map, pagelist, 449 pagelist_size, vchiq_dmamap_cb, 450 &pagelist_phys, 0); 451 452 if (err) { 453 vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory"); 454 err = -ENOMEM; 455 goto failed_load; 456 } 457 458 vchiq_log_trace(vchiq_arm_log_level, 459 "create_pagelist - %x (%d bytes @%p)", (unsigned int)pagelist, count, buf); 460 461 if (!pagelist) 462 return -ENOMEM; 463 464 addrs = pagelist->addrs; 465 pages = (vm_page_t*)(addrs + num_pages); 466 467 actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map, 468 (vm_offset_t)buf, count, 469 (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages); 470 471 if (actual_pages != num_pages) { 472 vm_page_unhold_pages(pages, actual_pages); 473 free(pagelist, M_VCPAGELIST); 474 return (-ENOMEM); 475 } 476 477 for (i = 0; i < actual_pages; i++) { 478 vm_page_lock(pages[i]); 479 vm_page_wire(pages[i]); 480 vm_page_unhold(pages[i]); 481 vm_page_unlock(pages[i]); 482 } 483 484 pagelist->length = count; 485 pagelist->type = type; 486 pagelist->offset = offset; 487 488 /* Group the pages into runs of contiguous pages */ 489 490 base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0])); 491 next_addr = base_addr + PAGE_SIZE; 492 addridx = 0; 493 run = 0; 494 495 for (i = 1; i < num_pages; i++) { 496 addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i])); 497 if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) { 498 next_addr += PAGE_SIZE; 499 run++; 500 } else { 501 addrs[addridx] = (unsigned long)base_addr + run; 502 addridx++; 503 base_addr = addr; 504 next_addr = addr + PAGE_SIZE; 505 run = 0; 506 } 507 } 508 509 addrs[addridx] = (unsigned long)base_addr + run; 510 addridx++; 511 512 /* Partial cache lines (fragments) require special measures */ 513 if ((type == PAGELIST_READ) && 514 ((pagelist->offset & (g_cache_line_size - 1)) || 515 ((pagelist->offset + pagelist->length) & 516 (g_cache_line_size - 1)))) { 517 char *fragments; 518 519 if (down_interruptible(&g_free_fragments_sema) != 0) { 520 free(pagelist, M_VCPAGELIST); 521 return -EINTR; 522 } 523 524 WARN_ON(g_free_fragments == NULL); 525 526 down(&g_free_fragments_mutex); 527 fragments = g_free_fragments; 528 WARN_ON(fragments == NULL); 529 g_free_fragments = *(char **) g_free_fragments; 530 up(&g_free_fragments_mutex); 531 pagelist->type = 532 PAGELIST_READ_WITH_FRAGMENTS + 533 (fragments - g_fragments_base)/g_fragment_size; 534 } 535 536 pa = pmap_extract(PCPU_GET(curpmap), (vm_offset_t)buf); 537 dcache_wbinv_poc((vm_offset_t)buf, pa, count); 538 539 bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map, BUS_DMASYNC_PREWRITE); 540 541 bi->pagelist = pagelist; 542 543 return 0; 544 545 failed_load: 546 bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map); 547 failed_alloc: 548 bus_dma_tag_destroy(bi->pagelist_dma_tag); 549 550 return err; 551 } 552 553 static void 554 free_pagelist(BULKINFO_T *bi, int actual) 555 { 556 vm_page_t*pages; 557 unsigned int num_pages, i; 558 PAGELIST_T *pagelist; 559 560 pagelist = bi->pagelist; 561 562 vchiq_log_trace(vchiq_arm_log_level, 563 "free_pagelist - %x, %d (%lu bytes @%p)", (unsigned int)pagelist, actual, pagelist->length, bi->buf); 564 565 num_pages = 566 (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / 567 PAGE_SIZE; 568 569 pages = (vm_page_t*)(pagelist->addrs + num_pages); 570 571 /* Deal with any partial cache lines (fragments) */ 572 if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { 573 char *fragments = g_fragments_base + 574 (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS)*g_fragment_size; 575 int head_bytes, tail_bytes; 576 head_bytes = (g_cache_line_size - pagelist->offset) & 577 (g_cache_line_size - 1); 578 tail_bytes = (pagelist->offset + actual) & 579 (g_cache_line_size - 1); 580 581 if ((actual >= 0) && (head_bytes != 0)) { 582 if (head_bytes > actual) 583 head_bytes = actual; 584 585 copyout_page(pages[0], 586 pagelist->offset, 587 fragments, 588 head_bytes); 589 } 590 591 if ((actual >= 0) && (head_bytes < actual) && 592 (tail_bytes != 0)) { 593 594 copyout_page(pages[num_pages-1], 595 (((vm_offset_t)bi->buf + actual) % PAGE_SIZE) - tail_bytes, 596 fragments + g_cache_line_size, 597 tail_bytes); 598 } 599 600 down(&g_free_fragments_mutex); 601 *(char **) fragments = g_free_fragments; 602 g_free_fragments = fragments; 603 up(&g_free_fragments_mutex); 604 up(&g_free_fragments_sema); 605 } 606 607 for (i = 0; i < num_pages; i++) { 608 if (pagelist->type != PAGELIST_WRITE) { 609 vm_page_dirty(pages[i]); 610 pagelist_page_free(pages[i]); 611 } 612 } 613 614 bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map); 615 bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map); 616 bus_dma_tag_destroy(bi->pagelist_dma_tag); 617 618 free(bi, M_VCPAGELIST); 619 } 620