1 /** 2 * Copyright (c) 2010-2012 Broadcom. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions, and the following disclaimer, 9 * without modification. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The names of the above-listed copyright holders may not be used 14 * to endorse or promote products derived from this software without 15 * specific prior written permission. 16 * 17 * ALTERNATIVELY, this software may be distributed under the terms of the 18 * GNU General Public License ("GPL") version 2, as published by the Free 19 * Software Foundation. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <interface/compat/vchi_bsd.h> 35 36 #include <sys/malloc.h> 37 #include <sys/rwlock.h> 38 39 #include <vm/vm.h> 40 #include <vm/pmap.h> 41 #include <vm/vm_extern.h> 42 #include <vm/vm_kern.h> 43 #include <vm/vm_map.h> 44 #include <vm/vm_object.h> 45 #include <vm/vm_page.h> 46 #include <vm/vm_pager.h> 47 #include <vm/vm_param.h> 48 #include <vm/vm_phys.h> 49 50 #include <machine/bus.h> 51 #include <machine/cpu.h> 52 #include <arm/broadcom/bcm2835/bcm2835_mbox.h> 53 #include <arm/broadcom/bcm2835/bcm2835_vcbus.h> 54 55 MALLOC_DEFINE(M_VCPAGELIST, "vcpagelist", "VideoCore pagelist memory"); 56 57 #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) 58 59 #define VCHIQ_DOORBELL_IRQ IRQ_ARM_DOORBELL_0 60 #define VCHIQ_ARM_ADDRESS(x) ((void *)PHYS_TO_VCBUS(pmap_kextract((vm_offset_t)(x)))) 61 62 #include "vchiq_arm.h" 63 #include "vchiq_2835.h" 64 #include "vchiq_connected.h" 65 #include "vchiq_killable.h" 66 67 #define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2) 68 69 int g_cache_line_size = 32; 70 static int g_fragment_size; 71 72 typedef struct vchiq_2835_state_struct { 73 int inited; 74 VCHIQ_ARM_STATE_T arm_state; 75 } VCHIQ_2835_ARM_STATE_T; 76 77 static char *g_slot_mem; 78 static int g_slot_mem_size; 79 vm_paddr_t g_slot_phys; 80 /* BSD DMA */ 81 bus_dma_tag_t bcm_slots_dma_tag; 82 bus_dmamap_t bcm_slots_dma_map; 83 84 static char *g_fragments_base; 85 static char *g_free_fragments; 86 struct semaphore g_free_fragments_sema; 87 88 static DEFINE_SEMAPHORE(g_free_fragments_mutex); 89 90 typedef struct bulkinfo_struct { 91 PAGELIST_T *pagelist; 92 bus_dma_tag_t pagelist_dma_tag; 93 bus_dmamap_t pagelist_dma_map; 94 void *buf; 95 size_t size; 96 } BULKINFO_T; 97 98 static int 99 create_pagelist(char __user *buf, size_t count, unsigned short type, 100 struct proc *p, BULKINFO_T *bi); 101 102 static void 103 free_pagelist(BULKINFO_T *bi, int actual); 104 105 static void 106 vchiq_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) 107 { 108 bus_addr_t *addr; 109 110 if (err) 111 return; 112 113 addr = (bus_addr_t*)arg; 114 *addr = PHYS_TO_VCBUS(segs[0].ds_addr); 115 } 116 117 static int 118 copyout_page(vm_page_t p, size_t offset, void *kaddr, size_t size) 119 { 120 uint8_t *dst; 121 122 dst = (uint8_t*)pmap_quick_enter_page(p); 123 if (!dst) 124 return ENOMEM; 125 126 memcpy(dst + offset, kaddr, size); 127 128 pmap_quick_remove_page((vm_offset_t)dst); 129 130 return 0; 131 } 132 133 int __init 134 vchiq_platform_init(VCHIQ_STATE_T *state) 135 { 136 VCHIQ_SLOT_ZERO_T *vchiq_slot_zero; 137 int frag_mem_size; 138 int err; 139 int i; 140 141 /* Allocate space for the channels in coherent memory */ 142 g_slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE); 143 g_fragment_size = 2*g_cache_line_size; 144 frag_mem_size = PAGE_ALIGN(g_fragment_size * MAX_FRAGMENTS); 145 146 err = bus_dma_tag_create( 147 NULL, 148 PAGE_SIZE, 0, /* alignment, boundary */ 149 BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ 150 BUS_SPACE_MAXADDR, /* highaddr */ 151 NULL, NULL, /* filter, filterarg */ 152 g_slot_mem_size + frag_mem_size, 1, /* maxsize, nsegments */ 153 g_slot_mem_size + frag_mem_size, 0, /* maxsegsize, flags */ 154 NULL, NULL, /* lockfunc, lockarg */ 155 &bcm_slots_dma_tag); 156 157 err = bus_dmamem_alloc(bcm_slots_dma_tag, (void **)&g_slot_mem, 158 BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bcm_slots_dma_map); 159 if (err) { 160 vchiq_log_error(vchiq_core_log_level, "Unable to allocate channel memory"); 161 err = -ENOMEM; 162 goto failed_alloc; 163 } 164 165 err = bus_dmamap_load(bcm_slots_dma_tag, bcm_slots_dma_map, g_slot_mem, 166 g_slot_mem_size + frag_mem_size, vchiq_dmamap_cb, 167 &g_slot_phys, 0); 168 169 if (err) { 170 vchiq_log_error(vchiq_core_log_level, "cannot load DMA map"); 171 err = -ENOMEM; 172 goto failed_load; 173 } 174 175 WARN_ON(((int)g_slot_mem & (PAGE_SIZE - 1)) != 0); 176 177 vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size); 178 if (!vchiq_slot_zero) { 179 err = -EINVAL; 180 goto failed_init_slots; 181 } 182 183 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] = 184 (int)g_slot_phys + g_slot_mem_size; 185 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] = 186 MAX_FRAGMENTS; 187 188 g_fragments_base = (char *)(g_slot_mem + g_slot_mem_size); 189 g_slot_mem_size += frag_mem_size; 190 191 g_free_fragments = g_fragments_base; 192 for (i = 0; i < (MAX_FRAGMENTS - 1); i++) { 193 *(char **)&g_fragments_base[i*g_fragment_size] = 194 &g_fragments_base[(i + 1)*g_fragment_size]; 195 } 196 *(char **)&g_fragments_base[i*g_fragment_size] = NULL; 197 _sema_init(&g_free_fragments_sema, MAX_FRAGMENTS); 198 199 if (vchiq_init_state(state, vchiq_slot_zero, 0/*slave*/) != 200 VCHIQ_SUCCESS) { 201 err = -EINVAL; 202 goto failed_vchiq_init; 203 } 204 205 bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (unsigned int)g_slot_phys); 206 207 vchiq_log_info(vchiq_arm_log_level, 208 "vchiq_init - done (slots %x, phys %x)", 209 (unsigned int)vchiq_slot_zero, g_slot_phys); 210 211 vchiq_call_connected_callbacks(); 212 213 return 0; 214 215 failed_vchiq_init: 216 failed_init_slots: 217 bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map); 218 failed_load: 219 bus_dmamem_free(bcm_slots_dma_tag, g_slot_mem, bcm_slots_dma_map); 220 failed_alloc: 221 bus_dma_tag_destroy(bcm_slots_dma_tag); 222 223 return err; 224 } 225 226 void __exit 227 vchiq_platform_exit(VCHIQ_STATE_T *state) 228 { 229 230 bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map); 231 bus_dmamem_free(bcm_slots_dma_tag, g_slot_mem, bcm_slots_dma_map); 232 bus_dma_tag_destroy(bcm_slots_dma_tag); 233 } 234 235 VCHIQ_STATUS_T 236 vchiq_platform_init_state(VCHIQ_STATE_T *state) 237 { 238 VCHIQ_STATUS_T status = VCHIQ_SUCCESS; 239 state->platform_state = kzalloc(sizeof(VCHIQ_2835_ARM_STATE_T), GFP_KERNEL); 240 ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 1; 241 status = vchiq_arm_init_state(state, &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state); 242 if(status != VCHIQ_SUCCESS) 243 { 244 ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 0; 245 } 246 return status; 247 } 248 249 VCHIQ_ARM_STATE_T* 250 vchiq_platform_get_arm_state(VCHIQ_STATE_T *state) 251 { 252 if(!((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited) 253 { 254 BUG(); 255 } 256 return &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state; 257 } 258 259 int 260 vchiq_copy_from_user(void *dst, const void *src, int size) 261 { 262 263 if (((vm_offset_t)(src)) < VM_MIN_KERNEL_ADDRESS) { 264 int error = copyin(src, dst, size); 265 return error ? VCHIQ_ERROR : VCHIQ_SUCCESS; 266 } 267 else 268 bcopy(src, dst, size); 269 270 return 0; 271 } 272 273 VCHIQ_STATUS_T 274 vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle, 275 void *offset, int size, int dir) 276 { 277 BULKINFO_T *bi; 278 int ret; 279 280 WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID); 281 bi = malloc(sizeof(*bi), M_VCPAGELIST, M_WAITOK | M_ZERO); 282 if (bi == NULL) 283 return VCHIQ_ERROR; 284 285 ret = create_pagelist((char __user *)offset, size, 286 (dir == VCHIQ_BULK_RECEIVE) 287 ? PAGELIST_READ 288 : PAGELIST_WRITE, 289 current, 290 bi); 291 if (ret != 0) 292 return VCHIQ_ERROR; 293 294 bulk->handle = memhandle; 295 bulk->data = VCHIQ_ARM_ADDRESS(bi->pagelist); 296 297 /* Store the pagelist address in remote_data, which isn't used by the 298 slave. */ 299 bulk->remote_data = bi; 300 301 return VCHIQ_SUCCESS; 302 } 303 304 void 305 vchiq_complete_bulk(VCHIQ_BULK_T *bulk) 306 { 307 if (bulk && bulk->remote_data && bulk->actual) 308 free_pagelist((BULKINFO_T *)bulk->remote_data, bulk->actual); 309 } 310 311 void 312 vchiq_transfer_bulk(VCHIQ_BULK_T *bulk) 313 { 314 /* 315 * This should only be called on the master (VideoCore) side, but 316 * provide an implementation to avoid the need for ifdefery. 317 */ 318 BUG(); 319 } 320 321 void 322 vchiq_dump_platform_state(void *dump_context) 323 { 324 char buf[80]; 325 int len; 326 len = snprintf(buf, sizeof(buf), 327 " Platform: 2835 (VC master)"); 328 vchiq_dump(dump_context, buf, len + 1); 329 } 330 331 VCHIQ_STATUS_T 332 vchiq_platform_suspend(VCHIQ_STATE_T *state) 333 { 334 return VCHIQ_ERROR; 335 } 336 337 VCHIQ_STATUS_T 338 vchiq_platform_resume(VCHIQ_STATE_T *state) 339 { 340 return VCHIQ_SUCCESS; 341 } 342 343 void 344 vchiq_platform_paused(VCHIQ_STATE_T *state) 345 { 346 } 347 348 void 349 vchiq_platform_resumed(VCHIQ_STATE_T *state) 350 { 351 } 352 353 int 354 vchiq_platform_videocore_wanted(VCHIQ_STATE_T* state) 355 { 356 return 1; // autosuspend not supported - videocore always wanted 357 } 358 359 int 360 vchiq_platform_use_suspend_timer(void) 361 { 362 return 0; 363 } 364 void 365 vchiq_dump_platform_use_state(VCHIQ_STATE_T *state) 366 { 367 vchiq_log_info(vchiq_arm_log_level, "Suspend timer not in use"); 368 } 369 void 370 vchiq_platform_handle_timeout(VCHIQ_STATE_T *state) 371 { 372 (void)state; 373 } 374 /* 375 * Local functions 376 */ 377 378 static void 379 pagelist_page_free(vm_page_t pp) 380 { 381 vm_page_lock(pp); 382 vm_page_unwire(pp, PQ_INACTIVE); 383 if (pp->wire_count == 0 && pp->object == NULL) 384 vm_page_free(pp); 385 vm_page_unlock(pp); 386 } 387 388 /* There is a potential problem with partial cache lines (pages?) 389 ** at the ends of the block when reading. If the CPU accessed anything in 390 ** the same line (page?) then it may have pulled old data into the cache, 391 ** obscuring the new data underneath. We can solve this by transferring the 392 ** partial cache lines separately, and allowing the ARM to copy into the 393 ** cached area. 394 395 ** N.B. This implementation plays slightly fast and loose with the Linux 396 ** driver programming rules, e.g. its use of __virt_to_bus instead of 397 ** dma_map_single, but it isn't a multi-platform driver and it benefits 398 ** from increased speed as a result. 399 */ 400 401 static int 402 create_pagelist(char __user *buf, size_t count, unsigned short type, 403 struct proc *p, BULKINFO_T *bi) 404 { 405 PAGELIST_T *pagelist; 406 vm_page_t* pages; 407 unsigned long *addrs; 408 unsigned int num_pages, i; 409 vm_offset_t offset; 410 int pagelist_size; 411 char *addr, *base_addr, *next_addr; 412 int run, addridx, actual_pages; 413 int err; 414 vm_paddr_t pagelist_phys; 415 vm_paddr_t pa; 416 417 offset = (vm_offset_t)buf & (PAGE_SIZE - 1); 418 num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE; 419 420 bi->pagelist = NULL; 421 bi->buf = buf; 422 bi->size = count; 423 424 /* Allocate enough storage to hold the page pointers and the page 425 ** list 426 */ 427 pagelist_size = sizeof(PAGELIST_T) + 428 (num_pages * sizeof(unsigned long)) + 429 (num_pages * sizeof(pages[0])); 430 431 err = bus_dma_tag_create( 432 NULL, 433 PAGE_SIZE, 0, /* alignment, boundary */ 434 BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ 435 BUS_SPACE_MAXADDR, /* highaddr */ 436 NULL, NULL, /* filter, filterarg */ 437 pagelist_size, 1, /* maxsize, nsegments */ 438 pagelist_size, 0, /* maxsegsize, flags */ 439 NULL, NULL, /* lockfunc, lockarg */ 440 &bi->pagelist_dma_tag); 441 442 err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist, 443 BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map); 444 if (err) { 445 vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory"); 446 err = -ENOMEM; 447 goto failed_alloc; 448 } 449 450 err = bus_dmamap_load(bi->pagelist_dma_tag, bi->pagelist_dma_map, pagelist, 451 pagelist_size, vchiq_dmamap_cb, 452 &pagelist_phys, 0); 453 454 if (err) { 455 vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory"); 456 err = -ENOMEM; 457 goto failed_load; 458 } 459 460 vchiq_log_trace(vchiq_arm_log_level, 461 "create_pagelist - %x (%d bytes @%p)", (unsigned int)pagelist, count, buf); 462 463 if (!pagelist) 464 return -ENOMEM; 465 466 addrs = pagelist->addrs; 467 pages = (vm_page_t*)(addrs + num_pages); 468 469 actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map, 470 (vm_offset_t)buf, count, 471 (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages); 472 473 if (actual_pages != num_pages) { 474 vm_page_unhold_pages(pages, actual_pages); 475 free(pagelist, M_VCPAGELIST); 476 return (-ENOMEM); 477 } 478 479 for (i = 0; i < actual_pages; i++) { 480 vm_page_lock(pages[i]); 481 vm_page_wire(pages[i]); 482 vm_page_unhold(pages[i]); 483 vm_page_unlock(pages[i]); 484 } 485 486 pagelist->length = count; 487 pagelist->type = type; 488 pagelist->offset = offset; 489 490 /* Group the pages into runs of contiguous pages */ 491 492 base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0])); 493 next_addr = base_addr + PAGE_SIZE; 494 addridx = 0; 495 run = 0; 496 497 for (i = 1; i < num_pages; i++) { 498 addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i])); 499 if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) { 500 next_addr += PAGE_SIZE; 501 run++; 502 } else { 503 addrs[addridx] = (unsigned long)base_addr + run; 504 addridx++; 505 base_addr = addr; 506 next_addr = addr + PAGE_SIZE; 507 run = 0; 508 } 509 } 510 511 addrs[addridx] = (unsigned long)base_addr + run; 512 addridx++; 513 514 /* Partial cache lines (fragments) require special measures */ 515 if ((type == PAGELIST_READ) && 516 ((pagelist->offset & (g_cache_line_size - 1)) || 517 ((pagelist->offset + pagelist->length) & 518 (g_cache_line_size - 1)))) { 519 char *fragments; 520 521 if (down_interruptible(&g_free_fragments_sema) != 0) { 522 free(pagelist, M_VCPAGELIST); 523 return -EINTR; 524 } 525 526 WARN_ON(g_free_fragments == NULL); 527 528 down(&g_free_fragments_mutex); 529 fragments = g_free_fragments; 530 WARN_ON(fragments == NULL); 531 g_free_fragments = *(char **) g_free_fragments; 532 up(&g_free_fragments_mutex); 533 pagelist->type = 534 PAGELIST_READ_WITH_FRAGMENTS + 535 (fragments - g_fragments_base)/g_fragment_size; 536 } 537 538 pa = pmap_extract(PCPU_GET(curpmap), (vm_offset_t)buf); 539 dcache_wbinv_poc((vm_offset_t)buf, pa, count); 540 541 bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map, BUS_DMASYNC_PREWRITE); 542 543 bi->pagelist = pagelist; 544 545 return 0; 546 547 failed_load: 548 bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map); 549 failed_alloc: 550 bus_dma_tag_destroy(bi->pagelist_dma_tag); 551 552 return err; 553 } 554 555 static void 556 free_pagelist(BULKINFO_T *bi, int actual) 557 { 558 vm_page_t*pages; 559 unsigned int num_pages, i; 560 PAGELIST_T *pagelist; 561 562 pagelist = bi->pagelist; 563 564 vchiq_log_trace(vchiq_arm_log_level, 565 "free_pagelist - %x, %d (%lu bytes @%p)", (unsigned int)pagelist, actual, pagelist->length, bi->buf); 566 567 num_pages = 568 (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / 569 PAGE_SIZE; 570 571 pages = (vm_page_t*)(pagelist->addrs + num_pages); 572 573 /* Deal with any partial cache lines (fragments) */ 574 if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { 575 char *fragments = g_fragments_base + 576 (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS)*g_fragment_size; 577 int head_bytes, tail_bytes; 578 head_bytes = (g_cache_line_size - pagelist->offset) & 579 (g_cache_line_size - 1); 580 tail_bytes = (pagelist->offset + actual) & 581 (g_cache_line_size - 1); 582 583 if ((actual >= 0) && (head_bytes != 0)) { 584 if (head_bytes > actual) 585 head_bytes = actual; 586 587 copyout_page(pages[0], 588 pagelist->offset, 589 fragments, 590 head_bytes); 591 } 592 593 if ((actual >= 0) && (head_bytes < actual) && 594 (tail_bytes != 0)) { 595 596 copyout_page(pages[num_pages-1], 597 (((vm_offset_t)bi->buf + actual) % PAGE_SIZE) - tail_bytes, 598 fragments + g_cache_line_size, 599 tail_bytes); 600 } 601 602 down(&g_free_fragments_mutex); 603 *(char **) fragments = g_free_fragments; 604 g_free_fragments = fragments; 605 up(&g_free_fragments_mutex); 606 up(&g_free_fragments_sema); 607 } 608 609 for (i = 0; i < num_pages; i++) { 610 if (pagelist->type != PAGELIST_WRITE) { 611 vm_page_dirty(pages[i]); 612 pagelist_page_free(pages[i]); 613 } 614 } 615 616 bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map); 617 bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map); 618 bus_dma_tag_destroy(bi->pagelist_dma_tag); 619 620 free(bi, M_VCPAGELIST); 621 } 622