1 /* 2 * Copyright(c) 2015-2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/poll.h> 48 #include <linux/cdev.h> 49 #include <linux/vmalloc.h> 50 #include <linux/io.h> 51 #include <linux/sched/mm.h> 52 #include <linux/bitmap.h> 53 54 #include <rdma/ib.h> 55 56 #include "hfi.h" 57 #include "pio.h" 58 #include "device.h" 59 #include "common.h" 60 #include "trace.h" 61 #include "user_sdma.h" 62 #include "user_exp_rcv.h" 63 #include "aspm.h" 64 #include "mmu_rb.h" 65 66 #undef pr_fmt 67 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 68 69 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ 70 71 /* 72 * File operation functions 73 */ 74 static int hfi1_file_open(struct inode *inode, struct file *fp); 75 static int hfi1_file_close(struct inode *inode, struct file *fp); 76 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); 77 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); 78 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); 79 80 static u64 kvirt_to_phys(void *addr); 81 static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); 82 static int init_subctxts(struct hfi1_ctxtdata *uctxt, 83 const struct hfi1_user_info *uinfo); 84 static int init_user_ctxt(struct hfi1_filedata *fd); 85 static void user_init(struct hfi1_ctxtdata *uctxt); 86 static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, 87 __u32 len); 88 static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, 89 __u32 len); 90 static int setup_base_ctxt(struct hfi1_filedata *fd); 91 static int setup_subctxt(struct hfi1_ctxtdata *uctxt); 92 93 static int find_sub_ctxt(struct hfi1_filedata *fd, 94 const struct hfi1_user_info *uinfo); 95 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 96 struct hfi1_user_info *uinfo); 97 static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); 98 static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); 99 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 100 unsigned long events); 101 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey); 102 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 103 int start_stop); 104 static int vma_fault(struct vm_fault *vmf); 105 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 106 unsigned long arg); 107 108 static const struct file_operations hfi1_file_ops = { 109 .owner = THIS_MODULE, 110 .write_iter = hfi1_write_iter, 111 .open = hfi1_file_open, 112 .release = hfi1_file_close, 113 .unlocked_ioctl = hfi1_file_ioctl, 114 .poll = hfi1_poll, 115 .mmap = hfi1_file_mmap, 116 .llseek = noop_llseek, 117 }; 118 119 static struct vm_operations_struct vm_ops = { 120 .fault = vma_fault, 121 }; 122 123 /* 124 * Types of memories mapped into user processes' space 125 */ 126 enum mmap_types { 127 PIO_BUFS = 1, 128 PIO_BUFS_SOP, 129 PIO_CRED, 130 RCV_HDRQ, 131 RCV_EGRBUF, 132 UREGS, 133 EVENTS, 134 STATUS, 135 RTAIL, 136 SUBCTXT_UREGS, 137 SUBCTXT_RCV_HDRQ, 138 SUBCTXT_EGRBUF, 139 SDMA_COMP 140 }; 141 142 /* 143 * Masks and offsets defining the mmap tokens 144 */ 145 #define HFI1_MMAP_OFFSET_MASK 0xfffULL 146 #define HFI1_MMAP_OFFSET_SHIFT 0 147 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL 148 #define HFI1_MMAP_SUBCTXT_SHIFT 12 149 #define HFI1_MMAP_CTXT_MASK 0xffULL 150 #define HFI1_MMAP_CTXT_SHIFT 16 151 #define HFI1_MMAP_TYPE_MASK 0xfULL 152 #define HFI1_MMAP_TYPE_SHIFT 24 153 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL 154 #define HFI1_MMAP_MAGIC_SHIFT 32 155 156 #define HFI1_MMAP_MAGIC 0xdabbad00 157 158 #define HFI1_MMAP_TOKEN_SET(field, val) \ 159 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) 160 #define HFI1_MMAP_TOKEN_GET(field, token) \ 161 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) 162 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ 163 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ 164 HFI1_MMAP_TOKEN_SET(TYPE, type) | \ 165 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ 166 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ 167 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) 168 169 #define dbg(fmt, ...) \ 170 pr_info(fmt, ##__VA_ARGS__) 171 172 static inline int is_valid_mmap(u64 token) 173 { 174 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); 175 } 176 177 static int hfi1_file_open(struct inode *inode, struct file *fp) 178 { 179 struct hfi1_filedata *fd; 180 struct hfi1_devdata *dd = container_of(inode->i_cdev, 181 struct hfi1_devdata, 182 user_cdev); 183 184 if (!((dd->flags & HFI1_PRESENT) && dd->kregbase)) 185 return -EINVAL; 186 187 if (!atomic_inc_not_zero(&dd->user_refcount)) 188 return -ENXIO; 189 190 /* Just take a ref now. Not all opens result in a context assign */ 191 kobject_get(&dd->kobj); 192 193 /* The real work is performed later in assign_ctxt() */ 194 195 fd = kzalloc(sizeof(*fd), GFP_KERNEL); 196 197 if (fd) { 198 fd->rec_cpu_num = -1; /* no cpu affinity by default */ 199 fd->mm = current->mm; 200 mmgrab(fd->mm); 201 fd->dd = dd; 202 fp->private_data = fd; 203 } else { 204 fp->private_data = NULL; 205 206 if (atomic_dec_and_test(&dd->user_refcount)) 207 complete(&dd->user_comp); 208 209 return -ENOMEM; 210 } 211 212 return 0; 213 } 214 215 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 216 unsigned long arg) 217 { 218 struct hfi1_filedata *fd = fp->private_data; 219 struct hfi1_ctxtdata *uctxt = fd->uctxt; 220 struct hfi1_user_info uinfo; 221 struct hfi1_tid_info tinfo; 222 int ret = 0; 223 unsigned long addr; 224 int uval = 0; 225 unsigned long ul_uval = 0; 226 u16 uval16 = 0; 227 228 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); 229 if (cmd != HFI1_IOCTL_ASSIGN_CTXT && 230 cmd != HFI1_IOCTL_GET_VERS && 231 !uctxt) 232 return -EINVAL; 233 234 switch (cmd) { 235 case HFI1_IOCTL_ASSIGN_CTXT: 236 if (uctxt) 237 return -EINVAL; 238 239 if (copy_from_user(&uinfo, 240 (struct hfi1_user_info __user *)arg, 241 sizeof(uinfo))) 242 return -EFAULT; 243 244 ret = assign_ctxt(fd, &uinfo); 245 break; 246 case HFI1_IOCTL_CTXT_INFO: 247 ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, 248 sizeof(struct hfi1_ctxt_info)); 249 break; 250 case HFI1_IOCTL_USER_INFO: 251 ret = get_base_info(fd, (void __user *)(unsigned long)arg, 252 sizeof(struct hfi1_base_info)); 253 break; 254 case HFI1_IOCTL_CREDIT_UPD: 255 if (uctxt) 256 sc_return_credits(uctxt->sc); 257 break; 258 259 case HFI1_IOCTL_TID_UPDATE: 260 if (copy_from_user(&tinfo, 261 (struct hfi11_tid_info __user *)arg, 262 sizeof(tinfo))) 263 return -EFAULT; 264 265 ret = hfi1_user_exp_rcv_setup(fd, &tinfo); 266 if (!ret) { 267 /* 268 * Copy the number of tidlist entries we used 269 * and the length of the buffer we registered. 270 * These fields are adjacent in the structure so 271 * we can copy them at the same time. 272 */ 273 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 274 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 275 sizeof(tinfo.tidcnt) + 276 sizeof(tinfo.length))) 277 ret = -EFAULT; 278 } 279 break; 280 281 case HFI1_IOCTL_TID_FREE: 282 if (copy_from_user(&tinfo, 283 (struct hfi11_tid_info __user *)arg, 284 sizeof(tinfo))) 285 return -EFAULT; 286 287 ret = hfi1_user_exp_rcv_clear(fd, &tinfo); 288 if (ret) 289 break; 290 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 291 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 292 sizeof(tinfo.tidcnt))) 293 ret = -EFAULT; 294 break; 295 296 case HFI1_IOCTL_TID_INVAL_READ: 297 if (copy_from_user(&tinfo, 298 (struct hfi11_tid_info __user *)arg, 299 sizeof(tinfo))) 300 return -EFAULT; 301 302 ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); 303 if (ret) 304 break; 305 addr = arg + offsetof(struct hfi1_tid_info, tidcnt); 306 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 307 sizeof(tinfo.tidcnt))) 308 ret = -EFAULT; 309 break; 310 311 case HFI1_IOCTL_RECV_CTRL: 312 ret = get_user(uval, (int __user *)arg); 313 if (ret != 0) 314 return -EFAULT; 315 ret = manage_rcvq(uctxt, fd->subctxt, uval); 316 break; 317 318 case HFI1_IOCTL_POLL_TYPE: 319 ret = get_user(uval, (int __user *)arg); 320 if (ret != 0) 321 return -EFAULT; 322 uctxt->poll_type = (typeof(uctxt->poll_type))uval; 323 break; 324 325 case HFI1_IOCTL_ACK_EVENT: 326 ret = get_user(ul_uval, (unsigned long __user *)arg); 327 if (ret != 0) 328 return -EFAULT; 329 ret = user_event_ack(uctxt, fd->subctxt, ul_uval); 330 break; 331 332 case HFI1_IOCTL_SET_PKEY: 333 ret = get_user(uval16, (u16 __user *)arg); 334 if (ret != 0) 335 return -EFAULT; 336 if (HFI1_CAP_IS_USET(PKEY_CHECK)) 337 ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); 338 else 339 return -EPERM; 340 break; 341 342 case HFI1_IOCTL_CTXT_RESET: { 343 struct send_context *sc; 344 struct hfi1_devdata *dd; 345 346 if (!uctxt || !uctxt->dd || !uctxt->sc) 347 return -EINVAL; 348 349 /* 350 * There is no protection here. User level has to 351 * guarantee that no one will be writing to the send 352 * context while it is being re-initialized. 353 * If user level breaks that guarantee, it will break 354 * it's own context and no one else's. 355 */ 356 dd = uctxt->dd; 357 sc = uctxt->sc; 358 /* 359 * Wait until the interrupt handler has marked the 360 * context as halted or frozen. Report error if we time 361 * out. 362 */ 363 wait_event_interruptible_timeout( 364 sc->halt_wait, (sc->flags & SCF_HALTED), 365 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 366 if (!(sc->flags & SCF_HALTED)) 367 return -ENOLCK; 368 369 /* 370 * If the send context was halted due to a Freeze, 371 * wait until the device has been "unfrozen" before 372 * resetting the context. 373 */ 374 if (sc->flags & SCF_FROZEN) { 375 wait_event_interruptible_timeout( 376 dd->event_queue, 377 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), 378 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 379 if (dd->flags & HFI1_FROZEN) 380 return -ENOLCK; 381 382 if (dd->flags & HFI1_FORCED_FREEZE) 383 /* 384 * Don't allow context reset if we are into 385 * forced freeze 386 */ 387 return -ENODEV; 388 389 sc_disable(sc); 390 ret = sc_enable(sc); 391 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, 392 uctxt->ctxt); 393 } else { 394 ret = sc_restart(sc); 395 } 396 if (!ret) 397 sc_return_credits(sc); 398 break; 399 } 400 401 case HFI1_IOCTL_GET_VERS: 402 uval = HFI1_USER_SWVERSION; 403 if (put_user(uval, (int __user *)arg)) 404 return -EFAULT; 405 break; 406 407 default: 408 return -EINVAL; 409 } 410 411 return ret; 412 } 413 414 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) 415 { 416 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 417 struct hfi1_user_sdma_pkt_q *pq = fd->pq; 418 struct hfi1_user_sdma_comp_q *cq = fd->cq; 419 int done = 0, reqs = 0; 420 unsigned long dim = from->nr_segs; 421 422 if (!cq || !pq) 423 return -EIO; 424 425 if (!iter_is_iovec(from) || !dim) 426 return -EINVAL; 427 428 hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", 429 fd->uctxt->ctxt, fd->subctxt, dim); 430 431 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) 432 return -ENOSPC; 433 434 while (dim) { 435 int ret; 436 unsigned long count = 0; 437 438 ret = hfi1_user_sdma_process_request( 439 fd, (struct iovec *)(from->iov + done), 440 dim, &count); 441 if (ret) { 442 reqs = ret; 443 break; 444 } 445 dim -= count; 446 done += count; 447 reqs++; 448 } 449 450 return reqs; 451 } 452 453 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) 454 { 455 struct hfi1_filedata *fd = fp->private_data; 456 struct hfi1_ctxtdata *uctxt = fd->uctxt; 457 struct hfi1_devdata *dd; 458 unsigned long flags; 459 u64 token = vma->vm_pgoff << PAGE_SHIFT, 460 memaddr = 0; 461 void *memvirt = NULL; 462 u8 subctxt, mapio = 0, vmf = 0, type; 463 ssize_t memlen = 0; 464 int ret = 0; 465 u16 ctxt; 466 467 if (!is_valid_mmap(token) || !uctxt || 468 !(vma->vm_flags & VM_SHARED)) { 469 ret = -EINVAL; 470 goto done; 471 } 472 dd = uctxt->dd; 473 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); 474 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); 475 type = HFI1_MMAP_TOKEN_GET(TYPE, token); 476 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { 477 ret = -EINVAL; 478 goto done; 479 } 480 481 flags = vma->vm_flags; 482 483 switch (type) { 484 case PIO_BUFS: 485 case PIO_BUFS_SOP: 486 memaddr = ((dd->physaddr + TXE_PIO_SEND) + 487 /* chip pio base */ 488 (uctxt->sc->hw_context * BIT(16))) + 489 /* 64K PIO space / ctxt */ 490 (type == PIO_BUFS_SOP ? 491 (TXE_PIO_SIZE / 2) : 0); /* sop? */ 492 /* 493 * Map only the amount allocated to the context, not the 494 * entire available context's PIO space. 495 */ 496 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); 497 flags &= ~VM_MAYREAD; 498 flags |= VM_DONTCOPY | VM_DONTEXPAND; 499 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 500 mapio = 1; 501 break; 502 case PIO_CRED: 503 if (flags & VM_WRITE) { 504 ret = -EPERM; 505 goto done; 506 } 507 /* 508 * The credit return location for this context could be on the 509 * second or third page allocated for credit returns (if number 510 * of enabled contexts > 64 and 128 respectively). 511 */ 512 memvirt = dd->cr_base[uctxt->numa_id].va; 513 memaddr = virt_to_phys(memvirt) + 514 (((u64)uctxt->sc->hw_free - 515 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); 516 memlen = PAGE_SIZE; 517 flags &= ~VM_MAYWRITE; 518 flags |= VM_DONTCOPY | VM_DONTEXPAND; 519 /* 520 * The driver has already allocated memory for credit 521 * returns and programmed it into the chip. Has that 522 * memory been flagged as non-cached? 523 */ 524 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ 525 mapio = 1; 526 break; 527 case RCV_HDRQ: 528 memlen = uctxt->rcvhdrq_size; 529 memvirt = uctxt->rcvhdrq; 530 break; 531 case RCV_EGRBUF: { 532 unsigned long addr; 533 int i; 534 /* 535 * The RcvEgr buffer need to be handled differently 536 * as multiple non-contiguous pages need to be mapped 537 * into the user process. 538 */ 539 memlen = uctxt->egrbufs.size; 540 if ((vma->vm_end - vma->vm_start) != memlen) { 541 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n", 542 (vma->vm_end - vma->vm_start), memlen); 543 ret = -EINVAL; 544 goto done; 545 } 546 if (vma->vm_flags & VM_WRITE) { 547 ret = -EPERM; 548 goto done; 549 } 550 vma->vm_flags &= ~VM_MAYWRITE; 551 addr = vma->vm_start; 552 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { 553 memlen = uctxt->egrbufs.buffers[i].len; 554 memvirt = uctxt->egrbufs.buffers[i].addr; 555 ret = remap_pfn_range( 556 vma, addr, 557 /* 558 * virt_to_pfn() does the same, but 559 * it's not available on x86_64 560 * when CONFIG_MMU is enabled. 561 */ 562 PFN_DOWN(__pa(memvirt)), 563 memlen, 564 vma->vm_page_prot); 565 if (ret < 0) 566 goto done; 567 addr += memlen; 568 } 569 ret = 0; 570 goto done; 571 } 572 case UREGS: 573 /* 574 * Map only the page that contains this context's user 575 * registers. 576 */ 577 memaddr = (unsigned long) 578 (dd->physaddr + RXE_PER_CONTEXT_USER) 579 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); 580 /* 581 * TidFlow table is on the same page as the rest of the 582 * user registers. 583 */ 584 memlen = PAGE_SIZE; 585 flags |= VM_DONTCOPY | VM_DONTEXPAND; 586 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 587 mapio = 1; 588 break; 589 case EVENTS: 590 /* 591 * Use the page where this context's flags are. User level 592 * knows where it's own bitmap is within the page. 593 */ 594 memaddr = (unsigned long)(dd->events + 595 ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 596 HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; 597 memlen = PAGE_SIZE; 598 /* 599 * v3.7 removes VM_RESERVED but the effect is kept by 600 * using VM_IO. 601 */ 602 flags |= VM_IO | VM_DONTEXPAND; 603 vmf = 1; 604 break; 605 case STATUS: 606 if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { 607 ret = -EPERM; 608 goto done; 609 } 610 memaddr = kvirt_to_phys((void *)dd->status); 611 memlen = PAGE_SIZE; 612 flags |= VM_IO | VM_DONTEXPAND; 613 break; 614 case RTAIL: 615 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { 616 /* 617 * If the memory allocation failed, the context alloc 618 * also would have failed, so we would never get here 619 */ 620 ret = -EINVAL; 621 goto done; 622 } 623 if (flags & VM_WRITE) { 624 ret = -EPERM; 625 goto done; 626 } 627 memlen = PAGE_SIZE; 628 memvirt = (void *)uctxt->rcvhdrtail_kvaddr; 629 flags &= ~VM_MAYWRITE; 630 break; 631 case SUBCTXT_UREGS: 632 memaddr = (u64)uctxt->subctxt_uregbase; 633 memlen = PAGE_SIZE; 634 flags |= VM_IO | VM_DONTEXPAND; 635 vmf = 1; 636 break; 637 case SUBCTXT_RCV_HDRQ: 638 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 639 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; 640 flags |= VM_IO | VM_DONTEXPAND; 641 vmf = 1; 642 break; 643 case SUBCTXT_EGRBUF: 644 memaddr = (u64)uctxt->subctxt_rcvegrbuf; 645 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; 646 flags |= VM_IO | VM_DONTEXPAND; 647 flags &= ~VM_MAYWRITE; 648 vmf = 1; 649 break; 650 case SDMA_COMP: { 651 struct hfi1_user_sdma_comp_q *cq = fd->cq; 652 653 if (!cq) { 654 ret = -EFAULT; 655 goto done; 656 } 657 memaddr = (u64)cq->comps; 658 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); 659 flags |= VM_IO | VM_DONTEXPAND; 660 vmf = 1; 661 break; 662 } 663 default: 664 ret = -EINVAL; 665 break; 666 } 667 668 if ((vma->vm_end - vma->vm_start) != memlen) { 669 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu", 670 uctxt->ctxt, fd->subctxt, 671 (vma->vm_end - vma->vm_start), memlen); 672 ret = -EINVAL; 673 goto done; 674 } 675 676 vma->vm_flags = flags; 677 hfi1_cdbg(PROC, 678 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", 679 ctxt, subctxt, type, mapio, vmf, memaddr, memlen, 680 vma->vm_end - vma->vm_start, vma->vm_flags); 681 if (vmf) { 682 vma->vm_pgoff = PFN_DOWN(memaddr); 683 vma->vm_ops = &vm_ops; 684 ret = 0; 685 } else if (mapio) { 686 ret = io_remap_pfn_range(vma, vma->vm_start, 687 PFN_DOWN(memaddr), 688 memlen, 689 vma->vm_page_prot); 690 } else if (memvirt) { 691 ret = remap_pfn_range(vma, vma->vm_start, 692 PFN_DOWN(__pa(memvirt)), 693 memlen, 694 vma->vm_page_prot); 695 } else { 696 ret = remap_pfn_range(vma, vma->vm_start, 697 PFN_DOWN(memaddr), 698 memlen, 699 vma->vm_page_prot); 700 } 701 done: 702 return ret; 703 } 704 705 /* 706 * Local (non-chip) user memory is not mapped right away but as it is 707 * accessed by the user-level code. 708 */ 709 static int vma_fault(struct vm_fault *vmf) 710 { 711 struct page *page; 712 713 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 714 if (!page) 715 return VM_FAULT_SIGBUS; 716 717 get_page(page); 718 vmf->page = page; 719 720 return 0; 721 } 722 723 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt) 724 { 725 struct hfi1_ctxtdata *uctxt; 726 unsigned pollflag; 727 728 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; 729 if (!uctxt) 730 pollflag = POLLERR; 731 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) 732 pollflag = poll_urgent(fp, pt); 733 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) 734 pollflag = poll_next(fp, pt); 735 else /* invalid */ 736 pollflag = POLLERR; 737 738 return pollflag; 739 } 740 741 static int hfi1_file_close(struct inode *inode, struct file *fp) 742 { 743 struct hfi1_filedata *fdata = fp->private_data; 744 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 745 struct hfi1_devdata *dd = container_of(inode->i_cdev, 746 struct hfi1_devdata, 747 user_cdev); 748 unsigned long flags, *ev; 749 750 fp->private_data = NULL; 751 752 if (!uctxt) 753 goto done; 754 755 hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 756 mutex_lock(&hfi1_mutex); 757 758 flush_wc(); 759 /* drain user sdma queue */ 760 hfi1_user_sdma_free_queues(fdata); 761 762 /* release the cpu */ 763 hfi1_put_proc_affinity(fdata->rec_cpu_num); 764 765 /* clean up rcv side */ 766 hfi1_user_exp_rcv_free(fdata); 767 768 /* 769 * Clear any left over, unhandled events so the next process that 770 * gets this context doesn't get confused. 771 */ 772 ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 773 HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; 774 *ev = 0; 775 776 __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); 777 if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { 778 mutex_unlock(&hfi1_mutex); 779 goto done; 780 } 781 782 spin_lock_irqsave(&dd->uctxt_lock, flags); 783 /* 784 * Disable receive context and interrupt available, reset all 785 * RcvCtxtCtrl bits to default values. 786 */ 787 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 788 HFI1_RCVCTRL_TIDFLOW_DIS | 789 HFI1_RCVCTRL_INTRAVAIL_DIS | 790 HFI1_RCVCTRL_TAILUPD_DIS | 791 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 792 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 793 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); 794 /* Clear the context's J_KEY */ 795 hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); 796 /* 797 * Reset context integrity checks to default. 798 * (writes to CSRs probably belong in chip.c) 799 */ 800 write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, 801 hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); 802 sc_disable(uctxt->sc); 803 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 804 805 dd->rcd[uctxt->ctxt] = NULL; 806 807 hfi1_user_exp_rcv_grp_free(uctxt); 808 hfi1_clear_ctxt_pkey(dd, uctxt); 809 810 uctxt->rcvwait_to = 0; 811 uctxt->piowait_to = 0; 812 uctxt->rcvnowait = 0; 813 uctxt->pionowait = 0; 814 uctxt->event_flags = 0; 815 816 hfi1_stats.sps_ctxts--; 817 if (++dd->freectxts == dd->num_user_contexts) 818 aspm_enable_all(dd); 819 mutex_unlock(&hfi1_mutex); 820 hfi1_free_ctxtdata(dd, uctxt); 821 done: 822 mmdrop(fdata->mm); 823 kobject_put(&dd->kobj); 824 825 if (atomic_dec_and_test(&dd->user_refcount)) 826 complete(&dd->user_comp); 827 828 kfree(fdata); 829 return 0; 830 } 831 832 /* 833 * Convert kernel *virtual* addresses to physical addresses. 834 * This is used to vmalloc'ed addresses. 835 */ 836 static u64 kvirt_to_phys(void *addr) 837 { 838 struct page *page; 839 u64 paddr = 0; 840 841 page = vmalloc_to_page(addr); 842 if (page) 843 paddr = page_to_pfn(page) << PAGE_SHIFT; 844 845 return paddr; 846 } 847 848 static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) 849 { 850 int ret; 851 unsigned int swmajor, swminor; 852 853 swmajor = uinfo->userversion >> 16; 854 if (swmajor != HFI1_USER_SWMAJOR) 855 return -ENODEV; 856 857 swminor = uinfo->userversion & 0xffff; 858 859 mutex_lock(&hfi1_mutex); 860 /* 861 * Get a sub context if necessary. 862 * ret < 0 error, 0 no context, 1 sub-context found 863 */ 864 ret = 0; 865 if (uinfo->subctxt_cnt) { 866 ret = find_sub_ctxt(fd, uinfo); 867 if (ret > 0) 868 fd->rec_cpu_num = 869 hfi1_get_proc_affinity(fd->uctxt->numa_id); 870 } 871 872 /* 873 * Allocate a base context if context sharing is not required or we 874 * couldn't find a sub context. 875 */ 876 if (!ret) 877 ret = allocate_ctxt(fd, fd->dd, uinfo); 878 879 mutex_unlock(&hfi1_mutex); 880 881 /* Depending on the context type, do the appropriate init */ 882 if (ret > 0) { 883 /* 884 * sub-context info can only be set up after the base 885 * context has been completed. 886 */ 887 ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( 888 HFI1_CTXT_BASE_UNINIT, 889 &fd->uctxt->event_flags)); 890 if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) { 891 clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); 892 return -ENOMEM; 893 } 894 /* The only thing a sub context needs is the user_xxx stuff */ 895 if (!ret) 896 ret = init_user_ctxt(fd); 897 898 if (ret) 899 clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); 900 } else if (!ret) { 901 ret = setup_base_ctxt(fd); 902 if (fd->uctxt->subctxt_cnt) { 903 /* If there is an error, set the failed bit. */ 904 if (ret) 905 set_bit(HFI1_CTXT_BASE_FAILED, 906 &fd->uctxt->event_flags); 907 /* 908 * Base context is done, notify anybody using a 909 * sub-context that is waiting for this completion 910 */ 911 clear_bit(HFI1_CTXT_BASE_UNINIT, 912 &fd->uctxt->event_flags); 913 wake_up(&fd->uctxt->wait); 914 } 915 } 916 917 return ret; 918 } 919 920 /* 921 * The hfi1_mutex must be held when this function is called. It is 922 * necessary to ensure serialized access to the bitmask in_use_ctxts. 923 */ 924 static int find_sub_ctxt(struct hfi1_filedata *fd, 925 const struct hfi1_user_info *uinfo) 926 { 927 int i; 928 struct hfi1_devdata *dd = fd->dd; 929 u16 subctxt; 930 931 for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { 932 struct hfi1_ctxtdata *uctxt = dd->rcd[i]; 933 934 /* Skip ctxts which are not yet open */ 935 if (!uctxt || 936 bitmap_empty(uctxt->in_use_ctxts, 937 HFI1_MAX_SHARED_CTXTS)) 938 continue; 939 940 /* Skip dynamically allocted kernel contexts */ 941 if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) 942 continue; 943 944 /* Skip ctxt if it doesn't match the requested one */ 945 if (memcmp(uctxt->uuid, uinfo->uuid, 946 sizeof(uctxt->uuid)) || 947 uctxt->jkey != generate_jkey(current_uid()) || 948 uctxt->subctxt_id != uinfo->subctxt_id || 949 uctxt->subctxt_cnt != uinfo->subctxt_cnt) 950 continue; 951 952 /* Verify the sharing process matches the master */ 953 if (uctxt->userversion != uinfo->userversion) 954 return -EINVAL; 955 956 /* Find an unused context */ 957 subctxt = find_first_zero_bit(uctxt->in_use_ctxts, 958 HFI1_MAX_SHARED_CTXTS); 959 if (subctxt >= uctxt->subctxt_cnt) 960 return -EBUSY; 961 962 fd->uctxt = uctxt; 963 fd->subctxt = subctxt; 964 __set_bit(fd->subctxt, uctxt->in_use_ctxts); 965 966 return 1; 967 } 968 969 return 0; 970 } 971 972 static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, 973 struct hfi1_user_info *uinfo) 974 { 975 struct hfi1_ctxtdata *uctxt; 976 unsigned int ctxt; 977 int ret, numa; 978 979 if (dd->flags & HFI1_FROZEN) { 980 /* 981 * Pick an error that is unique from all other errors 982 * that are returned so the user process knows that 983 * it tried to allocate while the SPC was frozen. It 984 * it should be able to retry with success in a short 985 * while. 986 */ 987 return -EIO; 988 } 989 990 /* 991 * This check is sort of redundant to the next EBUSY error. It would 992 * also indicate an inconsistancy in the driver if this value was 993 * zero, but there were still contexts available. 994 */ 995 if (!dd->freectxts) 996 return -EBUSY; 997 998 for (ctxt = dd->first_dyn_alloc_ctxt; 999 ctxt < dd->num_rcv_contexts; ctxt++) 1000 if (!dd->rcd[ctxt]) 1001 break; 1002 1003 if (ctxt == dd->num_rcv_contexts) 1004 return -EBUSY; 1005 1006 /* 1007 * If we don't have a NUMA node requested, preference is towards 1008 * device NUMA node. 1009 */ 1010 fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); 1011 if (fd->rec_cpu_num != -1) 1012 numa = cpu_to_node(fd->rec_cpu_num); 1013 else 1014 numa = numa_node_id(); 1015 uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa); 1016 if (!uctxt) { 1017 dd_dev_err(dd, 1018 "Unable to allocate ctxtdata memory, failing open\n"); 1019 return -ENOMEM; 1020 } 1021 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", 1022 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, 1023 uctxt->numa_id); 1024 1025 /* 1026 * Allocate and enable a PIO send context. 1027 */ 1028 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, 1029 uctxt->dd->node); 1030 if (!uctxt->sc) { 1031 ret = -ENOMEM; 1032 goto ctxdata_free; 1033 } 1034 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, 1035 uctxt->sc->hw_context); 1036 ret = sc_enable(uctxt->sc); 1037 if (ret) 1038 goto ctxdata_free; 1039 1040 /* 1041 * Setup sub context resources if the user-level has requested 1042 * sub contexts. 1043 * This has to be done here so the rest of the sub-contexts find the 1044 * proper master. 1045 */ 1046 if (uinfo->subctxt_cnt) { 1047 ret = init_subctxts(uctxt, uinfo); 1048 /* 1049 * On error, we don't need to disable and de-allocate the 1050 * send context because it will be done during file close 1051 */ 1052 if (ret) 1053 goto ctxdata_free; 1054 } 1055 uctxt->userversion = uinfo->userversion; 1056 uctxt->flags = hfi1_cap_mask; /* save current flag state */ 1057 init_waitqueue_head(&uctxt->wait); 1058 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 1059 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); 1060 uctxt->jkey = generate_jkey(current_uid()); 1061 INIT_LIST_HEAD(&uctxt->sdma_queues); 1062 spin_lock_init(&uctxt->sdma_qlock); 1063 hfi1_stats.sps_ctxts++; 1064 /* 1065 * Disable ASPM when there are open user/PSM contexts to avoid 1066 * issues with ASPM L1 exit latency 1067 */ 1068 if (dd->freectxts-- == dd->num_user_contexts) 1069 aspm_disable_all(dd); 1070 fd->uctxt = uctxt; 1071 1072 return 0; 1073 1074 ctxdata_free: 1075 dd->rcd[ctxt] = NULL; 1076 hfi1_free_ctxtdata(dd, uctxt); 1077 return ret; 1078 } 1079 1080 static int init_subctxts(struct hfi1_ctxtdata *uctxt, 1081 const struct hfi1_user_info *uinfo) 1082 { 1083 u16 num_subctxts; 1084 1085 num_subctxts = uinfo->subctxt_cnt; 1086 if (num_subctxts > HFI1_MAX_SHARED_CTXTS) 1087 return -EINVAL; 1088 1089 uctxt->subctxt_cnt = uinfo->subctxt_cnt; 1090 uctxt->subctxt_id = uinfo->subctxt_id; 1091 uctxt->redirect_seq_cnt = 1; 1092 set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1093 1094 return 0; 1095 } 1096 1097 static int setup_subctxt(struct hfi1_ctxtdata *uctxt) 1098 { 1099 int ret = 0; 1100 u16 num_subctxts = uctxt->subctxt_cnt; 1101 1102 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); 1103 if (!uctxt->subctxt_uregbase) 1104 return -ENOMEM; 1105 1106 /* We can take the size of the RcvHdr Queue from the master */ 1107 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * 1108 num_subctxts); 1109 if (!uctxt->subctxt_rcvhdr_base) { 1110 ret = -ENOMEM; 1111 goto bail_ureg; 1112 } 1113 1114 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size * 1115 num_subctxts); 1116 if (!uctxt->subctxt_rcvegrbuf) { 1117 ret = -ENOMEM; 1118 goto bail_rhdr; 1119 } 1120 1121 return 0; 1122 1123 bail_rhdr: 1124 vfree(uctxt->subctxt_rcvhdr_base); 1125 uctxt->subctxt_rcvhdr_base = NULL; 1126 bail_ureg: 1127 vfree(uctxt->subctxt_uregbase); 1128 uctxt->subctxt_uregbase = NULL; 1129 1130 return ret; 1131 } 1132 1133 static void user_init(struct hfi1_ctxtdata *uctxt) 1134 { 1135 unsigned int rcvctrl_ops = 0; 1136 1137 /* initialize poll variables... */ 1138 uctxt->urgent = 0; 1139 uctxt->urgent_poll = 0; 1140 1141 /* 1142 * Now enable the ctxt for receive. 1143 * For chips that are set to DMA the tail register to memory 1144 * when they change (and when the update bit transitions from 1145 * 0 to 1. So for those chips, we turn it off and then back on. 1146 * This will (very briefly) affect any other open ctxts, but the 1147 * duration is very short, and therefore isn't an issue. We 1148 * explicitly set the in-memory tail copy to 0 beforehand, so we 1149 * don't have to wait to be sure the DMA update has happened 1150 * (chip resets head/tail to 0 on transition to enable). 1151 */ 1152 if (uctxt->rcvhdrtail_kvaddr) 1153 clear_rcvhdrtail(uctxt); 1154 1155 /* Setup J_KEY before enabling the context */ 1156 hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); 1157 1158 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1159 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1160 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1161 /* 1162 * Ignore the bit in the flags for now until proper 1163 * support for multiple packet per rcv array entry is 1164 * added. 1165 */ 1166 if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1167 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1168 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1169 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1170 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1171 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1172 /* 1173 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. 1174 * We can't rely on the correct value to be set from prior 1175 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1176 * for both cases. 1177 */ 1178 if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) 1179 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1180 else 1181 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; 1182 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); 1183 } 1184 1185 static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, 1186 __u32 len) 1187 { 1188 struct hfi1_ctxt_info cinfo; 1189 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1190 int ret = 0; 1191 1192 memset(&cinfo, 0, sizeof(cinfo)); 1193 cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & 1194 HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | 1195 HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | 1196 HFI1_CAP_KGET_MASK(uctxt->flags, K2U); 1197 /* adjust flag if this fd is not able to cache */ 1198 if (!fd->handler) 1199 cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ 1200 1201 cinfo.num_active = hfi1_count_active_units(); 1202 cinfo.unit = uctxt->dd->unit; 1203 cinfo.ctxt = uctxt->ctxt; 1204 cinfo.subctxt = fd->subctxt; 1205 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, 1206 uctxt->dd->rcv_entries.group_size) + 1207 uctxt->expected_count; 1208 cinfo.credits = uctxt->sc->credits; 1209 cinfo.numa_node = uctxt->numa_id; 1210 cinfo.rec_cpu = fd->rec_cpu_num; 1211 cinfo.send_ctxt = uctxt->sc->hw_context; 1212 1213 cinfo.egrtids = uctxt->egrbufs.alloced; 1214 cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt; 1215 cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2; 1216 cinfo.sdma_ring_size = fd->cq->nentries; 1217 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; 1218 1219 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); 1220 if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) 1221 ret = -EFAULT; 1222 1223 return ret; 1224 } 1225 1226 static int init_user_ctxt(struct hfi1_filedata *fd) 1227 { 1228 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1229 int ret; 1230 1231 ret = hfi1_user_sdma_alloc_queues(uctxt, fd); 1232 if (ret) 1233 return ret; 1234 1235 ret = hfi1_user_exp_rcv_init(fd); 1236 1237 return ret; 1238 } 1239 1240 static int setup_base_ctxt(struct hfi1_filedata *fd) 1241 { 1242 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1243 struct hfi1_devdata *dd = uctxt->dd; 1244 int ret = 0; 1245 1246 hfi1_init_ctxt(uctxt->sc); 1247 1248 /* Now allocate the RcvHdr queue and eager buffers. */ 1249 ret = hfi1_create_rcvhdrq(dd, uctxt); 1250 if (ret) 1251 return ret; 1252 1253 ret = hfi1_setup_eagerbufs(uctxt); 1254 if (ret) 1255 goto setup_failed; 1256 1257 /* If sub-contexts are enabled, do the appropriate setup */ 1258 if (uctxt->subctxt_cnt) 1259 ret = setup_subctxt(uctxt); 1260 if (ret) 1261 goto setup_failed; 1262 1263 ret = hfi1_user_exp_rcv_grp_init(fd); 1264 if (ret) 1265 goto setup_failed; 1266 1267 ret = init_user_ctxt(fd); 1268 if (ret) 1269 goto setup_failed; 1270 1271 user_init(uctxt); 1272 1273 return 0; 1274 1275 setup_failed: 1276 hfi1_free_ctxtdata(dd, uctxt); 1277 return ret; 1278 } 1279 1280 static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, 1281 __u32 len) 1282 { 1283 struct hfi1_base_info binfo; 1284 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1285 struct hfi1_devdata *dd = uctxt->dd; 1286 ssize_t sz; 1287 unsigned offset; 1288 int ret = 0; 1289 1290 trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); 1291 1292 memset(&binfo, 0, sizeof(binfo)); 1293 binfo.hw_version = dd->revision; 1294 binfo.sw_version = HFI1_KERN_SWVERSION; 1295 binfo.bthqp = kdeth_qp; 1296 binfo.jkey = uctxt->jkey; 1297 /* 1298 * If more than 64 contexts are enabled the allocated credit 1299 * return will span two or three contiguous pages. Since we only 1300 * map the page containing the context's credit return address, 1301 * we need to calculate the offset in the proper page. 1302 */ 1303 offset = ((u64)uctxt->sc->hw_free - 1304 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; 1305 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, 1306 fd->subctxt, offset); 1307 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, 1308 fd->subctxt, 1309 uctxt->sc->base_addr); 1310 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, 1311 uctxt->ctxt, 1312 fd->subctxt, 1313 uctxt->sc->base_addr); 1314 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, 1315 fd->subctxt, 1316 uctxt->rcvhdrq); 1317 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, 1318 fd->subctxt, 1319 uctxt->egrbufs.rcvtids[0].dma); 1320 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, 1321 fd->subctxt, 0); 1322 /* 1323 * user regs are at 1324 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) 1325 */ 1326 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1327 fd->subctxt, 0); 1328 offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1329 HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * 1330 sizeof(*dd->events)); 1331 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, 1332 fd->subctxt, 1333 offset); 1334 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, 1335 fd->subctxt, 1336 dd->status); 1337 if (HFI1_CAP_IS_USET(DMA_RTAIL)) 1338 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, 1339 fd->subctxt, 0); 1340 if (uctxt->subctxt_cnt) { 1341 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, 1342 uctxt->ctxt, 1343 fd->subctxt, 0); 1344 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, 1345 uctxt->ctxt, 1346 fd->subctxt, 0); 1347 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, 1348 uctxt->ctxt, 1349 fd->subctxt, 0); 1350 } 1351 sz = (len < sizeof(binfo)) ? len : sizeof(binfo); 1352 if (copy_to_user(ubase, &binfo, sz)) 1353 ret = -EFAULT; 1354 return ret; 1355 } 1356 1357 static unsigned int poll_urgent(struct file *fp, 1358 struct poll_table_struct *pt) 1359 { 1360 struct hfi1_filedata *fd = fp->private_data; 1361 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1362 struct hfi1_devdata *dd = uctxt->dd; 1363 unsigned pollflag; 1364 1365 poll_wait(fp, &uctxt->wait, pt); 1366 1367 spin_lock_irq(&dd->uctxt_lock); 1368 if (uctxt->urgent != uctxt->urgent_poll) { 1369 pollflag = POLLIN | POLLRDNORM; 1370 uctxt->urgent_poll = uctxt->urgent; 1371 } else { 1372 pollflag = 0; 1373 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags); 1374 } 1375 spin_unlock_irq(&dd->uctxt_lock); 1376 1377 return pollflag; 1378 } 1379 1380 static unsigned int poll_next(struct file *fp, 1381 struct poll_table_struct *pt) 1382 { 1383 struct hfi1_filedata *fd = fp->private_data; 1384 struct hfi1_ctxtdata *uctxt = fd->uctxt; 1385 struct hfi1_devdata *dd = uctxt->dd; 1386 unsigned pollflag; 1387 1388 poll_wait(fp, &uctxt->wait, pt); 1389 1390 spin_lock_irq(&dd->uctxt_lock); 1391 if (hdrqempty(uctxt)) { 1392 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); 1393 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt); 1394 pollflag = 0; 1395 } else { 1396 pollflag = POLLIN | POLLRDNORM; 1397 } 1398 spin_unlock_irq(&dd->uctxt_lock); 1399 1400 return pollflag; 1401 } 1402 1403 /* 1404 * Find all user contexts in use, and set the specified bit in their 1405 * event mask. 1406 * See also find_ctxt() for a similar use, that is specific to send buffers. 1407 */ 1408 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) 1409 { 1410 struct hfi1_ctxtdata *uctxt; 1411 struct hfi1_devdata *dd = ppd->dd; 1412 unsigned ctxt; 1413 int ret = 0; 1414 unsigned long flags; 1415 1416 if (!dd->events) { 1417 ret = -EINVAL; 1418 goto done; 1419 } 1420 1421 spin_lock_irqsave(&dd->uctxt_lock, flags); 1422 for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; 1423 ctxt++) { 1424 uctxt = dd->rcd[ctxt]; 1425 if (uctxt) { 1426 unsigned long *evs = dd->events + 1427 (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1428 HFI1_MAX_SHARED_CTXTS; 1429 int i; 1430 /* 1431 * subctxt_cnt is 0 if not shared, so do base 1432 * separately, first, then remaining subctxt, if any 1433 */ 1434 set_bit(evtbit, evs); 1435 for (i = 1; i < uctxt->subctxt_cnt; i++) 1436 set_bit(evtbit, evs + i); 1437 } 1438 } 1439 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1440 done: 1441 return ret; 1442 } 1443 1444 /** 1445 * manage_rcvq - manage a context's receive queue 1446 * @uctxt: the context 1447 * @subctxt: the sub-context 1448 * @start_stop: action to carry out 1449 * 1450 * start_stop == 0 disables receive on the context, for use in queue 1451 * overflow conditions. start_stop==1 re-enables, to be used to 1452 * re-init the software copy of the head register 1453 */ 1454 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1455 int start_stop) 1456 { 1457 struct hfi1_devdata *dd = uctxt->dd; 1458 unsigned int rcvctrl_op; 1459 1460 if (subctxt) 1461 goto bail; 1462 /* atomically clear receive enable ctxt. */ 1463 if (start_stop) { 1464 /* 1465 * On enable, force in-memory copy of the tail register to 1466 * 0, so that protocol code doesn't have to worry about 1467 * whether or not the chip has yet updated the in-memory 1468 * copy or not on return from the system call. The chip 1469 * always resets it's tail register back to 0 on a 1470 * transition from disabled to enabled. 1471 */ 1472 if (uctxt->rcvhdrtail_kvaddr) 1473 clear_rcvhdrtail(uctxt); 1474 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; 1475 } else { 1476 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; 1477 } 1478 hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt); 1479 /* always; new head should be equal to new tail; see above */ 1480 bail: 1481 return 0; 1482 } 1483 1484 /* 1485 * clear the event notifier events for this context. 1486 * User process then performs actions appropriate to bit having been 1487 * set, if desired, and checks again in future. 1488 */ 1489 static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, 1490 unsigned long events) 1491 { 1492 int i; 1493 struct hfi1_devdata *dd = uctxt->dd; 1494 unsigned long *evs; 1495 1496 if (!dd->events) 1497 return 0; 1498 1499 evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1500 HFI1_MAX_SHARED_CTXTS) + subctxt; 1501 1502 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { 1503 if (!test_bit(i, &events)) 1504 continue; 1505 clear_bit(i, evs); 1506 } 1507 return 0; 1508 } 1509 1510 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) 1511 { 1512 int ret = -ENOENT, i, intable = 0; 1513 struct hfi1_pportdata *ppd = uctxt->ppd; 1514 struct hfi1_devdata *dd = uctxt->dd; 1515 1516 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) { 1517 ret = -EINVAL; 1518 goto done; 1519 } 1520 1521 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) 1522 if (pkey == ppd->pkeys[i]) { 1523 intable = 1; 1524 break; 1525 } 1526 1527 if (intable) 1528 ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey); 1529 done: 1530 return ret; 1531 } 1532 1533 static void user_remove(struct hfi1_devdata *dd) 1534 { 1535 1536 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1537 } 1538 1539 static int user_add(struct hfi1_devdata *dd) 1540 { 1541 char name[10]; 1542 int ret; 1543 1544 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1545 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, 1546 &dd->user_cdev, &dd->user_device, 1547 true, &dd->kobj); 1548 if (ret) 1549 user_remove(dd); 1550 1551 return ret; 1552 } 1553 1554 /* 1555 * Create per-unit files in /dev 1556 */ 1557 int hfi1_device_create(struct hfi1_devdata *dd) 1558 { 1559 return user_add(dd); 1560 } 1561 1562 /* 1563 * Remove per-unit files in /dev 1564 * void, core kernel returns no errors for this stuff 1565 */ 1566 void hfi1_device_remove(struct hfi1_devdata *dd) 1567 { 1568 user_remove(dd); 1569 } 1570