1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 52 #include <machine/stdarg.h> 53 54 #if defined(__i386__) || defined(__amd64__) 55 #include <machine/md_var.h> 56 #endif 57 58 #include <linux/kobject.h> 59 #include <linux/device.h> 60 #include <linux/slab.h> 61 #include <linux/module.h> 62 #include <linux/moduleparam.h> 63 #include <linux/cdev.h> 64 #include <linux/file.h> 65 #include <linux/sysfs.h> 66 #include <linux/mm.h> 67 #include <linux/io.h> 68 #include <linux/vmalloc.h> 69 #include <linux/netdevice.h> 70 #include <linux/timer.h> 71 #include <linux/interrupt.h> 72 #include <linux/uaccess.h> 73 #include <linux/kernel.h> 74 #include <linux/list.h> 75 #include <linux/compat.h> 76 #include <linux/poll.h> 77 78 #include <vm/vm_pager.h> 79 80 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters"); 81 82 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 83 84 #include <linux/rbtree.h> 85 /* Undo Linux compat changes. */ 86 #undef RB_ROOT 87 #undef file 88 #undef cdev 89 #define RB_ROOT(head) (head)->rbh_root 90 91 struct kobject linux_class_root; 92 struct device linux_root_device; 93 struct class linux_class_misc; 94 struct list_head pci_drivers; 95 struct list_head pci_devices; 96 spinlock_t pci_lock; 97 98 unsigned long linux_timer_hz_mask; 99 100 int 101 panic_cmp(struct rb_node *one, struct rb_node *two) 102 { 103 panic("no cmp"); 104 } 105 106 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 107 108 int 109 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 110 { 111 va_list tmp_va; 112 int len; 113 char *old; 114 char *name; 115 char dummy; 116 117 old = kobj->name; 118 119 if (old && fmt == NULL) 120 return (0); 121 122 /* compute length of string */ 123 va_copy(tmp_va, args); 124 len = vsnprintf(&dummy, 0, fmt, tmp_va); 125 va_end(tmp_va); 126 127 /* account for zero termination */ 128 len++; 129 130 /* check for error */ 131 if (len < 1) 132 return (-EINVAL); 133 134 /* allocate memory for string */ 135 name = kzalloc(len, GFP_KERNEL); 136 if (name == NULL) 137 return (-ENOMEM); 138 vsnprintf(name, len, fmt, args); 139 kobj->name = name; 140 141 /* free old string */ 142 kfree(old); 143 144 /* filter new string */ 145 for (; *name != '\0'; name++) 146 if (*name == '/') 147 *name = '!'; 148 return (0); 149 } 150 151 int 152 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 153 { 154 va_list args; 155 int error; 156 157 va_start(args, fmt); 158 error = kobject_set_name_vargs(kobj, fmt, args); 159 va_end(args); 160 161 return (error); 162 } 163 164 static int 165 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 166 { 167 const struct kobj_type *t; 168 int error; 169 170 kobj->parent = parent; 171 error = sysfs_create_dir(kobj); 172 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 173 struct attribute **attr; 174 t = kobj->ktype; 175 176 for (attr = t->default_attrs; *attr != NULL; attr++) { 177 error = sysfs_create_file(kobj, *attr); 178 if (error) 179 break; 180 } 181 if (error) 182 sysfs_remove_dir(kobj); 183 184 } 185 return (error); 186 } 187 188 int 189 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 190 { 191 va_list args; 192 int error; 193 194 va_start(args, fmt); 195 error = kobject_set_name_vargs(kobj, fmt, args); 196 va_end(args); 197 if (error) 198 return (error); 199 200 return kobject_add_complete(kobj, parent); 201 } 202 203 void 204 linux_kobject_release(struct kref *kref) 205 { 206 struct kobject *kobj; 207 char *name; 208 209 kobj = container_of(kref, struct kobject, kref); 210 sysfs_remove_dir(kobj); 211 name = kobj->name; 212 if (kobj->ktype && kobj->ktype->release) 213 kobj->ktype->release(kobj); 214 kfree(name); 215 } 216 217 static void 218 linux_kobject_kfree(struct kobject *kobj) 219 { 220 kfree(kobj); 221 } 222 223 static void 224 linux_kobject_kfree_name(struct kobject *kobj) 225 { 226 if (kobj) { 227 kfree(kobj->name); 228 } 229 } 230 231 const struct kobj_type linux_kfree_type = { 232 .release = linux_kobject_kfree 233 }; 234 235 static void 236 linux_device_release(struct device *dev) 237 { 238 pr_debug("linux_device_release: %s\n", dev_name(dev)); 239 kfree(dev); 240 } 241 242 static ssize_t 243 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 244 { 245 struct class_attribute *dattr; 246 ssize_t error; 247 248 dattr = container_of(attr, struct class_attribute, attr); 249 error = -EIO; 250 if (dattr->show) 251 error = dattr->show(container_of(kobj, struct class, kobj), 252 dattr, buf); 253 return (error); 254 } 255 256 static ssize_t 257 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 258 size_t count) 259 { 260 struct class_attribute *dattr; 261 ssize_t error; 262 263 dattr = container_of(attr, struct class_attribute, attr); 264 error = -EIO; 265 if (dattr->store) 266 error = dattr->store(container_of(kobj, struct class, kobj), 267 dattr, buf, count); 268 return (error); 269 } 270 271 static void 272 linux_class_release(struct kobject *kobj) 273 { 274 struct class *class; 275 276 class = container_of(kobj, struct class, kobj); 277 if (class->class_release) 278 class->class_release(class); 279 } 280 281 static const struct sysfs_ops linux_class_sysfs = { 282 .show = linux_class_show, 283 .store = linux_class_store, 284 }; 285 286 const struct kobj_type linux_class_ktype = { 287 .release = linux_class_release, 288 .sysfs_ops = &linux_class_sysfs 289 }; 290 291 static void 292 linux_dev_release(struct kobject *kobj) 293 { 294 struct device *dev; 295 296 dev = container_of(kobj, struct device, kobj); 297 /* This is the precedence defined by linux. */ 298 if (dev->release) 299 dev->release(dev); 300 else if (dev->class && dev->class->dev_release) 301 dev->class->dev_release(dev); 302 } 303 304 static ssize_t 305 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 306 { 307 struct device_attribute *dattr; 308 ssize_t error; 309 310 dattr = container_of(attr, struct device_attribute, attr); 311 error = -EIO; 312 if (dattr->show) 313 error = dattr->show(container_of(kobj, struct device, kobj), 314 dattr, buf); 315 return (error); 316 } 317 318 static ssize_t 319 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 320 size_t count) 321 { 322 struct device_attribute *dattr; 323 ssize_t error; 324 325 dattr = container_of(attr, struct device_attribute, attr); 326 error = -EIO; 327 if (dattr->store) 328 error = dattr->store(container_of(kobj, struct device, kobj), 329 dattr, buf, count); 330 return (error); 331 } 332 333 static const struct sysfs_ops linux_dev_sysfs = { 334 .show = linux_dev_show, 335 .store = linux_dev_store, 336 }; 337 338 const struct kobj_type linux_dev_ktype = { 339 .release = linux_dev_release, 340 .sysfs_ops = &linux_dev_sysfs 341 }; 342 343 struct device * 344 device_create(struct class *class, struct device *parent, dev_t devt, 345 void *drvdata, const char *fmt, ...) 346 { 347 struct device *dev; 348 va_list args; 349 350 dev = kzalloc(sizeof(*dev), M_WAITOK); 351 dev->parent = parent; 352 dev->class = class; 353 dev->devt = devt; 354 dev->driver_data = drvdata; 355 dev->release = linux_device_release; 356 va_start(args, fmt); 357 kobject_set_name_vargs(&dev->kobj, fmt, args); 358 va_end(args); 359 device_register(dev); 360 361 return (dev); 362 } 363 364 int 365 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 366 struct kobject *parent, const char *fmt, ...) 367 { 368 va_list args; 369 int error; 370 371 kobject_init(kobj, ktype); 372 kobj->ktype = ktype; 373 kobj->parent = parent; 374 kobj->name = NULL; 375 376 va_start(args, fmt); 377 error = kobject_set_name_vargs(kobj, fmt, args); 378 va_end(args); 379 if (error) 380 return (error); 381 return kobject_add_complete(kobj, parent); 382 } 383 384 static void 385 linux_file_dtor(void *cdp) 386 { 387 struct linux_file *filp; 388 389 linux_set_current(curthread); 390 filp = cdp; 391 filp->f_op->release(filp->f_vnode, filp); 392 vdrop(filp->f_vnode); 393 kfree(filp); 394 } 395 396 static int 397 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 398 { 399 struct linux_cdev *ldev; 400 struct linux_file *filp; 401 struct file *file; 402 int error; 403 404 file = td->td_fpop; 405 ldev = dev->si_drv1; 406 if (ldev == NULL) 407 return (ENODEV); 408 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 409 filp->f_dentry = &filp->f_dentry_store; 410 filp->f_op = ldev->ops; 411 filp->f_flags = file->f_flag; 412 vhold(file->f_vnode); 413 filp->f_vnode = file->f_vnode; 414 linux_set_current(td); 415 if (filp->f_op->open) { 416 error = -filp->f_op->open(file->f_vnode, filp); 417 if (error) { 418 kfree(filp); 419 goto done; 420 } 421 } 422 error = devfs_set_cdevpriv(filp, linux_file_dtor); 423 if (error) { 424 filp->f_op->release(file->f_vnode, filp); 425 kfree(filp); 426 } 427 done: 428 return (error); 429 } 430 431 static int 432 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 433 { 434 struct linux_file *filp; 435 struct file *file; 436 int error; 437 438 file = td->td_fpop; 439 if (dev->si_drv1 == NULL) 440 return (0); 441 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 442 return (error); 443 filp->f_flags = file->f_flag; 444 devfs_clear_cdevpriv(); 445 446 447 return (0); 448 } 449 450 #define LINUX_IOCTL_MIN_PTR 0x10000UL 451 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 452 453 static inline int 454 linux_remap_address(void **uaddr, size_t len) 455 { 456 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 457 458 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 459 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 460 struct task_struct *pts = current; 461 if (pts == NULL) { 462 *uaddr = NULL; 463 return (1); 464 } 465 466 /* compute data offset */ 467 uaddr_val -= LINUX_IOCTL_MIN_PTR; 468 469 /* check that length is within bounds */ 470 if ((len > IOCPARM_MAX) || 471 (uaddr_val + len) > pts->bsd_ioctl_len) { 472 *uaddr = NULL; 473 return (1); 474 } 475 476 /* re-add kernel buffer address */ 477 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 478 479 /* update address location */ 480 *uaddr = (void *)uaddr_val; 481 return (1); 482 } 483 return (0); 484 } 485 486 int 487 linux_copyin(const void *uaddr, void *kaddr, size_t len) 488 { 489 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 490 if (uaddr == NULL) 491 return (-EFAULT); 492 memcpy(kaddr, uaddr, len); 493 return (0); 494 } 495 return (-copyin(uaddr, kaddr, len)); 496 } 497 498 int 499 linux_copyout(const void *kaddr, void *uaddr, size_t len) 500 { 501 if (linux_remap_address(&uaddr, len)) { 502 if (uaddr == NULL) 503 return (-EFAULT); 504 memcpy(uaddr, kaddr, len); 505 return (0); 506 } 507 return (-copyout(kaddr, uaddr, len)); 508 } 509 510 size_t 511 linux_clear_user(void *_uaddr, size_t _len) 512 { 513 uint8_t *uaddr = _uaddr; 514 size_t len = _len; 515 516 /* make sure uaddr is aligned before going into the fast loop */ 517 while (((uintptr_t)uaddr & 7) != 0 && len > 7) { 518 if (subyte(uaddr, 0)) 519 return (_len); 520 uaddr++; 521 len--; 522 } 523 524 /* zero 8 bytes at a time */ 525 while (len > 7) { 526 #ifdef __LP64__ 527 if (suword64(uaddr, 0)) 528 return (_len); 529 #else 530 if (suword32(uaddr, 0)) 531 return (_len); 532 if (suword32(uaddr + 4, 0)) 533 return (_len); 534 #endif 535 uaddr += 8; 536 len -= 8; 537 } 538 539 /* zero fill end, if any */ 540 while (len > 0) { 541 if (subyte(uaddr, 0)) 542 return (_len); 543 uaddr++; 544 len--; 545 } 546 return (0); 547 } 548 549 int 550 linux_access_ok(int rw, const void *uaddr, size_t len) 551 { 552 uintptr_t saddr; 553 uintptr_t eaddr; 554 555 /* get start and end address */ 556 saddr = (uintptr_t)uaddr; 557 eaddr = (uintptr_t)uaddr + len; 558 559 /* verify addresses are valid for userspace */ 560 return ((saddr == eaddr) || 561 (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS)); 562 } 563 564 static int 565 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 566 struct thread *td) 567 { 568 struct linux_file *filp; 569 struct file *file; 570 unsigned size; 571 int error; 572 573 file = td->td_fpop; 574 if (dev->si_drv1 == NULL) 575 return (ENXIO); 576 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 577 return (error); 578 filp->f_flags = file->f_flag; 579 580 linux_set_current(td); 581 size = IOCPARM_LEN(cmd); 582 /* refer to logic in sys_ioctl() */ 583 if (size > 0) { 584 /* 585 * Setup hint for linux_copyin() and linux_copyout(). 586 * 587 * Background: Linux code expects a user-space address 588 * while FreeBSD supplies a kernel-space address. 589 */ 590 current->bsd_ioctl_data = data; 591 current->bsd_ioctl_len = size; 592 data = (void *)LINUX_IOCTL_MIN_PTR; 593 } else { 594 /* fetch user-space pointer */ 595 data = *(void **)data; 596 } 597 if (filp->f_op->unlocked_ioctl) 598 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 599 else 600 error = ENOTTY; 601 if (size > 0) { 602 current->bsd_ioctl_data = NULL; 603 current->bsd_ioctl_len = 0; 604 } 605 606 return (error); 607 } 608 609 static int 610 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 611 { 612 struct linux_file *filp; 613 struct thread *td; 614 struct file *file; 615 ssize_t bytes; 616 int error; 617 618 td = curthread; 619 file = td->td_fpop; 620 if (dev->si_drv1 == NULL) 621 return (ENXIO); 622 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 623 return (error); 624 filp->f_flags = file->f_flag; 625 /* XXX no support for I/O vectors currently */ 626 if (uio->uio_iovcnt != 1) 627 return (EOPNOTSUPP); 628 linux_set_current(td); 629 if (filp->f_op->read) { 630 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 631 uio->uio_iov->iov_len, &uio->uio_offset); 632 if (bytes >= 0) { 633 uio->uio_iov->iov_base = 634 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 635 uio->uio_iov->iov_len -= bytes; 636 uio->uio_resid -= bytes; 637 } else 638 error = -bytes; 639 } else 640 error = ENXIO; 641 642 return (error); 643 } 644 645 static int 646 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 647 { 648 struct linux_file *filp; 649 struct thread *td; 650 struct file *file; 651 ssize_t bytes; 652 int error; 653 654 td = curthread; 655 file = td->td_fpop; 656 if (dev->si_drv1 == NULL) 657 return (ENXIO); 658 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 659 return (error); 660 filp->f_flags = file->f_flag; 661 /* XXX no support for I/O vectors currently */ 662 if (uio->uio_iovcnt != 1) 663 return (EOPNOTSUPP); 664 linux_set_current(td); 665 if (filp->f_op->write) { 666 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 667 uio->uio_iov->iov_len, &uio->uio_offset); 668 if (bytes >= 0) { 669 uio->uio_iov->iov_base = 670 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 671 uio->uio_iov->iov_len -= bytes; 672 uio->uio_resid -= bytes; 673 } else 674 error = -bytes; 675 } else 676 error = ENXIO; 677 678 return (error); 679 } 680 681 static int 682 linux_dev_poll(struct cdev *dev, int events, struct thread *td) 683 { 684 struct linux_file *filp; 685 struct file *file; 686 int revents; 687 688 if (dev->si_drv1 == NULL) 689 goto error; 690 if (devfs_get_cdevpriv((void **)&filp) != 0) 691 goto error; 692 693 file = td->td_fpop; 694 filp->f_flags = file->f_flag; 695 linux_set_current(td); 696 if (filp->f_op->poll) 697 revents = filp->f_op->poll(filp, NULL) & events; 698 else 699 revents = 0; 700 701 return (revents); 702 error: 703 return (events & (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); 704 } 705 706 static int 707 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 708 vm_size_t size, struct vm_object **object, int nprot) 709 { 710 struct linux_file *filp; 711 struct thread *td; 712 struct file *file; 713 struct vm_area_struct vma; 714 int error; 715 716 td = curthread; 717 file = td->td_fpop; 718 if (dev->si_drv1 == NULL) 719 return (ENODEV); 720 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 721 return (error); 722 filp->f_flags = file->f_flag; 723 linux_set_current(td); 724 vma.vm_start = 0; 725 vma.vm_end = size; 726 vma.vm_pgoff = *offset / PAGE_SIZE; 727 vma.vm_pfn = 0; 728 vma.vm_page_prot = VM_MEMATTR_DEFAULT; 729 if (filp->f_op->mmap) { 730 error = -filp->f_op->mmap(filp, &vma); 731 if (error == 0) { 732 struct sglist *sg; 733 734 sg = sglist_alloc(1, M_WAITOK); 735 sglist_append_phys(sg, 736 (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len); 737 *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len, 738 nprot, 0, td->td_ucred); 739 if (*object == NULL) { 740 sglist_free(sg); 741 error = EINVAL; 742 goto done; 743 } 744 *offset = 0; 745 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) { 746 VM_OBJECT_WLOCK(*object); 747 vm_object_set_memattr(*object, 748 vma.vm_page_prot); 749 VM_OBJECT_WUNLOCK(*object); 750 } 751 } 752 } else 753 error = ENODEV; 754 done: 755 return (error); 756 } 757 758 struct cdevsw linuxcdevsw = { 759 .d_version = D_VERSION, 760 .d_flags = D_TRACKCLOSE, 761 .d_open = linux_dev_open, 762 .d_close = linux_dev_close, 763 .d_read = linux_dev_read, 764 .d_write = linux_dev_write, 765 .d_ioctl = linux_dev_ioctl, 766 .d_mmap_single = linux_dev_mmap_single, 767 .d_poll = linux_dev_poll, 768 }; 769 770 static int 771 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 772 int flags, struct thread *td) 773 { 774 struct linux_file *filp; 775 ssize_t bytes; 776 int error; 777 778 error = 0; 779 filp = (struct linux_file *)file->f_data; 780 filp->f_flags = file->f_flag; 781 /* XXX no support for I/O vectors currently */ 782 if (uio->uio_iovcnt != 1) 783 return (EOPNOTSUPP); 784 linux_set_current(td); 785 if (filp->f_op->read) { 786 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 787 uio->uio_iov->iov_len, &uio->uio_offset); 788 if (bytes >= 0) { 789 uio->uio_iov->iov_base = 790 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 791 uio->uio_iov->iov_len -= bytes; 792 uio->uio_resid -= bytes; 793 } else 794 error = -bytes; 795 } else 796 error = ENXIO; 797 798 return (error); 799 } 800 801 static int 802 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 803 struct thread *td) 804 { 805 struct linux_file *filp; 806 int revents; 807 808 filp = (struct linux_file *)file->f_data; 809 filp->f_flags = file->f_flag; 810 linux_set_current(td); 811 if (filp->f_op->poll) 812 revents = filp->f_op->poll(filp, NULL) & events; 813 else 814 revents = 0; 815 816 return (revents); 817 } 818 819 static int 820 linux_file_close(struct file *file, struct thread *td) 821 { 822 struct linux_file *filp; 823 int error; 824 825 filp = (struct linux_file *)file->f_data; 826 filp->f_flags = file->f_flag; 827 linux_set_current(td); 828 error = -filp->f_op->release(NULL, filp); 829 funsetown(&filp->f_sigio); 830 kfree(filp); 831 832 return (error); 833 } 834 835 static int 836 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 837 struct thread *td) 838 { 839 struct linux_file *filp; 840 int error; 841 842 filp = (struct linux_file *)fp->f_data; 843 filp->f_flags = fp->f_flag; 844 error = 0; 845 846 linux_set_current(td); 847 switch (cmd) { 848 case FIONBIO: 849 break; 850 case FIOASYNC: 851 if (filp->f_op->fasync == NULL) 852 break; 853 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 854 break; 855 case FIOSETOWN: 856 error = fsetown(*(int *)data, &filp->f_sigio); 857 if (error == 0) 858 error = filp->f_op->fasync(0, filp, 859 fp->f_flag & FASYNC); 860 break; 861 case FIOGETOWN: 862 *(int *)data = fgetown(&filp->f_sigio); 863 break; 864 default: 865 error = ENOTTY; 866 break; 867 } 868 return (error); 869 } 870 871 static int 872 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 873 struct thread *td) 874 { 875 876 return (EOPNOTSUPP); 877 } 878 879 static int 880 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 881 struct filedesc *fdp) 882 { 883 884 return (0); 885 } 886 887 struct fileops linuxfileops = { 888 .fo_read = linux_file_read, 889 .fo_write = invfo_rdwr, 890 .fo_truncate = invfo_truncate, 891 .fo_kqfilter = invfo_kqfilter, 892 .fo_stat = linux_file_stat, 893 .fo_fill_kinfo = linux_file_fill_kinfo, 894 .fo_poll = linux_file_poll, 895 .fo_close = linux_file_close, 896 .fo_ioctl = linux_file_ioctl, 897 .fo_chmod = invfo_chmod, 898 .fo_chown = invfo_chown, 899 .fo_sendfile = invfo_sendfile, 900 }; 901 902 /* 903 * Hash of vmmap addresses. This is infrequently accessed and does not 904 * need to be particularly large. This is done because we must store the 905 * caller's idea of the map size to properly unmap. 906 */ 907 struct vmmap { 908 LIST_ENTRY(vmmap) vm_next; 909 void *vm_addr; 910 unsigned long vm_size; 911 }; 912 913 struct vmmaphd { 914 struct vmmap *lh_first; 915 }; 916 #define VMMAP_HASH_SIZE 64 917 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 918 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 919 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 920 static struct mtx vmmaplock; 921 922 static void 923 vmmap_add(void *addr, unsigned long size) 924 { 925 struct vmmap *vmmap; 926 927 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 928 mtx_lock(&vmmaplock); 929 vmmap->vm_size = size; 930 vmmap->vm_addr = addr; 931 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 932 mtx_unlock(&vmmaplock); 933 } 934 935 static struct vmmap * 936 vmmap_remove(void *addr) 937 { 938 struct vmmap *vmmap; 939 940 mtx_lock(&vmmaplock); 941 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 942 if (vmmap->vm_addr == addr) 943 break; 944 if (vmmap) 945 LIST_REMOVE(vmmap, vm_next); 946 mtx_unlock(&vmmaplock); 947 948 return (vmmap); 949 } 950 951 #if defined(__i386__) || defined(__amd64__) 952 void * 953 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 954 { 955 void *addr; 956 957 addr = pmap_mapdev_attr(phys_addr, size, attr); 958 if (addr == NULL) 959 return (NULL); 960 vmmap_add(addr, size); 961 962 return (addr); 963 } 964 #endif 965 966 void 967 iounmap(void *addr) 968 { 969 struct vmmap *vmmap; 970 971 vmmap = vmmap_remove(addr); 972 if (vmmap == NULL) 973 return; 974 #if defined(__i386__) || defined(__amd64__) 975 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 976 #endif 977 kfree(vmmap); 978 } 979 980 981 void * 982 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 983 { 984 vm_offset_t off; 985 size_t size; 986 987 size = count * PAGE_SIZE; 988 off = kva_alloc(size); 989 if (off == 0) 990 return (NULL); 991 vmmap_add((void *)off, size); 992 pmap_qenter(off, pages, count); 993 994 return ((void *)off); 995 } 996 997 void 998 vunmap(void *addr) 999 { 1000 struct vmmap *vmmap; 1001 1002 vmmap = vmmap_remove(addr); 1003 if (vmmap == NULL) 1004 return; 1005 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 1006 kva_free((vm_offset_t)addr, vmmap->vm_size); 1007 kfree(vmmap); 1008 } 1009 1010 char * 1011 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 1012 { 1013 unsigned int len; 1014 char *p; 1015 va_list aq; 1016 1017 va_copy(aq, ap); 1018 len = vsnprintf(NULL, 0, fmt, aq); 1019 va_end(aq); 1020 1021 p = kmalloc(len + 1, gfp); 1022 if (p != NULL) 1023 vsnprintf(p, len + 1, fmt, ap); 1024 1025 return (p); 1026 } 1027 1028 char * 1029 kasprintf(gfp_t gfp, const char *fmt, ...) 1030 { 1031 va_list ap; 1032 char *p; 1033 1034 va_start(ap, fmt); 1035 p = kvasprintf(gfp, fmt, ap); 1036 va_end(ap); 1037 1038 return (p); 1039 } 1040 1041 static void 1042 linux_timer_callback_wrapper(void *context) 1043 { 1044 struct timer_list *timer; 1045 1046 linux_set_current(curthread); 1047 1048 timer = context; 1049 timer->function(timer->data); 1050 } 1051 1052 void 1053 mod_timer(struct timer_list *timer, unsigned long expires) 1054 { 1055 1056 timer->expires = expires; 1057 callout_reset(&timer->timer_callout, 1058 linux_timer_jiffies_until(expires), 1059 &linux_timer_callback_wrapper, timer); 1060 } 1061 1062 void 1063 add_timer(struct timer_list *timer) 1064 { 1065 1066 callout_reset(&timer->timer_callout, 1067 linux_timer_jiffies_until(timer->expires), 1068 &linux_timer_callback_wrapper, timer); 1069 } 1070 1071 void 1072 add_timer_on(struct timer_list *timer, int cpu) 1073 { 1074 1075 callout_reset_on(&timer->timer_callout, 1076 linux_timer_jiffies_until(timer->expires), 1077 &linux_timer_callback_wrapper, timer, cpu); 1078 } 1079 1080 static void 1081 linux_timer_init(void *arg) 1082 { 1083 1084 /* 1085 * Compute an internal HZ value which can divide 2**32 to 1086 * avoid timer rounding problems when the tick value wraps 1087 * around 2**32: 1088 */ 1089 linux_timer_hz_mask = 1; 1090 while (linux_timer_hz_mask < (unsigned long)hz) 1091 linux_timer_hz_mask *= 2; 1092 linux_timer_hz_mask--; 1093 } 1094 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1095 1096 void 1097 linux_complete_common(struct completion *c, int all) 1098 { 1099 int wakeup_swapper; 1100 1101 sleepq_lock(c); 1102 c->done++; 1103 if (all) 1104 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1105 else 1106 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1107 sleepq_release(c); 1108 if (wakeup_swapper) 1109 kick_proc0(); 1110 } 1111 1112 /* 1113 * Indefinite wait for done != 0 with or without signals. 1114 */ 1115 long 1116 linux_wait_for_common(struct completion *c, int flags) 1117 { 1118 if (SCHEDULER_STOPPED()) 1119 return (0); 1120 1121 if (flags != 0) 1122 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1123 else 1124 flags = SLEEPQ_SLEEP; 1125 for (;;) { 1126 sleepq_lock(c); 1127 if (c->done) 1128 break; 1129 sleepq_add(c, NULL, "completion", flags, 0); 1130 if (flags & SLEEPQ_INTERRUPTIBLE) { 1131 if (sleepq_wait_sig(c, 0) != 0) 1132 return (-ERESTARTSYS); 1133 } else 1134 sleepq_wait(c, 0); 1135 } 1136 c->done--; 1137 sleepq_release(c); 1138 1139 return (0); 1140 } 1141 1142 /* 1143 * Time limited wait for done != 0 with or without signals. 1144 */ 1145 long 1146 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) 1147 { 1148 long end = jiffies + timeout; 1149 1150 if (SCHEDULER_STOPPED()) 1151 return (0); 1152 1153 if (flags != 0) 1154 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1155 else 1156 flags = SLEEPQ_SLEEP; 1157 for (;;) { 1158 int ret; 1159 1160 sleepq_lock(c); 1161 if (c->done) 1162 break; 1163 sleepq_add(c, NULL, "completion", flags, 0); 1164 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1165 if (flags & SLEEPQ_INTERRUPTIBLE) 1166 ret = sleepq_timedwait_sig(c, 0); 1167 else 1168 ret = sleepq_timedwait(c, 0); 1169 if (ret != 0) { 1170 /* check for timeout or signal */ 1171 if (ret == EWOULDBLOCK) 1172 return (0); 1173 else 1174 return (-ERESTARTSYS); 1175 } 1176 } 1177 c->done--; 1178 sleepq_release(c); 1179 1180 /* return how many jiffies are left */ 1181 return (linux_timer_jiffies_until(end)); 1182 } 1183 1184 int 1185 linux_try_wait_for_completion(struct completion *c) 1186 { 1187 int isdone; 1188 1189 isdone = 1; 1190 sleepq_lock(c); 1191 if (c->done) 1192 c->done--; 1193 else 1194 isdone = 0; 1195 sleepq_release(c); 1196 return (isdone); 1197 } 1198 1199 int 1200 linux_completion_done(struct completion *c) 1201 { 1202 int isdone; 1203 1204 isdone = 1; 1205 sleepq_lock(c); 1206 if (c->done == 0) 1207 isdone = 0; 1208 sleepq_release(c); 1209 return (isdone); 1210 } 1211 1212 static void 1213 linux_cdev_release(struct kobject *kobj) 1214 { 1215 struct linux_cdev *cdev; 1216 struct kobject *parent; 1217 1218 cdev = container_of(kobj, struct linux_cdev, kobj); 1219 parent = kobj->parent; 1220 if (cdev->cdev) 1221 destroy_dev(cdev->cdev); 1222 kfree(cdev); 1223 kobject_put(parent); 1224 } 1225 1226 static void 1227 linux_cdev_static_release(struct kobject *kobj) 1228 { 1229 struct linux_cdev *cdev; 1230 struct kobject *parent; 1231 1232 cdev = container_of(kobj, struct linux_cdev, kobj); 1233 parent = kobj->parent; 1234 if (cdev->cdev) 1235 destroy_dev(cdev->cdev); 1236 kobject_put(parent); 1237 } 1238 1239 const struct kobj_type linux_cdev_ktype = { 1240 .release = linux_cdev_release, 1241 }; 1242 1243 const struct kobj_type linux_cdev_static_ktype = { 1244 .release = linux_cdev_static_release, 1245 }; 1246 1247 static void 1248 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1249 { 1250 struct notifier_block *nb; 1251 1252 nb = arg; 1253 if (linkstate == LINK_STATE_UP) 1254 nb->notifier_call(nb, NETDEV_UP, ifp); 1255 else 1256 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1257 } 1258 1259 static void 1260 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1261 { 1262 struct notifier_block *nb; 1263 1264 nb = arg; 1265 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1266 } 1267 1268 static void 1269 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1270 { 1271 struct notifier_block *nb; 1272 1273 nb = arg; 1274 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1275 } 1276 1277 static void 1278 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1279 { 1280 struct notifier_block *nb; 1281 1282 nb = arg; 1283 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1284 } 1285 1286 static void 1287 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1288 { 1289 struct notifier_block *nb; 1290 1291 nb = arg; 1292 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1293 } 1294 1295 int 1296 register_netdevice_notifier(struct notifier_block *nb) 1297 { 1298 1299 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1300 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1301 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1302 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1303 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1304 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1305 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1306 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1307 1308 return (0); 1309 } 1310 1311 int 1312 register_inetaddr_notifier(struct notifier_block *nb) 1313 { 1314 1315 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1316 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1317 return (0); 1318 } 1319 1320 int 1321 unregister_netdevice_notifier(struct notifier_block *nb) 1322 { 1323 1324 EVENTHANDLER_DEREGISTER(ifnet_link_event, 1325 nb->tags[NETDEV_UP]); 1326 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 1327 nb->tags[NETDEV_REGISTER]); 1328 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1329 nb->tags[NETDEV_UNREGISTER]); 1330 EVENTHANDLER_DEREGISTER(iflladdr_event, 1331 nb->tags[NETDEV_CHANGEADDR]); 1332 1333 return (0); 1334 } 1335 1336 int 1337 unregister_inetaddr_notifier(struct notifier_block *nb) 1338 { 1339 1340 EVENTHANDLER_DEREGISTER(ifaddr_event, 1341 nb->tags[NETDEV_CHANGEIFADDR]); 1342 1343 return (0); 1344 } 1345 1346 struct list_sort_thunk { 1347 int (*cmp)(void *, struct list_head *, struct list_head *); 1348 void *priv; 1349 }; 1350 1351 static inline int 1352 linux_le_cmp(void *priv, const void *d1, const void *d2) 1353 { 1354 struct list_head *le1, *le2; 1355 struct list_sort_thunk *thunk; 1356 1357 thunk = priv; 1358 le1 = *(__DECONST(struct list_head **, d1)); 1359 le2 = *(__DECONST(struct list_head **, d2)); 1360 return ((thunk->cmp)(thunk->priv, le1, le2)); 1361 } 1362 1363 void 1364 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 1365 struct list_head *a, struct list_head *b)) 1366 { 1367 struct list_sort_thunk thunk; 1368 struct list_head **ar, *le; 1369 size_t count, i; 1370 1371 count = 0; 1372 list_for_each(le, head) 1373 count++; 1374 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 1375 i = 0; 1376 list_for_each(le, head) 1377 ar[i++] = le; 1378 thunk.cmp = cmp; 1379 thunk.priv = priv; 1380 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 1381 INIT_LIST_HEAD(head); 1382 for (i = 0; i < count; i++) 1383 list_add_tail(ar[i], head); 1384 free(ar, M_KMALLOC); 1385 } 1386 1387 void 1388 linux_irq_handler(void *ent) 1389 { 1390 struct irq_ent *irqe; 1391 1392 linux_set_current(curthread); 1393 1394 irqe = ent; 1395 irqe->handler(irqe->irq, irqe->arg); 1396 } 1397 1398 struct linux_cdev * 1399 linux_find_cdev(const char *name, unsigned major, unsigned minor) 1400 { 1401 int unit = MKDEV(major, minor); 1402 struct cdev *cdev; 1403 1404 dev_lock(); 1405 LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) { 1406 struct linux_cdev *ldev = cdev->si_drv1; 1407 if (dev2unit(cdev) == unit && 1408 strcmp(kobject_name(&ldev->kobj), name) == 0) { 1409 break; 1410 } 1411 } 1412 dev_unlock(); 1413 1414 return (cdev != NULL ? cdev->si_drv1 : NULL); 1415 } 1416 1417 int 1418 __register_chrdev(unsigned int major, unsigned int baseminor, 1419 unsigned int count, const char *name, 1420 const struct file_operations *fops) 1421 { 1422 struct linux_cdev *cdev; 1423 int ret = 0; 1424 int i; 1425 1426 for (i = baseminor; i < baseminor + count; i++) { 1427 cdev = cdev_alloc(); 1428 cdev_init(cdev, fops); 1429 kobject_set_name(&cdev->kobj, name); 1430 1431 ret = cdev_add(cdev, makedev(major, i), 1); 1432 if (ret != 0) 1433 break; 1434 } 1435 return (ret); 1436 } 1437 1438 int 1439 __register_chrdev_p(unsigned int major, unsigned int baseminor, 1440 unsigned int count, const char *name, 1441 const struct file_operations *fops, uid_t uid, 1442 gid_t gid, int mode) 1443 { 1444 struct linux_cdev *cdev; 1445 int ret = 0; 1446 int i; 1447 1448 for (i = baseminor; i < baseminor + count; i++) { 1449 cdev = cdev_alloc(); 1450 cdev_init(cdev, fops); 1451 kobject_set_name(&cdev->kobj, name); 1452 1453 ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode); 1454 if (ret != 0) 1455 break; 1456 } 1457 return (ret); 1458 } 1459 1460 void 1461 __unregister_chrdev(unsigned int major, unsigned int baseminor, 1462 unsigned int count, const char *name) 1463 { 1464 struct linux_cdev *cdevp; 1465 int i; 1466 1467 for (i = baseminor; i < baseminor + count; i++) { 1468 cdevp = linux_find_cdev(name, major, i); 1469 if (cdevp != NULL) 1470 cdev_del(cdevp); 1471 } 1472 } 1473 1474 #if defined(__i386__) || defined(__amd64__) 1475 bool linux_cpu_has_clflush; 1476 #endif 1477 1478 static void 1479 linux_compat_init(void *arg) 1480 { 1481 struct sysctl_oid *rootoid; 1482 int i; 1483 1484 #if defined(__i386__) || defined(__amd64__) 1485 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 1486 #endif 1487 1488 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 1489 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 1490 kobject_init(&linux_class_root, &linux_class_ktype); 1491 kobject_set_name(&linux_class_root, "class"); 1492 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 1493 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 1494 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 1495 kobject_set_name(&linux_root_device.kobj, "device"); 1496 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 1497 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 1498 "device"); 1499 linux_root_device.bsddev = root_bus; 1500 linux_class_misc.name = "misc"; 1501 class_register(&linux_class_misc); 1502 INIT_LIST_HEAD(&pci_drivers); 1503 INIT_LIST_HEAD(&pci_devices); 1504 spin_lock_init(&pci_lock); 1505 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 1506 for (i = 0; i < VMMAP_HASH_SIZE; i++) 1507 LIST_INIT(&vmmaphead[i]); 1508 } 1509 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 1510 1511 static void 1512 linux_compat_uninit(void *arg) 1513 { 1514 linux_kobject_kfree_name(&linux_class_root); 1515 linux_kobject_kfree_name(&linux_root_device.kobj); 1516 linux_kobject_kfree_name(&linux_class_misc.kobj); 1517 } 1518 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 1519 1520 /* 1521 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 1522 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 1523 * used. Assert these types have the same size, else some parts of the 1524 * LinuxKPI may not work like expected: 1525 */ 1526 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 1527