1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 52 #include <machine/stdarg.h> 53 54 #if defined(__i386__) || defined(__amd64__) 55 #include <machine/md_var.h> 56 #endif 57 58 #include <linux/kobject.h> 59 #include <linux/device.h> 60 #include <linux/slab.h> 61 #include <linux/module.h> 62 #include <linux/moduleparam.h> 63 #include <linux/cdev.h> 64 #include <linux/file.h> 65 #include <linux/sysfs.h> 66 #include <linux/mm.h> 67 #include <linux/io.h> 68 #include <linux/vmalloc.h> 69 #include <linux/netdevice.h> 70 #include <linux/timer.h> 71 #include <linux/interrupt.h> 72 #include <linux/uaccess.h> 73 #include <linux/kernel.h> 74 #include <linux/list.h> 75 #include <linux/compat.h> 76 77 #include <vm/vm_pager.h> 78 79 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters"); 80 81 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 82 83 #include <linux/rbtree.h> 84 /* Undo Linux compat changes. */ 85 #undef RB_ROOT 86 #undef file 87 #undef cdev 88 #define RB_ROOT(head) (head)->rbh_root 89 90 struct kobject linux_class_root; 91 struct device linux_root_device; 92 struct class linux_class_misc; 93 struct list_head pci_drivers; 94 struct list_head pci_devices; 95 spinlock_t pci_lock; 96 97 unsigned long linux_timer_hz_mask; 98 99 int 100 panic_cmp(struct rb_node *one, struct rb_node *two) 101 { 102 panic("no cmp"); 103 } 104 105 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 106 107 int 108 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 109 { 110 va_list tmp_va; 111 int len; 112 char *old; 113 char *name; 114 char dummy; 115 116 old = kobj->name; 117 118 if (old && fmt == NULL) 119 return (0); 120 121 /* compute length of string */ 122 va_copy(tmp_va, args); 123 len = vsnprintf(&dummy, 0, fmt, tmp_va); 124 va_end(tmp_va); 125 126 /* account for zero termination */ 127 len++; 128 129 /* check for error */ 130 if (len < 1) 131 return (-EINVAL); 132 133 /* allocate memory for string */ 134 name = kzalloc(len, GFP_KERNEL); 135 if (name == NULL) 136 return (-ENOMEM); 137 vsnprintf(name, len, fmt, args); 138 kobj->name = name; 139 140 /* free old string */ 141 kfree(old); 142 143 /* filter new string */ 144 for (; *name != '\0'; name++) 145 if (*name == '/') 146 *name = '!'; 147 return (0); 148 } 149 150 int 151 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 152 { 153 va_list args; 154 int error; 155 156 va_start(args, fmt); 157 error = kobject_set_name_vargs(kobj, fmt, args); 158 va_end(args); 159 160 return (error); 161 } 162 163 static int 164 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 165 { 166 const struct kobj_type *t; 167 int error; 168 169 kobj->parent = parent; 170 error = sysfs_create_dir(kobj); 171 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 172 struct attribute **attr; 173 t = kobj->ktype; 174 175 for (attr = t->default_attrs; *attr != NULL; attr++) { 176 error = sysfs_create_file(kobj, *attr); 177 if (error) 178 break; 179 } 180 if (error) 181 sysfs_remove_dir(kobj); 182 183 } 184 return (error); 185 } 186 187 int 188 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 189 { 190 va_list args; 191 int error; 192 193 va_start(args, fmt); 194 error = kobject_set_name_vargs(kobj, fmt, args); 195 va_end(args); 196 if (error) 197 return (error); 198 199 return kobject_add_complete(kobj, parent); 200 } 201 202 void 203 linux_kobject_release(struct kref *kref) 204 { 205 struct kobject *kobj; 206 char *name; 207 208 kobj = container_of(kref, struct kobject, kref); 209 sysfs_remove_dir(kobj); 210 name = kobj->name; 211 if (kobj->ktype && kobj->ktype->release) 212 kobj->ktype->release(kobj); 213 kfree(name); 214 } 215 216 static void 217 linux_kobject_kfree(struct kobject *kobj) 218 { 219 kfree(kobj); 220 } 221 222 static void 223 linux_kobject_kfree_name(struct kobject *kobj) 224 { 225 if (kobj) { 226 kfree(kobj->name); 227 } 228 } 229 230 const struct kobj_type linux_kfree_type = { 231 .release = linux_kobject_kfree 232 }; 233 234 static void 235 linux_device_release(struct device *dev) 236 { 237 pr_debug("linux_device_release: %s\n", dev_name(dev)); 238 kfree(dev); 239 } 240 241 static ssize_t 242 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 243 { 244 struct class_attribute *dattr; 245 ssize_t error; 246 247 dattr = container_of(attr, struct class_attribute, attr); 248 error = -EIO; 249 if (dattr->show) 250 error = dattr->show(container_of(kobj, struct class, kobj), 251 dattr, buf); 252 return (error); 253 } 254 255 static ssize_t 256 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 257 size_t count) 258 { 259 struct class_attribute *dattr; 260 ssize_t error; 261 262 dattr = container_of(attr, struct class_attribute, attr); 263 error = -EIO; 264 if (dattr->store) 265 error = dattr->store(container_of(kobj, struct class, kobj), 266 dattr, buf, count); 267 return (error); 268 } 269 270 static void 271 linux_class_release(struct kobject *kobj) 272 { 273 struct class *class; 274 275 class = container_of(kobj, struct class, kobj); 276 if (class->class_release) 277 class->class_release(class); 278 } 279 280 static const struct sysfs_ops linux_class_sysfs = { 281 .show = linux_class_show, 282 .store = linux_class_store, 283 }; 284 285 const struct kobj_type linux_class_ktype = { 286 .release = linux_class_release, 287 .sysfs_ops = &linux_class_sysfs 288 }; 289 290 static void 291 linux_dev_release(struct kobject *kobj) 292 { 293 struct device *dev; 294 295 dev = container_of(kobj, struct device, kobj); 296 /* This is the precedence defined by linux. */ 297 if (dev->release) 298 dev->release(dev); 299 else if (dev->class && dev->class->dev_release) 300 dev->class->dev_release(dev); 301 } 302 303 static ssize_t 304 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 305 { 306 struct device_attribute *dattr; 307 ssize_t error; 308 309 dattr = container_of(attr, struct device_attribute, attr); 310 error = -EIO; 311 if (dattr->show) 312 error = dattr->show(container_of(kobj, struct device, kobj), 313 dattr, buf); 314 return (error); 315 } 316 317 static ssize_t 318 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 319 size_t count) 320 { 321 struct device_attribute *dattr; 322 ssize_t error; 323 324 dattr = container_of(attr, struct device_attribute, attr); 325 error = -EIO; 326 if (dattr->store) 327 error = dattr->store(container_of(kobj, struct device, kobj), 328 dattr, buf, count); 329 return (error); 330 } 331 332 static const struct sysfs_ops linux_dev_sysfs = { 333 .show = linux_dev_show, 334 .store = linux_dev_store, 335 }; 336 337 const struct kobj_type linux_dev_ktype = { 338 .release = linux_dev_release, 339 .sysfs_ops = &linux_dev_sysfs 340 }; 341 342 struct device * 343 device_create(struct class *class, struct device *parent, dev_t devt, 344 void *drvdata, const char *fmt, ...) 345 { 346 struct device *dev; 347 va_list args; 348 349 dev = kzalloc(sizeof(*dev), M_WAITOK); 350 dev->parent = parent; 351 dev->class = class; 352 dev->devt = devt; 353 dev->driver_data = drvdata; 354 dev->release = linux_device_release; 355 va_start(args, fmt); 356 kobject_set_name_vargs(&dev->kobj, fmt, args); 357 va_end(args); 358 device_register(dev); 359 360 return (dev); 361 } 362 363 int 364 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 365 struct kobject *parent, const char *fmt, ...) 366 { 367 va_list args; 368 int error; 369 370 kobject_init(kobj, ktype); 371 kobj->ktype = ktype; 372 kobj->parent = parent; 373 kobj->name = NULL; 374 375 va_start(args, fmt); 376 error = kobject_set_name_vargs(kobj, fmt, args); 377 va_end(args); 378 if (error) 379 return (error); 380 return kobject_add_complete(kobj, parent); 381 } 382 383 static void 384 linux_file_dtor(void *cdp) 385 { 386 struct linux_file *filp; 387 388 linux_set_current(curthread); 389 filp = cdp; 390 filp->f_op->release(filp->f_vnode, filp); 391 vdrop(filp->f_vnode); 392 kfree(filp); 393 } 394 395 static int 396 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 397 { 398 struct linux_cdev *ldev; 399 struct linux_file *filp; 400 struct file *file; 401 int error; 402 403 file = td->td_fpop; 404 ldev = dev->si_drv1; 405 if (ldev == NULL) 406 return (ENODEV); 407 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 408 filp->f_dentry = &filp->f_dentry_store; 409 filp->f_op = ldev->ops; 410 filp->f_flags = file->f_flag; 411 vhold(file->f_vnode); 412 filp->f_vnode = file->f_vnode; 413 linux_set_current(td); 414 if (filp->f_op->open) { 415 error = -filp->f_op->open(file->f_vnode, filp); 416 if (error) { 417 kfree(filp); 418 goto done; 419 } 420 } 421 error = devfs_set_cdevpriv(filp, linux_file_dtor); 422 if (error) { 423 filp->f_op->release(file->f_vnode, filp); 424 kfree(filp); 425 } 426 done: 427 return (error); 428 } 429 430 static int 431 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 432 { 433 struct linux_cdev *ldev; 434 struct linux_file *filp; 435 struct file *file; 436 int error; 437 438 file = td->td_fpop; 439 ldev = dev->si_drv1; 440 if (ldev == NULL) 441 return (0); 442 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 443 return (error); 444 filp->f_flags = file->f_flag; 445 devfs_clear_cdevpriv(); 446 447 448 return (0); 449 } 450 451 #define LINUX_IOCTL_MIN_PTR 0x10000UL 452 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 453 454 static inline int 455 linux_remap_address(void **uaddr, size_t len) 456 { 457 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 458 459 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 460 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 461 struct task_struct *pts = current; 462 if (pts == NULL) { 463 *uaddr = NULL; 464 return (1); 465 } 466 467 /* compute data offset */ 468 uaddr_val -= LINUX_IOCTL_MIN_PTR; 469 470 /* check that length is within bounds */ 471 if ((len > IOCPARM_MAX) || 472 (uaddr_val + len) > pts->bsd_ioctl_len) { 473 *uaddr = NULL; 474 return (1); 475 } 476 477 /* re-add kernel buffer address */ 478 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 479 480 /* update address location */ 481 *uaddr = (void *)uaddr_val; 482 return (1); 483 } 484 return (0); 485 } 486 487 int 488 linux_copyin(const void *uaddr, void *kaddr, size_t len) 489 { 490 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 491 if (uaddr == NULL) 492 return (-EFAULT); 493 memcpy(kaddr, uaddr, len); 494 return (0); 495 } 496 return (-copyin(uaddr, kaddr, len)); 497 } 498 499 int 500 linux_copyout(const void *kaddr, void *uaddr, size_t len) 501 { 502 if (linux_remap_address(&uaddr, len)) { 503 if (uaddr == NULL) 504 return (-EFAULT); 505 memcpy(uaddr, kaddr, len); 506 return (0); 507 } 508 return (-copyout(kaddr, uaddr, len)); 509 } 510 511 size_t 512 linux_clear_user(void *_uaddr, size_t _len) 513 { 514 uint8_t *uaddr = _uaddr; 515 size_t len = _len; 516 517 /* make sure uaddr is aligned before going into the fast loop */ 518 while (((uintptr_t)uaddr & 7) != 0 && len > 7) { 519 if (subyte(uaddr, 0)) 520 return (_len); 521 uaddr++; 522 len--; 523 } 524 525 /* zero 8 bytes at a time */ 526 while (len > 7) { 527 #ifdef __LP64__ 528 if (suword64(uaddr, 0)) 529 return (_len); 530 #else 531 if (suword32(uaddr, 0)) 532 return (_len); 533 if (suword32(uaddr + 4, 0)) 534 return (_len); 535 #endif 536 uaddr += 8; 537 len -= 8; 538 } 539 540 /* zero fill end, if any */ 541 while (len > 0) { 542 if (subyte(uaddr, 0)) 543 return (_len); 544 uaddr++; 545 len--; 546 } 547 return (0); 548 } 549 550 int 551 linux_access_ok(int rw, const void *uaddr, size_t len) 552 { 553 uintptr_t saddr; 554 uintptr_t eaddr; 555 556 /* get start and end address */ 557 saddr = (uintptr_t)uaddr; 558 eaddr = (uintptr_t)uaddr + len; 559 560 /* verify addresses are valid for userspace */ 561 return ((saddr == eaddr) || 562 (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS)); 563 } 564 565 static int 566 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 567 struct thread *td) 568 { 569 struct linux_cdev *ldev; 570 struct linux_file *filp; 571 struct file *file; 572 unsigned size; 573 int error; 574 575 file = td->td_fpop; 576 ldev = dev->si_drv1; 577 if (ldev == NULL) 578 return (0); 579 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 580 return (error); 581 filp->f_flags = file->f_flag; 582 583 linux_set_current(td); 584 size = IOCPARM_LEN(cmd); 585 /* refer to logic in sys_ioctl() */ 586 if (size > 0) { 587 /* 588 * Setup hint for linux_copyin() and linux_copyout(). 589 * 590 * Background: Linux code expects a user-space address 591 * while FreeBSD supplies a kernel-space address. 592 */ 593 current->bsd_ioctl_data = data; 594 current->bsd_ioctl_len = size; 595 data = (void *)LINUX_IOCTL_MIN_PTR; 596 } else { 597 /* fetch user-space pointer */ 598 data = *(void **)data; 599 } 600 if (filp->f_op->unlocked_ioctl) 601 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 602 else 603 error = ENOTTY; 604 if (size > 0) { 605 current->bsd_ioctl_data = NULL; 606 current->bsd_ioctl_len = 0; 607 } 608 609 return (error); 610 } 611 612 static int 613 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 614 { 615 struct linux_cdev *ldev; 616 struct linux_file *filp; 617 struct thread *td; 618 struct file *file; 619 ssize_t bytes; 620 int error; 621 622 td = curthread; 623 file = td->td_fpop; 624 ldev = dev->si_drv1; 625 if (ldev == NULL) 626 return (0); 627 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 628 return (error); 629 filp->f_flags = file->f_flag; 630 /* XXX no support for I/O vectors currently */ 631 if (uio->uio_iovcnt != 1) 632 return (EOPNOTSUPP); 633 linux_set_current(td); 634 if (filp->f_op->read) { 635 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 636 uio->uio_iov->iov_len, &uio->uio_offset); 637 if (bytes >= 0) { 638 uio->uio_iov->iov_base = 639 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 640 uio->uio_iov->iov_len -= bytes; 641 uio->uio_resid -= bytes; 642 } else 643 error = -bytes; 644 } else 645 error = ENXIO; 646 647 return (error); 648 } 649 650 static int 651 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 652 { 653 struct linux_cdev *ldev; 654 struct linux_file *filp; 655 struct thread *td; 656 struct file *file; 657 ssize_t bytes; 658 int error; 659 660 td = curthread; 661 file = td->td_fpop; 662 ldev = dev->si_drv1; 663 if (ldev == NULL) 664 return (0); 665 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 666 return (error); 667 filp->f_flags = file->f_flag; 668 /* XXX no support for I/O vectors currently */ 669 if (uio->uio_iovcnt != 1) 670 return (EOPNOTSUPP); 671 linux_set_current(td); 672 if (filp->f_op->write) { 673 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 674 uio->uio_iov->iov_len, &uio->uio_offset); 675 if (bytes >= 0) { 676 uio->uio_iov->iov_base = 677 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 678 uio->uio_iov->iov_len -= bytes; 679 uio->uio_resid -= bytes; 680 } else 681 error = -bytes; 682 } else 683 error = ENXIO; 684 685 return (error); 686 } 687 688 static int 689 linux_dev_poll(struct cdev *dev, int events, struct thread *td) 690 { 691 struct linux_cdev *ldev; 692 struct linux_file *filp; 693 struct file *file; 694 int revents; 695 int error; 696 697 file = td->td_fpop; 698 ldev = dev->si_drv1; 699 if (ldev == NULL) 700 return (0); 701 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 702 return (error); 703 filp->f_flags = file->f_flag; 704 linux_set_current(td); 705 if (filp->f_op->poll) 706 revents = filp->f_op->poll(filp, NULL) & events; 707 else 708 revents = 0; 709 710 return (revents); 711 } 712 713 static int 714 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 715 vm_size_t size, struct vm_object **object, int nprot) 716 { 717 struct linux_cdev *ldev; 718 struct linux_file *filp; 719 struct thread *td; 720 struct file *file; 721 struct vm_area_struct vma; 722 int error; 723 724 td = curthread; 725 file = td->td_fpop; 726 ldev = dev->si_drv1; 727 if (ldev == NULL) 728 return (ENODEV); 729 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 730 return (error); 731 filp->f_flags = file->f_flag; 732 linux_set_current(td); 733 vma.vm_start = 0; 734 vma.vm_end = size; 735 vma.vm_pgoff = *offset / PAGE_SIZE; 736 vma.vm_pfn = 0; 737 vma.vm_page_prot = VM_MEMATTR_DEFAULT; 738 if (filp->f_op->mmap) { 739 error = -filp->f_op->mmap(filp, &vma); 740 if (error == 0) { 741 struct sglist *sg; 742 743 sg = sglist_alloc(1, M_WAITOK); 744 sglist_append_phys(sg, 745 (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len); 746 *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len, 747 nprot, 0, td->td_ucred); 748 if (*object == NULL) { 749 sglist_free(sg); 750 error = EINVAL; 751 goto done; 752 } 753 *offset = 0; 754 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) { 755 VM_OBJECT_WLOCK(*object); 756 vm_object_set_memattr(*object, 757 vma.vm_page_prot); 758 VM_OBJECT_WUNLOCK(*object); 759 } 760 } 761 } else 762 error = ENODEV; 763 done: 764 return (error); 765 } 766 767 struct cdevsw linuxcdevsw = { 768 .d_version = D_VERSION, 769 .d_flags = D_TRACKCLOSE, 770 .d_open = linux_dev_open, 771 .d_close = linux_dev_close, 772 .d_read = linux_dev_read, 773 .d_write = linux_dev_write, 774 .d_ioctl = linux_dev_ioctl, 775 .d_mmap_single = linux_dev_mmap_single, 776 .d_poll = linux_dev_poll, 777 }; 778 779 static int 780 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 781 int flags, struct thread *td) 782 { 783 struct linux_file *filp; 784 ssize_t bytes; 785 int error; 786 787 error = 0; 788 filp = (struct linux_file *)file->f_data; 789 filp->f_flags = file->f_flag; 790 /* XXX no support for I/O vectors currently */ 791 if (uio->uio_iovcnt != 1) 792 return (EOPNOTSUPP); 793 linux_set_current(td); 794 if (filp->f_op->read) { 795 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 796 uio->uio_iov->iov_len, &uio->uio_offset); 797 if (bytes >= 0) { 798 uio->uio_iov->iov_base = 799 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 800 uio->uio_iov->iov_len -= bytes; 801 uio->uio_resid -= bytes; 802 } else 803 error = -bytes; 804 } else 805 error = ENXIO; 806 807 return (error); 808 } 809 810 static int 811 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 812 struct thread *td) 813 { 814 struct linux_file *filp; 815 int revents; 816 817 filp = (struct linux_file *)file->f_data; 818 filp->f_flags = file->f_flag; 819 linux_set_current(td); 820 if (filp->f_op->poll) 821 revents = filp->f_op->poll(filp, NULL) & events; 822 else 823 revents = 0; 824 825 return (revents); 826 } 827 828 static int 829 linux_file_close(struct file *file, struct thread *td) 830 { 831 struct linux_file *filp; 832 int error; 833 834 filp = (struct linux_file *)file->f_data; 835 filp->f_flags = file->f_flag; 836 linux_set_current(td); 837 error = -filp->f_op->release(NULL, filp); 838 funsetown(&filp->f_sigio); 839 kfree(filp); 840 841 return (error); 842 } 843 844 static int 845 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 846 struct thread *td) 847 { 848 struct linux_file *filp; 849 int error; 850 851 filp = (struct linux_file *)fp->f_data; 852 filp->f_flags = fp->f_flag; 853 error = 0; 854 855 linux_set_current(td); 856 switch (cmd) { 857 case FIONBIO: 858 break; 859 case FIOASYNC: 860 if (filp->f_op->fasync == NULL) 861 break; 862 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 863 break; 864 case FIOSETOWN: 865 error = fsetown(*(int *)data, &filp->f_sigio); 866 if (error == 0) 867 error = filp->f_op->fasync(0, filp, 868 fp->f_flag & FASYNC); 869 break; 870 case FIOGETOWN: 871 *(int *)data = fgetown(&filp->f_sigio); 872 break; 873 default: 874 error = ENOTTY; 875 break; 876 } 877 return (error); 878 } 879 880 static int 881 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 882 struct thread *td) 883 { 884 885 return (EOPNOTSUPP); 886 } 887 888 static int 889 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 890 struct filedesc *fdp) 891 { 892 893 return (0); 894 } 895 896 struct fileops linuxfileops = { 897 .fo_read = linux_file_read, 898 .fo_write = invfo_rdwr, 899 .fo_truncate = invfo_truncate, 900 .fo_kqfilter = invfo_kqfilter, 901 .fo_stat = linux_file_stat, 902 .fo_fill_kinfo = linux_file_fill_kinfo, 903 .fo_poll = linux_file_poll, 904 .fo_close = linux_file_close, 905 .fo_ioctl = linux_file_ioctl, 906 .fo_chmod = invfo_chmod, 907 .fo_chown = invfo_chown, 908 .fo_sendfile = invfo_sendfile, 909 }; 910 911 /* 912 * Hash of vmmap addresses. This is infrequently accessed and does not 913 * need to be particularly large. This is done because we must store the 914 * caller's idea of the map size to properly unmap. 915 */ 916 struct vmmap { 917 LIST_ENTRY(vmmap) vm_next; 918 void *vm_addr; 919 unsigned long vm_size; 920 }; 921 922 struct vmmaphd { 923 struct vmmap *lh_first; 924 }; 925 #define VMMAP_HASH_SIZE 64 926 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 927 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 928 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 929 static struct mtx vmmaplock; 930 931 static void 932 vmmap_add(void *addr, unsigned long size) 933 { 934 struct vmmap *vmmap; 935 936 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 937 mtx_lock(&vmmaplock); 938 vmmap->vm_size = size; 939 vmmap->vm_addr = addr; 940 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 941 mtx_unlock(&vmmaplock); 942 } 943 944 static struct vmmap * 945 vmmap_remove(void *addr) 946 { 947 struct vmmap *vmmap; 948 949 mtx_lock(&vmmaplock); 950 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 951 if (vmmap->vm_addr == addr) 952 break; 953 if (vmmap) 954 LIST_REMOVE(vmmap, vm_next); 955 mtx_unlock(&vmmaplock); 956 957 return (vmmap); 958 } 959 960 #if defined(__i386__) || defined(__amd64__) 961 void * 962 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 963 { 964 void *addr; 965 966 addr = pmap_mapdev_attr(phys_addr, size, attr); 967 if (addr == NULL) 968 return (NULL); 969 vmmap_add(addr, size); 970 971 return (addr); 972 } 973 #endif 974 975 void 976 iounmap(void *addr) 977 { 978 struct vmmap *vmmap; 979 980 vmmap = vmmap_remove(addr); 981 if (vmmap == NULL) 982 return; 983 #if defined(__i386__) || defined(__amd64__) 984 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 985 #endif 986 kfree(vmmap); 987 } 988 989 990 void * 991 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 992 { 993 vm_offset_t off; 994 size_t size; 995 996 size = count * PAGE_SIZE; 997 off = kva_alloc(size); 998 if (off == 0) 999 return (NULL); 1000 vmmap_add((void *)off, size); 1001 pmap_qenter(off, pages, count); 1002 1003 return ((void *)off); 1004 } 1005 1006 void 1007 vunmap(void *addr) 1008 { 1009 struct vmmap *vmmap; 1010 1011 vmmap = vmmap_remove(addr); 1012 if (vmmap == NULL) 1013 return; 1014 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 1015 kva_free((vm_offset_t)addr, vmmap->vm_size); 1016 kfree(vmmap); 1017 } 1018 1019 char * 1020 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 1021 { 1022 unsigned int len; 1023 char *p; 1024 va_list aq; 1025 1026 va_copy(aq, ap); 1027 len = vsnprintf(NULL, 0, fmt, aq); 1028 va_end(aq); 1029 1030 p = kmalloc(len + 1, gfp); 1031 if (p != NULL) 1032 vsnprintf(p, len + 1, fmt, ap); 1033 1034 return (p); 1035 } 1036 1037 char * 1038 kasprintf(gfp_t gfp, const char *fmt, ...) 1039 { 1040 va_list ap; 1041 char *p; 1042 1043 va_start(ap, fmt); 1044 p = kvasprintf(gfp, fmt, ap); 1045 va_end(ap); 1046 1047 return (p); 1048 } 1049 1050 static void 1051 linux_timer_callback_wrapper(void *context) 1052 { 1053 struct timer_list *timer; 1054 1055 linux_set_current(curthread); 1056 1057 timer = context; 1058 timer->function(timer->data); 1059 } 1060 1061 void 1062 mod_timer(struct timer_list *timer, unsigned long expires) 1063 { 1064 1065 timer->expires = expires; 1066 callout_reset(&timer->timer_callout, 1067 linux_timer_jiffies_until(expires), 1068 &linux_timer_callback_wrapper, timer); 1069 } 1070 1071 void 1072 add_timer(struct timer_list *timer) 1073 { 1074 1075 callout_reset(&timer->timer_callout, 1076 linux_timer_jiffies_until(timer->expires), 1077 &linux_timer_callback_wrapper, timer); 1078 } 1079 1080 void 1081 add_timer_on(struct timer_list *timer, int cpu) 1082 { 1083 1084 callout_reset_on(&timer->timer_callout, 1085 linux_timer_jiffies_until(timer->expires), 1086 &linux_timer_callback_wrapper, timer, cpu); 1087 } 1088 1089 static void 1090 linux_timer_init(void *arg) 1091 { 1092 1093 /* 1094 * Compute an internal HZ value which can divide 2**32 to 1095 * avoid timer rounding problems when the tick value wraps 1096 * around 2**32: 1097 */ 1098 linux_timer_hz_mask = 1; 1099 while (linux_timer_hz_mask < (unsigned long)hz) 1100 linux_timer_hz_mask *= 2; 1101 linux_timer_hz_mask--; 1102 } 1103 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1104 1105 void 1106 linux_complete_common(struct completion *c, int all) 1107 { 1108 int wakeup_swapper; 1109 1110 sleepq_lock(c); 1111 c->done++; 1112 if (all) 1113 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1114 else 1115 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1116 sleepq_release(c); 1117 if (wakeup_swapper) 1118 kick_proc0(); 1119 } 1120 1121 /* 1122 * Indefinite wait for done != 0 with or without signals. 1123 */ 1124 long 1125 linux_wait_for_common(struct completion *c, int flags) 1126 { 1127 if (SCHEDULER_STOPPED()) 1128 return (0); 1129 1130 if (flags != 0) 1131 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1132 else 1133 flags = SLEEPQ_SLEEP; 1134 for (;;) { 1135 sleepq_lock(c); 1136 if (c->done) 1137 break; 1138 sleepq_add(c, NULL, "completion", flags, 0); 1139 if (flags & SLEEPQ_INTERRUPTIBLE) { 1140 if (sleepq_wait_sig(c, 0) != 0) 1141 return (-ERESTARTSYS); 1142 } else 1143 sleepq_wait(c, 0); 1144 } 1145 c->done--; 1146 sleepq_release(c); 1147 1148 return (0); 1149 } 1150 1151 /* 1152 * Time limited wait for done != 0 with or without signals. 1153 */ 1154 long 1155 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) 1156 { 1157 long end = jiffies + timeout; 1158 1159 if (SCHEDULER_STOPPED()) 1160 return (0); 1161 1162 if (flags != 0) 1163 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1164 else 1165 flags = SLEEPQ_SLEEP; 1166 for (;;) { 1167 int ret; 1168 1169 sleepq_lock(c); 1170 if (c->done) 1171 break; 1172 sleepq_add(c, NULL, "completion", flags, 0); 1173 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1174 if (flags & SLEEPQ_INTERRUPTIBLE) 1175 ret = sleepq_timedwait_sig(c, 0); 1176 else 1177 ret = sleepq_timedwait(c, 0); 1178 if (ret != 0) { 1179 /* check for timeout or signal */ 1180 if (ret == EWOULDBLOCK) 1181 return (0); 1182 else 1183 return (-ERESTARTSYS); 1184 } 1185 } 1186 c->done--; 1187 sleepq_release(c); 1188 1189 /* return how many jiffies are left */ 1190 return (linux_timer_jiffies_until(end)); 1191 } 1192 1193 int 1194 linux_try_wait_for_completion(struct completion *c) 1195 { 1196 int isdone; 1197 1198 isdone = 1; 1199 sleepq_lock(c); 1200 if (c->done) 1201 c->done--; 1202 else 1203 isdone = 0; 1204 sleepq_release(c); 1205 return (isdone); 1206 } 1207 1208 int 1209 linux_completion_done(struct completion *c) 1210 { 1211 int isdone; 1212 1213 isdone = 1; 1214 sleepq_lock(c); 1215 if (c->done == 0) 1216 isdone = 0; 1217 sleepq_release(c); 1218 return (isdone); 1219 } 1220 1221 static void 1222 linux_cdev_release(struct kobject *kobj) 1223 { 1224 struct linux_cdev *cdev; 1225 struct kobject *parent; 1226 1227 cdev = container_of(kobj, struct linux_cdev, kobj); 1228 parent = kobj->parent; 1229 if (cdev->cdev) 1230 destroy_dev(cdev->cdev); 1231 kfree(cdev); 1232 kobject_put(parent); 1233 } 1234 1235 static void 1236 linux_cdev_static_release(struct kobject *kobj) 1237 { 1238 struct linux_cdev *cdev; 1239 struct kobject *parent; 1240 1241 cdev = container_of(kobj, struct linux_cdev, kobj); 1242 parent = kobj->parent; 1243 if (cdev->cdev) 1244 destroy_dev(cdev->cdev); 1245 kobject_put(parent); 1246 } 1247 1248 const struct kobj_type linux_cdev_ktype = { 1249 .release = linux_cdev_release, 1250 }; 1251 1252 const struct kobj_type linux_cdev_static_ktype = { 1253 .release = linux_cdev_static_release, 1254 }; 1255 1256 static void 1257 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1258 { 1259 struct notifier_block *nb; 1260 1261 nb = arg; 1262 if (linkstate == LINK_STATE_UP) 1263 nb->notifier_call(nb, NETDEV_UP, ifp); 1264 else 1265 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1266 } 1267 1268 static void 1269 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1270 { 1271 struct notifier_block *nb; 1272 1273 nb = arg; 1274 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1275 } 1276 1277 static void 1278 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1279 { 1280 struct notifier_block *nb; 1281 1282 nb = arg; 1283 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1284 } 1285 1286 static void 1287 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1288 { 1289 struct notifier_block *nb; 1290 1291 nb = arg; 1292 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1293 } 1294 1295 static void 1296 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1297 { 1298 struct notifier_block *nb; 1299 1300 nb = arg; 1301 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1302 } 1303 1304 int 1305 register_netdevice_notifier(struct notifier_block *nb) 1306 { 1307 1308 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1309 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1310 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1311 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1312 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1313 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1314 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1315 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1316 1317 return (0); 1318 } 1319 1320 int 1321 register_inetaddr_notifier(struct notifier_block *nb) 1322 { 1323 1324 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1325 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1326 return (0); 1327 } 1328 1329 int 1330 unregister_netdevice_notifier(struct notifier_block *nb) 1331 { 1332 1333 EVENTHANDLER_DEREGISTER(ifnet_link_event, 1334 nb->tags[NETDEV_UP]); 1335 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 1336 nb->tags[NETDEV_REGISTER]); 1337 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1338 nb->tags[NETDEV_UNREGISTER]); 1339 EVENTHANDLER_DEREGISTER(iflladdr_event, 1340 nb->tags[NETDEV_CHANGEADDR]); 1341 1342 return (0); 1343 } 1344 1345 int 1346 unregister_inetaddr_notifier(struct notifier_block *nb) 1347 { 1348 1349 EVENTHANDLER_DEREGISTER(ifaddr_event, 1350 nb->tags[NETDEV_CHANGEIFADDR]); 1351 1352 return (0); 1353 } 1354 1355 struct list_sort_thunk { 1356 int (*cmp)(void *, struct list_head *, struct list_head *); 1357 void *priv; 1358 }; 1359 1360 static inline int 1361 linux_le_cmp(void *priv, const void *d1, const void *d2) 1362 { 1363 struct list_head *le1, *le2; 1364 struct list_sort_thunk *thunk; 1365 1366 thunk = priv; 1367 le1 = *(__DECONST(struct list_head **, d1)); 1368 le2 = *(__DECONST(struct list_head **, d2)); 1369 return ((thunk->cmp)(thunk->priv, le1, le2)); 1370 } 1371 1372 void 1373 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 1374 struct list_head *a, struct list_head *b)) 1375 { 1376 struct list_sort_thunk thunk; 1377 struct list_head **ar, *le; 1378 size_t count, i; 1379 1380 count = 0; 1381 list_for_each(le, head) 1382 count++; 1383 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 1384 i = 0; 1385 list_for_each(le, head) 1386 ar[i++] = le; 1387 thunk.cmp = cmp; 1388 thunk.priv = priv; 1389 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 1390 INIT_LIST_HEAD(head); 1391 for (i = 0; i < count; i++) 1392 list_add_tail(ar[i], head); 1393 free(ar, M_KMALLOC); 1394 } 1395 1396 void 1397 linux_irq_handler(void *ent) 1398 { 1399 struct irq_ent *irqe; 1400 1401 linux_set_current(curthread); 1402 1403 irqe = ent; 1404 irqe->handler(irqe->irq, irqe->arg); 1405 } 1406 1407 struct linux_cdev * 1408 linux_find_cdev(const char *name, unsigned major, unsigned minor) 1409 { 1410 int unit = MKDEV(major, minor); 1411 struct cdev *cdev; 1412 1413 dev_lock(); 1414 LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) { 1415 struct linux_cdev *ldev = cdev->si_drv1; 1416 if (dev2unit(cdev) == unit && 1417 strcmp(kobject_name(&ldev->kobj), name) == 0) { 1418 break; 1419 } 1420 } 1421 dev_unlock(); 1422 1423 return (cdev != NULL ? cdev->si_drv1 : NULL); 1424 } 1425 1426 int 1427 __register_chrdev(unsigned int major, unsigned int baseminor, 1428 unsigned int count, const char *name, 1429 const struct file_operations *fops) 1430 { 1431 struct linux_cdev *cdev; 1432 int ret = 0; 1433 int i; 1434 1435 for (i = baseminor; i < baseminor + count; i++) { 1436 cdev = cdev_alloc(); 1437 cdev_init(cdev, fops); 1438 kobject_set_name(&cdev->kobj, name); 1439 1440 ret = cdev_add(cdev, makedev(major, i), 1); 1441 if (ret != 0) 1442 break; 1443 } 1444 return (ret); 1445 } 1446 1447 int 1448 __register_chrdev_p(unsigned int major, unsigned int baseminor, 1449 unsigned int count, const char *name, 1450 const struct file_operations *fops, uid_t uid, 1451 gid_t gid, int mode) 1452 { 1453 struct linux_cdev *cdev; 1454 int ret = 0; 1455 int i; 1456 1457 for (i = baseminor; i < baseminor + count; i++) { 1458 cdev = cdev_alloc(); 1459 cdev_init(cdev, fops); 1460 kobject_set_name(&cdev->kobj, name); 1461 1462 ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode); 1463 if (ret != 0) 1464 break; 1465 } 1466 return (ret); 1467 } 1468 1469 void 1470 __unregister_chrdev(unsigned int major, unsigned int baseminor, 1471 unsigned int count, const char *name) 1472 { 1473 struct linux_cdev *cdevp; 1474 int i; 1475 1476 for (i = baseminor; i < baseminor + count; i++) { 1477 cdevp = linux_find_cdev(name, major, i); 1478 if (cdevp != NULL) 1479 cdev_del(cdevp); 1480 } 1481 } 1482 1483 #if defined(__i386__) || defined(__amd64__) 1484 bool linux_cpu_has_clflush; 1485 #endif 1486 1487 static void 1488 linux_compat_init(void *arg) 1489 { 1490 struct sysctl_oid *rootoid; 1491 int i; 1492 1493 #if defined(__i386__) || defined(__amd64__) 1494 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 1495 #endif 1496 1497 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 1498 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 1499 kobject_init(&linux_class_root, &linux_class_ktype); 1500 kobject_set_name(&linux_class_root, "class"); 1501 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 1502 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 1503 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 1504 kobject_set_name(&linux_root_device.kobj, "device"); 1505 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 1506 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 1507 "device"); 1508 linux_root_device.bsddev = root_bus; 1509 linux_class_misc.name = "misc"; 1510 class_register(&linux_class_misc); 1511 INIT_LIST_HEAD(&pci_drivers); 1512 INIT_LIST_HEAD(&pci_devices); 1513 spin_lock_init(&pci_lock); 1514 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 1515 for (i = 0; i < VMMAP_HASH_SIZE; i++) 1516 LIST_INIT(&vmmaphead[i]); 1517 } 1518 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 1519 1520 static void 1521 linux_compat_uninit(void *arg) 1522 { 1523 linux_kobject_kfree_name(&linux_class_root); 1524 linux_kobject_kfree_name(&linux_root_device.kobj); 1525 linux_kobject_kfree_name(&linux_class_misc.kobj); 1526 } 1527 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 1528 1529 /* 1530 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 1531 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 1532 * used. Assert these types have the same size, else some parts of the 1533 * LinuxKPI may not work like expected: 1534 */ 1535 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 1536