1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 52 #include <machine/stdarg.h> 53 54 #if defined(__i386__) || defined(__amd64__) 55 #include <machine/md_var.h> 56 #endif 57 58 #include <linux/kobject.h> 59 #include <linux/device.h> 60 #include <linux/slab.h> 61 #include <linux/module.h> 62 #include <linux/moduleparam.h> 63 #include <linux/cdev.h> 64 #include <linux/file.h> 65 #include <linux/sysfs.h> 66 #include <linux/mm.h> 67 #include <linux/io.h> 68 #include <linux/vmalloc.h> 69 #include <linux/netdevice.h> 70 #include <linux/timer.h> 71 #include <linux/workqueue.h> 72 #include <linux/rcupdate.h> 73 #include <linux/interrupt.h> 74 #include <linux/uaccess.h> 75 #include <linux/kernel.h> 76 #include <linux/list.h> 77 #include <linux/compat.h> 78 79 #include <vm/vm_pager.h> 80 81 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters"); 82 83 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 84 85 #include <linux/rbtree.h> 86 /* Undo Linux compat changes. */ 87 #undef RB_ROOT 88 #undef file 89 #undef cdev 90 #define RB_ROOT(head) (head)->rbh_root 91 92 struct kobject linux_class_root; 93 struct device linux_root_device; 94 struct class linux_class_misc; 95 struct list_head pci_drivers; 96 struct list_head pci_devices; 97 struct net init_net; 98 spinlock_t pci_lock; 99 100 unsigned long linux_timer_hz_mask; 101 102 int 103 panic_cmp(struct rb_node *one, struct rb_node *two) 104 { 105 panic("no cmp"); 106 } 107 108 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 109 110 int 111 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 112 { 113 va_list tmp_va; 114 int len; 115 char *old; 116 char *name; 117 char dummy; 118 119 old = kobj->name; 120 121 if (old && fmt == NULL) 122 return (0); 123 124 /* compute length of string */ 125 va_copy(tmp_va, args); 126 len = vsnprintf(&dummy, 0, fmt, tmp_va); 127 va_end(tmp_va); 128 129 /* account for zero termination */ 130 len++; 131 132 /* check for error */ 133 if (len < 1) 134 return (-EINVAL); 135 136 /* allocate memory for string */ 137 name = kzalloc(len, GFP_KERNEL); 138 if (name == NULL) 139 return (-ENOMEM); 140 vsnprintf(name, len, fmt, args); 141 kobj->name = name; 142 143 /* free old string */ 144 kfree(old); 145 146 /* filter new string */ 147 for (; *name != '\0'; name++) 148 if (*name == '/') 149 *name = '!'; 150 return (0); 151 } 152 153 int 154 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 155 { 156 va_list args; 157 int error; 158 159 va_start(args, fmt); 160 error = kobject_set_name_vargs(kobj, fmt, args); 161 va_end(args); 162 163 return (error); 164 } 165 166 static int 167 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 168 { 169 const struct kobj_type *t; 170 int error; 171 172 kobj->parent = parent; 173 error = sysfs_create_dir(kobj); 174 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 175 struct attribute **attr; 176 t = kobj->ktype; 177 178 for (attr = t->default_attrs; *attr != NULL; attr++) { 179 error = sysfs_create_file(kobj, *attr); 180 if (error) 181 break; 182 } 183 if (error) 184 sysfs_remove_dir(kobj); 185 186 } 187 return (error); 188 } 189 190 int 191 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 192 { 193 va_list args; 194 int error; 195 196 va_start(args, fmt); 197 error = kobject_set_name_vargs(kobj, fmt, args); 198 va_end(args); 199 if (error) 200 return (error); 201 202 return kobject_add_complete(kobj, parent); 203 } 204 205 void 206 linux_kobject_release(struct kref *kref) 207 { 208 struct kobject *kobj; 209 char *name; 210 211 kobj = container_of(kref, struct kobject, kref); 212 sysfs_remove_dir(kobj); 213 name = kobj->name; 214 if (kobj->ktype && kobj->ktype->release) 215 kobj->ktype->release(kobj); 216 kfree(name); 217 } 218 219 static void 220 linux_kobject_kfree(struct kobject *kobj) 221 { 222 kfree(kobj); 223 } 224 225 static void 226 linux_kobject_kfree_name(struct kobject *kobj) 227 { 228 if (kobj) { 229 kfree(kobj->name); 230 } 231 } 232 233 const struct kobj_type linux_kfree_type = { 234 .release = linux_kobject_kfree 235 }; 236 237 static void 238 linux_device_release(struct device *dev) 239 { 240 pr_debug("linux_device_release: %s\n", dev_name(dev)); 241 kfree(dev); 242 } 243 244 static ssize_t 245 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 246 { 247 struct class_attribute *dattr; 248 ssize_t error; 249 250 dattr = container_of(attr, struct class_attribute, attr); 251 error = -EIO; 252 if (dattr->show) 253 error = dattr->show(container_of(kobj, struct class, kobj), 254 dattr, buf); 255 return (error); 256 } 257 258 static ssize_t 259 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 260 size_t count) 261 { 262 struct class_attribute *dattr; 263 ssize_t error; 264 265 dattr = container_of(attr, struct class_attribute, attr); 266 error = -EIO; 267 if (dattr->store) 268 error = dattr->store(container_of(kobj, struct class, kobj), 269 dattr, buf, count); 270 return (error); 271 } 272 273 static void 274 linux_class_release(struct kobject *kobj) 275 { 276 struct class *class; 277 278 class = container_of(kobj, struct class, kobj); 279 if (class->class_release) 280 class->class_release(class); 281 } 282 283 static const struct sysfs_ops linux_class_sysfs = { 284 .show = linux_class_show, 285 .store = linux_class_store, 286 }; 287 288 const struct kobj_type linux_class_ktype = { 289 .release = linux_class_release, 290 .sysfs_ops = &linux_class_sysfs 291 }; 292 293 static void 294 linux_dev_release(struct kobject *kobj) 295 { 296 struct device *dev; 297 298 dev = container_of(kobj, struct device, kobj); 299 /* This is the precedence defined by linux. */ 300 if (dev->release) 301 dev->release(dev); 302 else if (dev->class && dev->class->dev_release) 303 dev->class->dev_release(dev); 304 } 305 306 static ssize_t 307 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 308 { 309 struct device_attribute *dattr; 310 ssize_t error; 311 312 dattr = container_of(attr, struct device_attribute, attr); 313 error = -EIO; 314 if (dattr->show) 315 error = dattr->show(container_of(kobj, struct device, kobj), 316 dattr, buf); 317 return (error); 318 } 319 320 static ssize_t 321 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 322 size_t count) 323 { 324 struct device_attribute *dattr; 325 ssize_t error; 326 327 dattr = container_of(attr, struct device_attribute, attr); 328 error = -EIO; 329 if (dattr->store) 330 error = dattr->store(container_of(kobj, struct device, kobj), 331 dattr, buf, count); 332 return (error); 333 } 334 335 static const struct sysfs_ops linux_dev_sysfs = { 336 .show = linux_dev_show, 337 .store = linux_dev_store, 338 }; 339 340 const struct kobj_type linux_dev_ktype = { 341 .release = linux_dev_release, 342 .sysfs_ops = &linux_dev_sysfs 343 }; 344 345 struct device * 346 device_create(struct class *class, struct device *parent, dev_t devt, 347 void *drvdata, const char *fmt, ...) 348 { 349 struct device *dev; 350 va_list args; 351 352 dev = kzalloc(sizeof(*dev), M_WAITOK); 353 dev->parent = parent; 354 dev->class = class; 355 dev->devt = devt; 356 dev->driver_data = drvdata; 357 dev->release = linux_device_release; 358 va_start(args, fmt); 359 kobject_set_name_vargs(&dev->kobj, fmt, args); 360 va_end(args); 361 device_register(dev); 362 363 return (dev); 364 } 365 366 int 367 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 368 struct kobject *parent, const char *fmt, ...) 369 { 370 va_list args; 371 int error; 372 373 kobject_init(kobj, ktype); 374 kobj->ktype = ktype; 375 kobj->parent = parent; 376 kobj->name = NULL; 377 378 va_start(args, fmt); 379 error = kobject_set_name_vargs(kobj, fmt, args); 380 va_end(args); 381 if (error) 382 return (error); 383 return kobject_add_complete(kobj, parent); 384 } 385 386 static void 387 linux_file_dtor(void *cdp) 388 { 389 struct linux_file *filp; 390 391 linux_set_current(curthread); 392 filp = cdp; 393 filp->f_op->release(filp->f_vnode, filp); 394 vdrop(filp->f_vnode); 395 kfree(filp); 396 } 397 398 static int 399 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 400 { 401 struct linux_cdev *ldev; 402 struct linux_file *filp; 403 struct file *file; 404 int error; 405 406 file = td->td_fpop; 407 ldev = dev->si_drv1; 408 if (ldev == NULL) 409 return (ENODEV); 410 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 411 filp->f_dentry = &filp->f_dentry_store; 412 filp->f_op = ldev->ops; 413 filp->f_flags = file->f_flag; 414 vhold(file->f_vnode); 415 filp->f_vnode = file->f_vnode; 416 linux_set_current(td); 417 if (filp->f_op->open) { 418 error = -filp->f_op->open(file->f_vnode, filp); 419 if (error) { 420 kfree(filp); 421 goto done; 422 } 423 } 424 error = devfs_set_cdevpriv(filp, linux_file_dtor); 425 if (error) { 426 filp->f_op->release(file->f_vnode, filp); 427 kfree(filp); 428 } 429 done: 430 return (error); 431 } 432 433 static int 434 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 435 { 436 struct linux_cdev *ldev; 437 struct linux_file *filp; 438 struct file *file; 439 int error; 440 441 file = td->td_fpop; 442 ldev = dev->si_drv1; 443 if (ldev == NULL) 444 return (0); 445 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 446 return (error); 447 filp->f_flags = file->f_flag; 448 devfs_clear_cdevpriv(); 449 450 451 return (0); 452 } 453 454 #define LINUX_IOCTL_MIN_PTR 0x10000UL 455 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 456 457 static inline int 458 linux_remap_address(void **uaddr, size_t len) 459 { 460 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 461 462 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 463 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 464 struct task_struct *pts = current; 465 if (pts == NULL) { 466 *uaddr = NULL; 467 return (1); 468 } 469 470 /* compute data offset */ 471 uaddr_val -= LINUX_IOCTL_MIN_PTR; 472 473 /* check that length is within bounds */ 474 if ((len > IOCPARM_MAX) || 475 (uaddr_val + len) > pts->bsd_ioctl_len) { 476 *uaddr = NULL; 477 return (1); 478 } 479 480 /* re-add kernel buffer address */ 481 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 482 483 /* update address location */ 484 *uaddr = (void *)uaddr_val; 485 return (1); 486 } 487 return (0); 488 } 489 490 int 491 linux_copyin(const void *uaddr, void *kaddr, size_t len) 492 { 493 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 494 if (uaddr == NULL) 495 return (-EFAULT); 496 memcpy(kaddr, uaddr, len); 497 return (0); 498 } 499 return (-copyin(uaddr, kaddr, len)); 500 } 501 502 int 503 linux_copyout(const void *kaddr, void *uaddr, size_t len) 504 { 505 if (linux_remap_address(&uaddr, len)) { 506 if (uaddr == NULL) 507 return (-EFAULT); 508 memcpy(uaddr, kaddr, len); 509 return (0); 510 } 511 return (-copyout(kaddr, uaddr, len)); 512 } 513 514 static int 515 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 516 struct thread *td) 517 { 518 struct linux_cdev *ldev; 519 struct linux_file *filp; 520 struct file *file; 521 unsigned size; 522 int error; 523 524 file = td->td_fpop; 525 ldev = dev->si_drv1; 526 if (ldev == NULL) 527 return (0); 528 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 529 return (error); 530 filp->f_flags = file->f_flag; 531 532 linux_set_current(td); 533 size = IOCPARM_LEN(cmd); 534 /* refer to logic in sys_ioctl() */ 535 if (size > 0) { 536 /* 537 * Setup hint for linux_copyin() and linux_copyout(). 538 * 539 * Background: Linux code expects a user-space address 540 * while FreeBSD supplies a kernel-space address. 541 */ 542 current->bsd_ioctl_data = data; 543 current->bsd_ioctl_len = size; 544 data = (void *)LINUX_IOCTL_MIN_PTR; 545 } else { 546 /* fetch user-space pointer */ 547 data = *(void **)data; 548 } 549 if (filp->f_op->unlocked_ioctl) 550 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 551 else 552 error = ENOTTY; 553 if (size > 0) { 554 current->bsd_ioctl_data = NULL; 555 current->bsd_ioctl_len = 0; 556 } 557 558 return (error); 559 } 560 561 static int 562 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 563 { 564 struct linux_cdev *ldev; 565 struct linux_file *filp; 566 struct thread *td; 567 struct file *file; 568 ssize_t bytes; 569 int error; 570 571 td = curthread; 572 file = td->td_fpop; 573 ldev = dev->si_drv1; 574 if (ldev == NULL) 575 return (0); 576 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 577 return (error); 578 filp->f_flags = file->f_flag; 579 /* XXX no support for I/O vectors currently */ 580 if (uio->uio_iovcnt != 1) 581 return (EOPNOTSUPP); 582 linux_set_current(td); 583 if (filp->f_op->read) { 584 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 585 uio->uio_iov->iov_len, &uio->uio_offset); 586 if (bytes >= 0) { 587 uio->uio_iov->iov_base = 588 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 589 uio->uio_iov->iov_len -= bytes; 590 uio->uio_resid -= bytes; 591 } else 592 error = -bytes; 593 } else 594 error = ENXIO; 595 596 return (error); 597 } 598 599 static int 600 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 601 { 602 struct linux_cdev *ldev; 603 struct linux_file *filp; 604 struct thread *td; 605 struct file *file; 606 ssize_t bytes; 607 int error; 608 609 td = curthread; 610 file = td->td_fpop; 611 ldev = dev->si_drv1; 612 if (ldev == NULL) 613 return (0); 614 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 615 return (error); 616 filp->f_flags = file->f_flag; 617 /* XXX no support for I/O vectors currently */ 618 if (uio->uio_iovcnt != 1) 619 return (EOPNOTSUPP); 620 linux_set_current(td); 621 if (filp->f_op->write) { 622 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 623 uio->uio_iov->iov_len, &uio->uio_offset); 624 if (bytes >= 0) { 625 uio->uio_iov->iov_base = 626 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 627 uio->uio_iov->iov_len -= bytes; 628 uio->uio_resid -= bytes; 629 } else 630 error = -bytes; 631 } else 632 error = ENXIO; 633 634 return (error); 635 } 636 637 static int 638 linux_dev_poll(struct cdev *dev, int events, struct thread *td) 639 { 640 struct linux_cdev *ldev; 641 struct linux_file *filp; 642 struct file *file; 643 int revents; 644 int error; 645 646 file = td->td_fpop; 647 ldev = dev->si_drv1; 648 if (ldev == NULL) 649 return (0); 650 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 651 return (error); 652 filp->f_flags = file->f_flag; 653 linux_set_current(td); 654 if (filp->f_op->poll) 655 revents = filp->f_op->poll(filp, NULL) & events; 656 else 657 revents = 0; 658 659 return (revents); 660 } 661 662 static int 663 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 664 vm_size_t size, struct vm_object **object, int nprot) 665 { 666 struct linux_cdev *ldev; 667 struct linux_file *filp; 668 struct thread *td; 669 struct file *file; 670 struct vm_area_struct vma; 671 int error; 672 673 td = curthread; 674 file = td->td_fpop; 675 ldev = dev->si_drv1; 676 if (ldev == NULL) 677 return (ENODEV); 678 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 679 return (error); 680 filp->f_flags = file->f_flag; 681 linux_set_current(td); 682 vma.vm_start = 0; 683 vma.vm_end = size; 684 vma.vm_pgoff = *offset / PAGE_SIZE; 685 vma.vm_pfn = 0; 686 vma.vm_page_prot = VM_MEMATTR_DEFAULT; 687 if (filp->f_op->mmap) { 688 error = -filp->f_op->mmap(filp, &vma); 689 if (error == 0) { 690 struct sglist *sg; 691 692 sg = sglist_alloc(1, M_WAITOK); 693 sglist_append_phys(sg, 694 (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len); 695 *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len, 696 nprot, 0, td->td_ucred); 697 if (*object == NULL) { 698 sglist_free(sg); 699 error = EINVAL; 700 goto done; 701 } 702 *offset = 0; 703 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) { 704 VM_OBJECT_WLOCK(*object); 705 vm_object_set_memattr(*object, 706 vma.vm_page_prot); 707 VM_OBJECT_WUNLOCK(*object); 708 } 709 } 710 } else 711 error = ENODEV; 712 done: 713 return (error); 714 } 715 716 struct cdevsw linuxcdevsw = { 717 .d_version = D_VERSION, 718 .d_flags = D_TRACKCLOSE, 719 .d_open = linux_dev_open, 720 .d_close = linux_dev_close, 721 .d_read = linux_dev_read, 722 .d_write = linux_dev_write, 723 .d_ioctl = linux_dev_ioctl, 724 .d_mmap_single = linux_dev_mmap_single, 725 .d_poll = linux_dev_poll, 726 }; 727 728 static int 729 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 730 int flags, struct thread *td) 731 { 732 struct linux_file *filp; 733 ssize_t bytes; 734 int error; 735 736 error = 0; 737 filp = (struct linux_file *)file->f_data; 738 filp->f_flags = file->f_flag; 739 /* XXX no support for I/O vectors currently */ 740 if (uio->uio_iovcnt != 1) 741 return (EOPNOTSUPP); 742 linux_set_current(td); 743 if (filp->f_op->read) { 744 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 745 uio->uio_iov->iov_len, &uio->uio_offset); 746 if (bytes >= 0) { 747 uio->uio_iov->iov_base = 748 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 749 uio->uio_iov->iov_len -= bytes; 750 uio->uio_resid -= bytes; 751 } else 752 error = -bytes; 753 } else 754 error = ENXIO; 755 756 return (error); 757 } 758 759 static int 760 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 761 struct thread *td) 762 { 763 struct linux_file *filp; 764 int revents; 765 766 filp = (struct linux_file *)file->f_data; 767 filp->f_flags = file->f_flag; 768 linux_set_current(td); 769 if (filp->f_op->poll) 770 revents = filp->f_op->poll(filp, NULL) & events; 771 else 772 revents = 0; 773 774 return (revents); 775 } 776 777 static int 778 linux_file_close(struct file *file, struct thread *td) 779 { 780 struct linux_file *filp; 781 int error; 782 783 filp = (struct linux_file *)file->f_data; 784 filp->f_flags = file->f_flag; 785 linux_set_current(td); 786 error = -filp->f_op->release(NULL, filp); 787 funsetown(&filp->f_sigio); 788 kfree(filp); 789 790 return (error); 791 } 792 793 static int 794 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 795 struct thread *td) 796 { 797 struct linux_file *filp; 798 int error; 799 800 filp = (struct linux_file *)fp->f_data; 801 filp->f_flags = fp->f_flag; 802 error = 0; 803 804 linux_set_current(td); 805 switch (cmd) { 806 case FIONBIO: 807 break; 808 case FIOASYNC: 809 if (filp->f_op->fasync == NULL) 810 break; 811 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 812 break; 813 case FIOSETOWN: 814 error = fsetown(*(int *)data, &filp->f_sigio); 815 if (error == 0) 816 error = filp->f_op->fasync(0, filp, 817 fp->f_flag & FASYNC); 818 break; 819 case FIOGETOWN: 820 *(int *)data = fgetown(&filp->f_sigio); 821 break; 822 default: 823 error = ENOTTY; 824 break; 825 } 826 return (error); 827 } 828 829 static int 830 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 831 struct thread *td) 832 { 833 834 return (EOPNOTSUPP); 835 } 836 837 static int 838 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 839 struct filedesc *fdp) 840 { 841 842 return (0); 843 } 844 845 struct fileops linuxfileops = { 846 .fo_read = linux_file_read, 847 .fo_write = invfo_rdwr, 848 .fo_truncate = invfo_truncate, 849 .fo_kqfilter = invfo_kqfilter, 850 .fo_stat = linux_file_stat, 851 .fo_fill_kinfo = linux_file_fill_kinfo, 852 .fo_poll = linux_file_poll, 853 .fo_close = linux_file_close, 854 .fo_ioctl = linux_file_ioctl, 855 .fo_chmod = invfo_chmod, 856 .fo_chown = invfo_chown, 857 .fo_sendfile = invfo_sendfile, 858 }; 859 860 /* 861 * Hash of vmmap addresses. This is infrequently accessed and does not 862 * need to be particularly large. This is done because we must store the 863 * caller's idea of the map size to properly unmap. 864 */ 865 struct vmmap { 866 LIST_ENTRY(vmmap) vm_next; 867 void *vm_addr; 868 unsigned long vm_size; 869 }; 870 871 struct vmmaphd { 872 struct vmmap *lh_first; 873 }; 874 #define VMMAP_HASH_SIZE 64 875 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 876 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 877 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 878 static struct mtx vmmaplock; 879 880 static void 881 vmmap_add(void *addr, unsigned long size) 882 { 883 struct vmmap *vmmap; 884 885 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 886 mtx_lock(&vmmaplock); 887 vmmap->vm_size = size; 888 vmmap->vm_addr = addr; 889 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 890 mtx_unlock(&vmmaplock); 891 } 892 893 static struct vmmap * 894 vmmap_remove(void *addr) 895 { 896 struct vmmap *vmmap; 897 898 mtx_lock(&vmmaplock); 899 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 900 if (vmmap->vm_addr == addr) 901 break; 902 if (vmmap) 903 LIST_REMOVE(vmmap, vm_next); 904 mtx_unlock(&vmmaplock); 905 906 return (vmmap); 907 } 908 909 #if defined(__i386__) || defined(__amd64__) 910 void * 911 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 912 { 913 void *addr; 914 915 addr = pmap_mapdev_attr(phys_addr, size, attr); 916 if (addr == NULL) 917 return (NULL); 918 vmmap_add(addr, size); 919 920 return (addr); 921 } 922 #endif 923 924 void 925 iounmap(void *addr) 926 { 927 struct vmmap *vmmap; 928 929 vmmap = vmmap_remove(addr); 930 if (vmmap == NULL) 931 return; 932 #if defined(__i386__) || defined(__amd64__) 933 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 934 #endif 935 kfree(vmmap); 936 } 937 938 939 void * 940 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 941 { 942 vm_offset_t off; 943 size_t size; 944 945 size = count * PAGE_SIZE; 946 off = kva_alloc(size); 947 if (off == 0) 948 return (NULL); 949 vmmap_add((void *)off, size); 950 pmap_qenter(off, pages, count); 951 952 return ((void *)off); 953 } 954 955 void 956 vunmap(void *addr) 957 { 958 struct vmmap *vmmap; 959 960 vmmap = vmmap_remove(addr); 961 if (vmmap == NULL) 962 return; 963 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 964 kva_free((vm_offset_t)addr, vmmap->vm_size); 965 kfree(vmmap); 966 } 967 968 char * 969 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 970 { 971 unsigned int len; 972 char *p; 973 va_list aq; 974 975 va_copy(aq, ap); 976 len = vsnprintf(NULL, 0, fmt, aq); 977 va_end(aq); 978 979 p = kmalloc(len + 1, gfp); 980 if (p != NULL) 981 vsnprintf(p, len + 1, fmt, ap); 982 983 return (p); 984 } 985 986 char * 987 kasprintf(gfp_t gfp, const char *fmt, ...) 988 { 989 va_list ap; 990 char *p; 991 992 va_start(ap, fmt); 993 p = kvasprintf(gfp, fmt, ap); 994 va_end(ap); 995 996 return (p); 997 } 998 999 static void 1000 linux_timer_callback_wrapper(void *context) 1001 { 1002 struct timer_list *timer; 1003 1004 timer = context; 1005 timer->function(timer->data); 1006 } 1007 1008 void 1009 mod_timer(struct timer_list *timer, unsigned long expires) 1010 { 1011 1012 timer->expires = expires; 1013 callout_reset(&timer->timer_callout, 1014 linux_timer_jiffies_until(expires), 1015 &linux_timer_callback_wrapper, timer); 1016 } 1017 1018 void 1019 add_timer(struct timer_list *timer) 1020 { 1021 1022 callout_reset(&timer->timer_callout, 1023 linux_timer_jiffies_until(timer->expires), 1024 &linux_timer_callback_wrapper, timer); 1025 } 1026 1027 static void 1028 linux_timer_init(void *arg) 1029 { 1030 1031 /* 1032 * Compute an internal HZ value which can divide 2**32 to 1033 * avoid timer rounding problems when the tick value wraps 1034 * around 2**32: 1035 */ 1036 linux_timer_hz_mask = 1; 1037 while (linux_timer_hz_mask < (unsigned long)hz) 1038 linux_timer_hz_mask *= 2; 1039 linux_timer_hz_mask--; 1040 } 1041 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1042 1043 void 1044 linux_complete_common(struct completion *c, int all) 1045 { 1046 int wakeup_swapper; 1047 1048 sleepq_lock(c); 1049 c->done++; 1050 if (all) 1051 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1052 else 1053 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1054 sleepq_release(c); 1055 if (wakeup_swapper) 1056 kick_proc0(); 1057 } 1058 1059 /* 1060 * Indefinite wait for done != 0 with or without signals. 1061 */ 1062 long 1063 linux_wait_for_common(struct completion *c, int flags) 1064 { 1065 if (SCHEDULER_STOPPED()) 1066 return (0); 1067 1068 if (flags != 0) 1069 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1070 else 1071 flags = SLEEPQ_SLEEP; 1072 for (;;) { 1073 sleepq_lock(c); 1074 if (c->done) 1075 break; 1076 sleepq_add(c, NULL, "completion", flags, 0); 1077 if (flags & SLEEPQ_INTERRUPTIBLE) { 1078 if (sleepq_wait_sig(c, 0) != 0) 1079 return (-ERESTARTSYS); 1080 } else 1081 sleepq_wait(c, 0); 1082 } 1083 c->done--; 1084 sleepq_release(c); 1085 1086 return (0); 1087 } 1088 1089 /* 1090 * Time limited wait for done != 0 with or without signals. 1091 */ 1092 long 1093 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) 1094 { 1095 long end = jiffies + timeout; 1096 1097 if (SCHEDULER_STOPPED()) 1098 return (0); 1099 1100 if (flags != 0) 1101 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1102 else 1103 flags = SLEEPQ_SLEEP; 1104 for (;;) { 1105 int ret; 1106 1107 sleepq_lock(c); 1108 if (c->done) 1109 break; 1110 sleepq_add(c, NULL, "completion", flags, 0); 1111 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1112 if (flags & SLEEPQ_INTERRUPTIBLE) 1113 ret = sleepq_timedwait_sig(c, 0); 1114 else 1115 ret = sleepq_timedwait(c, 0); 1116 if (ret != 0) { 1117 /* check for timeout or signal */ 1118 if (ret == EWOULDBLOCK) 1119 return (0); 1120 else 1121 return (-ERESTARTSYS); 1122 } 1123 } 1124 c->done--; 1125 sleepq_release(c); 1126 1127 /* return how many jiffies are left */ 1128 return (linux_timer_jiffies_until(end)); 1129 } 1130 1131 int 1132 linux_try_wait_for_completion(struct completion *c) 1133 { 1134 int isdone; 1135 1136 isdone = 1; 1137 sleepq_lock(c); 1138 if (c->done) 1139 c->done--; 1140 else 1141 isdone = 0; 1142 sleepq_release(c); 1143 return (isdone); 1144 } 1145 1146 int 1147 linux_completion_done(struct completion *c) 1148 { 1149 int isdone; 1150 1151 isdone = 1; 1152 sleepq_lock(c); 1153 if (c->done == 0) 1154 isdone = 0; 1155 sleepq_release(c); 1156 return (isdone); 1157 } 1158 1159 void 1160 linux_delayed_work_fn(void *arg) 1161 { 1162 struct delayed_work *work; 1163 1164 work = arg; 1165 taskqueue_enqueue(work->work.taskqueue, &work->work.work_task); 1166 } 1167 1168 void 1169 linux_work_fn(void *context, int pending) 1170 { 1171 struct work_struct *work; 1172 1173 work = context; 1174 work->fn(work); 1175 } 1176 1177 void 1178 linux_flush_fn(void *context, int pending) 1179 { 1180 } 1181 1182 struct workqueue_struct * 1183 linux_create_workqueue_common(const char *name, int cpus) 1184 { 1185 struct workqueue_struct *wq; 1186 1187 wq = kmalloc(sizeof(*wq), M_WAITOK); 1188 wq->taskqueue = taskqueue_create(name, M_WAITOK, 1189 taskqueue_thread_enqueue, &wq->taskqueue); 1190 atomic_set(&wq->draining, 0); 1191 taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); 1192 1193 return (wq); 1194 } 1195 1196 void 1197 destroy_workqueue(struct workqueue_struct *wq) 1198 { 1199 taskqueue_free(wq->taskqueue); 1200 kfree(wq); 1201 } 1202 1203 static void 1204 linux_cdev_release(struct kobject *kobj) 1205 { 1206 struct linux_cdev *cdev; 1207 struct kobject *parent; 1208 1209 cdev = container_of(kobj, struct linux_cdev, kobj); 1210 parent = kobj->parent; 1211 if (cdev->cdev) 1212 destroy_dev(cdev->cdev); 1213 kfree(cdev); 1214 kobject_put(parent); 1215 } 1216 1217 static void 1218 linux_cdev_static_release(struct kobject *kobj) 1219 { 1220 struct linux_cdev *cdev; 1221 struct kobject *parent; 1222 1223 cdev = container_of(kobj, struct linux_cdev, kobj); 1224 parent = kobj->parent; 1225 if (cdev->cdev) 1226 destroy_dev(cdev->cdev); 1227 kobject_put(parent); 1228 } 1229 1230 const struct kobj_type linux_cdev_ktype = { 1231 .release = linux_cdev_release, 1232 }; 1233 1234 const struct kobj_type linux_cdev_static_ktype = { 1235 .release = linux_cdev_static_release, 1236 }; 1237 1238 static void 1239 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1240 { 1241 struct notifier_block *nb; 1242 1243 nb = arg; 1244 if (linkstate == LINK_STATE_UP) 1245 nb->notifier_call(nb, NETDEV_UP, ifp); 1246 else 1247 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1248 } 1249 1250 static void 1251 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1252 { 1253 struct notifier_block *nb; 1254 1255 nb = arg; 1256 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1257 } 1258 1259 static void 1260 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1261 { 1262 struct notifier_block *nb; 1263 1264 nb = arg; 1265 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1266 } 1267 1268 static void 1269 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1270 { 1271 struct notifier_block *nb; 1272 1273 nb = arg; 1274 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1275 } 1276 1277 static void 1278 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1279 { 1280 struct notifier_block *nb; 1281 1282 nb = arg; 1283 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1284 } 1285 1286 int 1287 register_netdevice_notifier(struct notifier_block *nb) 1288 { 1289 1290 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1291 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1292 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1293 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1294 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1295 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1296 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1297 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1298 1299 return (0); 1300 } 1301 1302 int 1303 register_inetaddr_notifier(struct notifier_block *nb) 1304 { 1305 1306 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1307 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1308 return (0); 1309 } 1310 1311 int 1312 unregister_netdevice_notifier(struct notifier_block *nb) 1313 { 1314 1315 EVENTHANDLER_DEREGISTER(ifnet_link_event, 1316 nb->tags[NETDEV_UP]); 1317 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 1318 nb->tags[NETDEV_REGISTER]); 1319 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1320 nb->tags[NETDEV_UNREGISTER]); 1321 EVENTHANDLER_DEREGISTER(iflladdr_event, 1322 nb->tags[NETDEV_CHANGEADDR]); 1323 1324 return (0); 1325 } 1326 1327 int 1328 unregister_inetaddr_notifier(struct notifier_block *nb) 1329 { 1330 1331 EVENTHANDLER_DEREGISTER(ifaddr_event, 1332 nb->tags[NETDEV_CHANGEIFADDR]); 1333 1334 return (0); 1335 } 1336 1337 struct list_sort_thunk { 1338 int (*cmp)(void *, struct list_head *, struct list_head *); 1339 void *priv; 1340 }; 1341 1342 static inline int 1343 linux_le_cmp(void *priv, const void *d1, const void *d2) 1344 { 1345 struct list_head *le1, *le2; 1346 struct list_sort_thunk *thunk; 1347 1348 thunk = priv; 1349 le1 = *(__DECONST(struct list_head **, d1)); 1350 le2 = *(__DECONST(struct list_head **, d2)); 1351 return ((thunk->cmp)(thunk->priv, le1, le2)); 1352 } 1353 1354 void 1355 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 1356 struct list_head *a, struct list_head *b)) 1357 { 1358 struct list_sort_thunk thunk; 1359 struct list_head **ar, *le; 1360 size_t count, i; 1361 1362 count = 0; 1363 list_for_each(le, head) 1364 count++; 1365 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 1366 i = 0; 1367 list_for_each(le, head) 1368 ar[i++] = le; 1369 thunk.cmp = cmp; 1370 thunk.priv = priv; 1371 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 1372 INIT_LIST_HEAD(head); 1373 for (i = 0; i < count; i++) 1374 list_add_tail(ar[i], head); 1375 free(ar, M_KMALLOC); 1376 } 1377 1378 void 1379 linux_irq_handler(void *ent) 1380 { 1381 struct irq_ent *irqe; 1382 1383 irqe = ent; 1384 irqe->handler(irqe->irq, irqe->arg); 1385 } 1386 1387 struct linux_cdev * 1388 linux_find_cdev(const char *name, unsigned major, unsigned minor) 1389 { 1390 int unit = MKDEV(major, minor); 1391 struct cdev *cdev; 1392 1393 dev_lock(); 1394 LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) { 1395 struct linux_cdev *ldev = cdev->si_drv1; 1396 if (dev2unit(cdev) == unit && 1397 strcmp(kobject_name(&ldev->kobj), name) == 0) { 1398 break; 1399 } 1400 } 1401 dev_unlock(); 1402 1403 return (cdev != NULL ? cdev->si_drv1 : NULL); 1404 } 1405 1406 int 1407 __register_chrdev(unsigned int major, unsigned int baseminor, 1408 unsigned int count, const char *name, 1409 const struct file_operations *fops) 1410 { 1411 struct linux_cdev *cdev; 1412 int ret = 0; 1413 int i; 1414 1415 for (i = baseminor; i < baseminor + count; i++) { 1416 cdev = cdev_alloc(); 1417 cdev_init(cdev, fops); 1418 kobject_set_name(&cdev->kobj, name); 1419 1420 ret = cdev_add(cdev, makedev(major, i), 1); 1421 if (ret != 0) 1422 break; 1423 } 1424 return (ret); 1425 } 1426 1427 int 1428 __register_chrdev_p(unsigned int major, unsigned int baseminor, 1429 unsigned int count, const char *name, 1430 const struct file_operations *fops, uid_t uid, 1431 gid_t gid, int mode) 1432 { 1433 struct linux_cdev *cdev; 1434 int ret = 0; 1435 int i; 1436 1437 for (i = baseminor; i < baseminor + count; i++) { 1438 cdev = cdev_alloc(); 1439 cdev_init(cdev, fops); 1440 kobject_set_name(&cdev->kobj, name); 1441 1442 ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode); 1443 if (ret != 0) 1444 break; 1445 } 1446 return (ret); 1447 } 1448 1449 void 1450 __unregister_chrdev(unsigned int major, unsigned int baseminor, 1451 unsigned int count, const char *name) 1452 { 1453 struct linux_cdev *cdevp; 1454 int i; 1455 1456 for (i = baseminor; i < baseminor + count; i++) { 1457 cdevp = linux_find_cdev(name, major, i); 1458 if (cdevp != NULL) 1459 cdev_del(cdevp); 1460 } 1461 } 1462 1463 #if defined(__i386__) || defined(__amd64__) 1464 bool linux_cpu_has_clflush; 1465 #endif 1466 1467 static void 1468 linux_compat_init(void *arg) 1469 { 1470 struct sysctl_oid *rootoid; 1471 int i; 1472 1473 #if defined(__i386__) || defined(__amd64__) 1474 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 1475 #endif 1476 1477 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 1478 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 1479 kobject_init(&linux_class_root, &linux_class_ktype); 1480 kobject_set_name(&linux_class_root, "class"); 1481 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 1482 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 1483 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 1484 kobject_set_name(&linux_root_device.kobj, "device"); 1485 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 1486 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 1487 "device"); 1488 linux_root_device.bsddev = root_bus; 1489 linux_class_misc.name = "misc"; 1490 class_register(&linux_class_misc); 1491 INIT_LIST_HEAD(&pci_drivers); 1492 INIT_LIST_HEAD(&pci_devices); 1493 spin_lock_init(&pci_lock); 1494 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 1495 for (i = 0; i < VMMAP_HASH_SIZE; i++) 1496 LIST_INIT(&vmmaphead[i]); 1497 } 1498 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 1499 1500 static void 1501 linux_compat_uninit(void *arg) 1502 { 1503 linux_kobject_kfree_name(&linux_class_root); 1504 linux_kobject_kfree_name(&linux_root_device.kobj); 1505 linux_kobject_kfree_name(&linux_class_misc.kobj); 1506 1507 synchronize_rcu(); 1508 } 1509 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 1510 1511 /* 1512 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 1513 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 1514 * used. Assert these types have the same size, else some parts of the 1515 * LinuxKPI may not work like expected: 1516 */ 1517 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 1518