1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 52 #include <machine/stdarg.h> 53 54 #if defined(__i386__) || defined(__amd64__) 55 #include <machine/md_var.h> 56 #endif 57 58 #include <linux/kobject.h> 59 #include <linux/device.h> 60 #include <linux/slab.h> 61 #include <linux/module.h> 62 #include <linux/moduleparam.h> 63 #include <linux/cdev.h> 64 #include <linux/file.h> 65 #include <linux/sysfs.h> 66 #include <linux/mm.h> 67 #include <linux/io.h> 68 #include <linux/vmalloc.h> 69 #include <linux/netdevice.h> 70 #include <linux/timer.h> 71 #include <linux/workqueue.h> 72 #include <linux/rcupdate.h> 73 #include <linux/interrupt.h> 74 #include <linux/uaccess.h> 75 #include <linux/kernel.h> 76 #include <linux/list.h> 77 #include <linux/compat.h> 78 79 #include <vm/vm_pager.h> 80 81 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters"); 82 83 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 84 85 #include <linux/rbtree.h> 86 /* Undo Linux compat changes. */ 87 #undef RB_ROOT 88 #undef file 89 #undef cdev 90 #define RB_ROOT(head) (head)->rbh_root 91 92 struct kobject linux_class_root; 93 struct device linux_root_device; 94 struct class linux_class_misc; 95 struct list_head pci_drivers; 96 struct list_head pci_devices; 97 struct net init_net; 98 spinlock_t pci_lock; 99 struct sx linux_global_rcu_lock; 100 101 unsigned long linux_timer_hz_mask; 102 103 int 104 panic_cmp(struct rb_node *one, struct rb_node *two) 105 { 106 panic("no cmp"); 107 } 108 109 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 110 111 int 112 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 113 { 114 va_list tmp_va; 115 int len; 116 char *old; 117 char *name; 118 char dummy; 119 120 old = kobj->name; 121 122 if (old && fmt == NULL) 123 return (0); 124 125 /* compute length of string */ 126 va_copy(tmp_va, args); 127 len = vsnprintf(&dummy, 0, fmt, tmp_va); 128 va_end(tmp_va); 129 130 /* account for zero termination */ 131 len++; 132 133 /* check for error */ 134 if (len < 1) 135 return (-EINVAL); 136 137 /* allocate memory for string */ 138 name = kzalloc(len, GFP_KERNEL); 139 if (name == NULL) 140 return (-ENOMEM); 141 vsnprintf(name, len, fmt, args); 142 kobj->name = name; 143 144 /* free old string */ 145 kfree(old); 146 147 /* filter new string */ 148 for (; *name != '\0'; name++) 149 if (*name == '/') 150 *name = '!'; 151 return (0); 152 } 153 154 int 155 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 156 { 157 va_list args; 158 int error; 159 160 va_start(args, fmt); 161 error = kobject_set_name_vargs(kobj, fmt, args); 162 va_end(args); 163 164 return (error); 165 } 166 167 static int 168 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 169 { 170 const struct kobj_type *t; 171 int error; 172 173 kobj->parent = parent; 174 error = sysfs_create_dir(kobj); 175 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 176 struct attribute **attr; 177 t = kobj->ktype; 178 179 for (attr = t->default_attrs; *attr != NULL; attr++) { 180 error = sysfs_create_file(kobj, *attr); 181 if (error) 182 break; 183 } 184 if (error) 185 sysfs_remove_dir(kobj); 186 187 } 188 return (error); 189 } 190 191 int 192 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 193 { 194 va_list args; 195 int error; 196 197 va_start(args, fmt); 198 error = kobject_set_name_vargs(kobj, fmt, args); 199 va_end(args); 200 if (error) 201 return (error); 202 203 return kobject_add_complete(kobj, parent); 204 } 205 206 void 207 linux_kobject_release(struct kref *kref) 208 { 209 struct kobject *kobj; 210 char *name; 211 212 kobj = container_of(kref, struct kobject, kref); 213 sysfs_remove_dir(kobj); 214 name = kobj->name; 215 if (kobj->ktype && kobj->ktype->release) 216 kobj->ktype->release(kobj); 217 kfree(name); 218 } 219 220 static void 221 linux_kobject_kfree(struct kobject *kobj) 222 { 223 kfree(kobj); 224 } 225 226 static void 227 linux_kobject_kfree_name(struct kobject *kobj) 228 { 229 if (kobj) { 230 kfree(kobj->name); 231 } 232 } 233 234 const struct kobj_type linux_kfree_type = { 235 .release = linux_kobject_kfree 236 }; 237 238 static void 239 linux_device_release(struct device *dev) 240 { 241 pr_debug("linux_device_release: %s\n", dev_name(dev)); 242 kfree(dev); 243 } 244 245 static ssize_t 246 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 247 { 248 struct class_attribute *dattr; 249 ssize_t error; 250 251 dattr = container_of(attr, struct class_attribute, attr); 252 error = -EIO; 253 if (dattr->show) 254 error = dattr->show(container_of(kobj, struct class, kobj), 255 dattr, buf); 256 return (error); 257 } 258 259 static ssize_t 260 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 261 size_t count) 262 { 263 struct class_attribute *dattr; 264 ssize_t error; 265 266 dattr = container_of(attr, struct class_attribute, attr); 267 error = -EIO; 268 if (dattr->store) 269 error = dattr->store(container_of(kobj, struct class, kobj), 270 dattr, buf, count); 271 return (error); 272 } 273 274 static void 275 linux_class_release(struct kobject *kobj) 276 { 277 struct class *class; 278 279 class = container_of(kobj, struct class, kobj); 280 if (class->class_release) 281 class->class_release(class); 282 } 283 284 static const struct sysfs_ops linux_class_sysfs = { 285 .show = linux_class_show, 286 .store = linux_class_store, 287 }; 288 289 const struct kobj_type linux_class_ktype = { 290 .release = linux_class_release, 291 .sysfs_ops = &linux_class_sysfs 292 }; 293 294 static void 295 linux_dev_release(struct kobject *kobj) 296 { 297 struct device *dev; 298 299 dev = container_of(kobj, struct device, kobj); 300 /* This is the precedence defined by linux. */ 301 if (dev->release) 302 dev->release(dev); 303 else if (dev->class && dev->class->dev_release) 304 dev->class->dev_release(dev); 305 } 306 307 static ssize_t 308 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 309 { 310 struct device_attribute *dattr; 311 ssize_t error; 312 313 dattr = container_of(attr, struct device_attribute, attr); 314 error = -EIO; 315 if (dattr->show) 316 error = dattr->show(container_of(kobj, struct device, kobj), 317 dattr, buf); 318 return (error); 319 } 320 321 static ssize_t 322 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 323 size_t count) 324 { 325 struct device_attribute *dattr; 326 ssize_t error; 327 328 dattr = container_of(attr, struct device_attribute, attr); 329 error = -EIO; 330 if (dattr->store) 331 error = dattr->store(container_of(kobj, struct device, kobj), 332 dattr, buf, count); 333 return (error); 334 } 335 336 static const struct sysfs_ops linux_dev_sysfs = { 337 .show = linux_dev_show, 338 .store = linux_dev_store, 339 }; 340 341 const struct kobj_type linux_dev_ktype = { 342 .release = linux_dev_release, 343 .sysfs_ops = &linux_dev_sysfs 344 }; 345 346 struct device * 347 device_create(struct class *class, struct device *parent, dev_t devt, 348 void *drvdata, const char *fmt, ...) 349 { 350 struct device *dev; 351 va_list args; 352 353 dev = kzalloc(sizeof(*dev), M_WAITOK); 354 dev->parent = parent; 355 dev->class = class; 356 dev->devt = devt; 357 dev->driver_data = drvdata; 358 dev->release = linux_device_release; 359 va_start(args, fmt); 360 kobject_set_name_vargs(&dev->kobj, fmt, args); 361 va_end(args); 362 device_register(dev); 363 364 return (dev); 365 } 366 367 int 368 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 369 struct kobject *parent, const char *fmt, ...) 370 { 371 va_list args; 372 int error; 373 374 kobject_init(kobj, ktype); 375 kobj->ktype = ktype; 376 kobj->parent = parent; 377 kobj->name = NULL; 378 379 va_start(args, fmt); 380 error = kobject_set_name_vargs(kobj, fmt, args); 381 va_end(args); 382 if (error) 383 return (error); 384 return kobject_add_complete(kobj, parent); 385 } 386 387 void 388 linux_set_current(struct thread *td, struct task_struct *t) 389 { 390 memset(t, 0, sizeof(*t)); 391 task_struct_fill(td, t); 392 task_struct_set(td, t); 393 } 394 395 void 396 linux_clear_current(struct thread *td) 397 { 398 task_struct_set(td, NULL); 399 } 400 401 static void 402 linux_file_dtor(void *cdp) 403 { 404 struct linux_file *filp; 405 struct task_struct t; 406 struct thread *td; 407 408 td = curthread; 409 filp = cdp; 410 linux_set_current(td, &t); 411 filp->f_op->release(filp->f_vnode, filp); 412 linux_clear_current(td); 413 vdrop(filp->f_vnode); 414 kfree(filp); 415 } 416 417 static int 418 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 419 { 420 struct linux_cdev *ldev; 421 struct linux_file *filp; 422 struct task_struct t; 423 struct file *file; 424 int error; 425 426 file = td->td_fpop; 427 ldev = dev->si_drv1; 428 if (ldev == NULL) 429 return (ENODEV); 430 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 431 filp->f_dentry = &filp->f_dentry_store; 432 filp->f_op = ldev->ops; 433 filp->f_flags = file->f_flag; 434 vhold(file->f_vnode); 435 filp->f_vnode = file->f_vnode; 436 linux_set_current(td, &t); 437 if (filp->f_op->open) { 438 error = -filp->f_op->open(file->f_vnode, filp); 439 if (error) { 440 kfree(filp); 441 goto done; 442 } 443 } 444 error = devfs_set_cdevpriv(filp, linux_file_dtor); 445 if (error) { 446 filp->f_op->release(file->f_vnode, filp); 447 kfree(filp); 448 } 449 done: 450 linux_clear_current(td); 451 return (error); 452 } 453 454 static int 455 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 456 { 457 struct linux_cdev *ldev; 458 struct linux_file *filp; 459 struct file *file; 460 int error; 461 462 file = td->td_fpop; 463 ldev = dev->si_drv1; 464 if (ldev == NULL) 465 return (0); 466 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 467 return (error); 468 filp->f_flags = file->f_flag; 469 devfs_clear_cdevpriv(); 470 471 472 return (0); 473 } 474 475 #define LINUX_IOCTL_MIN_PTR 0x10000UL 476 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 477 478 static inline int 479 linux_remap_address(void **uaddr, size_t len) 480 { 481 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 482 483 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 484 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 485 struct task_struct *pts = current; 486 if (pts == NULL) { 487 *uaddr = NULL; 488 return (1); 489 } 490 491 /* compute data offset */ 492 uaddr_val -= LINUX_IOCTL_MIN_PTR; 493 494 /* check that length is within bounds */ 495 if ((len > IOCPARM_MAX) || 496 (uaddr_val + len) > pts->bsd_ioctl_len) { 497 *uaddr = NULL; 498 return (1); 499 } 500 501 /* re-add kernel buffer address */ 502 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 503 504 /* update address location */ 505 *uaddr = (void *)uaddr_val; 506 return (1); 507 } 508 return (0); 509 } 510 511 int 512 linux_copyin(const void *uaddr, void *kaddr, size_t len) 513 { 514 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 515 if (uaddr == NULL) 516 return (-EFAULT); 517 memcpy(kaddr, uaddr, len); 518 return (0); 519 } 520 return (-copyin(uaddr, kaddr, len)); 521 } 522 523 int 524 linux_copyout(const void *kaddr, void *uaddr, size_t len) 525 { 526 if (linux_remap_address(&uaddr, len)) { 527 if (uaddr == NULL) 528 return (-EFAULT); 529 memcpy(uaddr, kaddr, len); 530 return (0); 531 } 532 return (-copyout(kaddr, uaddr, len)); 533 } 534 535 static int 536 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 537 struct thread *td) 538 { 539 struct linux_cdev *ldev; 540 struct linux_file *filp; 541 struct task_struct t; 542 struct file *file; 543 unsigned size; 544 int error; 545 546 file = td->td_fpop; 547 ldev = dev->si_drv1; 548 if (ldev == NULL) 549 return (0); 550 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 551 return (error); 552 filp->f_flags = file->f_flag; 553 linux_set_current(td, &t); 554 size = IOCPARM_LEN(cmd); 555 /* refer to logic in sys_ioctl() */ 556 if (size > 0) { 557 /* 558 * Setup hint for linux_copyin() and linux_copyout(). 559 * 560 * Background: Linux code expects a user-space address 561 * while FreeBSD supplies a kernel-space address. 562 */ 563 t.bsd_ioctl_data = data; 564 t.bsd_ioctl_len = size; 565 data = (void *)LINUX_IOCTL_MIN_PTR; 566 } else { 567 /* fetch user-space pointer */ 568 data = *(void **)data; 569 } 570 if (filp->f_op->unlocked_ioctl) 571 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 572 else 573 error = ENOTTY; 574 linux_clear_current(td); 575 576 return (error); 577 } 578 579 static int 580 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 581 { 582 struct linux_cdev *ldev; 583 struct linux_file *filp; 584 struct task_struct t; 585 struct thread *td; 586 struct file *file; 587 ssize_t bytes; 588 int error; 589 590 td = curthread; 591 file = td->td_fpop; 592 ldev = dev->si_drv1; 593 if (ldev == NULL) 594 return (0); 595 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 596 return (error); 597 filp->f_flags = file->f_flag; 598 /* XXX no support for I/O vectors currently */ 599 if (uio->uio_iovcnt != 1) 600 return (EOPNOTSUPP); 601 linux_set_current(td, &t); 602 if (filp->f_op->read) { 603 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 604 uio->uio_iov->iov_len, &uio->uio_offset); 605 if (bytes >= 0) { 606 uio->uio_iov->iov_base = 607 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 608 uio->uio_iov->iov_len -= bytes; 609 uio->uio_resid -= bytes; 610 } else 611 error = -bytes; 612 } else 613 error = ENXIO; 614 linux_clear_current(td); 615 616 return (error); 617 } 618 619 static int 620 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 621 { 622 struct linux_cdev *ldev; 623 struct linux_file *filp; 624 struct task_struct t; 625 struct thread *td; 626 struct file *file; 627 ssize_t bytes; 628 int error; 629 630 td = curthread; 631 file = td->td_fpop; 632 ldev = dev->si_drv1; 633 if (ldev == NULL) 634 return (0); 635 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 636 return (error); 637 filp->f_flags = file->f_flag; 638 /* XXX no support for I/O vectors currently */ 639 if (uio->uio_iovcnt != 1) 640 return (EOPNOTSUPP); 641 linux_set_current(td, &t); 642 if (filp->f_op->write) { 643 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 644 uio->uio_iov->iov_len, &uio->uio_offset); 645 if (bytes >= 0) { 646 uio->uio_iov->iov_base = 647 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 648 uio->uio_iov->iov_len -= bytes; 649 uio->uio_resid -= bytes; 650 } else 651 error = -bytes; 652 } else 653 error = ENXIO; 654 linux_clear_current(td); 655 656 return (error); 657 } 658 659 static int 660 linux_dev_poll(struct cdev *dev, int events, struct thread *td) 661 { 662 struct linux_cdev *ldev; 663 struct linux_file *filp; 664 struct task_struct t; 665 struct file *file; 666 int revents; 667 int error; 668 669 file = td->td_fpop; 670 ldev = dev->si_drv1; 671 if (ldev == NULL) 672 return (0); 673 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 674 return (error); 675 filp->f_flags = file->f_flag; 676 linux_set_current(td, &t); 677 if (filp->f_op->poll) 678 revents = filp->f_op->poll(filp, NULL) & events; 679 else 680 revents = 0; 681 linux_clear_current(td); 682 683 return (revents); 684 } 685 686 static int 687 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 688 vm_size_t size, struct vm_object **object, int nprot) 689 { 690 struct linux_cdev *ldev; 691 struct linux_file *filp; 692 struct thread *td; 693 struct task_struct t; 694 struct file *file; 695 struct vm_area_struct vma; 696 int error; 697 698 td = curthread; 699 file = td->td_fpop; 700 ldev = dev->si_drv1; 701 if (ldev == NULL) 702 return (ENODEV); 703 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 704 return (error); 705 filp->f_flags = file->f_flag; 706 linux_set_current(td, &t); 707 vma.vm_start = 0; 708 vma.vm_end = size; 709 vma.vm_pgoff = *offset / PAGE_SIZE; 710 vma.vm_pfn = 0; 711 vma.vm_page_prot = VM_MEMATTR_DEFAULT; 712 if (filp->f_op->mmap) { 713 error = -filp->f_op->mmap(filp, &vma); 714 if (error == 0) { 715 struct sglist *sg; 716 717 sg = sglist_alloc(1, M_WAITOK); 718 sglist_append_phys(sg, 719 (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len); 720 *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len, 721 nprot, 0, td->td_ucred); 722 if (*object == NULL) { 723 sglist_free(sg); 724 error = EINVAL; 725 goto done; 726 } 727 *offset = 0; 728 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) { 729 VM_OBJECT_WLOCK(*object); 730 vm_object_set_memattr(*object, 731 vma.vm_page_prot); 732 VM_OBJECT_WUNLOCK(*object); 733 } 734 } 735 } else 736 error = ENODEV; 737 done: 738 linux_clear_current(td); 739 return (error); 740 } 741 742 struct cdevsw linuxcdevsw = { 743 .d_version = D_VERSION, 744 .d_flags = D_TRACKCLOSE, 745 .d_open = linux_dev_open, 746 .d_close = linux_dev_close, 747 .d_read = linux_dev_read, 748 .d_write = linux_dev_write, 749 .d_ioctl = linux_dev_ioctl, 750 .d_mmap_single = linux_dev_mmap_single, 751 .d_poll = linux_dev_poll, 752 }; 753 754 static int 755 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 756 int flags, struct thread *td) 757 { 758 struct linux_file *filp; 759 struct task_struct t; 760 ssize_t bytes; 761 int error; 762 763 error = 0; 764 filp = (struct linux_file *)file->f_data; 765 filp->f_flags = file->f_flag; 766 /* XXX no support for I/O vectors currently */ 767 if (uio->uio_iovcnt != 1) 768 return (EOPNOTSUPP); 769 linux_set_current(td, &t); 770 if (filp->f_op->read) { 771 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 772 uio->uio_iov->iov_len, &uio->uio_offset); 773 if (bytes >= 0) { 774 uio->uio_iov->iov_base = 775 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 776 uio->uio_iov->iov_len -= bytes; 777 uio->uio_resid -= bytes; 778 } else 779 error = -bytes; 780 } else 781 error = ENXIO; 782 linux_clear_current(td); 783 784 return (error); 785 } 786 787 static int 788 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 789 struct thread *td) 790 { 791 struct linux_file *filp; 792 struct task_struct t; 793 int revents; 794 795 filp = (struct linux_file *)file->f_data; 796 filp->f_flags = file->f_flag; 797 linux_set_current(td, &t); 798 if (filp->f_op->poll) 799 revents = filp->f_op->poll(filp, NULL) & events; 800 else 801 revents = 0; 802 linux_clear_current(td); 803 804 return (revents); 805 } 806 807 static int 808 linux_file_close(struct file *file, struct thread *td) 809 { 810 struct linux_file *filp; 811 struct task_struct t; 812 int error; 813 814 filp = (struct linux_file *)file->f_data; 815 filp->f_flags = file->f_flag; 816 linux_set_current(td, &t); 817 error = -filp->f_op->release(NULL, filp); 818 linux_clear_current(td); 819 funsetown(&filp->f_sigio); 820 kfree(filp); 821 822 return (error); 823 } 824 825 static int 826 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 827 struct thread *td) 828 { 829 struct linux_file *filp; 830 struct task_struct t; 831 int error; 832 833 filp = (struct linux_file *)fp->f_data; 834 filp->f_flags = fp->f_flag; 835 error = 0; 836 837 linux_set_current(td, &t); 838 switch (cmd) { 839 case FIONBIO: 840 break; 841 case FIOASYNC: 842 if (filp->f_op->fasync == NULL) 843 break; 844 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 845 break; 846 case FIOSETOWN: 847 error = fsetown(*(int *)data, &filp->f_sigio); 848 if (error == 0) 849 error = filp->f_op->fasync(0, filp, 850 fp->f_flag & FASYNC); 851 break; 852 case FIOGETOWN: 853 *(int *)data = fgetown(&filp->f_sigio); 854 break; 855 default: 856 error = ENOTTY; 857 break; 858 } 859 linux_clear_current(td); 860 return (error); 861 } 862 863 static int 864 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 865 struct thread *td) 866 { 867 868 return (EOPNOTSUPP); 869 } 870 871 static int 872 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 873 struct filedesc *fdp) 874 { 875 876 return (0); 877 } 878 879 struct fileops linuxfileops = { 880 .fo_read = linux_file_read, 881 .fo_write = invfo_rdwr, 882 .fo_truncate = invfo_truncate, 883 .fo_kqfilter = invfo_kqfilter, 884 .fo_stat = linux_file_stat, 885 .fo_fill_kinfo = linux_file_fill_kinfo, 886 .fo_poll = linux_file_poll, 887 .fo_close = linux_file_close, 888 .fo_ioctl = linux_file_ioctl, 889 .fo_chmod = invfo_chmod, 890 .fo_chown = invfo_chown, 891 .fo_sendfile = invfo_sendfile, 892 }; 893 894 /* 895 * Hash of vmmap addresses. This is infrequently accessed and does not 896 * need to be particularly large. This is done because we must store the 897 * caller's idea of the map size to properly unmap. 898 */ 899 struct vmmap { 900 LIST_ENTRY(vmmap) vm_next; 901 void *vm_addr; 902 unsigned long vm_size; 903 }; 904 905 struct vmmaphd { 906 struct vmmap *lh_first; 907 }; 908 #define VMMAP_HASH_SIZE 64 909 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 910 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 911 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 912 static struct mtx vmmaplock; 913 914 static void 915 vmmap_add(void *addr, unsigned long size) 916 { 917 struct vmmap *vmmap; 918 919 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 920 mtx_lock(&vmmaplock); 921 vmmap->vm_size = size; 922 vmmap->vm_addr = addr; 923 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 924 mtx_unlock(&vmmaplock); 925 } 926 927 static struct vmmap * 928 vmmap_remove(void *addr) 929 { 930 struct vmmap *vmmap; 931 932 mtx_lock(&vmmaplock); 933 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 934 if (vmmap->vm_addr == addr) 935 break; 936 if (vmmap) 937 LIST_REMOVE(vmmap, vm_next); 938 mtx_unlock(&vmmaplock); 939 940 return (vmmap); 941 } 942 943 #if defined(__i386__) || defined(__amd64__) 944 void * 945 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 946 { 947 void *addr; 948 949 addr = pmap_mapdev_attr(phys_addr, size, attr); 950 if (addr == NULL) 951 return (NULL); 952 vmmap_add(addr, size); 953 954 return (addr); 955 } 956 #endif 957 958 void 959 iounmap(void *addr) 960 { 961 struct vmmap *vmmap; 962 963 vmmap = vmmap_remove(addr); 964 if (vmmap == NULL) 965 return; 966 #if defined(__i386__) || defined(__amd64__) 967 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 968 #endif 969 kfree(vmmap); 970 } 971 972 973 void * 974 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 975 { 976 vm_offset_t off; 977 size_t size; 978 979 size = count * PAGE_SIZE; 980 off = kva_alloc(size); 981 if (off == 0) 982 return (NULL); 983 vmmap_add((void *)off, size); 984 pmap_qenter(off, pages, count); 985 986 return ((void *)off); 987 } 988 989 void 990 vunmap(void *addr) 991 { 992 struct vmmap *vmmap; 993 994 vmmap = vmmap_remove(addr); 995 if (vmmap == NULL) 996 return; 997 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 998 kva_free((vm_offset_t)addr, vmmap->vm_size); 999 kfree(vmmap); 1000 } 1001 1002 char * 1003 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 1004 { 1005 unsigned int len; 1006 char *p; 1007 va_list aq; 1008 1009 va_copy(aq, ap); 1010 len = vsnprintf(NULL, 0, fmt, aq); 1011 va_end(aq); 1012 1013 p = kmalloc(len + 1, gfp); 1014 if (p != NULL) 1015 vsnprintf(p, len + 1, fmt, ap); 1016 1017 return (p); 1018 } 1019 1020 char * 1021 kasprintf(gfp_t gfp, const char *fmt, ...) 1022 { 1023 va_list ap; 1024 char *p; 1025 1026 va_start(ap, fmt); 1027 p = kvasprintf(gfp, fmt, ap); 1028 va_end(ap); 1029 1030 return (p); 1031 } 1032 1033 static void 1034 linux_timer_callback_wrapper(void *context) 1035 { 1036 struct timer_list *timer; 1037 1038 timer = context; 1039 timer->function(timer->data); 1040 } 1041 1042 void 1043 mod_timer(struct timer_list *timer, unsigned long expires) 1044 { 1045 1046 timer->expires = expires; 1047 callout_reset(&timer->timer_callout, 1048 linux_timer_jiffies_until(expires), 1049 &linux_timer_callback_wrapper, timer); 1050 } 1051 1052 void 1053 add_timer(struct timer_list *timer) 1054 { 1055 1056 callout_reset(&timer->timer_callout, 1057 linux_timer_jiffies_until(timer->expires), 1058 &linux_timer_callback_wrapper, timer); 1059 } 1060 1061 static void 1062 linux_timer_init(void *arg) 1063 { 1064 1065 /* 1066 * Compute an internal HZ value which can divide 2**32 to 1067 * avoid timer rounding problems when the tick value wraps 1068 * around 2**32: 1069 */ 1070 linux_timer_hz_mask = 1; 1071 while (linux_timer_hz_mask < (unsigned long)hz) 1072 linux_timer_hz_mask *= 2; 1073 linux_timer_hz_mask--; 1074 } 1075 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1076 1077 void 1078 linux_complete_common(struct completion *c, int all) 1079 { 1080 int wakeup_swapper; 1081 1082 sleepq_lock(c); 1083 c->done++; 1084 if (all) 1085 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1086 else 1087 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1088 sleepq_release(c); 1089 if (wakeup_swapper) 1090 kick_proc0(); 1091 } 1092 1093 /* 1094 * Indefinite wait for done != 0 with or without signals. 1095 */ 1096 long 1097 linux_wait_for_common(struct completion *c, int flags) 1098 { 1099 if (SCHEDULER_STOPPED()) 1100 return (0); 1101 1102 if (flags != 0) 1103 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1104 else 1105 flags = SLEEPQ_SLEEP; 1106 for (;;) { 1107 sleepq_lock(c); 1108 if (c->done) 1109 break; 1110 sleepq_add(c, NULL, "completion", flags, 0); 1111 if (flags & SLEEPQ_INTERRUPTIBLE) { 1112 if (sleepq_wait_sig(c, 0) != 0) 1113 return (-ERESTARTSYS); 1114 } else 1115 sleepq_wait(c, 0); 1116 } 1117 c->done--; 1118 sleepq_release(c); 1119 1120 return (0); 1121 } 1122 1123 /* 1124 * Time limited wait for done != 0 with or without signals. 1125 */ 1126 long 1127 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) 1128 { 1129 long end = jiffies + timeout; 1130 1131 if (SCHEDULER_STOPPED()) 1132 return (0); 1133 1134 if (flags != 0) 1135 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1136 else 1137 flags = SLEEPQ_SLEEP; 1138 for (;;) { 1139 int ret; 1140 1141 sleepq_lock(c); 1142 if (c->done) 1143 break; 1144 sleepq_add(c, NULL, "completion", flags, 0); 1145 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1146 if (flags & SLEEPQ_INTERRUPTIBLE) 1147 ret = sleepq_timedwait_sig(c, 0); 1148 else 1149 ret = sleepq_timedwait(c, 0); 1150 if (ret != 0) { 1151 /* check for timeout or signal */ 1152 if (ret == EWOULDBLOCK) 1153 return (0); 1154 else 1155 return (-ERESTARTSYS); 1156 } 1157 } 1158 c->done--; 1159 sleepq_release(c); 1160 1161 /* return how many jiffies are left */ 1162 return (linux_timer_jiffies_until(end)); 1163 } 1164 1165 int 1166 linux_try_wait_for_completion(struct completion *c) 1167 { 1168 int isdone; 1169 1170 isdone = 1; 1171 sleepq_lock(c); 1172 if (c->done) 1173 c->done--; 1174 else 1175 isdone = 0; 1176 sleepq_release(c); 1177 return (isdone); 1178 } 1179 1180 int 1181 linux_completion_done(struct completion *c) 1182 { 1183 int isdone; 1184 1185 isdone = 1; 1186 sleepq_lock(c); 1187 if (c->done == 0) 1188 isdone = 0; 1189 sleepq_release(c); 1190 return (isdone); 1191 } 1192 1193 void 1194 linux_delayed_work_fn(void *arg) 1195 { 1196 struct delayed_work *work; 1197 1198 work = arg; 1199 taskqueue_enqueue(work->work.taskqueue, &work->work.work_task); 1200 } 1201 1202 void 1203 linux_work_fn(void *context, int pending) 1204 { 1205 struct work_struct *work; 1206 1207 work = context; 1208 work->fn(work); 1209 } 1210 1211 void 1212 linux_flush_fn(void *context, int pending) 1213 { 1214 } 1215 1216 struct workqueue_struct * 1217 linux_create_workqueue_common(const char *name, int cpus) 1218 { 1219 struct workqueue_struct *wq; 1220 1221 wq = kmalloc(sizeof(*wq), M_WAITOK); 1222 wq->taskqueue = taskqueue_create(name, M_WAITOK, 1223 taskqueue_thread_enqueue, &wq->taskqueue); 1224 atomic_set(&wq->draining, 0); 1225 taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); 1226 1227 return (wq); 1228 } 1229 1230 void 1231 destroy_workqueue(struct workqueue_struct *wq) 1232 { 1233 taskqueue_free(wq->taskqueue); 1234 kfree(wq); 1235 } 1236 1237 static void 1238 linux_cdev_release(struct kobject *kobj) 1239 { 1240 struct linux_cdev *cdev; 1241 struct kobject *parent; 1242 1243 cdev = container_of(kobj, struct linux_cdev, kobj); 1244 parent = kobj->parent; 1245 if (cdev->cdev) 1246 destroy_dev(cdev->cdev); 1247 kfree(cdev); 1248 kobject_put(parent); 1249 } 1250 1251 static void 1252 linux_cdev_static_release(struct kobject *kobj) 1253 { 1254 struct linux_cdev *cdev; 1255 struct kobject *parent; 1256 1257 cdev = container_of(kobj, struct linux_cdev, kobj); 1258 parent = kobj->parent; 1259 if (cdev->cdev) 1260 destroy_dev(cdev->cdev); 1261 kobject_put(parent); 1262 } 1263 1264 const struct kobj_type linux_cdev_ktype = { 1265 .release = linux_cdev_release, 1266 }; 1267 1268 const struct kobj_type linux_cdev_static_ktype = { 1269 .release = linux_cdev_static_release, 1270 }; 1271 1272 static void 1273 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1274 { 1275 struct notifier_block *nb; 1276 1277 nb = arg; 1278 if (linkstate == LINK_STATE_UP) 1279 nb->notifier_call(nb, NETDEV_UP, ifp); 1280 else 1281 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1282 } 1283 1284 static void 1285 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1286 { 1287 struct notifier_block *nb; 1288 1289 nb = arg; 1290 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1291 } 1292 1293 static void 1294 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1295 { 1296 struct notifier_block *nb; 1297 1298 nb = arg; 1299 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1300 } 1301 1302 static void 1303 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1304 { 1305 struct notifier_block *nb; 1306 1307 nb = arg; 1308 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1309 } 1310 1311 static void 1312 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1313 { 1314 struct notifier_block *nb; 1315 1316 nb = arg; 1317 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1318 } 1319 1320 int 1321 register_netdevice_notifier(struct notifier_block *nb) 1322 { 1323 1324 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1325 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1326 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1327 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1328 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1329 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1330 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1331 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1332 1333 return (0); 1334 } 1335 1336 int 1337 register_inetaddr_notifier(struct notifier_block *nb) 1338 { 1339 1340 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1341 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1342 return (0); 1343 } 1344 1345 int 1346 unregister_netdevice_notifier(struct notifier_block *nb) 1347 { 1348 1349 EVENTHANDLER_DEREGISTER(ifnet_link_event, 1350 nb->tags[NETDEV_UP]); 1351 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 1352 nb->tags[NETDEV_REGISTER]); 1353 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1354 nb->tags[NETDEV_UNREGISTER]); 1355 EVENTHANDLER_DEREGISTER(iflladdr_event, 1356 nb->tags[NETDEV_CHANGEADDR]); 1357 1358 return (0); 1359 } 1360 1361 int 1362 unregister_inetaddr_notifier(struct notifier_block *nb) 1363 { 1364 1365 EVENTHANDLER_DEREGISTER(ifaddr_event, 1366 nb->tags[NETDEV_CHANGEIFADDR]); 1367 1368 return (0); 1369 } 1370 1371 struct list_sort_thunk { 1372 int (*cmp)(void *, struct list_head *, struct list_head *); 1373 void *priv; 1374 }; 1375 1376 static inline int 1377 linux_le_cmp(void *priv, const void *d1, const void *d2) 1378 { 1379 struct list_head *le1, *le2; 1380 struct list_sort_thunk *thunk; 1381 1382 thunk = priv; 1383 le1 = *(__DECONST(struct list_head **, d1)); 1384 le2 = *(__DECONST(struct list_head **, d2)); 1385 return ((thunk->cmp)(thunk->priv, le1, le2)); 1386 } 1387 1388 void 1389 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 1390 struct list_head *a, struct list_head *b)) 1391 { 1392 struct list_sort_thunk thunk; 1393 struct list_head **ar, *le; 1394 size_t count, i; 1395 1396 count = 0; 1397 list_for_each(le, head) 1398 count++; 1399 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 1400 i = 0; 1401 list_for_each(le, head) 1402 ar[i++] = le; 1403 thunk.cmp = cmp; 1404 thunk.priv = priv; 1405 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 1406 INIT_LIST_HEAD(head); 1407 for (i = 0; i < count; i++) 1408 list_add_tail(ar[i], head); 1409 free(ar, M_KMALLOC); 1410 } 1411 1412 void 1413 linux_irq_handler(void *ent) 1414 { 1415 struct irq_ent *irqe; 1416 1417 irqe = ent; 1418 irqe->handler(irqe->irq, irqe->arg); 1419 } 1420 1421 #if defined(__i386__) || defined(__amd64__) 1422 bool linux_cpu_has_clflush; 1423 #endif 1424 1425 static void 1426 linux_compat_init(void *arg) 1427 { 1428 struct sysctl_oid *rootoid; 1429 int i; 1430 1431 #if defined(__i386__) || defined(__amd64__) 1432 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 1433 #endif 1434 sx_init(&linux_global_rcu_lock, "LinuxGlobalRCU"); 1435 1436 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 1437 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 1438 kobject_init(&linux_class_root, &linux_class_ktype); 1439 kobject_set_name(&linux_class_root, "class"); 1440 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 1441 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 1442 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 1443 kobject_set_name(&linux_root_device.kobj, "device"); 1444 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 1445 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 1446 "device"); 1447 linux_root_device.bsddev = root_bus; 1448 linux_class_misc.name = "misc"; 1449 class_register(&linux_class_misc); 1450 INIT_LIST_HEAD(&pci_drivers); 1451 INIT_LIST_HEAD(&pci_devices); 1452 spin_lock_init(&pci_lock); 1453 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 1454 for (i = 0; i < VMMAP_HASH_SIZE; i++) 1455 LIST_INIT(&vmmaphead[i]); 1456 } 1457 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 1458 1459 static void 1460 linux_compat_uninit(void *arg) 1461 { 1462 linux_kobject_kfree_name(&linux_class_root); 1463 linux_kobject_kfree_name(&linux_root_device.kobj); 1464 linux_kobject_kfree_name(&linux_class_misc.kobj); 1465 1466 synchronize_rcu(); 1467 sx_destroy(&linux_global_rcu_lock); 1468 } 1469 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 1470 1471 /* 1472 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 1473 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 1474 * used. Assert these types have the same size, else some parts of the 1475 * LinuxKPI may not work like expected: 1476 */ 1477 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 1478