1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 52 #include <machine/stdarg.h> 53 54 #include <linux/kobject.h> 55 #include <linux/device.h> 56 #include <linux/slab.h> 57 #include <linux/module.h> 58 #include <linux/cdev.h> 59 #include <linux/file.h> 60 #include <linux/sysfs.h> 61 #include <linux/mm.h> 62 #include <linux/io.h> 63 #include <linux/vmalloc.h> 64 #include <linux/netdevice.h> 65 #include <linux/timer.h> 66 #include <linux/workqueue.h> 67 #include <linux/rcupdate.h> 68 #include <linux/interrupt.h> 69 #include <linux/uaccess.h> 70 71 #include <vm/vm_pager.h> 72 73 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 74 75 #include <linux/rbtree.h> 76 /* Undo Linux compat changes. */ 77 #undef RB_ROOT 78 #undef file 79 #undef cdev 80 #define RB_ROOT(head) (head)->rbh_root 81 82 struct kobject linux_class_root; 83 struct device linux_root_device; 84 struct class linux_class_misc; 85 struct list_head pci_drivers; 86 struct list_head pci_devices; 87 struct net init_net; 88 spinlock_t pci_lock; 89 struct sx linux_global_rcu_lock; 90 91 unsigned long linux_timer_hz_mask; 92 93 int 94 panic_cmp(struct rb_node *one, struct rb_node *two) 95 { 96 panic("no cmp"); 97 } 98 99 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 100 101 int 102 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 103 { 104 va_list tmp_va; 105 int len; 106 char *old; 107 char *name; 108 char dummy; 109 110 old = kobj->name; 111 112 if (old && fmt == NULL) 113 return (0); 114 115 /* compute length of string */ 116 va_copy(tmp_va, args); 117 len = vsnprintf(&dummy, 0, fmt, tmp_va); 118 va_end(tmp_va); 119 120 /* account for zero termination */ 121 len++; 122 123 /* check for error */ 124 if (len < 1) 125 return (-EINVAL); 126 127 /* allocate memory for string */ 128 name = kzalloc(len, GFP_KERNEL); 129 if (name == NULL) 130 return (-ENOMEM); 131 vsnprintf(name, len, fmt, args); 132 kobj->name = name; 133 134 /* free old string */ 135 kfree(old); 136 137 /* filter new string */ 138 for (; *name != '\0'; name++) 139 if (*name == '/') 140 *name = '!'; 141 return (0); 142 } 143 144 int 145 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 146 { 147 va_list args; 148 int error; 149 150 va_start(args, fmt); 151 error = kobject_set_name_vargs(kobj, fmt, args); 152 va_end(args); 153 154 return (error); 155 } 156 157 static int 158 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 159 { 160 const struct kobj_type *t; 161 int error; 162 163 kobj->parent = parent; 164 error = sysfs_create_dir(kobj); 165 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 166 struct attribute **attr; 167 t = kobj->ktype; 168 169 for (attr = t->default_attrs; *attr != NULL; attr++) { 170 error = sysfs_create_file(kobj, *attr); 171 if (error) 172 break; 173 } 174 if (error) 175 sysfs_remove_dir(kobj); 176 177 } 178 return (error); 179 } 180 181 int 182 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 183 { 184 va_list args; 185 int error; 186 187 va_start(args, fmt); 188 error = kobject_set_name_vargs(kobj, fmt, args); 189 va_end(args); 190 if (error) 191 return (error); 192 193 return kobject_add_complete(kobj, parent); 194 } 195 196 void 197 linux_kobject_release(struct kref *kref) 198 { 199 struct kobject *kobj; 200 char *name; 201 202 kobj = container_of(kref, struct kobject, kref); 203 sysfs_remove_dir(kobj); 204 name = kobj->name; 205 if (kobj->ktype && kobj->ktype->release) 206 kobj->ktype->release(kobj); 207 kfree(name); 208 } 209 210 static void 211 linux_kobject_kfree(struct kobject *kobj) 212 { 213 kfree(kobj); 214 } 215 216 static void 217 linux_kobject_kfree_name(struct kobject *kobj) 218 { 219 if (kobj) { 220 kfree(kobj->name); 221 } 222 } 223 224 const struct kobj_type linux_kfree_type = { 225 .release = linux_kobject_kfree 226 }; 227 228 static void 229 linux_device_release(struct device *dev) 230 { 231 pr_debug("linux_device_release: %s\n", dev_name(dev)); 232 kfree(dev); 233 } 234 235 static ssize_t 236 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 237 { 238 struct class_attribute *dattr; 239 ssize_t error; 240 241 dattr = container_of(attr, struct class_attribute, attr); 242 error = -EIO; 243 if (dattr->show) 244 error = dattr->show(container_of(kobj, struct class, kobj), 245 dattr, buf); 246 return (error); 247 } 248 249 static ssize_t 250 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 251 size_t count) 252 { 253 struct class_attribute *dattr; 254 ssize_t error; 255 256 dattr = container_of(attr, struct class_attribute, attr); 257 error = -EIO; 258 if (dattr->store) 259 error = dattr->store(container_of(kobj, struct class, kobj), 260 dattr, buf, count); 261 return (error); 262 } 263 264 static void 265 linux_class_release(struct kobject *kobj) 266 { 267 struct class *class; 268 269 class = container_of(kobj, struct class, kobj); 270 if (class->class_release) 271 class->class_release(class); 272 } 273 274 static const struct sysfs_ops linux_class_sysfs = { 275 .show = linux_class_show, 276 .store = linux_class_store, 277 }; 278 279 const struct kobj_type linux_class_ktype = { 280 .release = linux_class_release, 281 .sysfs_ops = &linux_class_sysfs 282 }; 283 284 static void 285 linux_dev_release(struct kobject *kobj) 286 { 287 struct device *dev; 288 289 dev = container_of(kobj, struct device, kobj); 290 /* This is the precedence defined by linux. */ 291 if (dev->release) 292 dev->release(dev); 293 else if (dev->class && dev->class->dev_release) 294 dev->class->dev_release(dev); 295 } 296 297 static ssize_t 298 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 299 { 300 struct device_attribute *dattr; 301 ssize_t error; 302 303 dattr = container_of(attr, struct device_attribute, attr); 304 error = -EIO; 305 if (dattr->show) 306 error = dattr->show(container_of(kobj, struct device, kobj), 307 dattr, buf); 308 return (error); 309 } 310 311 static ssize_t 312 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 313 size_t count) 314 { 315 struct device_attribute *dattr; 316 ssize_t error; 317 318 dattr = container_of(attr, struct device_attribute, attr); 319 error = -EIO; 320 if (dattr->store) 321 error = dattr->store(container_of(kobj, struct device, kobj), 322 dattr, buf, count); 323 return (error); 324 } 325 326 static const struct sysfs_ops linux_dev_sysfs = { 327 .show = linux_dev_show, 328 .store = linux_dev_store, 329 }; 330 331 const struct kobj_type linux_dev_ktype = { 332 .release = linux_dev_release, 333 .sysfs_ops = &linux_dev_sysfs 334 }; 335 336 struct device * 337 device_create(struct class *class, struct device *parent, dev_t devt, 338 void *drvdata, const char *fmt, ...) 339 { 340 struct device *dev; 341 va_list args; 342 343 dev = kzalloc(sizeof(*dev), M_WAITOK); 344 dev->parent = parent; 345 dev->class = class; 346 dev->devt = devt; 347 dev->driver_data = drvdata; 348 dev->release = linux_device_release; 349 va_start(args, fmt); 350 kobject_set_name_vargs(&dev->kobj, fmt, args); 351 va_end(args); 352 device_register(dev); 353 354 return (dev); 355 } 356 357 int 358 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 359 struct kobject *parent, const char *fmt, ...) 360 { 361 va_list args; 362 int error; 363 364 kobject_init(kobj, ktype); 365 kobj->ktype = ktype; 366 kobj->parent = parent; 367 kobj->name = NULL; 368 369 va_start(args, fmt); 370 error = kobject_set_name_vargs(kobj, fmt, args); 371 va_end(args); 372 if (error) 373 return (error); 374 return kobject_add_complete(kobj, parent); 375 } 376 377 static void 378 linux_set_current(struct thread *td, struct task_struct *t) 379 { 380 memset(t, 0, sizeof(*t)); 381 task_struct_fill(td, t); 382 task_struct_set(td, t); 383 } 384 385 static void 386 linux_clear_current(struct thread *td) 387 { 388 task_struct_set(td, NULL); 389 } 390 391 static void 392 linux_file_dtor(void *cdp) 393 { 394 struct linux_file *filp; 395 struct task_struct t; 396 struct thread *td; 397 398 td = curthread; 399 filp = cdp; 400 linux_set_current(td, &t); 401 filp->f_op->release(filp->f_vnode, filp); 402 linux_clear_current(td); 403 vdrop(filp->f_vnode); 404 kfree(filp); 405 } 406 407 static int 408 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 409 { 410 struct linux_cdev *ldev; 411 struct linux_file *filp; 412 struct task_struct t; 413 struct file *file; 414 int error; 415 416 file = td->td_fpop; 417 ldev = dev->si_drv1; 418 if (ldev == NULL) 419 return (ENODEV); 420 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 421 filp->f_dentry = &filp->f_dentry_store; 422 filp->f_op = ldev->ops; 423 filp->f_flags = file->f_flag; 424 vhold(file->f_vnode); 425 filp->f_vnode = file->f_vnode; 426 linux_set_current(td, &t); 427 if (filp->f_op->open) { 428 error = -filp->f_op->open(file->f_vnode, filp); 429 if (error) { 430 kfree(filp); 431 goto done; 432 } 433 } 434 error = devfs_set_cdevpriv(filp, linux_file_dtor); 435 if (error) { 436 filp->f_op->release(file->f_vnode, filp); 437 kfree(filp); 438 } 439 done: 440 linux_clear_current(td); 441 return (error); 442 } 443 444 static int 445 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 446 { 447 struct linux_cdev *ldev; 448 struct linux_file *filp; 449 struct file *file; 450 int error; 451 452 file = td->td_fpop; 453 ldev = dev->si_drv1; 454 if (ldev == NULL) 455 return (0); 456 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 457 return (error); 458 filp->f_flags = file->f_flag; 459 devfs_clear_cdevpriv(); 460 461 462 return (0); 463 } 464 465 #define LINUX_IOCTL_MIN_PTR 0x10000UL 466 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 467 468 static inline int 469 linux_remap_address(void **uaddr, size_t len) 470 { 471 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 472 473 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 474 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 475 struct task_struct *pts = current; 476 if (pts == NULL) { 477 *uaddr = NULL; 478 return (1); 479 } 480 481 /* compute data offset */ 482 uaddr_val -= LINUX_IOCTL_MIN_PTR; 483 484 /* check that length is within bounds */ 485 if ((len > IOCPARM_MAX) || 486 (uaddr_val + len) > pts->bsd_ioctl_len) { 487 *uaddr = NULL; 488 return (1); 489 } 490 491 /* re-add kernel buffer address */ 492 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 493 494 /* update address location */ 495 *uaddr = (void *)uaddr_val; 496 return (1); 497 } 498 return (0); 499 } 500 501 int 502 linux_copyin(const void *uaddr, void *kaddr, size_t len) 503 { 504 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 505 if (uaddr == NULL) 506 return (-EFAULT); 507 memcpy(kaddr, uaddr, len); 508 return (0); 509 } 510 return (-copyin(uaddr, kaddr, len)); 511 } 512 513 int 514 linux_copyout(const void *kaddr, void *uaddr, size_t len) 515 { 516 if (linux_remap_address(&uaddr, len)) { 517 if (uaddr == NULL) 518 return (-EFAULT); 519 memcpy(uaddr, kaddr, len); 520 return (0); 521 } 522 return (-copyout(kaddr, uaddr, len)); 523 } 524 525 static int 526 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 527 struct thread *td) 528 { 529 struct linux_cdev *ldev; 530 struct linux_file *filp; 531 struct task_struct t; 532 struct file *file; 533 unsigned size; 534 int error; 535 536 file = td->td_fpop; 537 ldev = dev->si_drv1; 538 if (ldev == NULL) 539 return (0); 540 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 541 return (error); 542 filp->f_flags = file->f_flag; 543 linux_set_current(td, &t); 544 size = IOCPARM_LEN(cmd); 545 /* refer to logic in sys_ioctl() */ 546 if (size > 0) { 547 /* 548 * Setup hint for linux_copyin() and linux_copyout(). 549 * 550 * Background: Linux code expects a user-space address 551 * while FreeBSD supplies a kernel-space address. 552 */ 553 t.bsd_ioctl_data = data; 554 t.bsd_ioctl_len = size; 555 data = (void *)LINUX_IOCTL_MIN_PTR; 556 } else { 557 /* fetch user-space pointer */ 558 data = *(void **)data; 559 } 560 if (filp->f_op->unlocked_ioctl) 561 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 562 else 563 error = ENOTTY; 564 linux_clear_current(td); 565 566 return (error); 567 } 568 569 static int 570 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 571 { 572 struct linux_cdev *ldev; 573 struct linux_file *filp; 574 struct task_struct t; 575 struct thread *td; 576 struct file *file; 577 ssize_t bytes; 578 int error; 579 580 td = curthread; 581 file = td->td_fpop; 582 ldev = dev->si_drv1; 583 if (ldev == NULL) 584 return (0); 585 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 586 return (error); 587 filp->f_flags = file->f_flag; 588 /* XXX no support for I/O vectors currently */ 589 if (uio->uio_iovcnt != 1) 590 return (EOPNOTSUPP); 591 linux_set_current(td, &t); 592 if (filp->f_op->read) { 593 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 594 uio->uio_iov->iov_len, &uio->uio_offset); 595 if (bytes >= 0) { 596 uio->uio_iov->iov_base = 597 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 598 uio->uio_iov->iov_len -= bytes; 599 uio->uio_resid -= bytes; 600 } else 601 error = -bytes; 602 } else 603 error = ENXIO; 604 linux_clear_current(td); 605 606 return (error); 607 } 608 609 static int 610 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 611 { 612 struct linux_cdev *ldev; 613 struct linux_file *filp; 614 struct task_struct t; 615 struct thread *td; 616 struct file *file; 617 ssize_t bytes; 618 int error; 619 620 td = curthread; 621 file = td->td_fpop; 622 ldev = dev->si_drv1; 623 if (ldev == NULL) 624 return (0); 625 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 626 return (error); 627 filp->f_flags = file->f_flag; 628 /* XXX no support for I/O vectors currently */ 629 if (uio->uio_iovcnt != 1) 630 return (EOPNOTSUPP); 631 linux_set_current(td, &t); 632 if (filp->f_op->write) { 633 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 634 uio->uio_iov->iov_len, &uio->uio_offset); 635 if (bytes >= 0) { 636 uio->uio_iov->iov_base = 637 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 638 uio->uio_iov->iov_len -= bytes; 639 uio->uio_resid -= bytes; 640 } else 641 error = -bytes; 642 } else 643 error = ENXIO; 644 linux_clear_current(td); 645 646 return (error); 647 } 648 649 static int 650 linux_dev_poll(struct cdev *dev, int events, struct thread *td) 651 { 652 struct linux_cdev *ldev; 653 struct linux_file *filp; 654 struct task_struct t; 655 struct file *file; 656 int revents; 657 int error; 658 659 file = td->td_fpop; 660 ldev = dev->si_drv1; 661 if (ldev == NULL) 662 return (0); 663 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 664 return (error); 665 filp->f_flags = file->f_flag; 666 linux_set_current(td, &t); 667 if (filp->f_op->poll) 668 revents = filp->f_op->poll(filp, NULL) & events; 669 else 670 revents = 0; 671 linux_clear_current(td); 672 673 return (revents); 674 } 675 676 static int 677 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 678 vm_size_t size, struct vm_object **object, int nprot) 679 { 680 struct linux_cdev *ldev; 681 struct linux_file *filp; 682 struct thread *td; 683 struct task_struct t; 684 struct file *file; 685 struct vm_area_struct vma; 686 int error; 687 688 td = curthread; 689 file = td->td_fpop; 690 ldev = dev->si_drv1; 691 if (ldev == NULL) 692 return (ENODEV); 693 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 694 return (error); 695 filp->f_flags = file->f_flag; 696 linux_set_current(td, &t); 697 vma.vm_start = 0; 698 vma.vm_end = size; 699 vma.vm_pgoff = *offset / PAGE_SIZE; 700 vma.vm_pfn = 0; 701 vma.vm_page_prot = VM_MEMATTR_DEFAULT; 702 if (filp->f_op->mmap) { 703 error = -filp->f_op->mmap(filp, &vma); 704 if (error == 0) { 705 struct sglist *sg; 706 707 sg = sglist_alloc(1, M_WAITOK); 708 sglist_append_phys(sg, 709 (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len); 710 *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len, 711 nprot, 0, td->td_ucred); 712 if (*object == NULL) { 713 sglist_free(sg); 714 error = EINVAL; 715 goto done; 716 } 717 *offset = 0; 718 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) { 719 VM_OBJECT_WLOCK(*object); 720 vm_object_set_memattr(*object, 721 vma.vm_page_prot); 722 VM_OBJECT_WUNLOCK(*object); 723 } 724 } 725 } else 726 error = ENODEV; 727 done: 728 linux_clear_current(td); 729 return (error); 730 } 731 732 struct cdevsw linuxcdevsw = { 733 .d_version = D_VERSION, 734 .d_flags = D_TRACKCLOSE, 735 .d_open = linux_dev_open, 736 .d_close = linux_dev_close, 737 .d_read = linux_dev_read, 738 .d_write = linux_dev_write, 739 .d_ioctl = linux_dev_ioctl, 740 .d_mmap_single = linux_dev_mmap_single, 741 .d_poll = linux_dev_poll, 742 }; 743 744 static int 745 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 746 int flags, struct thread *td) 747 { 748 struct linux_file *filp; 749 struct task_struct t; 750 ssize_t bytes; 751 int error; 752 753 error = 0; 754 filp = (struct linux_file *)file->f_data; 755 filp->f_flags = file->f_flag; 756 /* XXX no support for I/O vectors currently */ 757 if (uio->uio_iovcnt != 1) 758 return (EOPNOTSUPP); 759 linux_set_current(td, &t); 760 if (filp->f_op->read) { 761 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 762 uio->uio_iov->iov_len, &uio->uio_offset); 763 if (bytes >= 0) { 764 uio->uio_iov->iov_base = 765 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 766 uio->uio_iov->iov_len -= bytes; 767 uio->uio_resid -= bytes; 768 } else 769 error = -bytes; 770 } else 771 error = ENXIO; 772 linux_clear_current(td); 773 774 return (error); 775 } 776 777 static int 778 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 779 struct thread *td) 780 { 781 struct linux_file *filp; 782 struct task_struct t; 783 int revents; 784 785 filp = (struct linux_file *)file->f_data; 786 filp->f_flags = file->f_flag; 787 linux_set_current(td, &t); 788 if (filp->f_op->poll) 789 revents = filp->f_op->poll(filp, NULL) & events; 790 else 791 revents = 0; 792 linux_clear_current(td); 793 794 return (revents); 795 } 796 797 static int 798 linux_file_close(struct file *file, struct thread *td) 799 { 800 struct linux_file *filp; 801 struct task_struct t; 802 int error; 803 804 filp = (struct linux_file *)file->f_data; 805 filp->f_flags = file->f_flag; 806 linux_set_current(td, &t); 807 error = -filp->f_op->release(NULL, filp); 808 linux_clear_current(td); 809 funsetown(&filp->f_sigio); 810 kfree(filp); 811 812 return (error); 813 } 814 815 static int 816 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 817 struct thread *td) 818 { 819 struct linux_file *filp; 820 struct task_struct t; 821 int error; 822 823 filp = (struct linux_file *)fp->f_data; 824 filp->f_flags = fp->f_flag; 825 error = 0; 826 827 linux_set_current(td, &t); 828 switch (cmd) { 829 case FIONBIO: 830 break; 831 case FIOASYNC: 832 if (filp->f_op->fasync == NULL) 833 break; 834 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 835 break; 836 case FIOSETOWN: 837 error = fsetown(*(int *)data, &filp->f_sigio); 838 if (error == 0) 839 error = filp->f_op->fasync(0, filp, 840 fp->f_flag & FASYNC); 841 break; 842 case FIOGETOWN: 843 *(int *)data = fgetown(&filp->f_sigio); 844 break; 845 default: 846 error = ENOTTY; 847 break; 848 } 849 linux_clear_current(td); 850 return (error); 851 } 852 853 static int 854 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 855 struct thread *td) 856 { 857 858 return (EOPNOTSUPP); 859 } 860 861 static int 862 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 863 struct filedesc *fdp) 864 { 865 866 return (0); 867 } 868 869 struct fileops linuxfileops = { 870 .fo_read = linux_file_read, 871 .fo_write = invfo_rdwr, 872 .fo_truncate = invfo_truncate, 873 .fo_kqfilter = invfo_kqfilter, 874 .fo_stat = linux_file_stat, 875 .fo_fill_kinfo = linux_file_fill_kinfo, 876 .fo_poll = linux_file_poll, 877 .fo_close = linux_file_close, 878 .fo_ioctl = linux_file_ioctl, 879 .fo_chmod = invfo_chmod, 880 .fo_chown = invfo_chown, 881 .fo_sendfile = invfo_sendfile, 882 }; 883 884 /* 885 * Hash of vmmap addresses. This is infrequently accessed and does not 886 * need to be particularly large. This is done because we must store the 887 * caller's idea of the map size to properly unmap. 888 */ 889 struct vmmap { 890 LIST_ENTRY(vmmap) vm_next; 891 void *vm_addr; 892 unsigned long vm_size; 893 }; 894 895 struct vmmaphd { 896 struct vmmap *lh_first; 897 }; 898 #define VMMAP_HASH_SIZE 64 899 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 900 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 901 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 902 static struct mtx vmmaplock; 903 904 static void 905 vmmap_add(void *addr, unsigned long size) 906 { 907 struct vmmap *vmmap; 908 909 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 910 mtx_lock(&vmmaplock); 911 vmmap->vm_size = size; 912 vmmap->vm_addr = addr; 913 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 914 mtx_unlock(&vmmaplock); 915 } 916 917 static struct vmmap * 918 vmmap_remove(void *addr) 919 { 920 struct vmmap *vmmap; 921 922 mtx_lock(&vmmaplock); 923 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 924 if (vmmap->vm_addr == addr) 925 break; 926 if (vmmap) 927 LIST_REMOVE(vmmap, vm_next); 928 mtx_unlock(&vmmaplock); 929 930 return (vmmap); 931 } 932 933 #if defined(__i386__) || defined(__amd64__) 934 void * 935 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 936 { 937 void *addr; 938 939 addr = pmap_mapdev_attr(phys_addr, size, attr); 940 if (addr == NULL) 941 return (NULL); 942 vmmap_add(addr, size); 943 944 return (addr); 945 } 946 #endif 947 948 void 949 iounmap(void *addr) 950 { 951 struct vmmap *vmmap; 952 953 vmmap = vmmap_remove(addr); 954 if (vmmap == NULL) 955 return; 956 #if defined(__i386__) || defined(__amd64__) 957 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 958 #endif 959 kfree(vmmap); 960 } 961 962 963 void * 964 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 965 { 966 vm_offset_t off; 967 size_t size; 968 969 size = count * PAGE_SIZE; 970 off = kva_alloc(size); 971 if (off == 0) 972 return (NULL); 973 vmmap_add((void *)off, size); 974 pmap_qenter(off, pages, count); 975 976 return ((void *)off); 977 } 978 979 void 980 vunmap(void *addr) 981 { 982 struct vmmap *vmmap; 983 984 vmmap = vmmap_remove(addr); 985 if (vmmap == NULL) 986 return; 987 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 988 kva_free((vm_offset_t)addr, vmmap->vm_size); 989 kfree(vmmap); 990 } 991 992 char * 993 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 994 { 995 unsigned int len; 996 char *p; 997 va_list aq; 998 999 va_copy(aq, ap); 1000 len = vsnprintf(NULL, 0, fmt, aq); 1001 va_end(aq); 1002 1003 p = kmalloc(len + 1, gfp); 1004 if (p != NULL) 1005 vsnprintf(p, len + 1, fmt, ap); 1006 1007 return (p); 1008 } 1009 1010 char * 1011 kasprintf(gfp_t gfp, const char *fmt, ...) 1012 { 1013 va_list ap; 1014 char *p; 1015 1016 va_start(ap, fmt); 1017 p = kvasprintf(gfp, fmt, ap); 1018 va_end(ap); 1019 1020 return (p); 1021 } 1022 1023 static void 1024 linux_timer_callback_wrapper(void *context) 1025 { 1026 struct timer_list *timer; 1027 1028 timer = context; 1029 timer->function(timer->data); 1030 } 1031 1032 void 1033 mod_timer(struct timer_list *timer, unsigned long expires) 1034 { 1035 1036 timer->expires = expires; 1037 callout_reset(&timer->timer_callout, 1038 linux_timer_jiffies_until(expires), 1039 &linux_timer_callback_wrapper, timer); 1040 } 1041 1042 void 1043 add_timer(struct timer_list *timer) 1044 { 1045 1046 callout_reset(&timer->timer_callout, 1047 linux_timer_jiffies_until(timer->expires), 1048 &linux_timer_callback_wrapper, timer); 1049 } 1050 1051 static void 1052 linux_timer_init(void *arg) 1053 { 1054 1055 /* 1056 * Compute an internal HZ value which can divide 2**32 to 1057 * avoid timer rounding problems when the tick value wraps 1058 * around 2**32: 1059 */ 1060 linux_timer_hz_mask = 1; 1061 while (linux_timer_hz_mask < (unsigned long)hz) 1062 linux_timer_hz_mask *= 2; 1063 linux_timer_hz_mask--; 1064 } 1065 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1066 1067 void 1068 linux_complete_common(struct completion *c, int all) 1069 { 1070 int wakeup_swapper; 1071 1072 sleepq_lock(c); 1073 c->done++; 1074 if (all) 1075 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1076 else 1077 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1078 sleepq_release(c); 1079 if (wakeup_swapper) 1080 kick_proc0(); 1081 } 1082 1083 /* 1084 * Indefinite wait for done != 0 with or without signals. 1085 */ 1086 long 1087 linux_wait_for_common(struct completion *c, int flags) 1088 { 1089 1090 if (flags != 0) 1091 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1092 else 1093 flags = SLEEPQ_SLEEP; 1094 for (;;) { 1095 sleepq_lock(c); 1096 if (c->done) 1097 break; 1098 sleepq_add(c, NULL, "completion", flags, 0); 1099 if (flags & SLEEPQ_INTERRUPTIBLE) { 1100 if (sleepq_wait_sig(c, 0) != 0) 1101 return (-ERESTARTSYS); 1102 } else 1103 sleepq_wait(c, 0); 1104 } 1105 c->done--; 1106 sleepq_release(c); 1107 1108 return (0); 1109 } 1110 1111 /* 1112 * Time limited wait for done != 0 with or without signals. 1113 */ 1114 long 1115 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) 1116 { 1117 long end = jiffies + timeout; 1118 1119 if (flags != 0) 1120 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1121 else 1122 flags = SLEEPQ_SLEEP; 1123 for (;;) { 1124 int ret; 1125 1126 sleepq_lock(c); 1127 if (c->done) 1128 break; 1129 sleepq_add(c, NULL, "completion", flags, 0); 1130 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1131 if (flags & SLEEPQ_INTERRUPTIBLE) 1132 ret = sleepq_timedwait_sig(c, 0); 1133 else 1134 ret = sleepq_timedwait(c, 0); 1135 if (ret != 0) { 1136 /* check for timeout or signal */ 1137 if (ret == EWOULDBLOCK) 1138 return (0); 1139 else 1140 return (-ERESTARTSYS); 1141 } 1142 } 1143 c->done--; 1144 sleepq_release(c); 1145 1146 /* return how many jiffies are left */ 1147 return (linux_timer_jiffies_until(end)); 1148 } 1149 1150 int 1151 linux_try_wait_for_completion(struct completion *c) 1152 { 1153 int isdone; 1154 1155 isdone = 1; 1156 sleepq_lock(c); 1157 if (c->done) 1158 c->done--; 1159 else 1160 isdone = 0; 1161 sleepq_release(c); 1162 return (isdone); 1163 } 1164 1165 int 1166 linux_completion_done(struct completion *c) 1167 { 1168 int isdone; 1169 1170 isdone = 1; 1171 sleepq_lock(c); 1172 if (c->done == 0) 1173 isdone = 0; 1174 sleepq_release(c); 1175 return (isdone); 1176 } 1177 1178 void 1179 linux_delayed_work_fn(void *arg) 1180 { 1181 struct delayed_work *work; 1182 1183 work = arg; 1184 taskqueue_enqueue(work->work.taskqueue, &work->work.work_task); 1185 } 1186 1187 void 1188 linux_work_fn(void *context, int pending) 1189 { 1190 struct work_struct *work; 1191 1192 work = context; 1193 work->fn(work); 1194 } 1195 1196 void 1197 linux_flush_fn(void *context, int pending) 1198 { 1199 } 1200 1201 struct workqueue_struct * 1202 linux_create_workqueue_common(const char *name, int cpus) 1203 { 1204 struct workqueue_struct *wq; 1205 1206 wq = kmalloc(sizeof(*wq), M_WAITOK); 1207 wq->taskqueue = taskqueue_create(name, M_WAITOK, 1208 taskqueue_thread_enqueue, &wq->taskqueue); 1209 atomic_set(&wq->draining, 0); 1210 taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); 1211 1212 return (wq); 1213 } 1214 1215 void 1216 destroy_workqueue(struct workqueue_struct *wq) 1217 { 1218 taskqueue_free(wq->taskqueue); 1219 kfree(wq); 1220 } 1221 1222 static void 1223 linux_cdev_release(struct kobject *kobj) 1224 { 1225 struct linux_cdev *cdev; 1226 struct kobject *parent; 1227 1228 cdev = container_of(kobj, struct linux_cdev, kobj); 1229 parent = kobj->parent; 1230 if (cdev->cdev) 1231 destroy_dev(cdev->cdev); 1232 kfree(cdev); 1233 kobject_put(parent); 1234 } 1235 1236 static void 1237 linux_cdev_static_release(struct kobject *kobj) 1238 { 1239 struct linux_cdev *cdev; 1240 struct kobject *parent; 1241 1242 cdev = container_of(kobj, struct linux_cdev, kobj); 1243 parent = kobj->parent; 1244 if (cdev->cdev) 1245 destroy_dev(cdev->cdev); 1246 kobject_put(parent); 1247 } 1248 1249 const struct kobj_type linux_cdev_ktype = { 1250 .release = linux_cdev_release, 1251 }; 1252 1253 const struct kobj_type linux_cdev_static_ktype = { 1254 .release = linux_cdev_static_release, 1255 }; 1256 1257 static void 1258 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1259 { 1260 struct notifier_block *nb; 1261 1262 nb = arg; 1263 if (linkstate == LINK_STATE_UP) 1264 nb->notifier_call(nb, NETDEV_UP, ifp); 1265 else 1266 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1267 } 1268 1269 static void 1270 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1271 { 1272 struct notifier_block *nb; 1273 1274 nb = arg; 1275 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1276 } 1277 1278 static void 1279 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1280 { 1281 struct notifier_block *nb; 1282 1283 nb = arg; 1284 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1285 } 1286 1287 static void 1288 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1289 { 1290 struct notifier_block *nb; 1291 1292 nb = arg; 1293 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1294 } 1295 1296 static void 1297 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1298 { 1299 struct notifier_block *nb; 1300 1301 nb = arg; 1302 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1303 } 1304 1305 int 1306 register_netdevice_notifier(struct notifier_block *nb) 1307 { 1308 1309 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1310 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1311 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1312 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1313 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1314 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1315 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1316 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1317 1318 return (0); 1319 } 1320 1321 int 1322 register_inetaddr_notifier(struct notifier_block *nb) 1323 { 1324 1325 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1326 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1327 return (0); 1328 } 1329 1330 int 1331 unregister_netdevice_notifier(struct notifier_block *nb) 1332 { 1333 1334 EVENTHANDLER_DEREGISTER(ifnet_link_event, 1335 nb->tags[NETDEV_UP]); 1336 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 1337 nb->tags[NETDEV_REGISTER]); 1338 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1339 nb->tags[NETDEV_UNREGISTER]); 1340 EVENTHANDLER_DEREGISTER(iflladdr_event, 1341 nb->tags[NETDEV_CHANGEADDR]); 1342 1343 return (0); 1344 } 1345 1346 int 1347 unregister_inetaddr_notifier(struct notifier_block *nb) 1348 { 1349 1350 EVENTHANDLER_DEREGISTER(ifaddr_event, 1351 nb->tags[NETDEV_CHANGEIFADDR]); 1352 1353 return (0); 1354 } 1355 1356 void 1357 linux_irq_handler(void *ent) 1358 { 1359 struct irq_ent *irqe; 1360 1361 irqe = ent; 1362 irqe->handler(irqe->irq, irqe->arg); 1363 } 1364 1365 static void 1366 linux_compat_init(void *arg) 1367 { 1368 struct sysctl_oid *rootoid; 1369 int i; 1370 1371 sx_init(&linux_global_rcu_lock, "LinuxGlobalRCU"); 1372 1373 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 1374 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 1375 kobject_init(&linux_class_root, &linux_class_ktype); 1376 kobject_set_name(&linux_class_root, "class"); 1377 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 1378 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 1379 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 1380 kobject_set_name(&linux_root_device.kobj, "device"); 1381 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 1382 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 1383 "device"); 1384 linux_root_device.bsddev = root_bus; 1385 linux_class_misc.name = "misc"; 1386 class_register(&linux_class_misc); 1387 INIT_LIST_HEAD(&pci_drivers); 1388 INIT_LIST_HEAD(&pci_devices); 1389 spin_lock_init(&pci_lock); 1390 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 1391 for (i = 0; i < VMMAP_HASH_SIZE; i++) 1392 LIST_INIT(&vmmaphead[i]); 1393 } 1394 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 1395 1396 static void 1397 linux_compat_uninit(void *arg) 1398 { 1399 linux_kobject_kfree_name(&linux_class_root); 1400 linux_kobject_kfree_name(&linux_root_device.kobj); 1401 linux_kobject_kfree_name(&linux_class_misc.kobj); 1402 1403 synchronize_rcu(); 1404 sx_destroy(&linux_global_rcu_lock); 1405 } 1406 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 1407 1408 /* 1409 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 1410 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 1411 * used. Assert these types have the same size, else some parts of the 1412 * LinuxKPI may not work like expected: 1413 */ 1414 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 1415