1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 52 #include <machine/stdarg.h> 53 54 #if defined(__i386__) || defined(__amd64__) 55 #include <machine/md_var.h> 56 #endif 57 58 #include <linux/kobject.h> 59 #include <linux/device.h> 60 #include <linux/slab.h> 61 #include <linux/module.h> 62 #include <linux/cdev.h> 63 #include <linux/file.h> 64 #include <linux/sysfs.h> 65 #include <linux/mm.h> 66 #include <linux/io.h> 67 #include <linux/vmalloc.h> 68 #include <linux/netdevice.h> 69 #include <linux/timer.h> 70 #include <linux/workqueue.h> 71 #include <linux/rcupdate.h> 72 #include <linux/interrupt.h> 73 #include <linux/uaccess.h> 74 #include <linux/kernel.h> 75 76 #include <vm/vm_pager.h> 77 78 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 79 80 #include <linux/rbtree.h> 81 /* Undo Linux compat changes. */ 82 #undef RB_ROOT 83 #undef file 84 #undef cdev 85 #define RB_ROOT(head) (head)->rbh_root 86 87 struct kobject linux_class_root; 88 struct device linux_root_device; 89 struct class linux_class_misc; 90 struct list_head pci_drivers; 91 struct list_head pci_devices; 92 struct net init_net; 93 spinlock_t pci_lock; 94 struct sx linux_global_rcu_lock; 95 96 unsigned long linux_timer_hz_mask; 97 98 int 99 panic_cmp(struct rb_node *one, struct rb_node *two) 100 { 101 panic("no cmp"); 102 } 103 104 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 105 106 int 107 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 108 { 109 va_list tmp_va; 110 int len; 111 char *old; 112 char *name; 113 char dummy; 114 115 old = kobj->name; 116 117 if (old && fmt == NULL) 118 return (0); 119 120 /* compute length of string */ 121 va_copy(tmp_va, args); 122 len = vsnprintf(&dummy, 0, fmt, tmp_va); 123 va_end(tmp_va); 124 125 /* account for zero termination */ 126 len++; 127 128 /* check for error */ 129 if (len < 1) 130 return (-EINVAL); 131 132 /* allocate memory for string */ 133 name = kzalloc(len, GFP_KERNEL); 134 if (name == NULL) 135 return (-ENOMEM); 136 vsnprintf(name, len, fmt, args); 137 kobj->name = name; 138 139 /* free old string */ 140 kfree(old); 141 142 /* filter new string */ 143 for (; *name != '\0'; name++) 144 if (*name == '/') 145 *name = '!'; 146 return (0); 147 } 148 149 int 150 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 151 { 152 va_list args; 153 int error; 154 155 va_start(args, fmt); 156 error = kobject_set_name_vargs(kobj, fmt, args); 157 va_end(args); 158 159 return (error); 160 } 161 162 static int 163 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 164 { 165 const struct kobj_type *t; 166 int error; 167 168 kobj->parent = parent; 169 error = sysfs_create_dir(kobj); 170 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 171 struct attribute **attr; 172 t = kobj->ktype; 173 174 for (attr = t->default_attrs; *attr != NULL; attr++) { 175 error = sysfs_create_file(kobj, *attr); 176 if (error) 177 break; 178 } 179 if (error) 180 sysfs_remove_dir(kobj); 181 182 } 183 return (error); 184 } 185 186 int 187 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 188 { 189 va_list args; 190 int error; 191 192 va_start(args, fmt); 193 error = kobject_set_name_vargs(kobj, fmt, args); 194 va_end(args); 195 if (error) 196 return (error); 197 198 return kobject_add_complete(kobj, parent); 199 } 200 201 void 202 linux_kobject_release(struct kref *kref) 203 { 204 struct kobject *kobj; 205 char *name; 206 207 kobj = container_of(kref, struct kobject, kref); 208 sysfs_remove_dir(kobj); 209 name = kobj->name; 210 if (kobj->ktype && kobj->ktype->release) 211 kobj->ktype->release(kobj); 212 kfree(name); 213 } 214 215 static void 216 linux_kobject_kfree(struct kobject *kobj) 217 { 218 kfree(kobj); 219 } 220 221 static void 222 linux_kobject_kfree_name(struct kobject *kobj) 223 { 224 if (kobj) { 225 kfree(kobj->name); 226 } 227 } 228 229 const struct kobj_type linux_kfree_type = { 230 .release = linux_kobject_kfree 231 }; 232 233 static void 234 linux_device_release(struct device *dev) 235 { 236 pr_debug("linux_device_release: %s\n", dev_name(dev)); 237 kfree(dev); 238 } 239 240 static ssize_t 241 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 242 { 243 struct class_attribute *dattr; 244 ssize_t error; 245 246 dattr = container_of(attr, struct class_attribute, attr); 247 error = -EIO; 248 if (dattr->show) 249 error = dattr->show(container_of(kobj, struct class, kobj), 250 dattr, buf); 251 return (error); 252 } 253 254 static ssize_t 255 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 256 size_t count) 257 { 258 struct class_attribute *dattr; 259 ssize_t error; 260 261 dattr = container_of(attr, struct class_attribute, attr); 262 error = -EIO; 263 if (dattr->store) 264 error = dattr->store(container_of(kobj, struct class, kobj), 265 dattr, buf, count); 266 return (error); 267 } 268 269 static void 270 linux_class_release(struct kobject *kobj) 271 { 272 struct class *class; 273 274 class = container_of(kobj, struct class, kobj); 275 if (class->class_release) 276 class->class_release(class); 277 } 278 279 static const struct sysfs_ops linux_class_sysfs = { 280 .show = linux_class_show, 281 .store = linux_class_store, 282 }; 283 284 const struct kobj_type linux_class_ktype = { 285 .release = linux_class_release, 286 .sysfs_ops = &linux_class_sysfs 287 }; 288 289 static void 290 linux_dev_release(struct kobject *kobj) 291 { 292 struct device *dev; 293 294 dev = container_of(kobj, struct device, kobj); 295 /* This is the precedence defined by linux. */ 296 if (dev->release) 297 dev->release(dev); 298 else if (dev->class && dev->class->dev_release) 299 dev->class->dev_release(dev); 300 } 301 302 static ssize_t 303 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 304 { 305 struct device_attribute *dattr; 306 ssize_t error; 307 308 dattr = container_of(attr, struct device_attribute, attr); 309 error = -EIO; 310 if (dattr->show) 311 error = dattr->show(container_of(kobj, struct device, kobj), 312 dattr, buf); 313 return (error); 314 } 315 316 static ssize_t 317 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 318 size_t count) 319 { 320 struct device_attribute *dattr; 321 ssize_t error; 322 323 dattr = container_of(attr, struct device_attribute, attr); 324 error = -EIO; 325 if (dattr->store) 326 error = dattr->store(container_of(kobj, struct device, kobj), 327 dattr, buf, count); 328 return (error); 329 } 330 331 static const struct sysfs_ops linux_dev_sysfs = { 332 .show = linux_dev_show, 333 .store = linux_dev_store, 334 }; 335 336 const struct kobj_type linux_dev_ktype = { 337 .release = linux_dev_release, 338 .sysfs_ops = &linux_dev_sysfs 339 }; 340 341 struct device * 342 device_create(struct class *class, struct device *parent, dev_t devt, 343 void *drvdata, const char *fmt, ...) 344 { 345 struct device *dev; 346 va_list args; 347 348 dev = kzalloc(sizeof(*dev), M_WAITOK); 349 dev->parent = parent; 350 dev->class = class; 351 dev->devt = devt; 352 dev->driver_data = drvdata; 353 dev->release = linux_device_release; 354 va_start(args, fmt); 355 kobject_set_name_vargs(&dev->kobj, fmt, args); 356 va_end(args); 357 device_register(dev); 358 359 return (dev); 360 } 361 362 int 363 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 364 struct kobject *parent, const char *fmt, ...) 365 { 366 va_list args; 367 int error; 368 369 kobject_init(kobj, ktype); 370 kobj->ktype = ktype; 371 kobj->parent = parent; 372 kobj->name = NULL; 373 374 va_start(args, fmt); 375 error = kobject_set_name_vargs(kobj, fmt, args); 376 va_end(args); 377 if (error) 378 return (error); 379 return kobject_add_complete(kobj, parent); 380 } 381 382 static void 383 linux_set_current(struct thread *td, struct task_struct *t) 384 { 385 memset(t, 0, sizeof(*t)); 386 task_struct_fill(td, t); 387 task_struct_set(td, t); 388 } 389 390 static void 391 linux_clear_current(struct thread *td) 392 { 393 task_struct_set(td, NULL); 394 } 395 396 static void 397 linux_file_dtor(void *cdp) 398 { 399 struct linux_file *filp; 400 struct task_struct t; 401 struct thread *td; 402 403 td = curthread; 404 filp = cdp; 405 linux_set_current(td, &t); 406 filp->f_op->release(filp->f_vnode, filp); 407 linux_clear_current(td); 408 vdrop(filp->f_vnode); 409 kfree(filp); 410 } 411 412 static int 413 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 414 { 415 struct linux_cdev *ldev; 416 struct linux_file *filp; 417 struct task_struct t; 418 struct file *file; 419 int error; 420 421 file = td->td_fpop; 422 ldev = dev->si_drv1; 423 if (ldev == NULL) 424 return (ENODEV); 425 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 426 filp->f_dentry = &filp->f_dentry_store; 427 filp->f_op = ldev->ops; 428 filp->f_flags = file->f_flag; 429 vhold(file->f_vnode); 430 filp->f_vnode = file->f_vnode; 431 linux_set_current(td, &t); 432 if (filp->f_op->open) { 433 error = -filp->f_op->open(file->f_vnode, filp); 434 if (error) { 435 kfree(filp); 436 goto done; 437 } 438 } 439 error = devfs_set_cdevpriv(filp, linux_file_dtor); 440 if (error) { 441 filp->f_op->release(file->f_vnode, filp); 442 kfree(filp); 443 } 444 done: 445 linux_clear_current(td); 446 return (error); 447 } 448 449 static int 450 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 451 { 452 struct linux_cdev *ldev; 453 struct linux_file *filp; 454 struct file *file; 455 int error; 456 457 file = td->td_fpop; 458 ldev = dev->si_drv1; 459 if (ldev == NULL) 460 return (0); 461 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 462 return (error); 463 filp->f_flags = file->f_flag; 464 devfs_clear_cdevpriv(); 465 466 467 return (0); 468 } 469 470 #define LINUX_IOCTL_MIN_PTR 0x10000UL 471 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 472 473 static inline int 474 linux_remap_address(void **uaddr, size_t len) 475 { 476 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 477 478 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 479 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 480 struct task_struct *pts = current; 481 if (pts == NULL) { 482 *uaddr = NULL; 483 return (1); 484 } 485 486 /* compute data offset */ 487 uaddr_val -= LINUX_IOCTL_MIN_PTR; 488 489 /* check that length is within bounds */ 490 if ((len > IOCPARM_MAX) || 491 (uaddr_val + len) > pts->bsd_ioctl_len) { 492 *uaddr = NULL; 493 return (1); 494 } 495 496 /* re-add kernel buffer address */ 497 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 498 499 /* update address location */ 500 *uaddr = (void *)uaddr_val; 501 return (1); 502 } 503 return (0); 504 } 505 506 int 507 linux_copyin(const void *uaddr, void *kaddr, size_t len) 508 { 509 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 510 if (uaddr == NULL) 511 return (-EFAULT); 512 memcpy(kaddr, uaddr, len); 513 return (0); 514 } 515 return (-copyin(uaddr, kaddr, len)); 516 } 517 518 int 519 linux_copyout(const void *kaddr, void *uaddr, size_t len) 520 { 521 if (linux_remap_address(&uaddr, len)) { 522 if (uaddr == NULL) 523 return (-EFAULT); 524 memcpy(uaddr, kaddr, len); 525 return (0); 526 } 527 return (-copyout(kaddr, uaddr, len)); 528 } 529 530 static int 531 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 532 struct thread *td) 533 { 534 struct linux_cdev *ldev; 535 struct linux_file *filp; 536 struct task_struct t; 537 struct file *file; 538 unsigned size; 539 int error; 540 541 file = td->td_fpop; 542 ldev = dev->si_drv1; 543 if (ldev == NULL) 544 return (0); 545 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 546 return (error); 547 filp->f_flags = file->f_flag; 548 linux_set_current(td, &t); 549 size = IOCPARM_LEN(cmd); 550 /* refer to logic in sys_ioctl() */ 551 if (size > 0) { 552 /* 553 * Setup hint for linux_copyin() and linux_copyout(). 554 * 555 * Background: Linux code expects a user-space address 556 * while FreeBSD supplies a kernel-space address. 557 */ 558 t.bsd_ioctl_data = data; 559 t.bsd_ioctl_len = size; 560 data = (void *)LINUX_IOCTL_MIN_PTR; 561 } else { 562 /* fetch user-space pointer */ 563 data = *(void **)data; 564 } 565 if (filp->f_op->unlocked_ioctl) 566 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 567 else 568 error = ENOTTY; 569 linux_clear_current(td); 570 571 return (error); 572 } 573 574 static int 575 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 576 { 577 struct linux_cdev *ldev; 578 struct linux_file *filp; 579 struct task_struct t; 580 struct thread *td; 581 struct file *file; 582 ssize_t bytes; 583 int error; 584 585 td = curthread; 586 file = td->td_fpop; 587 ldev = dev->si_drv1; 588 if (ldev == NULL) 589 return (0); 590 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 591 return (error); 592 filp->f_flags = file->f_flag; 593 /* XXX no support for I/O vectors currently */ 594 if (uio->uio_iovcnt != 1) 595 return (EOPNOTSUPP); 596 linux_set_current(td, &t); 597 if (filp->f_op->read) { 598 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 599 uio->uio_iov->iov_len, &uio->uio_offset); 600 if (bytes >= 0) { 601 uio->uio_iov->iov_base = 602 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 603 uio->uio_iov->iov_len -= bytes; 604 uio->uio_resid -= bytes; 605 } else 606 error = -bytes; 607 } else 608 error = ENXIO; 609 linux_clear_current(td); 610 611 return (error); 612 } 613 614 static int 615 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 616 { 617 struct linux_cdev *ldev; 618 struct linux_file *filp; 619 struct task_struct t; 620 struct thread *td; 621 struct file *file; 622 ssize_t bytes; 623 int error; 624 625 td = curthread; 626 file = td->td_fpop; 627 ldev = dev->si_drv1; 628 if (ldev == NULL) 629 return (0); 630 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 631 return (error); 632 filp->f_flags = file->f_flag; 633 /* XXX no support for I/O vectors currently */ 634 if (uio->uio_iovcnt != 1) 635 return (EOPNOTSUPP); 636 linux_set_current(td, &t); 637 if (filp->f_op->write) { 638 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 639 uio->uio_iov->iov_len, &uio->uio_offset); 640 if (bytes >= 0) { 641 uio->uio_iov->iov_base = 642 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 643 uio->uio_iov->iov_len -= bytes; 644 uio->uio_resid -= bytes; 645 } else 646 error = -bytes; 647 } else 648 error = ENXIO; 649 linux_clear_current(td); 650 651 return (error); 652 } 653 654 static int 655 linux_dev_poll(struct cdev *dev, int events, struct thread *td) 656 { 657 struct linux_cdev *ldev; 658 struct linux_file *filp; 659 struct task_struct t; 660 struct file *file; 661 int revents; 662 int error; 663 664 file = td->td_fpop; 665 ldev = dev->si_drv1; 666 if (ldev == NULL) 667 return (0); 668 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 669 return (error); 670 filp->f_flags = file->f_flag; 671 linux_set_current(td, &t); 672 if (filp->f_op->poll) 673 revents = filp->f_op->poll(filp, NULL) & events; 674 else 675 revents = 0; 676 linux_clear_current(td); 677 678 return (revents); 679 } 680 681 static int 682 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 683 vm_size_t size, struct vm_object **object, int nprot) 684 { 685 struct linux_cdev *ldev; 686 struct linux_file *filp; 687 struct thread *td; 688 struct task_struct t; 689 struct file *file; 690 struct vm_area_struct vma; 691 int error; 692 693 td = curthread; 694 file = td->td_fpop; 695 ldev = dev->si_drv1; 696 if (ldev == NULL) 697 return (ENODEV); 698 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 699 return (error); 700 filp->f_flags = file->f_flag; 701 linux_set_current(td, &t); 702 vma.vm_start = 0; 703 vma.vm_end = size; 704 vma.vm_pgoff = *offset / PAGE_SIZE; 705 vma.vm_pfn = 0; 706 vma.vm_page_prot = VM_MEMATTR_DEFAULT; 707 if (filp->f_op->mmap) { 708 error = -filp->f_op->mmap(filp, &vma); 709 if (error == 0) { 710 struct sglist *sg; 711 712 sg = sglist_alloc(1, M_WAITOK); 713 sglist_append_phys(sg, 714 (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len); 715 *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len, 716 nprot, 0, td->td_ucred); 717 if (*object == NULL) { 718 sglist_free(sg); 719 error = EINVAL; 720 goto done; 721 } 722 *offset = 0; 723 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) { 724 VM_OBJECT_WLOCK(*object); 725 vm_object_set_memattr(*object, 726 vma.vm_page_prot); 727 VM_OBJECT_WUNLOCK(*object); 728 } 729 } 730 } else 731 error = ENODEV; 732 done: 733 linux_clear_current(td); 734 return (error); 735 } 736 737 struct cdevsw linuxcdevsw = { 738 .d_version = D_VERSION, 739 .d_flags = D_TRACKCLOSE, 740 .d_open = linux_dev_open, 741 .d_close = linux_dev_close, 742 .d_read = linux_dev_read, 743 .d_write = linux_dev_write, 744 .d_ioctl = linux_dev_ioctl, 745 .d_mmap_single = linux_dev_mmap_single, 746 .d_poll = linux_dev_poll, 747 }; 748 749 static int 750 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 751 int flags, struct thread *td) 752 { 753 struct linux_file *filp; 754 struct task_struct t; 755 ssize_t bytes; 756 int error; 757 758 error = 0; 759 filp = (struct linux_file *)file->f_data; 760 filp->f_flags = file->f_flag; 761 /* XXX no support for I/O vectors currently */ 762 if (uio->uio_iovcnt != 1) 763 return (EOPNOTSUPP); 764 linux_set_current(td, &t); 765 if (filp->f_op->read) { 766 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 767 uio->uio_iov->iov_len, &uio->uio_offset); 768 if (bytes >= 0) { 769 uio->uio_iov->iov_base = 770 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 771 uio->uio_iov->iov_len -= bytes; 772 uio->uio_resid -= bytes; 773 } else 774 error = -bytes; 775 } else 776 error = ENXIO; 777 linux_clear_current(td); 778 779 return (error); 780 } 781 782 static int 783 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 784 struct thread *td) 785 { 786 struct linux_file *filp; 787 struct task_struct t; 788 int revents; 789 790 filp = (struct linux_file *)file->f_data; 791 filp->f_flags = file->f_flag; 792 linux_set_current(td, &t); 793 if (filp->f_op->poll) 794 revents = filp->f_op->poll(filp, NULL) & events; 795 else 796 revents = 0; 797 linux_clear_current(td); 798 799 return (revents); 800 } 801 802 static int 803 linux_file_close(struct file *file, struct thread *td) 804 { 805 struct linux_file *filp; 806 struct task_struct t; 807 int error; 808 809 filp = (struct linux_file *)file->f_data; 810 filp->f_flags = file->f_flag; 811 linux_set_current(td, &t); 812 error = -filp->f_op->release(NULL, filp); 813 linux_clear_current(td); 814 funsetown(&filp->f_sigio); 815 kfree(filp); 816 817 return (error); 818 } 819 820 static int 821 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 822 struct thread *td) 823 { 824 struct linux_file *filp; 825 struct task_struct t; 826 int error; 827 828 filp = (struct linux_file *)fp->f_data; 829 filp->f_flags = fp->f_flag; 830 error = 0; 831 832 linux_set_current(td, &t); 833 switch (cmd) { 834 case FIONBIO: 835 break; 836 case FIOASYNC: 837 if (filp->f_op->fasync == NULL) 838 break; 839 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 840 break; 841 case FIOSETOWN: 842 error = fsetown(*(int *)data, &filp->f_sigio); 843 if (error == 0) 844 error = filp->f_op->fasync(0, filp, 845 fp->f_flag & FASYNC); 846 break; 847 case FIOGETOWN: 848 *(int *)data = fgetown(&filp->f_sigio); 849 break; 850 default: 851 error = ENOTTY; 852 break; 853 } 854 linux_clear_current(td); 855 return (error); 856 } 857 858 static int 859 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 860 struct thread *td) 861 { 862 863 return (EOPNOTSUPP); 864 } 865 866 static int 867 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 868 struct filedesc *fdp) 869 { 870 871 return (0); 872 } 873 874 struct fileops linuxfileops = { 875 .fo_read = linux_file_read, 876 .fo_write = invfo_rdwr, 877 .fo_truncate = invfo_truncate, 878 .fo_kqfilter = invfo_kqfilter, 879 .fo_stat = linux_file_stat, 880 .fo_fill_kinfo = linux_file_fill_kinfo, 881 .fo_poll = linux_file_poll, 882 .fo_close = linux_file_close, 883 .fo_ioctl = linux_file_ioctl, 884 .fo_chmod = invfo_chmod, 885 .fo_chown = invfo_chown, 886 .fo_sendfile = invfo_sendfile, 887 }; 888 889 /* 890 * Hash of vmmap addresses. This is infrequently accessed and does not 891 * need to be particularly large. This is done because we must store the 892 * caller's idea of the map size to properly unmap. 893 */ 894 struct vmmap { 895 LIST_ENTRY(vmmap) vm_next; 896 void *vm_addr; 897 unsigned long vm_size; 898 }; 899 900 struct vmmaphd { 901 struct vmmap *lh_first; 902 }; 903 #define VMMAP_HASH_SIZE 64 904 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 905 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 906 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 907 static struct mtx vmmaplock; 908 909 static void 910 vmmap_add(void *addr, unsigned long size) 911 { 912 struct vmmap *vmmap; 913 914 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 915 mtx_lock(&vmmaplock); 916 vmmap->vm_size = size; 917 vmmap->vm_addr = addr; 918 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 919 mtx_unlock(&vmmaplock); 920 } 921 922 static struct vmmap * 923 vmmap_remove(void *addr) 924 { 925 struct vmmap *vmmap; 926 927 mtx_lock(&vmmaplock); 928 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 929 if (vmmap->vm_addr == addr) 930 break; 931 if (vmmap) 932 LIST_REMOVE(vmmap, vm_next); 933 mtx_unlock(&vmmaplock); 934 935 return (vmmap); 936 } 937 938 #if defined(__i386__) || defined(__amd64__) 939 void * 940 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 941 { 942 void *addr; 943 944 addr = pmap_mapdev_attr(phys_addr, size, attr); 945 if (addr == NULL) 946 return (NULL); 947 vmmap_add(addr, size); 948 949 return (addr); 950 } 951 #endif 952 953 void 954 iounmap(void *addr) 955 { 956 struct vmmap *vmmap; 957 958 vmmap = vmmap_remove(addr); 959 if (vmmap == NULL) 960 return; 961 #if defined(__i386__) || defined(__amd64__) 962 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 963 #endif 964 kfree(vmmap); 965 } 966 967 968 void * 969 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 970 { 971 vm_offset_t off; 972 size_t size; 973 974 size = count * PAGE_SIZE; 975 off = kva_alloc(size); 976 if (off == 0) 977 return (NULL); 978 vmmap_add((void *)off, size); 979 pmap_qenter(off, pages, count); 980 981 return ((void *)off); 982 } 983 984 void 985 vunmap(void *addr) 986 { 987 struct vmmap *vmmap; 988 989 vmmap = vmmap_remove(addr); 990 if (vmmap == NULL) 991 return; 992 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 993 kva_free((vm_offset_t)addr, vmmap->vm_size); 994 kfree(vmmap); 995 } 996 997 char * 998 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 999 { 1000 unsigned int len; 1001 char *p; 1002 va_list aq; 1003 1004 va_copy(aq, ap); 1005 len = vsnprintf(NULL, 0, fmt, aq); 1006 va_end(aq); 1007 1008 p = kmalloc(len + 1, gfp); 1009 if (p != NULL) 1010 vsnprintf(p, len + 1, fmt, ap); 1011 1012 return (p); 1013 } 1014 1015 char * 1016 kasprintf(gfp_t gfp, const char *fmt, ...) 1017 { 1018 va_list ap; 1019 char *p; 1020 1021 va_start(ap, fmt); 1022 p = kvasprintf(gfp, fmt, ap); 1023 va_end(ap); 1024 1025 return (p); 1026 } 1027 1028 static void 1029 linux_timer_callback_wrapper(void *context) 1030 { 1031 struct timer_list *timer; 1032 1033 timer = context; 1034 timer->function(timer->data); 1035 } 1036 1037 void 1038 mod_timer(struct timer_list *timer, unsigned long expires) 1039 { 1040 1041 timer->expires = expires; 1042 callout_reset(&timer->timer_callout, 1043 linux_timer_jiffies_until(expires), 1044 &linux_timer_callback_wrapper, timer); 1045 } 1046 1047 void 1048 add_timer(struct timer_list *timer) 1049 { 1050 1051 callout_reset(&timer->timer_callout, 1052 linux_timer_jiffies_until(timer->expires), 1053 &linux_timer_callback_wrapper, timer); 1054 } 1055 1056 static void 1057 linux_timer_init(void *arg) 1058 { 1059 1060 /* 1061 * Compute an internal HZ value which can divide 2**32 to 1062 * avoid timer rounding problems when the tick value wraps 1063 * around 2**32: 1064 */ 1065 linux_timer_hz_mask = 1; 1066 while (linux_timer_hz_mask < (unsigned long)hz) 1067 linux_timer_hz_mask *= 2; 1068 linux_timer_hz_mask--; 1069 } 1070 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1071 1072 void 1073 linux_complete_common(struct completion *c, int all) 1074 { 1075 int wakeup_swapper; 1076 1077 sleepq_lock(c); 1078 c->done++; 1079 if (all) 1080 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1081 else 1082 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1083 sleepq_release(c); 1084 if (wakeup_swapper) 1085 kick_proc0(); 1086 } 1087 1088 /* 1089 * Indefinite wait for done != 0 with or without signals. 1090 */ 1091 long 1092 linux_wait_for_common(struct completion *c, int flags) 1093 { 1094 1095 if (flags != 0) 1096 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1097 else 1098 flags = SLEEPQ_SLEEP; 1099 for (;;) { 1100 sleepq_lock(c); 1101 if (c->done) 1102 break; 1103 sleepq_add(c, NULL, "completion", flags, 0); 1104 if (flags & SLEEPQ_INTERRUPTIBLE) { 1105 if (sleepq_wait_sig(c, 0) != 0) 1106 return (-ERESTARTSYS); 1107 } else 1108 sleepq_wait(c, 0); 1109 } 1110 c->done--; 1111 sleepq_release(c); 1112 1113 return (0); 1114 } 1115 1116 /* 1117 * Time limited wait for done != 0 with or without signals. 1118 */ 1119 long 1120 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) 1121 { 1122 long end = jiffies + timeout; 1123 1124 if (flags != 0) 1125 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1126 else 1127 flags = SLEEPQ_SLEEP; 1128 for (;;) { 1129 int ret; 1130 1131 sleepq_lock(c); 1132 if (c->done) 1133 break; 1134 sleepq_add(c, NULL, "completion", flags, 0); 1135 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1136 if (flags & SLEEPQ_INTERRUPTIBLE) 1137 ret = sleepq_timedwait_sig(c, 0); 1138 else 1139 ret = sleepq_timedwait(c, 0); 1140 if (ret != 0) { 1141 /* check for timeout or signal */ 1142 if (ret == EWOULDBLOCK) 1143 return (0); 1144 else 1145 return (-ERESTARTSYS); 1146 } 1147 } 1148 c->done--; 1149 sleepq_release(c); 1150 1151 /* return how many jiffies are left */ 1152 return (linux_timer_jiffies_until(end)); 1153 } 1154 1155 int 1156 linux_try_wait_for_completion(struct completion *c) 1157 { 1158 int isdone; 1159 1160 isdone = 1; 1161 sleepq_lock(c); 1162 if (c->done) 1163 c->done--; 1164 else 1165 isdone = 0; 1166 sleepq_release(c); 1167 return (isdone); 1168 } 1169 1170 int 1171 linux_completion_done(struct completion *c) 1172 { 1173 int isdone; 1174 1175 isdone = 1; 1176 sleepq_lock(c); 1177 if (c->done == 0) 1178 isdone = 0; 1179 sleepq_release(c); 1180 return (isdone); 1181 } 1182 1183 void 1184 linux_delayed_work_fn(void *arg) 1185 { 1186 struct delayed_work *work; 1187 1188 work = arg; 1189 taskqueue_enqueue(work->work.taskqueue, &work->work.work_task); 1190 } 1191 1192 void 1193 linux_work_fn(void *context, int pending) 1194 { 1195 struct work_struct *work; 1196 1197 work = context; 1198 work->fn(work); 1199 } 1200 1201 void 1202 linux_flush_fn(void *context, int pending) 1203 { 1204 } 1205 1206 struct workqueue_struct * 1207 linux_create_workqueue_common(const char *name, int cpus) 1208 { 1209 struct workqueue_struct *wq; 1210 1211 wq = kmalloc(sizeof(*wq), M_WAITOK); 1212 wq->taskqueue = taskqueue_create(name, M_WAITOK, 1213 taskqueue_thread_enqueue, &wq->taskqueue); 1214 atomic_set(&wq->draining, 0); 1215 taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); 1216 1217 return (wq); 1218 } 1219 1220 void 1221 destroy_workqueue(struct workqueue_struct *wq) 1222 { 1223 taskqueue_free(wq->taskqueue); 1224 kfree(wq); 1225 } 1226 1227 static void 1228 linux_cdev_release(struct kobject *kobj) 1229 { 1230 struct linux_cdev *cdev; 1231 struct kobject *parent; 1232 1233 cdev = container_of(kobj, struct linux_cdev, kobj); 1234 parent = kobj->parent; 1235 if (cdev->cdev) 1236 destroy_dev(cdev->cdev); 1237 kfree(cdev); 1238 kobject_put(parent); 1239 } 1240 1241 static void 1242 linux_cdev_static_release(struct kobject *kobj) 1243 { 1244 struct linux_cdev *cdev; 1245 struct kobject *parent; 1246 1247 cdev = container_of(kobj, struct linux_cdev, kobj); 1248 parent = kobj->parent; 1249 if (cdev->cdev) 1250 destroy_dev(cdev->cdev); 1251 kobject_put(parent); 1252 } 1253 1254 const struct kobj_type linux_cdev_ktype = { 1255 .release = linux_cdev_release, 1256 }; 1257 1258 const struct kobj_type linux_cdev_static_ktype = { 1259 .release = linux_cdev_static_release, 1260 }; 1261 1262 static void 1263 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1264 { 1265 struct notifier_block *nb; 1266 1267 nb = arg; 1268 if (linkstate == LINK_STATE_UP) 1269 nb->notifier_call(nb, NETDEV_UP, ifp); 1270 else 1271 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1272 } 1273 1274 static void 1275 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1276 { 1277 struct notifier_block *nb; 1278 1279 nb = arg; 1280 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1281 } 1282 1283 static void 1284 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1285 { 1286 struct notifier_block *nb; 1287 1288 nb = arg; 1289 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1290 } 1291 1292 static void 1293 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1294 { 1295 struct notifier_block *nb; 1296 1297 nb = arg; 1298 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1299 } 1300 1301 static void 1302 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1303 { 1304 struct notifier_block *nb; 1305 1306 nb = arg; 1307 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1308 } 1309 1310 int 1311 register_netdevice_notifier(struct notifier_block *nb) 1312 { 1313 1314 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1315 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1316 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1317 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1318 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1319 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1320 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1321 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1322 1323 return (0); 1324 } 1325 1326 int 1327 register_inetaddr_notifier(struct notifier_block *nb) 1328 { 1329 1330 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1331 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1332 return (0); 1333 } 1334 1335 int 1336 unregister_netdevice_notifier(struct notifier_block *nb) 1337 { 1338 1339 EVENTHANDLER_DEREGISTER(ifnet_link_event, 1340 nb->tags[NETDEV_UP]); 1341 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 1342 nb->tags[NETDEV_REGISTER]); 1343 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1344 nb->tags[NETDEV_UNREGISTER]); 1345 EVENTHANDLER_DEREGISTER(iflladdr_event, 1346 nb->tags[NETDEV_CHANGEADDR]); 1347 1348 return (0); 1349 } 1350 1351 int 1352 unregister_inetaddr_notifier(struct notifier_block *nb) 1353 { 1354 1355 EVENTHANDLER_DEREGISTER(ifaddr_event, 1356 nb->tags[NETDEV_CHANGEIFADDR]); 1357 1358 return (0); 1359 } 1360 1361 void 1362 linux_irq_handler(void *ent) 1363 { 1364 struct irq_ent *irqe; 1365 1366 irqe = ent; 1367 irqe->handler(irqe->irq, irqe->arg); 1368 } 1369 1370 #if defined(__i386__) || defined(__amd64__) 1371 bool linux_cpu_has_clflush; 1372 #endif 1373 1374 static void 1375 linux_compat_init(void *arg) 1376 { 1377 struct sysctl_oid *rootoid; 1378 int i; 1379 1380 #if defined(__i386__) || defined(__amd64__) 1381 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 1382 #endif 1383 sx_init(&linux_global_rcu_lock, "LinuxGlobalRCU"); 1384 1385 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 1386 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 1387 kobject_init(&linux_class_root, &linux_class_ktype); 1388 kobject_set_name(&linux_class_root, "class"); 1389 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 1390 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 1391 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 1392 kobject_set_name(&linux_root_device.kobj, "device"); 1393 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 1394 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 1395 "device"); 1396 linux_root_device.bsddev = root_bus; 1397 linux_class_misc.name = "misc"; 1398 class_register(&linux_class_misc); 1399 INIT_LIST_HEAD(&pci_drivers); 1400 INIT_LIST_HEAD(&pci_devices); 1401 spin_lock_init(&pci_lock); 1402 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 1403 for (i = 0; i < VMMAP_HASH_SIZE; i++) 1404 LIST_INIT(&vmmaphead[i]); 1405 } 1406 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 1407 1408 static void 1409 linux_compat_uninit(void *arg) 1410 { 1411 linux_kobject_kfree_name(&linux_class_root); 1412 linux_kobject_kfree_name(&linux_root_device.kobj); 1413 linux_kobject_kfree_name(&linux_class_misc.kobj); 1414 1415 synchronize_rcu(); 1416 sx_destroy(&linux_global_rcu_lock); 1417 } 1418 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 1419 1420 /* 1421 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 1422 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 1423 * used. Assert these types have the same size, else some parts of the 1424 * LinuxKPI may not work like expected: 1425 */ 1426 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 1427