1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 52 #include <machine/stdarg.h> 53 54 #if defined(__i386__) || defined(__amd64__) 55 #include <machine/md_var.h> 56 #endif 57 58 #include <linux/kobject.h> 59 #include <linux/device.h> 60 #include <linux/slab.h> 61 #include <linux/module.h> 62 #include <linux/cdev.h> 63 #include <linux/file.h> 64 #include <linux/sysfs.h> 65 #include <linux/mm.h> 66 #include <linux/io.h> 67 #include <linux/vmalloc.h> 68 #include <linux/netdevice.h> 69 #include <linux/timer.h> 70 #include <linux/workqueue.h> 71 #include <linux/rcupdate.h> 72 #include <linux/interrupt.h> 73 #include <linux/uaccess.h> 74 #include <linux/kernel.h> 75 #include <linux/list.h> 76 77 #include <vm/vm_pager.h> 78 79 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 80 81 #include <linux/rbtree.h> 82 /* Undo Linux compat changes. */ 83 #undef RB_ROOT 84 #undef file 85 #undef cdev 86 #define RB_ROOT(head) (head)->rbh_root 87 88 struct kobject linux_class_root; 89 struct device linux_root_device; 90 struct class linux_class_misc; 91 struct list_head pci_drivers; 92 struct list_head pci_devices; 93 struct net init_net; 94 spinlock_t pci_lock; 95 struct sx linux_global_rcu_lock; 96 97 unsigned long linux_timer_hz_mask; 98 99 int 100 panic_cmp(struct rb_node *one, struct rb_node *two) 101 { 102 panic("no cmp"); 103 } 104 105 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 106 107 int 108 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 109 { 110 va_list tmp_va; 111 int len; 112 char *old; 113 char *name; 114 char dummy; 115 116 old = kobj->name; 117 118 if (old && fmt == NULL) 119 return (0); 120 121 /* compute length of string */ 122 va_copy(tmp_va, args); 123 len = vsnprintf(&dummy, 0, fmt, tmp_va); 124 va_end(tmp_va); 125 126 /* account for zero termination */ 127 len++; 128 129 /* check for error */ 130 if (len < 1) 131 return (-EINVAL); 132 133 /* allocate memory for string */ 134 name = kzalloc(len, GFP_KERNEL); 135 if (name == NULL) 136 return (-ENOMEM); 137 vsnprintf(name, len, fmt, args); 138 kobj->name = name; 139 140 /* free old string */ 141 kfree(old); 142 143 /* filter new string */ 144 for (; *name != '\0'; name++) 145 if (*name == '/') 146 *name = '!'; 147 return (0); 148 } 149 150 int 151 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 152 { 153 va_list args; 154 int error; 155 156 va_start(args, fmt); 157 error = kobject_set_name_vargs(kobj, fmt, args); 158 va_end(args); 159 160 return (error); 161 } 162 163 static int 164 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 165 { 166 const struct kobj_type *t; 167 int error; 168 169 kobj->parent = parent; 170 error = sysfs_create_dir(kobj); 171 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 172 struct attribute **attr; 173 t = kobj->ktype; 174 175 for (attr = t->default_attrs; *attr != NULL; attr++) { 176 error = sysfs_create_file(kobj, *attr); 177 if (error) 178 break; 179 } 180 if (error) 181 sysfs_remove_dir(kobj); 182 183 } 184 return (error); 185 } 186 187 int 188 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 189 { 190 va_list args; 191 int error; 192 193 va_start(args, fmt); 194 error = kobject_set_name_vargs(kobj, fmt, args); 195 va_end(args); 196 if (error) 197 return (error); 198 199 return kobject_add_complete(kobj, parent); 200 } 201 202 void 203 linux_kobject_release(struct kref *kref) 204 { 205 struct kobject *kobj; 206 char *name; 207 208 kobj = container_of(kref, struct kobject, kref); 209 sysfs_remove_dir(kobj); 210 name = kobj->name; 211 if (kobj->ktype && kobj->ktype->release) 212 kobj->ktype->release(kobj); 213 kfree(name); 214 } 215 216 static void 217 linux_kobject_kfree(struct kobject *kobj) 218 { 219 kfree(kobj); 220 } 221 222 static void 223 linux_kobject_kfree_name(struct kobject *kobj) 224 { 225 if (kobj) { 226 kfree(kobj->name); 227 } 228 } 229 230 const struct kobj_type linux_kfree_type = { 231 .release = linux_kobject_kfree 232 }; 233 234 static void 235 linux_device_release(struct device *dev) 236 { 237 pr_debug("linux_device_release: %s\n", dev_name(dev)); 238 kfree(dev); 239 } 240 241 static ssize_t 242 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 243 { 244 struct class_attribute *dattr; 245 ssize_t error; 246 247 dattr = container_of(attr, struct class_attribute, attr); 248 error = -EIO; 249 if (dattr->show) 250 error = dattr->show(container_of(kobj, struct class, kobj), 251 dattr, buf); 252 return (error); 253 } 254 255 static ssize_t 256 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 257 size_t count) 258 { 259 struct class_attribute *dattr; 260 ssize_t error; 261 262 dattr = container_of(attr, struct class_attribute, attr); 263 error = -EIO; 264 if (dattr->store) 265 error = dattr->store(container_of(kobj, struct class, kobj), 266 dattr, buf, count); 267 return (error); 268 } 269 270 static void 271 linux_class_release(struct kobject *kobj) 272 { 273 struct class *class; 274 275 class = container_of(kobj, struct class, kobj); 276 if (class->class_release) 277 class->class_release(class); 278 } 279 280 static const struct sysfs_ops linux_class_sysfs = { 281 .show = linux_class_show, 282 .store = linux_class_store, 283 }; 284 285 const struct kobj_type linux_class_ktype = { 286 .release = linux_class_release, 287 .sysfs_ops = &linux_class_sysfs 288 }; 289 290 static void 291 linux_dev_release(struct kobject *kobj) 292 { 293 struct device *dev; 294 295 dev = container_of(kobj, struct device, kobj); 296 /* This is the precedence defined by linux. */ 297 if (dev->release) 298 dev->release(dev); 299 else if (dev->class && dev->class->dev_release) 300 dev->class->dev_release(dev); 301 } 302 303 static ssize_t 304 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 305 { 306 struct device_attribute *dattr; 307 ssize_t error; 308 309 dattr = container_of(attr, struct device_attribute, attr); 310 error = -EIO; 311 if (dattr->show) 312 error = dattr->show(container_of(kobj, struct device, kobj), 313 dattr, buf); 314 return (error); 315 } 316 317 static ssize_t 318 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 319 size_t count) 320 { 321 struct device_attribute *dattr; 322 ssize_t error; 323 324 dattr = container_of(attr, struct device_attribute, attr); 325 error = -EIO; 326 if (dattr->store) 327 error = dattr->store(container_of(kobj, struct device, kobj), 328 dattr, buf, count); 329 return (error); 330 } 331 332 static const struct sysfs_ops linux_dev_sysfs = { 333 .show = linux_dev_show, 334 .store = linux_dev_store, 335 }; 336 337 const struct kobj_type linux_dev_ktype = { 338 .release = linux_dev_release, 339 .sysfs_ops = &linux_dev_sysfs 340 }; 341 342 struct device * 343 device_create(struct class *class, struct device *parent, dev_t devt, 344 void *drvdata, const char *fmt, ...) 345 { 346 struct device *dev; 347 va_list args; 348 349 dev = kzalloc(sizeof(*dev), M_WAITOK); 350 dev->parent = parent; 351 dev->class = class; 352 dev->devt = devt; 353 dev->driver_data = drvdata; 354 dev->release = linux_device_release; 355 va_start(args, fmt); 356 kobject_set_name_vargs(&dev->kobj, fmt, args); 357 va_end(args); 358 device_register(dev); 359 360 return (dev); 361 } 362 363 int 364 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 365 struct kobject *parent, const char *fmt, ...) 366 { 367 va_list args; 368 int error; 369 370 kobject_init(kobj, ktype); 371 kobj->ktype = ktype; 372 kobj->parent = parent; 373 kobj->name = NULL; 374 375 va_start(args, fmt); 376 error = kobject_set_name_vargs(kobj, fmt, args); 377 va_end(args); 378 if (error) 379 return (error); 380 return kobject_add_complete(kobj, parent); 381 } 382 383 static void 384 linux_set_current(struct thread *td, struct task_struct *t) 385 { 386 memset(t, 0, sizeof(*t)); 387 task_struct_fill(td, t); 388 task_struct_set(td, t); 389 } 390 391 static void 392 linux_clear_current(struct thread *td) 393 { 394 task_struct_set(td, NULL); 395 } 396 397 static void 398 linux_file_dtor(void *cdp) 399 { 400 struct linux_file *filp; 401 struct task_struct t; 402 struct thread *td; 403 404 td = curthread; 405 filp = cdp; 406 linux_set_current(td, &t); 407 filp->f_op->release(filp->f_vnode, filp); 408 linux_clear_current(td); 409 vdrop(filp->f_vnode); 410 kfree(filp); 411 } 412 413 static int 414 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 415 { 416 struct linux_cdev *ldev; 417 struct linux_file *filp; 418 struct task_struct t; 419 struct file *file; 420 int error; 421 422 file = td->td_fpop; 423 ldev = dev->si_drv1; 424 if (ldev == NULL) 425 return (ENODEV); 426 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 427 filp->f_dentry = &filp->f_dentry_store; 428 filp->f_op = ldev->ops; 429 filp->f_flags = file->f_flag; 430 vhold(file->f_vnode); 431 filp->f_vnode = file->f_vnode; 432 linux_set_current(td, &t); 433 if (filp->f_op->open) { 434 error = -filp->f_op->open(file->f_vnode, filp); 435 if (error) { 436 kfree(filp); 437 goto done; 438 } 439 } 440 error = devfs_set_cdevpriv(filp, linux_file_dtor); 441 if (error) { 442 filp->f_op->release(file->f_vnode, filp); 443 kfree(filp); 444 } 445 done: 446 linux_clear_current(td); 447 return (error); 448 } 449 450 static int 451 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 452 { 453 struct linux_cdev *ldev; 454 struct linux_file *filp; 455 struct file *file; 456 int error; 457 458 file = td->td_fpop; 459 ldev = dev->si_drv1; 460 if (ldev == NULL) 461 return (0); 462 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 463 return (error); 464 filp->f_flags = file->f_flag; 465 devfs_clear_cdevpriv(); 466 467 468 return (0); 469 } 470 471 #define LINUX_IOCTL_MIN_PTR 0x10000UL 472 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 473 474 static inline int 475 linux_remap_address(void **uaddr, size_t len) 476 { 477 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 478 479 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 480 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 481 struct task_struct *pts = current; 482 if (pts == NULL) { 483 *uaddr = NULL; 484 return (1); 485 } 486 487 /* compute data offset */ 488 uaddr_val -= LINUX_IOCTL_MIN_PTR; 489 490 /* check that length is within bounds */ 491 if ((len > IOCPARM_MAX) || 492 (uaddr_val + len) > pts->bsd_ioctl_len) { 493 *uaddr = NULL; 494 return (1); 495 } 496 497 /* re-add kernel buffer address */ 498 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 499 500 /* update address location */ 501 *uaddr = (void *)uaddr_val; 502 return (1); 503 } 504 return (0); 505 } 506 507 int 508 linux_copyin(const void *uaddr, void *kaddr, size_t len) 509 { 510 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 511 if (uaddr == NULL) 512 return (-EFAULT); 513 memcpy(kaddr, uaddr, len); 514 return (0); 515 } 516 return (-copyin(uaddr, kaddr, len)); 517 } 518 519 int 520 linux_copyout(const void *kaddr, void *uaddr, size_t len) 521 { 522 if (linux_remap_address(&uaddr, len)) { 523 if (uaddr == NULL) 524 return (-EFAULT); 525 memcpy(uaddr, kaddr, len); 526 return (0); 527 } 528 return (-copyout(kaddr, uaddr, len)); 529 } 530 531 static int 532 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 533 struct thread *td) 534 { 535 struct linux_cdev *ldev; 536 struct linux_file *filp; 537 struct task_struct t; 538 struct file *file; 539 unsigned size; 540 int error; 541 542 file = td->td_fpop; 543 ldev = dev->si_drv1; 544 if (ldev == NULL) 545 return (0); 546 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 547 return (error); 548 filp->f_flags = file->f_flag; 549 linux_set_current(td, &t); 550 size = IOCPARM_LEN(cmd); 551 /* refer to logic in sys_ioctl() */ 552 if (size > 0) { 553 /* 554 * Setup hint for linux_copyin() and linux_copyout(). 555 * 556 * Background: Linux code expects a user-space address 557 * while FreeBSD supplies a kernel-space address. 558 */ 559 t.bsd_ioctl_data = data; 560 t.bsd_ioctl_len = size; 561 data = (void *)LINUX_IOCTL_MIN_PTR; 562 } else { 563 /* fetch user-space pointer */ 564 data = *(void **)data; 565 } 566 if (filp->f_op->unlocked_ioctl) 567 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 568 else 569 error = ENOTTY; 570 linux_clear_current(td); 571 572 return (error); 573 } 574 575 static int 576 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 577 { 578 struct linux_cdev *ldev; 579 struct linux_file *filp; 580 struct task_struct t; 581 struct thread *td; 582 struct file *file; 583 ssize_t bytes; 584 int error; 585 586 td = curthread; 587 file = td->td_fpop; 588 ldev = dev->si_drv1; 589 if (ldev == NULL) 590 return (0); 591 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 592 return (error); 593 filp->f_flags = file->f_flag; 594 /* XXX no support for I/O vectors currently */ 595 if (uio->uio_iovcnt != 1) 596 return (EOPNOTSUPP); 597 linux_set_current(td, &t); 598 if (filp->f_op->read) { 599 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 600 uio->uio_iov->iov_len, &uio->uio_offset); 601 if (bytes >= 0) { 602 uio->uio_iov->iov_base = 603 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 604 uio->uio_iov->iov_len -= bytes; 605 uio->uio_resid -= bytes; 606 } else 607 error = -bytes; 608 } else 609 error = ENXIO; 610 linux_clear_current(td); 611 612 return (error); 613 } 614 615 static int 616 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 617 { 618 struct linux_cdev *ldev; 619 struct linux_file *filp; 620 struct task_struct t; 621 struct thread *td; 622 struct file *file; 623 ssize_t bytes; 624 int error; 625 626 td = curthread; 627 file = td->td_fpop; 628 ldev = dev->si_drv1; 629 if (ldev == NULL) 630 return (0); 631 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 632 return (error); 633 filp->f_flags = file->f_flag; 634 /* XXX no support for I/O vectors currently */ 635 if (uio->uio_iovcnt != 1) 636 return (EOPNOTSUPP); 637 linux_set_current(td, &t); 638 if (filp->f_op->write) { 639 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 640 uio->uio_iov->iov_len, &uio->uio_offset); 641 if (bytes >= 0) { 642 uio->uio_iov->iov_base = 643 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 644 uio->uio_iov->iov_len -= bytes; 645 uio->uio_resid -= bytes; 646 } else 647 error = -bytes; 648 } else 649 error = ENXIO; 650 linux_clear_current(td); 651 652 return (error); 653 } 654 655 static int 656 linux_dev_poll(struct cdev *dev, int events, struct thread *td) 657 { 658 struct linux_cdev *ldev; 659 struct linux_file *filp; 660 struct task_struct t; 661 struct file *file; 662 int revents; 663 int error; 664 665 file = td->td_fpop; 666 ldev = dev->si_drv1; 667 if (ldev == NULL) 668 return (0); 669 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 670 return (error); 671 filp->f_flags = file->f_flag; 672 linux_set_current(td, &t); 673 if (filp->f_op->poll) 674 revents = filp->f_op->poll(filp, NULL) & events; 675 else 676 revents = 0; 677 linux_clear_current(td); 678 679 return (revents); 680 } 681 682 static int 683 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 684 vm_size_t size, struct vm_object **object, int nprot) 685 { 686 struct linux_cdev *ldev; 687 struct linux_file *filp; 688 struct thread *td; 689 struct task_struct t; 690 struct file *file; 691 struct vm_area_struct vma; 692 int error; 693 694 td = curthread; 695 file = td->td_fpop; 696 ldev = dev->si_drv1; 697 if (ldev == NULL) 698 return (ENODEV); 699 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 700 return (error); 701 filp->f_flags = file->f_flag; 702 linux_set_current(td, &t); 703 vma.vm_start = 0; 704 vma.vm_end = size; 705 vma.vm_pgoff = *offset / PAGE_SIZE; 706 vma.vm_pfn = 0; 707 vma.vm_page_prot = VM_MEMATTR_DEFAULT; 708 if (filp->f_op->mmap) { 709 error = -filp->f_op->mmap(filp, &vma); 710 if (error == 0) { 711 struct sglist *sg; 712 713 sg = sglist_alloc(1, M_WAITOK); 714 sglist_append_phys(sg, 715 (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len); 716 *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len, 717 nprot, 0, td->td_ucred); 718 if (*object == NULL) { 719 sglist_free(sg); 720 error = EINVAL; 721 goto done; 722 } 723 *offset = 0; 724 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) { 725 VM_OBJECT_WLOCK(*object); 726 vm_object_set_memattr(*object, 727 vma.vm_page_prot); 728 VM_OBJECT_WUNLOCK(*object); 729 } 730 } 731 } else 732 error = ENODEV; 733 done: 734 linux_clear_current(td); 735 return (error); 736 } 737 738 struct cdevsw linuxcdevsw = { 739 .d_version = D_VERSION, 740 .d_flags = D_TRACKCLOSE, 741 .d_open = linux_dev_open, 742 .d_close = linux_dev_close, 743 .d_read = linux_dev_read, 744 .d_write = linux_dev_write, 745 .d_ioctl = linux_dev_ioctl, 746 .d_mmap_single = linux_dev_mmap_single, 747 .d_poll = linux_dev_poll, 748 }; 749 750 static int 751 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 752 int flags, struct thread *td) 753 { 754 struct linux_file *filp; 755 struct task_struct t; 756 ssize_t bytes; 757 int error; 758 759 error = 0; 760 filp = (struct linux_file *)file->f_data; 761 filp->f_flags = file->f_flag; 762 /* XXX no support for I/O vectors currently */ 763 if (uio->uio_iovcnt != 1) 764 return (EOPNOTSUPP); 765 linux_set_current(td, &t); 766 if (filp->f_op->read) { 767 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 768 uio->uio_iov->iov_len, &uio->uio_offset); 769 if (bytes >= 0) { 770 uio->uio_iov->iov_base = 771 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 772 uio->uio_iov->iov_len -= bytes; 773 uio->uio_resid -= bytes; 774 } else 775 error = -bytes; 776 } else 777 error = ENXIO; 778 linux_clear_current(td); 779 780 return (error); 781 } 782 783 static int 784 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 785 struct thread *td) 786 { 787 struct linux_file *filp; 788 struct task_struct t; 789 int revents; 790 791 filp = (struct linux_file *)file->f_data; 792 filp->f_flags = file->f_flag; 793 linux_set_current(td, &t); 794 if (filp->f_op->poll) 795 revents = filp->f_op->poll(filp, NULL) & events; 796 else 797 revents = 0; 798 linux_clear_current(td); 799 800 return (revents); 801 } 802 803 static int 804 linux_file_close(struct file *file, struct thread *td) 805 { 806 struct linux_file *filp; 807 struct task_struct t; 808 int error; 809 810 filp = (struct linux_file *)file->f_data; 811 filp->f_flags = file->f_flag; 812 linux_set_current(td, &t); 813 error = -filp->f_op->release(NULL, filp); 814 linux_clear_current(td); 815 funsetown(&filp->f_sigio); 816 kfree(filp); 817 818 return (error); 819 } 820 821 static int 822 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 823 struct thread *td) 824 { 825 struct linux_file *filp; 826 struct task_struct t; 827 int error; 828 829 filp = (struct linux_file *)fp->f_data; 830 filp->f_flags = fp->f_flag; 831 error = 0; 832 833 linux_set_current(td, &t); 834 switch (cmd) { 835 case FIONBIO: 836 break; 837 case FIOASYNC: 838 if (filp->f_op->fasync == NULL) 839 break; 840 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 841 break; 842 case FIOSETOWN: 843 error = fsetown(*(int *)data, &filp->f_sigio); 844 if (error == 0) 845 error = filp->f_op->fasync(0, filp, 846 fp->f_flag & FASYNC); 847 break; 848 case FIOGETOWN: 849 *(int *)data = fgetown(&filp->f_sigio); 850 break; 851 default: 852 error = ENOTTY; 853 break; 854 } 855 linux_clear_current(td); 856 return (error); 857 } 858 859 static int 860 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 861 struct thread *td) 862 { 863 864 return (EOPNOTSUPP); 865 } 866 867 static int 868 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 869 struct filedesc *fdp) 870 { 871 872 return (0); 873 } 874 875 struct fileops linuxfileops = { 876 .fo_read = linux_file_read, 877 .fo_write = invfo_rdwr, 878 .fo_truncate = invfo_truncate, 879 .fo_kqfilter = invfo_kqfilter, 880 .fo_stat = linux_file_stat, 881 .fo_fill_kinfo = linux_file_fill_kinfo, 882 .fo_poll = linux_file_poll, 883 .fo_close = linux_file_close, 884 .fo_ioctl = linux_file_ioctl, 885 .fo_chmod = invfo_chmod, 886 .fo_chown = invfo_chown, 887 .fo_sendfile = invfo_sendfile, 888 }; 889 890 /* 891 * Hash of vmmap addresses. This is infrequently accessed and does not 892 * need to be particularly large. This is done because we must store the 893 * caller's idea of the map size to properly unmap. 894 */ 895 struct vmmap { 896 LIST_ENTRY(vmmap) vm_next; 897 void *vm_addr; 898 unsigned long vm_size; 899 }; 900 901 struct vmmaphd { 902 struct vmmap *lh_first; 903 }; 904 #define VMMAP_HASH_SIZE 64 905 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 906 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 907 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 908 static struct mtx vmmaplock; 909 910 static void 911 vmmap_add(void *addr, unsigned long size) 912 { 913 struct vmmap *vmmap; 914 915 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 916 mtx_lock(&vmmaplock); 917 vmmap->vm_size = size; 918 vmmap->vm_addr = addr; 919 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 920 mtx_unlock(&vmmaplock); 921 } 922 923 static struct vmmap * 924 vmmap_remove(void *addr) 925 { 926 struct vmmap *vmmap; 927 928 mtx_lock(&vmmaplock); 929 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 930 if (vmmap->vm_addr == addr) 931 break; 932 if (vmmap) 933 LIST_REMOVE(vmmap, vm_next); 934 mtx_unlock(&vmmaplock); 935 936 return (vmmap); 937 } 938 939 #if defined(__i386__) || defined(__amd64__) 940 void * 941 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 942 { 943 void *addr; 944 945 addr = pmap_mapdev_attr(phys_addr, size, attr); 946 if (addr == NULL) 947 return (NULL); 948 vmmap_add(addr, size); 949 950 return (addr); 951 } 952 #endif 953 954 void 955 iounmap(void *addr) 956 { 957 struct vmmap *vmmap; 958 959 vmmap = vmmap_remove(addr); 960 if (vmmap == NULL) 961 return; 962 #if defined(__i386__) || defined(__amd64__) 963 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 964 #endif 965 kfree(vmmap); 966 } 967 968 969 void * 970 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 971 { 972 vm_offset_t off; 973 size_t size; 974 975 size = count * PAGE_SIZE; 976 off = kva_alloc(size); 977 if (off == 0) 978 return (NULL); 979 vmmap_add((void *)off, size); 980 pmap_qenter(off, pages, count); 981 982 return ((void *)off); 983 } 984 985 void 986 vunmap(void *addr) 987 { 988 struct vmmap *vmmap; 989 990 vmmap = vmmap_remove(addr); 991 if (vmmap == NULL) 992 return; 993 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 994 kva_free((vm_offset_t)addr, vmmap->vm_size); 995 kfree(vmmap); 996 } 997 998 char * 999 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 1000 { 1001 unsigned int len; 1002 char *p; 1003 va_list aq; 1004 1005 va_copy(aq, ap); 1006 len = vsnprintf(NULL, 0, fmt, aq); 1007 va_end(aq); 1008 1009 p = kmalloc(len + 1, gfp); 1010 if (p != NULL) 1011 vsnprintf(p, len + 1, fmt, ap); 1012 1013 return (p); 1014 } 1015 1016 char * 1017 kasprintf(gfp_t gfp, const char *fmt, ...) 1018 { 1019 va_list ap; 1020 char *p; 1021 1022 va_start(ap, fmt); 1023 p = kvasprintf(gfp, fmt, ap); 1024 va_end(ap); 1025 1026 return (p); 1027 } 1028 1029 static void 1030 linux_timer_callback_wrapper(void *context) 1031 { 1032 struct timer_list *timer; 1033 1034 timer = context; 1035 timer->function(timer->data); 1036 } 1037 1038 void 1039 mod_timer(struct timer_list *timer, unsigned long expires) 1040 { 1041 1042 timer->expires = expires; 1043 callout_reset(&timer->timer_callout, 1044 linux_timer_jiffies_until(expires), 1045 &linux_timer_callback_wrapper, timer); 1046 } 1047 1048 void 1049 add_timer(struct timer_list *timer) 1050 { 1051 1052 callout_reset(&timer->timer_callout, 1053 linux_timer_jiffies_until(timer->expires), 1054 &linux_timer_callback_wrapper, timer); 1055 } 1056 1057 static void 1058 linux_timer_init(void *arg) 1059 { 1060 1061 /* 1062 * Compute an internal HZ value which can divide 2**32 to 1063 * avoid timer rounding problems when the tick value wraps 1064 * around 2**32: 1065 */ 1066 linux_timer_hz_mask = 1; 1067 while (linux_timer_hz_mask < (unsigned long)hz) 1068 linux_timer_hz_mask *= 2; 1069 linux_timer_hz_mask--; 1070 } 1071 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1072 1073 void 1074 linux_complete_common(struct completion *c, int all) 1075 { 1076 int wakeup_swapper; 1077 1078 sleepq_lock(c); 1079 c->done++; 1080 if (all) 1081 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1082 else 1083 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1084 sleepq_release(c); 1085 if (wakeup_swapper) 1086 kick_proc0(); 1087 } 1088 1089 /* 1090 * Indefinite wait for done != 0 with or without signals. 1091 */ 1092 long 1093 linux_wait_for_common(struct completion *c, int flags) 1094 { 1095 1096 if (flags != 0) 1097 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1098 else 1099 flags = SLEEPQ_SLEEP; 1100 for (;;) { 1101 sleepq_lock(c); 1102 if (c->done) 1103 break; 1104 sleepq_add(c, NULL, "completion", flags, 0); 1105 if (flags & SLEEPQ_INTERRUPTIBLE) { 1106 if (sleepq_wait_sig(c, 0) != 0) 1107 return (-ERESTARTSYS); 1108 } else 1109 sleepq_wait(c, 0); 1110 } 1111 c->done--; 1112 sleepq_release(c); 1113 1114 return (0); 1115 } 1116 1117 /* 1118 * Time limited wait for done != 0 with or without signals. 1119 */ 1120 long 1121 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) 1122 { 1123 long end = jiffies + timeout; 1124 1125 if (flags != 0) 1126 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1127 else 1128 flags = SLEEPQ_SLEEP; 1129 for (;;) { 1130 int ret; 1131 1132 sleepq_lock(c); 1133 if (c->done) 1134 break; 1135 sleepq_add(c, NULL, "completion", flags, 0); 1136 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1137 if (flags & SLEEPQ_INTERRUPTIBLE) 1138 ret = sleepq_timedwait_sig(c, 0); 1139 else 1140 ret = sleepq_timedwait(c, 0); 1141 if (ret != 0) { 1142 /* check for timeout or signal */ 1143 if (ret == EWOULDBLOCK) 1144 return (0); 1145 else 1146 return (-ERESTARTSYS); 1147 } 1148 } 1149 c->done--; 1150 sleepq_release(c); 1151 1152 /* return how many jiffies are left */ 1153 return (linux_timer_jiffies_until(end)); 1154 } 1155 1156 int 1157 linux_try_wait_for_completion(struct completion *c) 1158 { 1159 int isdone; 1160 1161 isdone = 1; 1162 sleepq_lock(c); 1163 if (c->done) 1164 c->done--; 1165 else 1166 isdone = 0; 1167 sleepq_release(c); 1168 return (isdone); 1169 } 1170 1171 int 1172 linux_completion_done(struct completion *c) 1173 { 1174 int isdone; 1175 1176 isdone = 1; 1177 sleepq_lock(c); 1178 if (c->done == 0) 1179 isdone = 0; 1180 sleepq_release(c); 1181 return (isdone); 1182 } 1183 1184 void 1185 linux_delayed_work_fn(void *arg) 1186 { 1187 struct delayed_work *work; 1188 1189 work = arg; 1190 taskqueue_enqueue(work->work.taskqueue, &work->work.work_task); 1191 } 1192 1193 void 1194 linux_work_fn(void *context, int pending) 1195 { 1196 struct work_struct *work; 1197 1198 work = context; 1199 work->fn(work); 1200 } 1201 1202 void 1203 linux_flush_fn(void *context, int pending) 1204 { 1205 } 1206 1207 struct workqueue_struct * 1208 linux_create_workqueue_common(const char *name, int cpus) 1209 { 1210 struct workqueue_struct *wq; 1211 1212 wq = kmalloc(sizeof(*wq), M_WAITOK); 1213 wq->taskqueue = taskqueue_create(name, M_WAITOK, 1214 taskqueue_thread_enqueue, &wq->taskqueue); 1215 atomic_set(&wq->draining, 0); 1216 taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); 1217 1218 return (wq); 1219 } 1220 1221 void 1222 destroy_workqueue(struct workqueue_struct *wq) 1223 { 1224 taskqueue_free(wq->taskqueue); 1225 kfree(wq); 1226 } 1227 1228 static void 1229 linux_cdev_release(struct kobject *kobj) 1230 { 1231 struct linux_cdev *cdev; 1232 struct kobject *parent; 1233 1234 cdev = container_of(kobj, struct linux_cdev, kobj); 1235 parent = kobj->parent; 1236 if (cdev->cdev) 1237 destroy_dev(cdev->cdev); 1238 kfree(cdev); 1239 kobject_put(parent); 1240 } 1241 1242 static void 1243 linux_cdev_static_release(struct kobject *kobj) 1244 { 1245 struct linux_cdev *cdev; 1246 struct kobject *parent; 1247 1248 cdev = container_of(kobj, struct linux_cdev, kobj); 1249 parent = kobj->parent; 1250 if (cdev->cdev) 1251 destroy_dev(cdev->cdev); 1252 kobject_put(parent); 1253 } 1254 1255 const struct kobj_type linux_cdev_ktype = { 1256 .release = linux_cdev_release, 1257 }; 1258 1259 const struct kobj_type linux_cdev_static_ktype = { 1260 .release = linux_cdev_static_release, 1261 }; 1262 1263 static void 1264 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1265 { 1266 struct notifier_block *nb; 1267 1268 nb = arg; 1269 if (linkstate == LINK_STATE_UP) 1270 nb->notifier_call(nb, NETDEV_UP, ifp); 1271 else 1272 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1273 } 1274 1275 static void 1276 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1277 { 1278 struct notifier_block *nb; 1279 1280 nb = arg; 1281 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1282 } 1283 1284 static void 1285 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1286 { 1287 struct notifier_block *nb; 1288 1289 nb = arg; 1290 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1291 } 1292 1293 static void 1294 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1295 { 1296 struct notifier_block *nb; 1297 1298 nb = arg; 1299 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1300 } 1301 1302 static void 1303 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1304 { 1305 struct notifier_block *nb; 1306 1307 nb = arg; 1308 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1309 } 1310 1311 int 1312 register_netdevice_notifier(struct notifier_block *nb) 1313 { 1314 1315 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1316 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1317 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1318 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1319 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1320 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1321 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1322 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1323 1324 return (0); 1325 } 1326 1327 int 1328 register_inetaddr_notifier(struct notifier_block *nb) 1329 { 1330 1331 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1332 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1333 return (0); 1334 } 1335 1336 int 1337 unregister_netdevice_notifier(struct notifier_block *nb) 1338 { 1339 1340 EVENTHANDLER_DEREGISTER(ifnet_link_event, 1341 nb->tags[NETDEV_UP]); 1342 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 1343 nb->tags[NETDEV_REGISTER]); 1344 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1345 nb->tags[NETDEV_UNREGISTER]); 1346 EVENTHANDLER_DEREGISTER(iflladdr_event, 1347 nb->tags[NETDEV_CHANGEADDR]); 1348 1349 return (0); 1350 } 1351 1352 int 1353 unregister_inetaddr_notifier(struct notifier_block *nb) 1354 { 1355 1356 EVENTHANDLER_DEREGISTER(ifaddr_event, 1357 nb->tags[NETDEV_CHANGEIFADDR]); 1358 1359 return (0); 1360 } 1361 1362 struct list_sort_thunk { 1363 int (*cmp)(void *, struct list_head *, struct list_head *); 1364 void *priv; 1365 }; 1366 1367 static inline int 1368 linux_le_cmp(void *priv, const void *d1, const void *d2) 1369 { 1370 struct list_head *le1, *le2; 1371 struct list_sort_thunk *thunk; 1372 1373 thunk = priv; 1374 le1 = *(__DECONST(struct list_head **, d1)); 1375 le2 = *(__DECONST(struct list_head **, d2)); 1376 return ((thunk->cmp)(thunk->priv, le1, le2)); 1377 } 1378 1379 void 1380 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 1381 struct list_head *a, struct list_head *b)) 1382 { 1383 struct list_sort_thunk thunk; 1384 struct list_head **ar, *le; 1385 size_t count, i; 1386 1387 count = 0; 1388 list_for_each(le, head) 1389 count++; 1390 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 1391 i = 0; 1392 list_for_each(le, head) 1393 ar[i++] = le; 1394 thunk.cmp = cmp; 1395 thunk.priv = priv; 1396 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 1397 INIT_LIST_HEAD(head); 1398 for (i = 0; i < count; i++) 1399 list_add_tail(ar[i], head); 1400 free(ar, M_KMALLOC); 1401 } 1402 1403 void 1404 linux_irq_handler(void *ent) 1405 { 1406 struct irq_ent *irqe; 1407 1408 irqe = ent; 1409 irqe->handler(irqe->irq, irqe->arg); 1410 } 1411 1412 #if defined(__i386__) || defined(__amd64__) 1413 bool linux_cpu_has_clflush; 1414 #endif 1415 1416 static void 1417 linux_compat_init(void *arg) 1418 { 1419 struct sysctl_oid *rootoid; 1420 int i; 1421 1422 #if defined(__i386__) || defined(__amd64__) 1423 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 1424 #endif 1425 sx_init(&linux_global_rcu_lock, "LinuxGlobalRCU"); 1426 1427 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 1428 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 1429 kobject_init(&linux_class_root, &linux_class_ktype); 1430 kobject_set_name(&linux_class_root, "class"); 1431 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 1432 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 1433 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 1434 kobject_set_name(&linux_root_device.kobj, "device"); 1435 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 1436 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 1437 "device"); 1438 linux_root_device.bsddev = root_bus; 1439 linux_class_misc.name = "misc"; 1440 class_register(&linux_class_misc); 1441 INIT_LIST_HEAD(&pci_drivers); 1442 INIT_LIST_HEAD(&pci_devices); 1443 spin_lock_init(&pci_lock); 1444 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 1445 for (i = 0; i < VMMAP_HASH_SIZE; i++) 1446 LIST_INIT(&vmmaphead[i]); 1447 } 1448 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 1449 1450 static void 1451 linux_compat_uninit(void *arg) 1452 { 1453 linux_kobject_kfree_name(&linux_class_root); 1454 linux_kobject_kfree_name(&linux_root_device.kobj); 1455 linux_kobject_kfree_name(&linux_class_misc.kobj); 1456 1457 synchronize_rcu(); 1458 sx_destroy(&linux_global_rcu_lock); 1459 } 1460 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 1461 1462 /* 1463 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 1464 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 1465 * used. Assert these types have the same size, else some parts of the 1466 * LinuxKPI may not work like expected: 1467 */ 1468 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 1469